├── .gitignore ├── LICENSE ├── README.md ├── act ├── README.md ├── check_all_artworks.ipynb ├── compare_metadata_sources.ipynb ├── create_act_items.ipynb ├── disambiguate_prior_to_phase_2b.ipynb └── remove_problematic_rows_before_upload.ipynb ├── appointments ├── appointments-to-write copy.csv ├── appointments-to-write.csv ├── csv-metadata copy.json ├── csv-metadata.json └── vb6_upload_wikidata.py ├── clinical_trials ├── clinical_trials.ipynb ├── csv-metadata.json ├── links_missing_qids.csv ├── match_fail.csv └── non_vanderbilt.csv ├── commonsbot ├── README.md ├── all_fields.json ├── art_photo.json ├── artwork.json ├── artwork_metadata.csv ├── commons_data.ipynb ├── commons_images.csv ├── commonsbot.ipynb ├── commonstool.py ├── commonstool_config.yml ├── config.json ├── convert_dng_to_tiff.py ├── convert_tiff_to_smaller.py ├── convert_to_pyramidal_tiled_tiff.ipynb ├── convert_to_pyramidal_tiled_tiff.py ├── csv-metadata.json ├── depicts │ ├── config.yaml │ ├── csv-metadata.json │ ├── depicts.csv │ └── example_response.json ├── extract_image_metadata.ipynb ├── extract_image_metadata.py ├── fields_comparison.csv ├── fields_comparison_sort.csv ├── images.csv ├── information.json ├── items_status_abbrev.csv ├── minimal_manifest.py ├── photograph.json ├── transfer_to_vanderbot.ipynb ├── transfer_to_vanderbot.py ├── upload_artwork.sh ├── upload_metadata.csv ├── wcqs │ └── wcqs_query.py └── works_multiprop.csv ├── etd ├── config.yaml └── theses.csv ├── gallery ├── classification_mappings.csv ├── country_mappings.csv ├── creators.csv ├── csv-metadata.json ├── gallery_works_to_write.csv ├── get_image_dimensions.ipynb ├── materials.csv ├── medium.csv ├── process_gallery.ipynb ├── properties.csv ├── screen_creators.py └── vb_common_code.py ├── image_analysis ├── README.md └── google_cloud_vision │ ├── README.md │ ├── deprecated_google_cloud_vision.ipynb │ ├── face_detection.csv │ ├── google_cloud_vision.py │ ├── label_detection.csv │ ├── object_localization.csv │ └── text_detection.csv ├── json_schema ├── bluffton_presidents.csv ├── bluffton_presidents.json ├── bluffton_presidents.ttl ├── count_entities.py ├── csv-metadata.json ├── csv-metadata_globecoordinate.json ├── csv-metadata_monolingual.json ├── csv-metadata_quantity.json ├── federated_test.ipynb ├── globecoordinate.ttl ├── globecoordinate_test.csv ├── journal-div-qids.csv ├── journal-div-qids_small.csv ├── monolingual.ttl ├── monolingualstring_test.csv ├── property_labels.csv ├── quantity.ttl └── quantity_test.csv ├── neptune ├── README.md ├── config │ ├── named_graphs_config.json │ └── prefixes.txt ├── drop_time.jpg ├── graph_file_associations.csv ├── load_neptune.py ├── load_time.jpg ├── named_graphs.csv ├── service_description_model.png └── trigger.txt ├── publications ├── README.md ├── add_missing_references.py ├── apis.md ├── crosscheck-publications.ipynb ├── crossref │ ├── articles.csv │ ├── author_strings.csv │ ├── authors.csv │ ├── config.json │ ├── crossref_errors.txt │ ├── csv-metadata.json │ ├── department_labels.csv │ ├── departments.csv │ ├── doi_source.csv │ ├── editors.csv │ ├── old_xquery │ │ ├── README.md │ │ ├── crossref-get.py │ │ ├── crossref-get.xq │ │ ├── merge-doi.xq │ │ └── vanderbilt-doi.csv │ ├── publishers.csv │ ├── researchers.csv │ ├── retrieve_doi_data.ipynb │ ├── screens.json │ ├── stored_retrieved_authors.csv │ └── vanderbilt_wikidata_altlabels.csv ├── csv-metadata.json ├── data │ ├── automate_collect_data.ipynb │ ├── collect_data.ipynb │ ├── data-collection-error-times.txt │ ├── last_run.txt │ ├── units_men.csv │ ├── units_orcid.csv │ ├── units_total.csv │ ├── units_women.csv │ ├── units_works.csv │ ├── units_works_men.csv │ ├── units_works_women.csv │ ├── vanderbilt_units.csv │ ├── vandycite_edit_data.csv │ ├── vandycite_item_data.csv │ └── vandycite_users.csv ├── default_label_desc.json ├── delete-references.py ├── department-configuration.json ├── departments │ ├── a-and-s-to-write.csv │ ├── a-and-s.csv │ ├── csv-metadata.json │ ├── engineering-to-write.csv │ ├── engineering.csv │ ├── example-response-department.json │ ├── medical-departments_full.csv │ ├── medicine-response.json │ ├── medicine-source.csv │ ├── medicine-to-write.csv │ ├── peabody-to-write.csv │ ├── process_a_and_s.ipynb │ ├── process_engineering.ipynb │ ├── process_medicine.ipynb │ └── uva │ │ ├── config.json │ │ ├── csv-metadata.json │ │ ├── departments.csv │ │ └── graph_pattern.txt ├── divinity-law │ ├── __pycache__ │ │ └── vb_common_code.cpython-37.pyc │ ├── acquire_wikidata_metadata.ipynb │ ├── determine_ref_properties.ipynb │ ├── divinity-law.ipynb │ ├── response_example.json │ ├── vandycite_statistics.md │ └── vb_common_code.py ├── elsevier │ ├── data │ │ ├── https%3A%2F%2Fapi.elsevier.com%2Fcontent%2Fabstract%2Fscopus_id%2F84872135457.json │ │ └── https%3A%2F%2Fapi.elsevier.com%2Fcontent%2Farticle%2Fpii%2FS1674927814000082.json │ ├── dump.json │ ├── elsevier_api.ipynb │ └── logs │ │ └── elsapy-20210309.log ├── example-response-person-claims.json ├── orcid │ ├── README.md │ ├── match_bsci_orcid.ipynb │ ├── orcid-get-json.ipynb │ ├── orcid-get.py │ ├── orcid-id-get.xq │ ├── orcid-record-get.xq │ ├── people.rdf │ ├── vanderbilt-orcid.csv │ ├── vandy-people-all.xq │ └── vandy-people-rdf-xml.xq ├── process_csv_metadata_full.py ├── process_department.ipynb ├── pubmed │ ├── README.md │ ├── covid_pubmed.ipynb │ ├── covid_results.csv │ ├── falsePos.csv │ ├── search.py │ ├── truePos.csv │ └── truePosCount.txt ├── redcap │ └── redcap_api.ipynb ├── scrape-bsci.ipynb ├── wikidata │ ├── README.md │ ├── affiliation.json │ ├── alt_label copy.csv │ ├── alt_label.csv │ ├── csv-metadata-real-props.json │ ├── csv-metadata.json │ ├── department.json │ ├── download-vanderbilt-people-altlabels.py │ ├── download-vanderbilt-people.py │ ├── match_bsci_wikidata.ipynb │ ├── process_csv │ │ ├── csv-metadata.json │ │ ├── process_csv_metadata_full.py │ │ └── researcher.csv │ ├── process_csv_metadata.py │ ├── process_csv_metadata_simplified.py │ ├── researcher copy.csv │ ├── researcher-real-ids.csv │ ├── researcher-with-ids.csv │ ├── researcher.csv │ ├── wikidata-employee.ttl │ ├── wikidata-student.ttl │ ├── wikidata-to-csv.sparql │ ├── wikidata-to-turtle.sparql │ ├── work.csv │ └── work_author_join.csv ├── work-person-figure.png ├── work-person-figure.pptx ├── wos │ ├── wos.py │ └── wos_lite.ipynb └── zenodo │ └── zenodo_api.ipynb ├── sparql ├── prefixes.txt ├── sparql_gui.py └── sparql_results.csv ├── swj ├── csv-metadata.json ├── example1 │ ├── bluffton_presidents.csv │ ├── csv-metadata.json │ └── output.ttl ├── example2 │ ├── bluffton_employees.csv │ ├── csv-metadata.json │ └── output.ttl ├── example3 │ ├── bluffton_positions.csv │ ├── csv-metadata.json │ └── output.ttl ├── example4.py ├── label_example.csv └── label_example_output.ttl ├── vanderbot ├── README.md ├── acquire_wikidata.md ├── acquire_wikidata_metadata.py ├── config.yaml ├── config_gallery.json ├── config_journals.json ├── convert-config.md ├── convert_config_to_metadata_schema.py ├── convert_json_to_metadata_schema.py ├── count_entities.md ├── count_entities.py ├── csv-metadata.json ├── department-configuration.json ├── generate_direct_props.ipynb ├── generate_direct_props.py ├── graph.txt ├── medicine-employees-to-write.csv ├── medicine-employees-with-wikidata.csv ├── medicine-employees.csv ├── properties_to_add.csv ├── researcher-project.md ├── vanderbot.py ├── vanderdeletebot.ipynb ├── vanderdeletebot.md ├── vanderdeletebot.py ├── vanderpropertybot.md ├── vanderpropertybot.py ├── vb1_process_department.ipynb ├── vb2_match_orcid.py ├── vb3_match_wikidata.py ├── vb4_download_wikidata.py ├── vb5_check_labels_descriptions.py ├── vb6_upload_wikidata.py ├── vb_common_code.py ├── vu_authors.txt ├── wikidata-csv2rdf-metadata.css ├── wikidata-csv2rdf-metadata.html └── wikidata-csv2rdf-metadata.js └── wikibase ├── README.md ├── interaction-diagram.png ├── interaction-diagram.pptx ├── load-fac-wikibase.py ├── vanderbot ├── README.md ├── config.yaml ├── config_wikidata.yaml ├── csv-metadata.json ├── elements │ ├── README.md │ ├── config.yaml │ └── elements_raw.csv ├── hstatues.csv ├── qids.csv ├── states │ ├── README.md │ ├── config.yaml │ ├── config_capital.yaml │ └── states_raw.csv ├── statues.csv ├── statues_added_ready.csv ├── statues_downloaded.csv └── statues_raw.csv └── vu-faculty.json /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Thumbs.db 3 | .ipynb_checkpoints 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # linked-data 2 | Documentation and Data related to the Linked Data and Wikidata Working Groups 3 | 4 | ## Repo structure 5 | 6 | ``` 7 | ├── publications : content related to project to collect metadata about Vanderbilt people and works 8 | │   ├── apis.md : list of APIs for data about people and works 9 | │   ├── crossref : scripts and data related to retrieving DOI metadata 10 | │   ├── orcid : scripts and data related to retrieving ORCID metadata 11 | │   ├── pubmed : work done with Philip Walker to retrieve publication data from PubMed 12 | │   ├── wikidata : scripts and data related to retrieving data on Vanderbilt people from Wikidata (using SPARQL) 13 | │   └── work-person-figure.png : a diagram showing a data model for works, persons, and institutions with properties of each 14 | └── wikibase : content related to project to get a Wikibase instance up online and automate content input 15 | ``` 16 | -------------------------------------------------------------------------------- /act/README.md: -------------------------------------------------------------------------------- 1 | # Wikiproject Art in the Christian Tradition (ACT) processing scripts 2 | 3 | This directory contains some of the scripts used to process output from the ACT database and data scraped from Wikimedia Commons. It is purpose-built and therefore not necessarily usable by others. However, you may find some of the code useful to adapt for your own purposes. 4 | 5 | ## Script descriptions 6 | 7 | `create_act_items.ipynb` - This is the primary script used to create Wikidata items for ACT artwork. Its main source files were act_data_fix.csv (data output from the ACT database) and commons_data_fix.csv (data output from scraping Commons), but it also used clean_ids.csv to join tables by shared identifiers. Some useful code includes disambiguating artists using fuzzy string matching. 8 | 9 | `compare_metadata_sources.ipynb` - contains some useful code for discovering links to Wikidata items based on the tiny Wikidata flag links on Commons pages that use the Artwork template. 10 | 11 | `check_all_artworks.ipynb` - checked potential artwork uploads against labels of all artwork labels by the artist in Wikidata using fuzzy string matching 12 | 13 | `disambiguate_prior_to_phase_2b.ipynb` - various checks including whether Commons page URLs dereference, retrieving various sorts of data programmatically using SPARQL queries, and checking whether Commons pages have links to Wikidata. 14 | 15 | `remove_problematic_rows_before_upload.ipynb` - various screening routines, including fuzzy matching against existing labels and displaying Commons images in the output to assist the user in deciding about the image (last cell). 16 | 17 | ---- 18 | Revised 2022-06-24 19 | -------------------------------------------------------------------------------- /appointments/appointments-to-write copy.csv: -------------------------------------------------------------------------------- 1 | name,wikidataId,academicAppointmentUuid,academicAppointment,academicAppointmentHash,academicAppointmentSourceUrl,academicAppointmentReferenceRetrieved,academicAppointmentStartTime,academicAppointmentEndTime,academicAppointmentOrdinal 2 | Ken Catania,Q6389929,CB3F25D5-5B75-499D-9911-2DE13285FCF3,Q78041310,f8ec4d80911685cb41294aeef07527be752365bb,https://as.vanderbilt.edu/biosci/people/index.php?group=primary-training-faculty,2020-07-18T00:00:00Z,,,1 3 | -------------------------------------------------------------------------------- /appointments/appointments-to-write.csv: -------------------------------------------------------------------------------- 1 | name,wikidataId,worldcatIdUuid,worldcatId,worldcatIdHash,worldcatIdViafId,worldcatIdReferenceRetrieved,worldcatIdStatedIn 2 | Suzana Herculano-Houzel,Q10375239,15948DE3-92F8-4642-8616-C49C18E7B0ED,viaf-208186153,212c597031b99b13eb8838c8682bdba529590cc8,208186153,2020-07-18T00:00:00Z,Q54919 3 | -------------------------------------------------------------------------------- /appointments/csv-metadata copy.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "appointments-to-write.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles": "name", 11 | "name": "name", 12 | "datatype": "string", 13 | "suppressOutput": true 14 | }, 15 | { 16 | "titles": "wikidataId", 17 | "name": "wikidataId", 18 | "datatype": "string", 19 | "suppressOutput": true 20 | }, 21 | { 22 | "titles": "academicAppointmentUuid", 23 | "name": "academicAppointmentUuid", 24 | "datatype": "string", 25 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 26 | "propertyUrl": "http://www.wikidata.org/prop/P8413", 27 | "valueUrl": "http://www.wikidata.org/entity/statement/{academicAppointmentUuid}" 28 | }, 29 | { 30 | "titles": "academicAppointment", 31 | "name": "academicAppointment", 32 | "datatype": "string", 33 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 34 | "propertyUrl": "http://www.wikidata.org/prop/direct/P8413", 35 | "valueUrl": "http://www.wikidata.org/entity/{academicAppointment}" 36 | }, 37 | { 38 | "titles": "academicAppointmentHash", 39 | "name": "academicAppointmentHash", 40 | "datatype": "string", 41 | "aboutUrl": "http://www.wikidata.org/entity/statement/{academicAppointmentUuid}", 42 | "propertyUrl": "prov:wasDerivedFrom", 43 | "valueUrl": "http://www.wikidata.org/reference/{academicAppointmentHash}" 44 | }, 45 | { 46 | "titles": "academicAppointmentSourceUrl", 47 | "name": "academicAppointmentSourceUrl", 48 | "datatype": "string", 49 | "aboutUrl": "http://www.wikidata.org/reference/{academicAppointmentHash}", 50 | "propertyUrl": "http://www.wikidata.org/prop/reference/P854", 51 | "valueUrl": "{academicAppointmentSourceUrl}" 52 | }, 53 | { 54 | "titles": "academicAppointmentReferenceRetrieved", 55 | "name": "academicAppointmentReferenceRetrieved", 56 | "datatype": "dateTime", 57 | "aboutUrl": "http://www.wikidata.org/reference/{academicAppointmentHash}", 58 | "propertyUrl": "http://www.wikidata.org/prop/reference/P813" 59 | }, 60 | { 61 | "titles": "academicAppointmentStartTime", 62 | "name": "academicAppointmentStartTime", 63 | "datatype": "dateTime", 64 | "aboutUrl": "http://www.wikidata.org/entity/statement/{academicAppointmentUuid}", 65 | "propertyUrl": "http://www.wikidata.org/prop/qualifier/P580" 66 | }, 67 | { 68 | "titles": "academicAppointmentEndTime", 69 | "name": "academicAppointmentEndTime", 70 | "datatype": "dateTime", 71 | "aboutUrl": "http://www.wikidata.org/entity/statement/{academicAppointmentUuid}", 72 | "propertyUrl": "http://www.wikidata.org/prop/qualifier/P582" 73 | }, 74 | { 75 | "titles": "academicAppointmentOrdinal", 76 | "name": "academicAppointmentOrdinal", 77 | "datatype": "literal", 78 | "aboutUrl": "http://www.wikidata.org/entity/statement/{academicAppointmentUuid}", 79 | "propertyUrl": "http://www.wikidata.org/prop/qualifier/P1545" 80 | } 81 | ] 82 | } 83 | } 84 | ] 85 | } -------------------------------------------------------------------------------- /appointments/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "appointments-to-write.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles": "name", 11 | "name": "name", 12 | "datatype": "string", 13 | "suppressOutput": true 14 | }, 15 | { 16 | "titles": "wikidataId", 17 | "name": "wikidataId", 18 | "datatype": "string", 19 | "suppressOutput": true 20 | }, 21 | { 22 | "titles": "worldcatIdUuid", 23 | "name": "worldcatIdUuid", 24 | "datatype": "string", 25 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 26 | "propertyUrl": "http://www.wikidata.org/prop/P7859", 27 | "valueUrl": "http://www.wikidata.org/entity/statement/{worldcatIdUuid}" 28 | }, 29 | { 30 | "titles": "worldcatId", 31 | "name": "worldcatId", 32 | "datatype": "string", 33 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 34 | "propertyUrl": "http://www.wikidata.org/prop/direct/P7859", 35 | "valueUrl": "http://www.wikidata.org/entity/{worldcatId}" 36 | }, 37 | { 38 | "titles": "worldcatIdHash", 39 | "name": "worldcatIdHash", 40 | "datatype": "string", 41 | "aboutUrl": "http://www.wikidata.org/entity/statement/{worldcatIdUuid}", 42 | "propertyUrl": "prov:wasDerivedFrom", 43 | "valueUrl": "http://www.wikidata.org/reference/{worldcatIdHash}" 44 | }, 45 | { 46 | "titles": "worldcatIdViafId", 47 | "name": "worldcatIdViafId", 48 | "datatype": "string", 49 | "aboutUrl": "http://www.wikidata.org/reference/{worldcatIdHash}", 50 | "propertyUrl": "http://www.wikidata.org/prop/reference/P214" 51 | }, 52 | { 53 | "titles": "worldcatIdReferenceRetrieved", 54 | "name": "worldcatIdReferenceRetrieved", 55 | "datatype": "dateTime", 56 | "aboutUrl": "http://www.wikidata.org/reference/{worldcatIdHash}", 57 | "propertyUrl": "http://www.wikidata.org/prop/reference/P813" 58 | }, 59 | { 60 | "titles": "worldcatIdStatedIn", 61 | "name": "worldcatIdStatedIn", 62 | "datatype": "string", 63 | "aboutUrl": "http://www.wikidata.org/reference/{worldcatIdHash}", 64 | "propertyUrl": "http://www.wikidata.org/prop/reference/P248", 65 | "valueUrl": "http://www.wikidata.org/entity/{worldcatIdStatedIn}" 66 | } 67 | ] 68 | } 69 | } 70 | ] 71 | } -------------------------------------------------------------------------------- /clinical_trials/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "links_edited.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles": "NCTId", 11 | "name": "NCTId", 12 | "datatype": "string", 13 | "suppressOutput": true 14 | }, 15 | { 16 | "titles": "wikidataId", 17 | "name": "wikidataId", 18 | "datatype": "string", 19 | "suppressOutput": true 20 | }, 21 | { 22 | "titles": "pi_uuid", 23 | "name": "pi_uuid", 24 | "datatype": "string", 25 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 26 | "propertyUrl": "http://www.wikidata.org/prop/P8329", 27 | "valueUrl": "http://www.wikidata.org/entity/statement/{pi_uuid}" 28 | }, 29 | { 30 | "titles": "pi", 31 | "name": "pi", 32 | "datatype": "string", 33 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 34 | "propertyUrl": "http://www.wikidata.org/prop/direct/P8329", 35 | "valueUrl": "http://www.wikidata.org/entity/{pi}" 36 | }, 37 | { 38 | "titles": "pi_reference_hash", 39 | "name": "pi_reference_hash", 40 | "datatype": "string", 41 | "aboutUrl": "http://www.wikidata.org/entity/statement/{pi_uuid}", 42 | "propertyUrl": "prov:wasDerivedFrom", 43 | "valueUrl": "http://www.wikidata.org/reference/{pi_reference_hash}" 44 | }, 45 | { 46 | "titles": "pi_reference_url", 47 | "name": "pi_reference_url", 48 | "datatype": "string", 49 | "aboutUrl": "http://www.wikidata.org/reference/{pi_reference_hash}", 50 | "propertyUrl": "http://www.wikidata.org/prop/reference/P854", 51 | "valueUrl": "{pi_reference_url}" 52 | }, 53 | { 54 | "titles": "pi_retrieved", 55 | "name": "pi_retrieved", 56 | "datatype": "dateTime", 57 | "aboutUrl": "http://www.wikidata.org/reference/{pi_reference_hash}", 58 | "propertyUrl": "http://www.wikidata.org/prop/reference/P813" 59 | } 60 | ] 61 | } 62 | } 63 | ] 64 | } -------------------------------------------------------------------------------- /clinical_trials/links_missing_qids.csv: -------------------------------------------------------------------------------- 1 | NCTId,wikidataId,pi_uuid,pi,pi_reference_hash,pi_reference_url,pi_retrieved 2 | NCT04464070,,,Q88144488,,https://clinicaltrials.gov/ct2/show/NCT04464070,2020-07-15T00:00:00Z 3 | NCT04454476,,,Q85236648,,https://clinicaltrials.gov/ct2/show/NCT04454476,2020-07-15T00:00:00Z 4 | NCT04456517,,,Q91133452,,https://clinicaltrials.gov/ct2/show/NCT04456517,2020-07-15T00:00:00Z 5 | NCT04451980,,,Q57011109,,https://clinicaltrials.gov/ct2/show/NCT04451980,2020-07-15T00:00:00Z 6 | NCT04449003,,,Q87412279,,https://clinicaltrials.gov/ct2/show/NCT04449003,2020-07-15T00:00:00Z 7 | NCT04449003,,,Q91486086,,https://clinicaltrials.gov/ct2/show/NCT04449003,2020-07-15T00:00:00Z 8 | -------------------------------------------------------------------------------- /commonsbot/all_fields.json: -------------------------------------------------------------------------------- 1 | ["artist", 2 | "author", 3 | "title", 4 | "description", 5 | "depicted people", 6 | "depicted place", 7 | "date", 8 | "medium", 9 | "dimensions", 10 | "institution", 11 | "department", 12 | "accession number", 13 | "place of creation", 14 | "place of discovery", 15 | "object history", 16 | "exhibition history", 17 | "credit line", 18 | "inscriptions", 19 | "notes", 20 | "references", 21 | "source", 22 | "permission", 23 | "other_versions", 24 | "wikidata", 25 | "other fields", 26 | "other versions", 27 | "photographer", 28 | "camera coord", 29 | "attribution", 30 | "collection", 31 | "genre", 32 | "object type", 33 | "source/photographer", 34 | "current location"] -------------------------------------------------------------------------------- /commonsbot/art_photo.json: -------------------------------------------------------------------------------- 1 | ["wikidata", 2 | "artwork license", 3 | "photo description", 4 | "photo date", 5 | "photographer", 6 | "source", 7 | "photo license", 8 | "other_versions"] -------------------------------------------------------------------------------- /commonsbot/artwork.json: -------------------------------------------------------------------------------- 1 | ["artist", 2 | "author", 3 | "title", 4 | "description", 5 | "depicted people", 6 | "depicted place", 7 | "date", 8 | "medium", 9 | "dimensions", 10 | "institution", 11 | "department", 12 | "accession number", 13 | "place of creation", 14 | "place of discovery", 15 | "object history", 16 | "exhibition history", 17 | "credit line", 18 | "inscriptions", 19 | "notes", 20 | "references", 21 | "source", 22 | "permission", 23 | "other_versions", 24 | "wikidata"] -------------------------------------------------------------------------------- /commonsbot/artwork_metadata.csv: -------------------------------------------------------------------------------- 1 | qid,inventory_number,dimension,type,label,creator,inception_val,inception_earliest_date_val,inception_latest_date_val,style_period,rights,media_url,status 2 | Q103304554,1970.040,2D,document,Leaf from Italian Book of Hours,Q4233718,1500-01-01T00:00:00Z,,,,NO KNOWN COPYRIGHT,https://forum.jstor.org/assets/26753665/representation-view,inception prior to copyright cutoff 3 | Q102961245,1992.083,2D,print,"A Ghost Painting Coming to Life in the Studio of the Painter Okyō, from the series Yoshitoshi ryakuga (Sketches by Yoshitoshi)",Q467337,1882-01-01T00:00:00Z,,,,,https://forum.jstor.org/assets/26756398/representation-view,inception prior to copyright cutoff 4 | -------------------------------------------------------------------------------- /commonsbot/commons_images.csv: -------------------------------------------------------------------------------- 1 | qid,commons_id,local_identifier,label_en,directory,local_filename,rank,image_name,iiif_manifest,notes 2 | Q102961245,M122562112,1992.083,"A Ghost Painting Coming to Life in the Studio of the Painter Okyō, from the series Yoshitoshi ryakuga (Sketches by Yoshitoshi)",1992,1992.083.tif,primary,"A Ghost Painting Coming to Life in the Studio of the Painter Okyō, from the series Yoshitoshi ryakuga (Sketches by Yoshitoshi) - Vanderbilt Fine Arts Gallery - 1992.083.tif",https://iiif-manifest.library.vanderbilt.edu/gallery/1992/1992.083.json, 3 | Q103304554,M122746189,1970.040,Leaf from Italian Book of Hours - recto,1970,1970.040_recto_003.tif,primary,Leaf from Italian Book of Hours - recto - Vanderbilt Fine Arts Gallery - 1970.040 recto 003.tif,https://iiif-manifest.library.vanderbilt.edu/gallery/1970/1970.040.json, 4 | Q103304554,M122746245,1970.040,Leaf from Italian Book of Hours - verso,1970,1970.040_verso_004.tif,secondary,Leaf from Italian Book of Hours - verso - Vanderbilt Fine Arts Gallery - 1970.040 verso 004.tif,https://iiif-manifest.library.vanderbilt.edu/gallery/1970/1970.040.json, 5 | -------------------------------------------------------------------------------- /commonsbot/convert_dng_to_tiff.py: -------------------------------------------------------------------------------- 1 | # convert_dng_to_tiff.py, a Python script for converting digital negative (DNG) files to TIFF files 2 | 3 | # (c) 2023 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 4 | # Author: Steve Baskauf 5 | 6 | # NOTE: This script requires ImageMagick to be installed on the system before it can be run. 7 | # See https://imagemagick.org/script/download.php 8 | 9 | import os 10 | 11 | def image_magick_convert_dng(in_path: str, out_path: str, log_path: str): 12 | """Convert a DNG file to a TIFF file using ImageMagick. 13 | 14 | Parameters 15 | ---------- 16 | in_path: path to the input DNG file 17 | out_path: path to the output TIFF file 18 | log_path: path to a text log file to which warnings, errors, and other output will be appended. 19 | """ 20 | # Note: need to enclose file paths in quotes because filenames sometimes include spaces. 21 | command_string = 'convert dng:"' + in_path + '" tif:"' + out_path + '" 2>> "' + log_path + '"' 22 | #print(command_string) 23 | os.system(command_string) 24 | 25 | # ------------------- 26 | # Main routine 27 | # ------------------- 28 | 29 | # Set the path to the directory containing the TIFF files to be converted 30 | # NOTE: this script will convert all TIFF files in the directory, and will ignore other filetypes. 31 | # The path should end with a slash. 32 | in_dir = '/Users/baskausj/dng/input/' 33 | 34 | # Set the path to the directory where the converted files will be written. The path should end with a slash. 35 | out_dir = '/Users/baskausj/raw_tiffs/' 36 | 37 | # Set the path to the log file 38 | log_path = out_dir + 'log.txt' 39 | 40 | # Get a list of the files in the input directory 41 | in_files = os.listdir(in_dir) 42 | 43 | # Loop through the files in the input directory 44 | for in_file in in_files: 45 | # Check to see if the file is a TIFF file 46 | print(in_file) 47 | 48 | # Set the path to the input file 49 | in_path = in_dir + in_file 50 | 51 | # Change the file extension to .tif 52 | out_file = in_file.replace('.dng', '.tif') 53 | 54 | # Set the path to the output file 55 | out_path = out_dir + out_file 56 | 57 | # Write the file name to the log file 58 | with open(log_path, 'a') as log_file: 59 | log_file.write(in_file + '\n') 60 | 61 | # Convert the file 62 | image_magick_convert_dng(in_path, out_path, log_path) 63 | 64 | print('done') 65 | 66 | -------------------------------------------------------------------------------- /commonsbot/convert_tiff_to_smaller.py: -------------------------------------------------------------------------------- 1 | # convert_tiff_to_smaller.py, a Python script for resizging TIFF files to a smaller size 2 | 3 | # (c) 2023 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 4 | # Author: Steve Baskauf 5 | 6 | # NOTE: This script requires ImageMagick to be installed on the system before it can be run. 7 | # See https://imagemagick.org/script/download.php 8 | 9 | import os 10 | 11 | def image_magick_convert_smaller(in_path: str, out_path: str, log_path: str): 12 | """Convert a DNG file to a TIFF file using ImageMagick. 13 | 14 | Parameters 15 | ---------- 16 | in_path: path to the input DNG file 17 | out_path: path to the output TIFF file 18 | log_path: path to a text log file to which warnings, errors, and other output will be appended. 19 | """ 20 | # Note: need to enclose file paths in quotes because filenames sometimes include spaces. 21 | command_string = 'convert tif:"' + in_path + '" -resize 73% tif:"' + out_path + '" 2>> "' + log_path + '"' 22 | #print(command_string) 23 | os.system(command_string) 24 | 25 | # ------------------- 26 | # Main routine 27 | # ------------------- 28 | 29 | # Set the path to the directory containing the TIFF files to be converted 30 | # NOTE: this script will convert all TIFF files in the directory, and will ignore other filetypes. 31 | # The path should end with a slash. 32 | in_dir = '/Users/baskausj/raw_tiffs/' 33 | 34 | # Set the path to the directory where the converted files will be written. The path should end with a slash. 35 | out_dir = '/Users/baskausj/raw_tiffs/smaller/' 36 | 37 | # Set the path to the log file 38 | log_path = out_dir + 'log.txt' 39 | 40 | # Get a list of the files in the input directory 41 | in_files = os.listdir(in_dir) 42 | 43 | # Loop through the files in the input directory 44 | for in_file in in_files: 45 | # Check to see if the file is a TIFF file 46 | print(in_file) 47 | if not in_file.endswith('.tif'): 48 | # If not, skip to the next file 49 | continue 50 | 51 | # Set the path to the input file 52 | in_path = in_dir + in_file 53 | 54 | # Set the path to the output file 55 | out_path = out_dir + in_file 56 | 57 | # Write the file name to the log file 58 | with open(log_path, 'a') as log_file: 59 | log_file.write(in_file + '\n') 60 | 61 | # Convert the file 62 | image_magick_convert_smaller(in_path, out_path, log_path) 63 | 64 | print('done') 65 | 66 | -------------------------------------------------------------------------------- /commonsbot/convert_to_pyramidal_tiled_tiff.py: -------------------------------------------------------------------------------- 1 | # convert_to_pyramidal_tiled_tiff.py, a Python script for converting single-image TIFFs to pyramidal tiled TIFFs 2 | 3 | # (c) 2023 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 4 | # Author: Steve Baskauf 5 | 6 | # NOTE: This script requires ImageMagick to be installed on the system before it can be run. 7 | # See https://imagemagick.org/script/download.php 8 | 9 | # See also a key post at https://legacy.imagemagick.org/discourse-server/viewtopic.php?t=20193 for 10 | # background on using command-line ImageMagick to do the conversion. 11 | 12 | import os 13 | 14 | def image_magick_convert_tiff(in_path: str, out_path: str, log_path: str): 15 | """Convert a TIFF file to a pyramidal tiled TIFF file using ImageMagick. 16 | 17 | Parameters 18 | ---------- 19 | in_path: path to the input TIFF file 20 | out_path: path to the output pyramidal tiled TIFF file 21 | log_path: path to a text log file to which warnings, errors, and other output will be appended. 22 | 23 | Based on practical experience, most errors can be ignored.""" 24 | # Note: need to enclose file paths in quotes because filenames sometimes include spaces. 25 | # 256x256 is the tile size and seems to be a good choice for IIIF viewers. 26 | # The depth 8 setting should not be changed as it is necessary to view all of the tiles in Gimp and 27 | # for preview to work properly in Mac Preview. 28 | # The number of images included in the pyramid will depend on the filesize of the original image. 29 | command_string = 'convert "' + in_path + '" -define tiff:tile-geometry=256x256 -depth 8 ptif:"' + out_path + '" 2>> "' + log_path + '"' 30 | #print(command_string) 31 | os.system(command_string) 32 | 33 | # ------------------- 34 | # Main routine 35 | # ------------------- 36 | 37 | # Set the path to the directory containing the TIFF files to be converted 38 | # NOTE: this script will convert all TIFF files in the directory, and will ignore other filetypes. 39 | # The path should end with a slash. 40 | in_dir = '/Users/baskausj/raw_tiffs/' 41 | 42 | # Set the path to the directory where the converted files will be written. The path should end with a slash. 43 | out_dir = '/Users/baskausj/pyramidal_tiffs/' 44 | 45 | # Set the path to the log file 46 | log_path = out_dir + 'log.txt' 47 | 48 | # Get a list of the files in the input directory 49 | in_files = os.listdir(in_dir) 50 | 51 | # Loop through the files in the input directory 52 | for in_file in in_files: 53 | # Check to see if the file is a TIFF file 54 | if in_file.lower().endswith('.tif') or in_file.lower().endswith('.tiff'): 55 | print(in_file) 56 | 57 | # Set the path to the input file 58 | in_path = in_dir + in_file 59 | 60 | # Set the path to the output file 61 | out_path = out_dir + in_file 62 | 63 | # Write the file name to the log file 64 | with open(log_path, 'a') as log_file: 65 | log_file.write(in_file + '\n') 66 | 67 | # Convert the file 68 | image_magick_convert_tiff(in_path, out_path, log_path) 69 | 70 | print('done') 71 | 72 | -------------------------------------------------------------------------------- /commonsbot/depicts/config.yaml: -------------------------------------------------------------------------------- 1 | data_path: "" 2 | item_pattern_file: graph_pattern.txt 3 | item_source_csv: "" 4 | outfiles: 5 | - output_file_name: depicts.csv 6 | label_description_language_list: 7 | - en 8 | manage_descriptions: false 9 | ignore: 10 | - depicts_label 11 | prop_list: 12 | - variable: depicts 13 | value_type: item 14 | pid: P180 15 | qual: [] 16 | ref: [] 17 | 18 | -------------------------------------------------------------------------------- /commonsbot/depicts/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "depicts.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles": "qid", 11 | "name": "qid", 12 | "datatype": "string", 13 | "suppressOutput": true 14 | }, 15 | { 16 | "titles": "depicts_label", 17 | "name": "depicts_label", 18 | "datatype": "string", 19 | "suppressOutput": true 20 | }, 21 | { 22 | "titles": "label_en", 23 | "name": "label_en", 24 | "datatype": "string", 25 | "suppressOutput": true 26 | }, 27 | { 28 | "titles": "depicts_uuid", 29 | "name": "depicts_uuid", 30 | "datatype": "string", 31 | "aboutUrl": "http://www.wikidata.org/entity/{qid}", 32 | "propertyUrl": "http://www.wikidata.org/prop/P180", 33 | "valueUrl": "http://www.wikidata.org/entity/statement/{qid}-{depicts_uuid}" 34 | }, 35 | { 36 | "titles": "depicts", 37 | "name": "depicts", 38 | "datatype": "string", 39 | "aboutUrl": "http://www.wikidata.org/entity/statement/{qid}-{depicts_uuid}", 40 | "propertyUrl": "http://www.wikidata.org/prop/statement/P180", 41 | "valueUrl": "http://www.wikidata.org/entity/{depicts}" 42 | } 43 | ] 44 | } 45 | } 46 | ] 47 | } -------------------------------------------------------------------------------- /commonsbot/depicts/depicts.csv: -------------------------------------------------------------------------------- 1 | qid,depicts_label,label_en,depicts_uuid,depicts 2 | M113161207,John the Baptist,"Madonna and Child with St. Elizabeth and infant John the Baptist, painting by Artist Unknown",2183B4D4-A898-4BF9-BCF0-1CFFAE2048EE,Q40662 3 | M113161207,Jesus,"Madonna and Child with St. Elizabeth and infant John the Baptist, painting by Artist Unknown",ACCCD1E3-586C-4533-8E2C-8AEB83BFF25C,Q302 4 | M113161207,Virgin Mary,"Madonna and Child with St. Elizabeth and infant John the Baptist, painting by Artist Unknown",F42462D1-0222-4990-8427-69656475C3E4,Q345 5 | M113161207,St. Elizabeth,"Madonna and Child with St. Elizabeth and infant John the Baptist, painting by Artist Unknown",,Q235849 6 | -------------------------------------------------------------------------------- /commonsbot/extract_image_metadata.py: -------------------------------------------------------------------------------- 1 | # extract_image_metadata.py, a Python script for extracting metata from the EXIF of images in a directory. 2 | 3 | # (c) 2023 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 4 | # Author: Steve Baskauf 5 | 6 | script_version = '0.1.0' 7 | version_modified = '2023-12-04' 8 | 9 | # Install a pip package in the current Jupyter kernel 10 | #import sys 11 | #!{sys.executable} -m pip install Pillow # Use with Jupyter Notebook 12 | 13 | import pandas as pd 14 | from PIL import Image 15 | import os 16 | import datetime 17 | import exifread # https://github.com/ianare/exif-py 18 | 19 | working_directory = os.getcwd() 20 | #working_directory = str(Path.home()) # gets path to home directory 21 | image_dir = '/users/baskausj/raw_tiffs/smaller/' 22 | 23 | # Create an empty dataframe to store the metadata 24 | image_df = pd.DataFrame(columns=['name', 'accession', 'kilobytes', 'height', 'width', 'create_date', 'extension']) 25 | 26 | items = os.listdir(image_dir) 27 | # list comprehension to extract only files from the listed items 28 | image_names = [x for x in items if os.path.isfile(os.path.join(image_dir, x))] 29 | for image_name in image_names: 30 | image_path = image_dir + image_name 31 | print(image_path) 32 | 33 | if image_name[0] == '.': # skip hidden files 34 | continue 35 | image = {} 36 | 37 | image['name'] = image_name 38 | rest_pieces = image_name.split('.') # separate into pieces by full stops 39 | extension = rest_pieces[len(rest_pieces)-1] # the last piece will be the file extension 40 | rest = '.'.join(rest_pieces[:-1]) # re-assemble the other pieces again, restoring the periods 41 | image['accession'] = rest 42 | 43 | # trap errors when the file isn't an image 44 | try: 45 | with Image.open(image_path) as img: 46 | width, height = img.size 47 | except: 48 | width = 0 49 | height = 0 50 | 51 | try: 52 | # First try to get the actual image creation date from the EXIF 53 | # Code from https://stackoverflow.com/questions/23064549/get-date-and-time-when-photo-was-taken-from-exif-data-using-pil 54 | with open(image_path, 'rb') as fh: 55 | tags = exifread.process_file(fh, stop_tag='EXIF DateTimeOriginal') 56 | date_taken = tags['EXIF DateTimeOriginal'] 57 | create_date_string = str(date_taken)[:10].replace(':', '-') 58 | #print('EXIF DateTimeOriginal', create_date_string) 59 | if create_date_string == '0000-00-00': 60 | raise Exception('Bad date') 61 | #print('image date') 62 | except: 63 | # If that's unavailable, then use the file creation date. 64 | # Note: this code is Mac/Linux-specific and would need to be modified if run on Windows. 65 | timestamp = os.stat(image_path).st_birthtime 66 | time_object = datetime.datetime.fromtimestamp(timestamp) 67 | create_date_string = time_object.strftime("%Y-%m-%d") 68 | #print('file date', create_date_string) 69 | 70 | 71 | if create_date_string == '1969-12-31': 72 | timestamp = os.stat(image_path).st_mtime 73 | time_object = datetime.datetime.fromtimestamp(timestamp) 74 | create_date_string = time_object.strftime("%Y-%m-%d") 75 | #print('file modified', create_date_string) 76 | 77 | #print(image_path, create_date_string) 78 | 79 | #print(height, width) 80 | #print() 81 | image['kilobytes'] = round(os.path.getsize(image_path)/1024) 82 | image['height'] = height 83 | image['width'] = width 84 | image['create_date'] = create_date_string 85 | image['extension'] = extension 86 | 87 | image_df = image_df.append(image, ignore_index=True) 88 | 89 | image_df.to_csv('image_metadata.csv', index=False) 90 | 91 | print('done') -------------------------------------------------------------------------------- /commonsbot/fields_comparison.csv: -------------------------------------------------------------------------------- 1 | field,wikidata_label,wikidata_qid,information,artwork,photograph,art_photo,credit_line,location,object_location 2 | accession number,inventory number/catalog code,P217/P528,,x,x,,,, 3 | artist,creator,P170,,x,,,,, 4 | artwork license,,,,,,x,,, 5 | author,creator,P170,x,x,,,x,, 6 | camera coord,coordinates of the point of view,P1259,,,x,,,, 7 | credit line,,,,x,x,,,, 8 | date,inception,P571,x,x,x,,,, 9 | department,,,,x,x,,,, 10 | depicted people,depicts,P180,,x,x,,,, 11 | depicted place,depicts,P180,,x,x,,,, 12 | description,(the description field itself),,x,x,x,,,, 13 | dimensions,height/width/thickness,P2048/P2049/P2610,,x,x,,,, 14 | exhibition history,exhibition history,P608,,x,x,,,, 15 | inscriptions,inscription,P1684,,x,x,,,, 16 | institution,collection,P195,,x,x,,,, 17 | License,,,,,,,x,, 18 | medium,made from material,P186,,x,x,,,, 19 | notes,,,,x,x,,,, 20 | object history,,,,x,x,,,, 21 | Other,,,,,,,x,, 22 | other_fields,,,x,,,,,, 23 | other_versions,,,,x,x,x,,, 24 | permission,,,x,x,x,,,, 25 | photo date,,,,,,x,,, 26 | photo description,,,,,,x,,, 27 | photo license,,,,,,x,,, 28 | photographer,creator,P170,,,x,x,,, 29 | place of creation,location of creation,P1071,,x,,,,, 30 | place of discovery,,,,x,,,,, 31 | references,,,,x,x,,,, 32 | source,based on,P144,x,x,x,x,,, 33 | title,title,P1476,,x,x,,,, 34 | wikidata,,,,x,x,x,,x, 35 | 1,coordinate location,P625,,,,,,x, 36 | 2,coordinate location,P625,,,,,,x, 37 | 3,coordinate location,P625,,,,,,x, 38 | prec,,,,,,,,x, 39 | ,instance of,P31,,,,,,, 40 | ,creator's signature,P7457,,,,,,, 41 | ,depicts Iconclass notation,P1257,,,,,,, 42 | ,fabrication method,P2079,,,,,,, 43 | ,comissioned by,P88,,,,,,, 44 | ,owned by,P127,,,,,,, 45 | (the location template is used for this),location,P276,,,,,,, 46 | (this is the link to the commons item itself),image,P18,,,,,,, 47 | ,image with frame,P7420,,,,,,, 48 | ,image of backside,P7417,,,,,,, 49 | ,genre,P135,,,,,,, 50 | ,movement,P135,,,,,,, 51 | ,main subject,P921,,,,,,, 52 | ,inspired by,P1941,,,,,,, 53 | -------------------------------------------------------------------------------- /commonsbot/fields_comparison_sort.csv: -------------------------------------------------------------------------------- 1 | field,wikidata_label,wikidata_qid,information,artwork,photograph,art_photo,credit_line,location,object_location 2 | date,inception,P571,x,x,x,,,, 3 | description,(the description field itself),,x,x,x,,,, 4 | permission,,,x,x,x,,,, 5 | source,based on,P144,x,x,x,x,,, 6 | author,creator,P170,x,x,,,x,, 7 | accession number,inventory number/catalog code,P217/P528,,x,x,,,, 8 | credit line,,,,x,x,,,, 9 | department,,,,x,x,,,, 10 | depicted people,depicts,P180,,x,x,,,, 11 | depicted place,depicts,P180,,x,x,,,, 12 | dimensions,height/width/thickness,P2048/P2049/P2610,,x,x,,,, 13 | exhibition history,exhibition history,P608,,x,x,,,, 14 | inscriptions,inscription,P1684,,x,x,,,, 15 | institution,collection,P195,,x,x,,,, 16 | medium,made from material,P186,,x,x,,,, 17 | notes,,,,x,x,,,, 18 | object history,,,,x,x,,,, 19 | other_versions,,,,x,x,x,,, 20 | references,,,,x,x,,,, 21 | title,title,P1476,,x,x,,,, 22 | wikidata,,,,x,x,x,,x, 23 | artist,creator,P170,,x,,,,, 24 | place of creation,location of creation,P1071,,x,,,,, 25 | place of discovery,,,,x,,,,, 26 | other_fields,,,x,,,,,, 27 | camera coord,coordinates of the point of view,P1259,,,x,,,, 28 | photographer,creator,P170,,,x,x,,, 29 | artwork license,,,,,,x,,, 30 | License,,,,,,,x,, 31 | Other,,,,,,,x,, 32 | photo date,,,,,,x,,, 33 | photo description,,,,,,x,,, 34 | photo license,,,,,,x,,, 35 | 1,coordinate location,P625,,,,,,x, 36 | 2,coordinate location,P625,,,,,,x, 37 | 3,coordinate location,P625,,,,,,x, 38 | prec,,,,,,,,x, 39 | ,instance of,P31,,,,,,, 40 | ,creator's signature,P7457,,,,,,, 41 | ,depicts Iconclass notation,P1257,,,,,,, 42 | ,fabrication method,P2079,,,,,,, 43 | ,comissioned by,P88,,,,,,, 44 | ,owned by,P127,,,,,,, 45 | (the location template is used for this),location,P276,,,,,,, 46 | (this is the link to the commons item itself),image,P18,,,,,,, 47 | ,image with frame,P7420,,,,,,, 48 | ,image of backside,P7417,,,,,,, 49 | ,genre,P135,,,,,,, 50 | ,movement,P135,,,,,,, 51 | ,main subject,P921,,,,,,, 52 | ,inspired by,P1941,,,,,,, 53 | -------------------------------------------------------------------------------- /commonsbot/images.csv: -------------------------------------------------------------------------------- 1 | qid,local_filename,local_identifier,rank,label,notes,kilobytes,height,width,photo_inception,subdir 2 | Q103304554,1970.040_verso_004.tif,1970.040,secondary,verso,,38205,4234,3080,2018-09-10,1970 3 | Q103304554,1970.040_recto_003.tif,1970.040,primary,recto,,38205,4234,3080,2018-09-10,1970 4 | Q102961245,1992.083.jpg,1992.083,suppress,,resolution too low,72,388,506,2001-11-27,1992 5 | Q102961245,1992.083.tif,1992.083,primary,,,18754,2215,2890,2012-07-30,1992 6 | -------------------------------------------------------------------------------- /commonsbot/information.json: -------------------------------------------------------------------------------- 1 | ["description", 2 | "date", 3 | "source", 4 | "author", 5 | "permission"] -------------------------------------------------------------------------------- /commonsbot/items_status_abbrev.csv: -------------------------------------------------------------------------------- 1 | accession_number,qid,label_en,description_en,creator,instance_of,inception_val,inception_earliest_date_val,inception_latest_date_val,height_val,width_val,thickness_val,diameter_val,style_period,rights,media_url,status 2 | 1979.0656P,Q94696638,St. Sebastian,Painting by Liberale da Verona,Q1374872,Q3305213,1525-01-01T00:00:00Z,,,26.5,19.5,,,,,https://forum.jstor.org/assets/26754868/representation-view,inception prior to copyright cutoff 3 | -------------------------------------------------------------------------------- /commonsbot/photograph.json: -------------------------------------------------------------------------------- 1 | ["photographer", 2 | "title", 3 | "description", 4 | "depicted people", 5 | "depicted place", 6 | "date", 7 | "medium", 8 | "dimensions", 9 | "institution", 10 | "department", 11 | "references", 12 | "object history", 13 | "exhibition history", 14 | "credit line", 15 | "inscriptions", 16 | "notes", 17 | "accession number", 18 | "source", 19 | "permission", 20 | "other_versions", 21 | "wikidata", 22 | "camera coord"] -------------------------------------------------------------------------------- /commonsbot/upload_artwork.sh: -------------------------------------------------------------------------------- 1 | python3 ../../../linked-data/commonsbot/commonstool.py 2 | python3 ../../../linked-data/commonsbot/transfer_to_vanderbot.py 3 | python3 ../../../linked-data/vanderbot/vanderbot.py --log error_log.txt --terse true 4 | -------------------------------------------------------------------------------- /etd/theses.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,description_en,instance_of_uuid,instance_of,handle_uuid,handle,handle_ref1_hash,handle_ref1_retrieved_nodeId,handle_ref1_retrieved_val,handle_ref1_retrieved_prec,full_text_available_uuid,full_text_available,full_text_available_ref1_hash,full_text_available_ref1_retrieved_nodeId,full_text_available_ref1_retrieved_val,full_text_available_ref1_retrieved_prec,author_string_uuid,author_string,author_string_ref1_hash,author_string_ref1_referenceUrl,author_string_ref1_retrieved_nodeId,author_string_ref1_retrieved_val,author_string_ref1_retrieved_prec,published_uuid,published_nodeId,published_val,published_prec,published_ref1_hash,published_ref1_referenceUrl,published_ref1_retrieved_nodeId,published_ref1_retrieved_val,published_ref1_retrieved_prec,title_uuid,title,title_ref1_hash,title_ref1_referenceUrl,title_ref1_retrieved_nodeId,title_ref1_retrieved_val,title_ref1_retrieved_prec,language_uuid,language,language_ref1_hash,language_ref1_referenceUrl,language_ref1_retrieved_nodeId,language_ref1_retrieved_val,language_ref1_retrieved_prec,dissert_submit_to_uuid,dissert_submit_to,dissert_submit_to_ref1_hash,dissert_submit_to_ref1_referenceUrl,dissert_submit_to_ref1_retrieved_nodeId,dissert_submit_to_ref1_retrieved_val,dissert_submit_to_ref1_retrieved_prec 2 | Q4115189,"Erasing God: Carolingians, Controversy, and the Ashburnham Pentateuch",doctoral dissertation,,Q187685,,1803/11165,,,2022-02-18,,,https://hdl.handle.net/1803/11165,,,2022-02-18,,,Jennifer Cecelia Awes-Freeman,,https://hdl.handle.net/1803/11165,,2022-02-18,,,,2016,,,https://hdl.handle.net/1803/11165,,2022-02-18,,,"Erasing God: Carolingians, Controversy, and the Ashburnham Pentateuch",,https://hdl.handle.net/1803/11165,,2022-02-18,,,Q1860,,https://hdl.handle.net/1803/11165,,2022-02-18,,,Q29052,,https://hdl.handle.net/1803/11165,,2022-02-18, 3 | -------------------------------------------------------------------------------- /gallery/classification_mappings.csv: -------------------------------------------------------------------------------- 1 | string,qid,label 2 | Applied Arts,Q838948,work of art 3 | Artifact,Q8205328,artificial physical object 4 | Artists' Books,Q1062404,artist's book 5 | Artists' Books24.,Q1062404,artist's book 6 | Ceramics,Q13464614,ceramics 7 | Coins,Q41207,coin 8 | Currency,Q8142,currency 9 | Decorative Arts,Q838948,work of art 10 | Decorative Arts-Sculpture,Q860861,sculpture 11 | Documents,Q49848,document 12 | Furniture,Q14745,furniture 13 | Graphic Art-Printmaking,Q11060274,print 14 | Graphic Arts,Q478798,image 15 | Graphic Arts - Printmaking,Q11060274,print 16 | Graphic Arts-Drawing,Q93184,drawing 17 | Graphic Arts-Painting,Q3305213,painting 18 | Graphic Arts-Photography,Q125191,photograph 19 | Graphic Arts-Printmaking,Q11060274,print 20 | Graphic Arts-printmaking,Q11060274,print 21 | Manuscript,Q87167,manuscript 22 | Masks,Q161524,mask 23 | Painting,Q3305213,painting 24 | Paintings,Q3305213,painting 25 | Paintings ,Q3305213,painting 26 | Photography,Q125191,photograph 27 | Photography-Graphic Arts,Q125191,photograph 28 | Poster,Q429785,poster 29 | Pottery,Q11642,pottery 30 | Printing Block,Q20820214,printing block 31 | Printing Blocks,Q20820214,printing block 32 | Printmaking,Q11060274,print 33 | Prints,Q11060274,print 34 | Scroll Paintings,Q19969434,scroll painting 35 | Scroll paintings,Q19969434,scroll painting 36 | Sculpture,Q860861,sculpture 37 | Sound Recording,Q3302947,audio recording 38 | Stone,Q22731,stone 39 | Textiles,Q28823,textile 40 | Tool,Q39546,tool 41 | Tools,Q39546,tool 42 | Weapons,Q728,weapon 43 | artists' books,Q1062404,artist's book 44 | bracelet,Q201664,bracelet 45 | ceramics,Q13464614,ceramics 46 | ceramics ,Q13464614,ceramics 47 | ceramics (objects),Q13464614,ceramics 48 | ceremonial masks,Q161524,mask 49 | ceremonial weapons,Q728,weapon 50 | decorative arts,Q838948,work of art 51 | drawing,Q93184,drawing 52 | drawing (image-making),Q93184,drawing 53 | manuscript,Q87167,manuscript 54 | masks (costume),Q161524,mask 55 | musical instruments,Q34379,musical instrument 56 | paintings,Q3305213,painting 57 | paintings (visual works),Q3305213,painting 58 | printmaking,Q11060274,print 59 | scroll paintings,Q19969434,scroll painting 60 | sculpture (visual work),Q860861,sculpture 61 | seating furniture,Q14745,furniture 62 | shields (armor),Q131559,shield 63 | shoes (footwear),Q22676,shoe 64 | weapons,Q728,weapon 65 | yarn,Q49007,yarn 66 | -------------------------------------------------------------------------------- /gallery/country_mappings.csv: -------------------------------------------------------------------------------- 1 | string,qid 2 | "Acoma Pueblo, Cibola, New Mexico, United States", 3 | Africa, 4 | "Africa, Central Zaire",Q974 5 | "Africa, Ivory Coast",Q1008 6 | "Africa, Ivory Coast or Ghana", 7 | "Africa, Ivory Coast, Baule People", 8 | "Africa, Nakanai, Bubu Village", 9 | "Africa, Nigeria",Q1033 10 | "Africa, Nigeria, Yoruba People", 11 | "Africa, Nigeria, Yoruba Tribe", 12 | "Africa, Zaire",Q974 13 | "African, Nakamai, Bubu Village", 14 | Ancient Rome, 15 | Angola,Q916 16 | Annam,Q10828323 17 | Annam (Present Day Vietnam),Q10828323 18 | "Annam, Viet Nam",Q10828323 19 | Apulia (Present day Italy), 20 | "Apulia, Italy",Q38 21 | Argentina,Q414 22 | Asia, 23 | Australia,Q408 24 | "Australia, Arnhenhaha", 25 | "Australia, North Central, Aboriginal", 26 | Austria,Q40 27 | Belgium,Q31 28 | Benin,Q962 29 | Bhutan,Q917 30 | "Bologna, Emilia-Romagna, Italy",Q38 31 | Brazil,Q155 32 | Brussels (Southern Netherlandish),Q31 33 | "Cahokia Mounds, Saint Clair, Illinois, United States", 34 | Cambodia,Q424 35 | Canada,Q16 36 | Central America, 37 | "Central America, Chipicuaro, Guanaguato, Mexico", 38 | "Central America, Coastal Mexico, Veracruz",Q96 39 | "Central America, Mexico",Q96 40 | "Central America, West Mexico, Colima",Q96 41 | Chile,Q298 42 | "Chimbote, Ancash, Peru",Q419 43 | China,Q148 44 | China or Thailand, 45 | Cochiti, 46 | "Cochiti Pueblo, Sandoval, New Mexico, United States", 47 | Colombia,Q739 48 | "Congo, Democratic Republic of the",Q974 49 | Costa Rica,Q800 50 | Cuba,Q241 51 | Czech Republic,Q213 52 | Czechoslovakia,Q33946 53 | Côte d'Ivoire,Q1008 54 | Eastern Africa, 55 | Ecuador,Q736 56 | Egypt,Q79 57 | England,Q21 58 | "England, United Kingdom",Q21 59 | Ethiopia,Q115 60 | Etruria (Present day Italy), 61 | Europe, 62 | "Eyre, Lake, South Australia, Australia",Q408 63 | "Firenze, Tuscany, Italy",Q38 64 | Flanders,Q234 65 | France,Q142 66 | France (Northern),Q142 67 | France (Southern),Q142 68 | France or Italy, 69 | "France, Aquitaine, France", 70 | Germany,Q183 71 | Greco-Roman, 72 | Greece,Q41 73 | Guatemala,Q774 74 | Haiti,Q790 75 | Hungary,Q28 76 | India,Q668 77 | "India, Northwestern",Q668 78 | Indonesia,Q252 79 | Iran,Q794 80 | Iran and United States, 81 | Iraq,Q796 82 | Ireland,Q27 83 | Israel,Q801 84 | Italy,Q38 85 | "Italy, Roman", 86 | "Ivory Coast, Côte d'Ivoire",Q1008 87 | Japan,Q17 88 | "Japan, United States", 89 | Kenya,Q114 90 | "Kingdom of Etruria, Italy",Q223793 91 | Korea,Q18097 92 | Latvia,Q211 93 | "Limoges, Haute-Vienne, Limousin, France",Q142 94 | "Lorestān, Iran",Q794 95 | Luxembourg,Q32 96 | Macedonia,Q221 97 | Mali,Q912 98 | Melanesia, 99 | "Melville Island, Northern Territory, Australia",Q408 100 | Mexico,Q96 101 | Middle East, 102 | "Milan, Milano, Lombardy, Italy",Q38 103 | Morocco,Q1028 104 | Mozambique,Q1029 105 | Myanmar,Q836 106 | Netherlands,Q55 107 | "Netherlands, Ukraine", 108 | New Guinea, 109 | "New Ireland, New Ireland, Papua New Guinea",Q691 110 | Nicaragua,Q811 111 | Nigeria,Q1033 112 | North Africa, 113 | Northern Europe, 114 | Pakistan,Q843 115 | Palestine,Q219060 116 | Papua New Guinea,Q691 117 | Persia,Q794 118 | Peru,Q419 119 | Poland,Q36 120 | Portugal,Q45 121 | "Republic of Florence, Tuscany, Italy",Q148540 122 | "Rhodes, Rhodes, Sporades, Aegean Islands, Greece", 123 | Romania,Q218 124 | Russia,Q159 125 | Scotland,Q22 126 | Scotland and England, 127 | "Scotland, United Kingdom",Q22 128 | Serbia and Montenegro,Q37024 129 | "Serbia, Serbia and Montenegro", 130 | "Siena, Tuscany, Italy",Q38 131 | Solomon Islands,Q685 132 | South Africa,Q258 133 | South America, 134 | "South America, Ecuador, Valdivia", 135 | South Korea,Q884 136 | Southeast Asia, 137 | Soviet Union,Q15180 138 | Spain,Q29 139 | Sri Lanka,Q854 140 | "Sumatra, Lampung District",Q252 141 | Sweden,Q34 142 | Switzerland,Q39 143 | Syria,Q858 144 | Taiwan,Q865 145 | Tanzania,Q924 146 | Thailand,Q869 147 | "Thessaloníki, Thessaloníki, Macedonia, Greece", 148 | Tibet,Q2444884 149 | Turkey,Q43 150 | Turkey (ancient Byzantium), 151 | Turkey (ancient Syria), 152 | U.S.A.,Q30 153 | U.S.A. and South Korea, 154 | Ukraine,Q212 155 | United Kingdom,Q145 156 | United States,Q30 157 | United States and Poland, 158 | "United States, France", 159 | "Venice, Venezia, Veneto, Italy",Q38 160 | Western Panama & adjoining areas of Costa Rica, 161 | Yugoslavia,Q36704 162 | "Zaire, Angola", 163 | -------------------------------------------------------------------------------- /gallery/materials.csv: -------------------------------------------------------------------------------- 1 | qid,searched,matches,material 2 | Q2259977,yes,yes,acrylic 3 | Q4691480,yes,yes,agate 4 | Q14552475,yes,yes,alabaster 5 | Q663,yes,,aluminum 6 | Q17291407,yes,yes,amber 7 | Q181908,yes,,andesite 8 | Q836062,yes,,argillite 9 | Q670887,yes,yes,bamboo 10 | Q1112,yes,,barium 11 | Q29817539,yes,yes,bark 12 | Q1053956,yes,no,beads 13 | Q13184,yes,no,berries 14 | Q804070,yes,yes,bisque 15 | Q173350,yes,yes,board 16 | Q55347942,yes,yes,bone 17 | Q101579652,yes,yes,book 18 | Q2828250,yes,yes,brass 19 | Q2778382,yes,yes,bronze 20 | Q171917,yes,,calcite 21 | Q12321255,yes,yes,canvas 22 | Q6432723,yes,yes,card 23 | Q193970,yes,,casein 24 | Q45621,yes,,ceramic 25 | Q170448,yes,,chalcedony 26 | Q95965284,yes,yes,chalk 27 | Q177463,yes,yes,charcoal 28 | Q619865,yes,no,china 29 | Q29814964,yes,yes,cinnabar 30 | Q42302,yes,yes,clay 31 | Q5849500,yes,,cloth 32 | ,yes,yes,color 33 | Q18667172,yes,,composition board 34 | Q583353,yes,yes,copper 35 | Q97011139,yes,yes,coral 36 | Q8231603,yes,yes,cotton 37 | Q99826317,yes,,crayon 38 | Q55352534,yes,yes,crystal 39 | Q41159,yes,yes,diamond 40 | Q1377111,yes,,earthenware 41 | Q73344494,yes,yes,emerald 42 | Q1102226,yes,yes,enamel 43 | Q81025,yes,,feathers 44 | Q189964,yes,,felt 45 | Q66559339,yes,yes,fiber 46 | Q2042162,yes,no,foam core 47 | Q23422698,yes,yes,fur 48 | Q1514256,yes,yes,gesso 49 | Q1334300,yes,,gilding 50 | Q70722524,yes,no,gilt 51 | Q56369598,yes,yes,glass 52 | Q1242466,yes,yes,glaze 53 | Q70722524,yes,yes,gold 54 | Q21281546,yes,yes,gouache 55 | Q41177,yes,yes,granite 56 | Q5309,yes,,graphite 57 | Q43238,yes,yes,grass 58 | Q4670978,yes,yes,gum arabic 59 | Q5656329,yes,no,hardstone 60 | Q90801961,yes,yes,horn 61 | Q99900954,yes,,illustration board 62 | Q127418,yes,yes,ink 63 | Q6072283,yes,yes,iron 64 | Q90278705,yes,yes,ivory 65 | Q60733799,yes,yes,jade 66 | Q107211,yes,,jute 67 | Q29719333,yes,yes,lace 68 | Q11236878,yes,,lacquer 69 | Q1936519,yes,yes,lead 70 | Q286,yes,,leather 71 | Q185006,yes,yes,limestone 72 | Q47089651,yes,yes,linen 73 | Q108728,yes,,linoleum 74 | Q200199,yes,yes,marble 75 | Q1808397,yes,,masonite 76 | Q38848,yes,yes,metal 77 | Q114675,yes,no,mica 78 | Q215865,yes,,mother of pearl 79 | Q890914,yes,yes,mud 80 | Q138979,yes,,nephrite 81 | Q84942208,yes,yes,oak 82 | Q22656,yes,yes,oil 83 | Q174219,yes,yes,paint 84 | Q13442814,yes,yes,paper 85 | Q8353,yes,no,paper clips 86 | Q226697,yes,yes,parchment 87 | Q747457,yes,yes,pastel 88 | Q12190,yes,yes,pen 89 | Q15736718,yes,yes,pencil 90 | P462,yes,yes,pigment 91 | Q2050624,yes,yes,pitch 92 | Q216054,yes,yes,plaster 93 | Q11474,yes,yes,plastic 94 | Q219803,yes,,plywood 95 | Q56625581,yes,yes,polymer 96 | Q130693,yes,yes,porcelain 97 | Q50769,yes,,pyrite 98 | Q14251138,yes,,raffia 99 | Q323021,yes,,rattan 100 | Q76626,yes,yes,resin 101 | Q1134817,yes,yes,root 102 | Q543923,yes,yes,rust 103 | Q13085,yes,,sandstone 104 | Q29809132,yes,yes,serpentine 105 | Q1537820,yes,yes,shell 106 | Q429659,yes,,shellac 107 | Q11614515,yes,no,shikishi 108 | Q10475001,yes,yes,silk 109 | Q29809750,yes,yes,silver 110 | Q707896,yes,,soapstone 111 | Q55368248,yes,yes,staple 112 | Q25593491,yes,yes,steel 113 | Q617079,yes,yes,stone 114 | Q50478641,yes,yes,string 115 | Q29167534,yes,yes,tapestry 116 | Q553,yes,yes,teeth 117 | Q175166,yes,,tempera 118 | Q60424,yes,,terracotta 119 | Q28823,yes,,textile 120 | Q29809500,yes,yes,thread 121 | Q2141546,yes,,tortoise shell 122 | Q5960345,yes,yes,turquoise 123 | Q96745747,yes,yes,vellum 124 | Q394001,yes,yes,wire 125 | Q287,yes,yes,wood 126 | Q42329,yes,yes,wool 127 | -------------------------------------------------------------------------------- /gallery/properties.csv: -------------------------------------------------------------------------------- 1 | field_name,property_name,PID,value_type 2 | what do we use for the references on everything???,,, 3 | ssid,,,local primary key 4 | filename,,, 5 | title,title,P1476,monolingual text (en) 6 | label,label,rdfs:label,append accession number to title as necessary to make unique 7 | description,description,schema:description,construct from instance of and creator 8 | creator_string,,,"suppress output, map to creator item" 9 | creator,creator,P170,item 10 | date,,, 11 | classification,,,"suppress output, map to instance_of item" 12 | instance_of,instance of,P31,item 13 | medium,,,suppress output 14 | material_used,material used,P186,"item, derive this from medium (manage separately since multiple materials)" 15 | measurements,,,"suppress output, parsed to dimensions" 16 | style_period,,, 17 | country_culture,,,"suppress output, map to country (don't attempt to manage culture at the moment)" 18 | country,country,P17,"item, duplicate this value with country of origin: P495" 19 | seals_inscriptions,inscription,P1684,"monolingual text, use zxx for no linguistic content (too much cleanup for now)" 20 | signature,,, 21 | description,,, 22 | publications,,, 23 | exhibitions,,, 24 | accession_number,inventory number,P217,string 25 | [hard code],collection,P195,item qualifier of inventory number with constant Q18563658 (VU Fine Arts Gallery) 26 | date_acquired,,,"suppress, clean up to get start date for collection" 27 | gift_of,,, 28 | purchased_from,,, 29 | credit_line,,, 30 | provenance,,, 31 | collection,,, 32 | last_change,,, 33 | notes,,, 34 | rights,,, 35 | media_url,,, 36 | height,height,P2048, 37 | width,width,P2049, 38 | depth,thickness,P2610, 39 | diameter,diameter,P2386, 40 | inception,,,suppress output 41 | inception_val,inception,P571,date 42 | inception_prec,inception,P571,date 43 | earliest_date_val,earliest date,P1319,date qualifier of inception 44 | earliest_date_prec,earliest date,P1319,date qualifier of inception 45 | latest_date_val,latest date,P1326,date qualifier of inception 46 | latest_date_prec,latest date,P1326,date qualifier of inception 47 | sourcing_circumstances,sourcing circumstances,P1480,item qualifier of inception with constant Q5727902 (circa) 48 | ?,image,P18,leave for now 49 | [hard code],collection,P195,item with constant Q18563658 (VU Fine Arts Gallery) 50 | collection_start_time,start time,P580,date qualifier of collection 51 | [hard code],location,P276,item with constant Q29052 (Vanderbilt University) 52 | -------------------------------------------------------------------------------- /image_analysis/README.md: -------------------------------------------------------------------------------- 1 | # Scripts for image analysis 2 | 3 | This directory includes subdirectories that contains scripts for image analysis. Currently there is only one. 4 | 5 | ## google_cloud_vision 6 | 7 | ### google_cloud_vision.ipynb 8 | 9 | This [script](google_cloud_vision/google_cloud_vision.ipynb) was written to carry out image analysis on artwork images from the Vanderbilt University Fine Arts Gallery. The main analysis is carried out with the Google Cloud Vision API using the FACE_DETECTION, LABEL_DETECTION, OBJECT_LOCALIZATION, and TEXT_DETECTION features. These were the features deemed useful in the artwork context. 10 | 11 | In order to provide images for analysis that had sufficient resolution but that were not larger than the limits imposed by the Vision API, the script uses an IIIF image server to generate JPEG images that were 1000 pixels in their smallest dimension (or full resolution if the original images were smaller than that). The first part of the script contains code for that. 12 | 13 | To display localized objects, the last part of the script generates an IIIF annotation file that can be linked to a manifest for the analyzed image. This allows a human to view the detected object by displaying its bounding box and the textual label assigned to the localized object. 14 | 15 | ### Output files 16 | 17 | There are four CSV files that contain the output of analysis of about 1500 gallery images. 18 | 19 | [face_detection.csv](google_cloud_vision/face_detection.csv) 20 | 21 | [label_detection.csv](google_cloud_vision/label_detection.csv) 22 | 23 | [object_localization.csv](google_cloud_vision/object_localization.csv) 24 | 25 | [text_detection.csv](google_cloud_vision/text_detection.csv) 26 | 27 | ---- 28 | Last modified: 2023-03-27 -------------------------------------------------------------------------------- /image_analysis/google_cloud_vision/face_detection.csv: -------------------------------------------------------------------------------- 1 | image_filename,score,abs_left_x,abs_right_x,abs_top_y,abs_bottom_y,roll_angle,pan_angle,tilt_angle,"landmarking, confidence",joy_likelihood,sorrow_likelihood,anger_likelihood,surprise_likelihood,under_exposed_likelihood,blurred_likelihood,headwear_likelihood,landmarking_confidence 2 | 1979.1140P.tif.jpg,0.54296875,247,327,27,119,-1.2418436,-6.9631257,3.4942808,,1,1,1,1,1,1,1,0.6479626 3 | -------------------------------------------------------------------------------- /image_analysis/google_cloud_vision/label_detection.csv: -------------------------------------------------------------------------------- 1 | image_filename,mid,description,score,topicality 2 | 1979.1140P.tif.jpg,/m/047vlmn,Outerwear,0.9522831,0.9522831 3 | 1979.1140P.tif.jpg,/m/062581,Sleeve,0.8724492,0.8724492 4 | 1979.1140P.tif.jpg,/m/01n5jq,Poster,0.82310665,0.82310665 5 | 1979.1140P.tif.jpg,/m/0jjw,Art,0.7093869,0.7093869 6 | 1979.1140P.tif.jpg,/m/011s0,Advertising,0.7002993,0.7002993 7 | 1979.1140P.tif.jpg,/m/0bt_c3,Book,0.7000526,0.7000526 8 | 1979.1140P.tif.jpg,/m/01kr8f,Illustration,0.69645387,0.69645387 9 | 1979.1140P.tif.jpg,/m/08v4gh,Vintage clothing,0.69622165,0.69622165 10 | 1979.1140P.tif.jpg,/m/02w3_2,Formal wear,0.6948315,0.6948315 11 | 1979.1140P.tif.jpg,/m/03tvvb,Overcoat,0.68086326,0.68086326 12 | -------------------------------------------------------------------------------- /image_analysis/google_cloud_vision/object_localization.csv: -------------------------------------------------------------------------------- 1 | image_filename,description,score,rel_left_x,rel_right_x,rel_top_y,rel_bottom_y 2 | 1979.1140P.tif.jpg,Person,0.7397425,0.086612225,0.9310081,0.031645376,0.9316656 3 | 1979.1140P.tif.jpg,Outerwear,0.6133001,0.26430628,0.9030248,0.1569512,0.83101875 4 | 1979.1140P.tif.jpg,Clothing,0.58644634,0.109607734,0.928936,0.06095144,0.9136492 5 | 1979.1140P.tif.jpg,Luggage & bags,0.5370669,0.75118846,0.93071,0.67792517,0.8153662 6 | -------------------------------------------------------------------------------- /image_analysis/google_cloud_vision/text_detection.csv: -------------------------------------------------------------------------------- 1 | image_filename,locale,description,abs_left_x,abs_right_x,abs_top_y,abs_bottom_y 2 | 1979.1140P.tif.jpg,en,"God bless the 3 | churches and 4 | blessed be God, 5 | Who in this our 6 | great trial giveth 7 | us the churches 8 | ALincoln, 9 | (Œ†) 10 | INTERCHURCH 11 | WORLD MOVEMENT",24,440,25,580 12 | 1979.1140P.tif.jpg,,God,38,94,25,52 13 | 1979.1140P.tif.jpg,,bless,97,152,27,53 14 | 1979.1140P.tif.jpg,,the,156,195,28,55 15 | 1979.1140P.tif.jpg,,churches,38,146,58,78 16 | 1979.1140P.tif.jpg,,and,151,196,58,78 17 | 1979.1140P.tif.jpg,,blessed,38,117,83,104 18 | 1979.1140P.tif.jpg,,be,117,143,83,104 19 | 1979.1140P.tif.jpg,,God,147,196,83,104 20 | 1979.1140P.tif.jpg,,",",196,203,83,104 21 | 1979.1140P.tif.jpg,,Who,36,88,109,130 22 | 1979.1140P.tif.jpg,,in,93,112,109,129 23 | 1979.1140P.tif.jpg,,this,115,156,109,130 24 | 1979.1140P.tif.jpg,,our,159,197,110,130 25 | 1979.1140P.tif.jpg,,great,37,89,134,159 26 | 1979.1140P.tif.jpg,,trial,90,133,134,158 27 | 1979.1140P.tif.jpg,,giveth,134,197,133,158 28 | 1979.1140P.tif.jpg,,us,39,63,160,181 29 | 1979.1140P.tif.jpg,,the,65,99,160,180 30 | 1979.1140P.tif.jpg,,churches,103,194,159,178 31 | 1979.1140P.tif.jpg,,ALincoln,76,171,191,210 32 | 1979.1140P.tif.jpg,,",",165,181,186,208 33 | 1979.1140P.tif.jpg,,(,378,386,48,62 34 | 1979.1140P.tif.jpg,,Œ,379,389,48,62 35 | 1979.1140P.tif.jpg,,†,380,393,48,62 36 | 1979.1140P.tif.jpg,,),391,397,48,62 37 | 1979.1140P.tif.jpg,,INTERCHURCH,24,438,512,548 38 | 1979.1140P.tif.jpg,,WORLD,24,173,553,580 39 | 1979.1140P.tif.jpg,,MOVEMENT,190,440,553,580 40 | -------------------------------------------------------------------------------- /json_schema/csv-metadata_globecoordinate.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "globecoordinate_test.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles": "qid", 11 | "name": "qid", 12 | "datatype": "string", 13 | "suppressOutput": true 14 | }, 15 | { 16 | "titles": "coordinateLocation_uuid", 17 | "name": "coordinateLocation_uuid", 18 | "datatype": "string", 19 | "aboutUrl": "http://www.wikidata.org/entity/{qid}", 20 | "propertyUrl": "http://www.wikidata.org/prop/P625", 21 | "valueUrl": "http://www.wikidata.org/entity/statement/{qid}-{coordinateLocation_uuid}" 22 | }, 23 | { 24 | "titles": "coordinateLocation_nodeId", 25 | "name": "coordinateLocation_nodeId", 26 | "datatype": "string", 27 | "aboutUrl": "http://www.wikidata.org/entity/statement/{qid}-{coordinateLocation_uuid}", 28 | "propertyUrl": "http://www.wikidata.org/prop/statement/value/P625", 29 | "valueUrl": "http://example.com/.well-known/genid/{coordinateLocation_nodeId}" 30 | }, 31 | { 32 | "titles": "coordinateLocation_val", 33 | "name": "coordinateLocation_val", 34 | "datatype": "float", 35 | "aboutUrl": "http://example.com/.well-known/genid/{coordinateLocation_nodeId}", 36 | "propertyUrl": "http://wikiba.se/ontology#geoLatitude" 37 | }, 38 | { 39 | "titles": "coordinateLocation_long", 40 | "name": "coordinateLocation_long", 41 | "datatype": "float", 42 | "aboutUrl": "http://example.com/.well-known/genid/{coordinateLocation_nodeId}", 43 | "propertyUrl": "http://wikiba.se/ontology#geoLongitude" 44 | }, 45 | { 46 | "titles": "coordinateLocation_prec", 47 | "name": "coordinateLocation_prec", 48 | "datatype": "float", 49 | "aboutUrl": "http://example.com/.well-known/genid/{coordinateLocation_nodeId}", 50 | "propertyUrl": "http://wikiba.se/ontology#geoPrecision" 51 | }, 52 | { 53 | "titles": "coordinateLocation_ref1_hash", 54 | "name": "coordinateLocation_ref1_hash", 55 | "datatype": "string", 56 | "aboutUrl": "http://www.wikidata.org/entity/statement/{qid}-{coordinateLocation_uuid}", 57 | "propertyUrl": "prov:wasDerivedFrom", 58 | "valueUrl": "http://www.wikidata.org/reference/{coordinateLocation_ref1_hash}" 59 | }, 60 | { 61 | "titles": "coordinateLocation_ref1_importedFromWikipedia", 62 | "name": "coordinateLocation_ref1_importedFromWikipedia", 63 | "datatype": "string", 64 | "aboutUrl": "http://www.wikidata.org/reference/{coordinateLocation_ref1_hash}", 65 | "propertyUrl": "http://www.wikidata.org/prop/reference/P143", 66 | "valueUrl": "http://www.wikidata.org/entity/{coordinateLocation_ref1_importedFromWikipedia}" 67 | }, 68 | { 69 | "titles": "coordinateLocation_ref1_importUrl", 70 | "name": "coordinateLocation_ref1_importUrl", 71 | "datatype": "string", 72 | "aboutUrl": "http://www.wikidata.org/reference/{coordinateLocation_ref1_hash}", 73 | "propertyUrl": "http://www.wikidata.org/prop/reference/P4656", 74 | "valueUrl": "{+coordinateLocation_ref1_importUrl}" 75 | } 76 | ] 77 | } 78 | } 79 | ] 80 | } -------------------------------------------------------------------------------- /json_schema/csv-metadata_monolingual.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "monolingualstring_test.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles": "qid", 11 | "name": "qid", 12 | "datatype": "string", 13 | "suppressOutput": true 14 | }, 15 | { 16 | "titles": "title_uuid", 17 | "name": "title_uuid", 18 | "datatype": "string", 19 | "aboutUrl": "http://www.wikidata.org/entity/{qid}", 20 | "propertyUrl": "http://www.wikidata.org/prop/P1476", 21 | "valueUrl": "http://www.wikidata.org/entity/statement/{qid}-{title_uuid}" 22 | }, 23 | { 24 | "titles": "title", 25 | "name": "title", 26 | "datatype": "string", 27 | "aboutUrl": "http://www.wikidata.org/entity/statement/{qid}-{title_uuid}", 28 | "propertyUrl": "http://www.wikidata.org/prop/statement/P1476", 29 | "lang": "nl" 30 | } 31 | ] 32 | } 33 | } 34 | ] 35 | } -------------------------------------------------------------------------------- /json_schema/globecoordinate.ttl: -------------------------------------------------------------------------------- 1 | @prefix prov: . 2 | @prefix xsd: . 3 | 4 | . 5 | 6 | "39.333611"^^xsd:float; 7 | "-85.973611"^^xsd:float; 8 | "0.01"^^xsd:float . 9 | 10 | prov:wasDerivedFrom ; 11 | . 12 | 13 | ; 14 | . 15 | -------------------------------------------------------------------------------- /json_schema/globecoordinate_test.csv: -------------------------------------------------------------------------------- 1 | qid,coordinateLocation_uuid,coordinateLocation_nodeId,coordinateLocation_val,coordinateLocation_long,coordinateLocation_prec,coordinateLocation_ref1_hash,coordinateLocation_ref1_importedFromWikipedia,coordinateLocation_ref1_importUrl 2 | Q86754582,EA0D64DE-9661-41AA-9AE9-812AB2799C39,f46b537c-676f-4a57-88f8-fed3c052c7b8,39.333611,-85.973611,0.01,de5758ff0250a2f1746e381bc79d3b2ca4bfecff,Q328,"https://en.wikipedia.org/w/index.php?title=Pleasant_View_Village,_Indiana&oldid=942655251" 3 | -------------------------------------------------------------------------------- /json_schema/journal-div-qids_small.csv: -------------------------------------------------------------------------------- 1 | qid,label 2 | Q100718707,Achaemenid Research on Texts and Archaeology 3 | Q97446840,Acme: annali della Facoltà di Lettere e Filosofia dell'Università degli Studi di Milano. 4 | Q11956877,Acta ad archaeologiam et atrium historiam pertinentia 5 | Q100694040,Alive Now 6 | -------------------------------------------------------------------------------- /json_schema/monolingual.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | 3 | . 4 | 5 | "Religion Compass"@nl . 6 | -------------------------------------------------------------------------------- /json_schema/monolingualstring_test.csv: -------------------------------------------------------------------------------- 1 | qid,title_uuid,title 2 | Q15749660,40C38DAE-F06A-4D56-8563-C0074728BA01,Religion Compass 3 | -------------------------------------------------------------------------------- /json_schema/property_labels.csv: -------------------------------------------------------------------------------- 1 | pid,label 2 | P127,owned by 3 | P135,movement 4 | P136,genre 5 | P17,country 6 | P18,image 7 | P180,depicts 8 | P186,material used 9 | P195,collection 10 | P217,inventory number 11 | P2596,culture 12 | P276,location 13 | P31,instance of 14 | P528,catalog code 15 | P571,inception 16 | P921,main subject 17 | P973,described at URL 18 | P734,family name 19 | P108,employer 20 | P735,given name 21 | P170,creator 22 | P361,part of 23 | P495,country of origin 24 | P1476,title 25 | P2049,width 26 | P2048,height 27 | -------------------------------------------------------------------------------- /json_schema/quantity.ttl: -------------------------------------------------------------------------------- 1 | @prefix prov: . 2 | @prefix xsd: . 3 | 4 | ; 5 | . 6 | 7 | 51.25; 8 | . 9 | 10 | 11; 11 | "2020-11-10T00:00:00Z"^^xsd:dateTime . 12 | 13 | 11; 14 | "2020-11-10T00:00:00Z"^^xsd:dateTime . 15 | 16 | 28.0; 17 | . 18 | 19 | prov:wasDerivedFrom ; 20 | . 21 | 22 | prov:wasDerivedFrom ; 23 | . 24 | 25 | ; 26 | , 27 | . 28 | -------------------------------------------------------------------------------- /json_schema/quantity_test.csv: -------------------------------------------------------------------------------- 1 | qid,width_uuid,width_nodeId,width_val,width_unit,width_ref1_hash,width_ref1_referenceUrl,width_ref1_retrieved_nodeId,width_ref1_retrieved_val,width_ref1_retrieved_prec,height_uuid,height_nodeId,height_val,height_unit,height_ref1_hash,height_ref1_referenceUrl,height_ref1_retrieved_nodeId,height_ref1_retrieved_val,height_ref1_retrieved_prec 2 | Q80566046,BBAF8E0E-2FA5-4E8A-A00C-47FC5C1817B2,851dee0b-341c-478c-930a-062b479cfe89,28,Q218593,b113c41e47fc3426c8bc9bad706985043b5f85ea,https://library-artstor-org.proxy.library.vanderbilt.edu/asset/26754871,4b854bcd-f5cb-4a4b-b988-5525122b9b83,2020-11-10T00:00:00Z,11,C3479869-73B3-4147-81A5-EB878026C033,364b9dca-163a-4911-ba31-8aede61a24a5,51.25,Q218593,b113c41e47fc3426c8bc9bad706985043b5f85ea,https://library-artstor-org.proxy.library.vanderbilt.edu/asset/26754871,78cc1e9e-68a4-4c4d-88fa-5487b519d98d,2020-11-10T00:00:00Z,11 3 | -------------------------------------------------------------------------------- /neptune/config/named_graphs_config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "column_header": "sd:name", 4 | "object_type": "iri" 5 | }, 6 | { 7 | "column_header": "dcterms:issued", 8 | "object_type": "literal", 9 | "datatype": "xsd:date" 10 | }, 11 | { 12 | "column_header": "dc:publisher", 13 | "object_type": "literal" 14 | }, 15 | { 16 | "column_header": "rdf:type", 17 | "object_type": "curie" 18 | }, 19 | { 20 | "column_header": "dcterms:isPartOf", 21 | "object_type": "iri" 22 | }, 23 | { 24 | "column_header": "tdwgutility:status", 25 | "object_type": "literal" 26 | } 27 | ] 28 | -------------------------------------------------------------------------------- /neptune/config/prefixes.txt: -------------------------------------------------------------------------------- 1 | prefix rdf: 2 | prefix rdfs: 3 | prefix xsd: 4 | prefix sd: 5 | prefix void: 6 | prefix dc: 7 | prefix dcterms: 8 | prefix tdwgutility: 9 | -------------------------------------------------------------------------------- /neptune/drop_time.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/neptune/drop_time.jpg -------------------------------------------------------------------------------- /neptune/graph_file_associations.csv: -------------------------------------------------------------------------------- 1 | sd:name,sd:graph,filename,elapsed_time,graph_load_status 2 | http://vocab.getty.edu/aat/msc/,http://AATOut_Notations,AATOut_Notations.nt,18.781338,load complete in 3.012247s 3 | http://vocab.getty.edu/aat/msc/,http://AATOut_LCSHAlignment,AATOut_LCSHAlignment.nt,30.521203,load complete in 5.642851s 4 | http://vocab.getty.edu/aat/msc/,http://AATOut_Lang_sameAs,AATOut_Lang_sameAs.nt,40.061934,load complete in 3.388141s 5 | http://vocab.getty.edu/aat/msc/,http://AATOut_Contribs,AATOut_Contribs.nt,49.596593,load complete in 3.450805s 6 | http://vocab.getty.edu/aat/msc/,http://AATOut_ObsoleteSubjects,AATOut_ObsoleteSubjects.nt,61.051823,load complete in 3.587236s 7 | http://vocab.getty.edu/aat/msc/,http://AATOut_OrderedCollections,AATOut_OrderedCollections.nt,75.883127,load complete in 8.774357s 8 | http://vocab.getty.edu/aat/msc/,http://AATOut_WikidataCoref,AATOut_WikidataCoref.nt,94.34295,load complete in 12.295558s 9 | http://vocab.getty.edu/aat/msc/,http://AATOut_AssociativeRels,AATOut_AssociativeRels.nt,140.766459,load complete in 40.110157s 10 | http://vocab.getty.edu/aat/msc/,http://AATOut_SemanticLinks,AATOut_SemanticLinks.nt,206.142259,load complete in 59.189885s 11 | http://vocab.getty.edu/aat/msc/,http://AATOut_HierarchicalRels,AATOut_HierarchicalRels.nt,261.286129,load complete in 48.995635s 12 | http://vocab.getty.edu/aat/msc/,http://AATOut_Sources,AATOut_Sources.nt,409.531723,load complete in 141.691312s 13 | -------------------------------------------------------------------------------- /neptune/load_time.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/neptune/load_time.jpg -------------------------------------------------------------------------------- /neptune/named_graphs.csv: -------------------------------------------------------------------------------- 1 | sd:name,dcterms:issued,dc:publisher,rdf:type,dcterms:isPartOf,tdwgutility:status,load_status 2 | http://vocab.getty.edu/aat/msc/,2024-04-12,J. Paul Getty Trust,sd:NamedGraph,https://www.getty.edu/,production,update complete 3 | -------------------------------------------------------------------------------- /neptune/service_description_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/neptune/service_description_model.png -------------------------------------------------------------------------------- /neptune/trigger.txt: -------------------------------------------------------------------------------- 1 | load -------------------------------------------------------------------------------- /publications/README.md: -------------------------------------------------------------------------------- 1 | # Publications and authors metadata 2 | 3 | This directory contains work related to harvesting information about authors and their publications. 4 | 5 | ## VanderBot 6 | 7 | See [this repository](../vanderbot/) for current information about Vanderbot. The VanderBot code in this repository is no longer maintained and is out of date. 8 | 9 | ## Other stuff 10 | 11 | The publications part is related to assembling publication metadata and identifiers (DOIs, Handles, Wikidata, etc.) and associating those publicaitons with their authors. The `crossref` directory has some work on this. 12 | 13 | The `work-person-figure.png` and powerpoint file is an RDF model to represent institutions (Vanderbilt), people, and their works. 14 | 15 | ---- 16 | Revised 2020-04-20 17 | -------------------------------------------------------------------------------- /publications/apis.md: -------------------------------------------------------------------------------- 1 | # APIs for retrieving data about people and publications 2 | 3 | 4 | | name | endpoint URL | API documentation | description | 5 | |------|----------|-------------------|-------------| 6 | | ORCID RDF API | https://pub.orcid.org/experimental_rdf_v1/ | | ORCID experimental API for retrieving RDF data | 7 | | ORCID records | https://orcid.org/{id} | | retrieval of metadata through content negotiation (request appropriate Content-type) | 8 | | CrossRef | http://dx.doi.org/{doi} | | retrieval of RDF metadata through content negotiation (request Content-type: application/rdf+xml)| 9 | | NCBI Entrez (PubMed) | https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi | https://www.ncbi.nlm.nih.gov/books/NBK25501/ | Programmatic searches of NCBI datasets including PubMed | 10 | | Web Of Science | | https://developer.clarivate.com/ | Clarivate Developer Portal (Web of Science API Lite) | 11 | | OpenAlex | https://api.openalex.org/ | https://docs.openalex.org/api | Open catalog of scholarly papers, authors, etc. | 12 | 13 | 14 | ---- 15 | Revised 2022-02-05 16 | -------------------------------------------------------------------------------- /publications/crossref/articles.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,description_en,instance_of_uuid,instance_of,doi_uuid,doi,doi_ref1_hash,doi_ref1_referenceUrl,doi_ref1_retrieved_nodeId,doi_ref1_retrieved_val,doi_ref1_retrieved_prec,pmid_uuid,pmid,pmid_ref1_hash,pmid_ref1_referenceUrl,pmid_ref1_retrieved_nodeId,pmid_ref1_retrieved_val,pmid_ref1_retrieved_prec,published_uuid,published_nodeId,published_val,published_prec,published_ref1_hash,published_ref1_referenceUrl,published_ref1_retrieved_nodeId,published_ref1_retrieved_val,published_ref1_retrieved_prec,title_en_uuid,title_en,title_en_ref1_hash,title_en_ref1_referenceUrl,title_en_ref1_retrieved_nodeId,title_en_ref1_retrieved_val,title_en_ref1_retrieved_prec,journal_uuid,journal,journal_ref1_hash,journal_ref1_referenceUrl,journal_ref1_retrieved_nodeId,journal_ref1_retrieved_val,journal_ref1_retrieved_prec,volume_uuid,volume,volume_ref1_hash,volume_ref1_referenceUrl,volume_ref1_retrieved_nodeId,volume_ref1_retrieved_val,volume_ref1_retrieved_prec,page_uuid,page,page_ref1_hash,page_ref1_referenceUrl,page_ref1_retrieved_nodeId,page_ref1_retrieved_val,page_ref1_retrieved_prec,issue_uuid,issue,issue_ref1_hash,issue_ref1_referenceUrl,issue_ref1_retrieved_nodeId,issue_ref1_retrieved_val,issue_ref1_retrieved_prec,isbn10_uuid,isbn10,isbn10_ref1_hash,isbn10_ref1_referenceUrl,isbn10_ref1_retrieved_nodeId,isbn10_ref1_retrieved_val,isbn10_ref1_retrieved_prec,isbn13_uuid,isbn13,isbn13_ref1_hash,isbn13_ref1_referenceUrl,isbn13_ref1_retrieved_nodeId,isbn13_ref1_retrieved_val,isbn13_ref1_retrieved_prec,publisher_uuid,publisher,publisher_ref1_hash,publisher_ref1_referenceUrl,publisher_ref1_retrieved_nodeId,publisher_ref1_retrieved_val,publisher_ref1_retrieved_prec 2 | ,The microbiome impacts host hybridization and speciation,journal article,,Q18918145,,10.1371/JOURNAL.PBIO.3001417,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,34699520,,https://pubmed.ncbi.nlm.nih.gov/34699520/,,2021-11-06,,,,2021-10-26,,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,The microbiome impacts host hybridization and speciation,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,Q1771695,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,19,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,e3001417,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,10,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06,,,,,,,,,,,,,,,,,,,,,, 3 | -------------------------------------------------------------------------------- /publications/crossref/author_strings.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,author_string_uuid,author_string,author_string_series_ordinal,author_string_ref1_hash,author_string_ref1_referenceUrl,author_string_ref1_retrieved_nodeId,author_string_ref1_retrieved_val,author_string_ref1_retrieved_prec 2 | ,The microbiome impacts host hybridization and speciation,,Asia K. Miller,1,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06, 3 | ,The microbiome impacts host hybridization and speciation,,Camille S. Westlake,2,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06, 4 | -------------------------------------------------------------------------------- /publications/crossref/authors.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,author_uuid,author,author_series_ordinal,author_stated_as,author_ref1_hash,author_ref1_referenceUrl,author_ref1_retrieved_nodeId,author_ref1_retrieved_val,author_ref1_retrieved_prec 2 | ,The microbiome impacts host hybridization and speciation,,Q77516238,3,Karissa L. Cross,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06, 3 | ,The microbiome impacts host hybridization and speciation,,Q77516371,4,Brittany A. Leigh,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06, 4 | ,The microbiome impacts host hybridization and speciation,,Q45943775,5,Seth R. Bordenstein,,http://doi.org/10.1371/JOURNAL.PBIO.3001417,,2021-11-06, 5 | -------------------------------------------------------------------------------- /publications/crossref/crossref_errors.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/publications/crossref/crossref_errors.txt -------------------------------------------------------------------------------- /publications/crossref/departments.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,affiliation_uuid,affiliation,affiliation_start_time_nodeId,affiliation_start_time_val,affiliation_start_time_prec,affiliation_end_time_nodeId,affiliation_end_time_val,affiliation_end_time_prec,affiliation_ref1_hash,affiliation_ref1_referenceUrl,affiliation_ref1_retrieved_nodeId,affiliation_ref1_retrieved_val,affiliation_ref1_retrieved_prec 2 | Q45943775,Seth R. Bordenstein,5B2F1843-88F2-4AC9-A1C7-70F6B9635592,Q78041310,,,,,,,63084c503d28acea296eaa7cab8f654683594f03,https://as.vanderbilt.edu/biosci/people/index.php?group=primary-training-faculty,af1d0b824820d36603409c8aadad11e0,2019-12-10T00:00:00Z,11 3 | Q45943775,Seth R. Bordenstein,3f823f57-4d7d-32a5-af8f-92c646045a36,Q91645674,,,,,,,237d7c7ca4c3e4881c901493f20439c6dcc2502d,https://www.vumc.org/viiii/person/seth-r-bordenstein-phd,b3bbbaf00b7ee7f20987fa45fa87e2f1,2020-05-12T00:00:00Z,11 4 | Q45943775,Seth R. Bordenstein,9e7b61dd-4300-64ee-fb41-c0117eb54065,Q7914466,,,,,,,50d5048ae7928e33d7e5fa648ec1066f1eb41ff6,https://wag.app.vanderbilt.edu/PublicPage/Faculty/Details/26362,b3bbbaf00b7ee7f20987fa45fa87e2f1,2020-05-12T00:00:00Z,11 5 | -------------------------------------------------------------------------------- /publications/crossref/doi_source.csv: -------------------------------------------------------------------------------- 1 | author,doi 2 | bordenstein et al.,10.1371/journal.pbio.3001417 3 | -------------------------------------------------------------------------------- /publications/crossref/editors.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,editor_uuid,editor,editor_series_ordinal,editor_stated_as,editor_ref1_hash,editor_ref1_referenceUrl,editor_ref1_retrieved_nodeId,editor_ref1_retrieved_val,editor_ref1_retrieved_prec 2 | -------------------------------------------------------------------------------- /publications/crossref/old_xquery/README.md: -------------------------------------------------------------------------------- 1 | # Munging data from CrossRef 2 | 3 | | file | description | 4 | |------|-------------| 5 | | crossref-get.py | a Python script that gets RDF data from Crossref by dereferencing DOIs | 6 | | crossref-get.xq | XQuery script that dereferences DOIs to get RDF/XML, then just merges it into files of 1000 pubs each | 7 | | merge-doi.xq | After all of the 1000 pub files are loaded into a BaseX database, this XQuery script merges them into a single RDF/XML document that can be loaded into the SPARQL endpoint | 8 | | vanderbilt-doi.csv | This is just a list of all of the DOIs for Vanderbilt people publications; derived from their ORCID profiles | 9 | | doi.rdf | This file isn't present in this directory - not sure what happened to it. It may have been too big to put on Github | 10 | 11 | # Baskauf notes on munging data from Crossref - 2017-09-10 12 | 13 | ## This part of the procedures deals with the scripts and data in the this directory (see the orcid directory for the first part) 14 | 15 | 8\. Retrieve RDF data from CrossRef using crossref-get.py program. 16 | 17 | 9\. Load doi.rdf into the SPARQL endpoint. 18 | 19 | 10\. Extract desired data about the works using this query: 20 | 21 | ``` 22 | prefix dcterms: 23 | prefix rdfs: 24 | prefix foaf: 25 | prefix bibo: 26 | 27 | SELECT DISTINCT ?s ?aTitle ?date ?aVolume ?start ?end ?journal ?jTitle ?publisher WHERE { 28 | graph { 29 | ?s dcterms:title ?aTitle. 30 | ?s dcterms:date ?date. 31 | OPTIONAL { 32 | ?s bibo:volume ?aVolume. 33 | } 34 | OPTIONAL { 35 | ?s bibo:pageStart ?start. 36 | } 37 | OPTIONAL { 38 | ?s bibo:pageEnd ?end. 39 | } 40 | OPTIONAL { 41 | ?s dcterms:isPartOf ?journal. 42 | ?journal dcterms:title ?jTitle. 43 | } 44 | OPTIONAL { 45 | ?s dcterms:publisher ?publisher. 46 | } 47 | } 48 | } 49 | ``` 50 | 51 | 11\. Copy output table and save as a CSV. 52 | 53 | 12\. Get Vanderbilt information from https://www.grid.ac/institutes/grid.152326.1 as text/turtle and fill in the CSV manually. 54 | 55 | ---- 56 | Revised 2019-05-01 57 | -------------------------------------------------------------------------------- /publications/crossref/old_xquery/crossref-get.py: -------------------------------------------------------------------------------- 1 | # had to install rdflib using "pip install rdflib" before running the first time 2 | 3 | # see https://rdflib.readthedocs.org/en/latest/gettingstarted.html 4 | import rdflib 5 | 6 | # this is Python's built-in XML processor 7 | import xml.etree.ElementTree as etree 8 | 9 | # results.xml is the SPARQL results file that I saved after querying for GeoNames IRIs 10 | tree = etree.parse('escape-doi.xml') 11 | 12 | # I searched the XML to find the "path" elements (ORCID ID strings), then put them in an array 13 | resultsArray=tree.findall('.//uri') 14 | 15 | #builtGraph is where I'm going to accumulate triples that I've scraped 16 | builtGraph=rdflib.Graph() 17 | 18 | #addedGraph contains triples that I got from a particular GeoNames RDF file 19 | addedGraph=rdflib.Graph() 20 | 21 | fileIndex=0 22 | while fileIndex 24 | return http:send-request($request) 25 | }; 26 | 27 | declare function local:generate-description-element($uri as xs:string) 28 | { 29 | let $redirectUri := local:get-redirect($uri) 30 | return if ($redirectUri = "error") 31 | then element rdf:Description { 32 | attribute rdf:about {$uri}, 33 | {substring-after($uri,"http://dx.doi.org/")}, 34 | {substring-after($uri,"http://dx.doi.org/")}, 35 | bad doi 36 | } 37 | else local:query-endpoint($redirectUri)[2]/rdf:RDF/rdf:Description 38 | }; 39 | 40 | (: let $textDoi := http:send-request()[2] :) 41 | let $textDoi := file:read-text('file:///c:/test/vanderbilt-doi.csv') 42 | let $xmlDoi := csv:parse($textDoi, map { 'header' : true(),'separator' : "," }) 43 | 44 | let $numberOfResults := count($xmlDoi/csv/record) 45 | let $pages := $numberOfResults idiv 1000 (: pages are sets of 1000 results :) 46 | let $remainder := $numberOfResults mod 1000 47 | 48 | return ( 49 | for $page in (0 to $pages - 1) 50 | return 51 | file:write("c:\test\doi\doi"||string($page)||".rdf", 52 | { 53 | for $record in (1 to 1000) 54 | let $uri := $xmlDoi/csv/record[$page * 1000 + $record]/work/text() 55 | return local:generate-description-element($uri) 56 | } 57 | ) 58 | , 59 | 60 | file:write("c:\test\doi\doi"||string($pages)||".rdf", 61 | { 62 | for $record in (1 to $remainder) 63 | let $uri := $xmlDoi/csv/record[$pages * 1000 + $record]/work/text() 64 | return local:generate-description-element($uri) 65 | } 66 | ) 67 | ) 68 | 69 | -------------------------------------------------------------------------------- /publications/crossref/old_xquery/merge-doi.xq: -------------------------------------------------------------------------------- 1 | xquery version "3.1"; 2 | declare namespace rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; 3 | 4 | let $records := fn:collection('doi') 5 | 6 | (: wrap all of the merged rdf:Description elements inside a single rdf:RDF element :) 7 | return file:write("c:\test\doi\doi-all.rdf",{ 8 | for $description in $records/rdf:RDF/rdf:Description 9 | return $description 10 | } 11 | ) -------------------------------------------------------------------------------- /publications/crossref/researchers.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,description_en,instance_of_uuid,instance_of,gender_uuid,gender,orcid_uuid,orcid,orcid_ref1_hash,orcid_ref1_retrieved_nodeId,orcid_ref1_retrieved_val,orcid_ref1_retrieved_prec,employer_uuid,employer,employer_start_time_nodeId,employer_start_time_val,employer_start_time_prec,employer_end_time_nodeId,employer_end_time_val,employer_end_time_prec,employer_ref1_hash,employer_ref1_referenceUrl,employer_ref1_retrieved_nodeId,employer_ref1_retrieved_val,employer_ref1_retrieved_prec 2 | Q45943775,Seth R. Bordenstein,researcher,24AE5893-359C-4FD1-AF38-74059E63EC66,Q5,c051b4b9-441d-c3a0-90db-d23cc65a7dca,Q6581097,3746c124-4a86-bd76-587c-016f66b353cd,0000-0001-7346-0954,8eb6208639efa82b5e7e4c709b7d18cbfca67411,742521f02b14bf1a6cbf7d4bc599eb77,2019-12-14T00:00:00Z,11,B5B28E75-B0A3-4884-B788-C1DF10799A36,Q29052,,,,,,,f9c309a55265fcddd2cb0be62a530a1787c3783e,https://as.vanderbilt.edu/biosci/people/index.php?group=primary-training-faculty,0fc804bbc48ff257da07bd3a5ac7f2c0,2019-12-09T00:00:00Z,11 3 | -------------------------------------------------------------------------------- /publications/crossref/screens.json: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | { 4 | "property": "P31", 5 | "entity": "Q5", 6 | "lang": "", 7 | "position": "object", 8 | "require": "include", 9 | "filter_type": "", 10 | "filter_string": "" 11 | }, 12 | { 13 | "property": "description", 14 | "entity": "", 15 | "lang": "", 16 | "position": "object", 17 | "require": "exclude", 18 | "filter_type": "in", 19 | "filter_string": "dynasty person" 20 | }, 21 | { 22 | "property": "description", 23 | "entity": "", 24 | "lang": "", 25 | "position": "object", 26 | "require": "exclude", 27 | "filter_type": "in", 28 | "filter_string": "Peerage person ID=" 29 | }, 30 | { 31 | "property": "P570", 32 | "entity": "", 33 | "lang": "", 34 | "position": "object", 35 | "require": "exclude", 36 | "filter_type": "<", 37 | "filter_string": "2000" 38 | }, 39 | { 40 | "property": "P569", 41 | "entity": "", 42 | "lang": "", 43 | "position": "object", 44 | "require": "exclude", 45 | "filter_type": "<", 46 | "filter_string": "1920" 47 | } 48 | ] 49 | ] 50 | -------------------------------------------------------------------------------- /publications/crossref/stored_retrieved_authors.csv: -------------------------------------------------------------------------------- 1 | doi,authors 2 | 10.1371/JOURNAL.PBIO.3001417,"[{""orcid"": ""0000-0003-0378-232X"", ""sequence"": ""first"", ""givenName"": ""Asia K."", ""familyName"": ""Miller"", ""affiliation"": []}, {""orcid"": ""0000-0001-7847-8643"", ""sequence"": ""additional"", ""givenName"": ""Camille S."", ""familyName"": ""Westlake"", ""affiliation"": []}, {""orcid"": ""0000-0001-5618-7642"", ""sequence"": ""additional"", ""givenName"": ""Karissa L."", ""familyName"": ""Cross"", ""affiliation"": []}, {""orcid"": """", ""sequence"": ""additional"", ""givenName"": ""Brittany A."", ""familyName"": ""Leigh"", ""affiliation"": []}, {""orcid"": ""0000-0001-7346-0954"", ""sequence"": ""additional"", ""givenName"": ""Seth R."", ""familyName"": ""Bordenstein"", ""affiliation"": []}]" 3 | -------------------------------------------------------------------------------- /publications/crossref/vanderbilt_wikidata_altlabels.csv: -------------------------------------------------------------------------------- 1 | qid,altLabel,source 2 | Q45943775,Seth Bordenstein,wikidata 3 | Q45943775,SR Bordenstein,wikidata 4 | -------------------------------------------------------------------------------- /publications/data/data-collection-error-times.txt: -------------------------------------------------------------------------------- 1 | Time checked: 2020-12-26T14:35:51.869354 2 | Date last run: 2020-12-26 3 | UTC date now is: 2020-12-26 4 | 5 | Time checked: 2020-12-26T15:35:52.470429 6 | Date last run: 2020-12-26 7 | UTC date now is: 2020-12-26 8 | 9 | Time checked: 2020-12-26T16:35:52.753359 10 | Error occurred, trying again in 10 minutes 11 | Time checked: 2020-12-26T16:46:52.800914 12 | Error occurred, trying again in 10 minutes 13 | Time checked: 2020-12-26T16:57:52.820885 14 | Error occurred, trying again in 10 minutes 15 | Time checked: 2020-12-26T17:08:52.831336 16 | Error occurred, trying again in 10 minutes 17 | Time checked: 2020-12-26T17:19:52.840139 18 | Error occurred, trying again in 10 minutes 19 | Time checked: 2020-12-26T17:30:52.850903 20 | Error occurred, trying again in 10 minutes 21 | Time checked: 2020-12-26T17:41:52.859840 22 | Error occurred, trying again in 10 minutes 23 | Time checked: 2020-12-26T17:52:52.866867 24 | Error occurred, trying again in 10 minutes 25 | Time checked: 2020-12-26T18:03:52.885197 26 | Error occurred, trying again in 10 minutes 27 | Time checked: 2020-12-26T18:14:52.908675 28 | Error occurred, trying again in 10 minutes 29 | Time checked: 2020-12-26T18:25:52.925752 30 | Error occurred, trying again in 10 minutes 31 | Time checked: 2020-12-26T18:36:52.947317 32 | Date last run: 2020-12-26 33 | UTC date now is: 2020-12-26 34 | 35 | Time checked: 2020-12-26T19:36:53.635689 36 | Error occurred, trying again in 10 minutes 37 | Time checked: 2020-12-26T19:47:53.659869 38 | Error occurred, trying again in 10 minutes 39 | Time checked: 2020-12-26T19:58:53.681610 40 | Date last run: 2020-12-26 41 | UTC date now is: 2020-12-26 42 | 43 | Time checked: 2020-12-26T20:59:34.327049 44 | Error occurred, trying again in 10 minutes 45 | Time checked: 2020-12-26T21:10:34.338726 46 | Date last run: 2020-12-26 47 | UTC date now is: 2020-12-26 48 | 49 | Time checked: 2020-12-26T22:11:08.788009 50 | Date last run: 2020-12-26 51 | UTC date now is: 2020-12-26 52 | 53 | Time checked: 2020-12-26T23:11:09.505177 54 | Date last run: 2020-12-26 55 | UTC date now is: 2020-12-26 56 | 57 | Time checked: 2020-12-27T00:11:10.285793 58 | Date last run: 2020-12-26 59 | UTC date now is: 2020-12-27 60 | Item counts (university-wide): 61 | vu_total 62 | vu_men 63 | vu_women 64 | vu_orcid 65 | vu_works 66 | vu_men_works 67 | vu_women_works 68 | {'commit': Commit(sha="1d56ef9048bce14aac48df7cda4a2df80a32071b"), 'content': ContentFile(path="publications/data/vandycite_item_data.csv")} 69 | 70 | Item counts by unit: 71 | units_total 72 | {'commit': Commit(sha="d3c58ebcb684eca6df442c8e216cb8e6eb81d37c"), 'content': ContentFile(path="publications/data/units_total.csv")} 73 | units_women 74 | {'commit': Commit(sha="17e7c4e305a59d586da0d610c80674b2c1e2a791"), 'content': ContentFile(path="publications/data/units_women.csv")} 75 | units_men 76 | {'commit': Commit(sha="770f4c9e7dbbe8facb57595abf1ba4f4d7a8761e"), 'content': ContentFile(path="publications/data/units_men.csv")} 77 | units_orcid 78 | {'commit': Commit(sha="6a82c9c488641260747124cd602a48b3220342cc"), 'content': ContentFile(path="publications/data/units_orcid.csv")} 79 | units_works 80 | {'commit': Commit(sha="8df1bf90f763214c456982c44c5e99b8eb221a49"), 'content': ContentFile(path="publications/data/units_works.csv")} 81 | units_works_men 82 | {'commit': Commit(sha="4934ec734943c3a2ea297a01436ea98d509ac908"), 'content': ContentFile(path="publications/data/units_works_men.csv")} 83 | units_works_women 84 | {'commit': Commit(sha="a03d665752272da0687ccb806bf2521633452c87"), 'content': ContentFile(path="publications/data/units_works_women.csv")} 85 | 86 | Contributions 87 | Clifford_Anderson 88 | Baskaufs 89 | Fmlester 90 | Ramonavromero 91 | Talinum 92 | Celiaswalker 93 | CatonMA2 94 | Gridersd 95 | JeffBTaylor 96 | Marjans74 97 | Charlotte_Y._Lew 98 | KukanaLuika 99 | VanderBot 100 | {'commit': Commit(sha="67ecfefade928ea6d31843fd43d31fce3a1f2c42"), 'content': ContentFile(path="publications/data/vandycite_edit_data.csv")} 101 | done 102 | 103 | Time checked: 2020-12-27T01:12:50.760783 104 | Date last run: 2020-12-27 105 | UTC date now is: 2020-12-27 106 | 107 | Time checked: 2020-12-27T02:12:51.194540 108 | Date last run: 2020-12-27 109 | UTC date now is: 2020-12-27 -------------------------------------------------------------------------------- /publications/data/last_run.txt: -------------------------------------------------------------------------------- 1 | 2021-02-08 -------------------------------------------------------------------------------- /publications/data/vandycite_users.csv: -------------------------------------------------------------------------------- 1 | username 2 | Clifford_Anderson 3 | Baskaufs 4 | Fmlester 5 | Ramonavromero 6 | Talinum 7 | Celiaswalker 8 | CatonMA2 9 | Gridersd 10 | JeffBTaylor 11 | Marjans74 12 | Charlotte_Y._Lew 13 | KukanaLuika 14 | VanderBot 15 | -------------------------------------------------------------------------------- /publications/default_label_desc.json: -------------------------------------------------------------------------------- 1 | {"labels": 2 | { 3 | "source": "column", 4 | "value": "name" 5 | }, 6 | "descriptions": 7 | { 8 | "source": "constant", 9 | "value": "biology researcher" 10 | } 11 | } -------------------------------------------------------------------------------- /publications/department-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "deptShortName": "classics", 3 | "aads": { 4 | "categories": [ 5 | "" 6 | ], 7 | "baseUrl": "https://as.vanderbilt.edu/aads/people/", 8 | "nTables": 1, 9 | "departmentSearchString": "African American and Diaspora Studies", 10 | "departmentQId": "Q79117444", 11 | "testAuthorAffiliation": "African American Diaspora Studies Vanderbilt", 12 | "labels": { 13 | "source": "column", 14 | "value": "name" 15 | }, 16 | "descriptions": { 17 | "source": "constant", 18 | "value": "African American and Diaspora Studies scholar" 19 | } 20 | }, 21 | "anthropology": { 22 | "categories": [ 23 | "faculty", 24 | "affiliated-faculty", 25 | "emeriti", 26 | "postdoctoral-scholar", 27 | "research-assistant" 28 | ], 29 | "baseUrl": "https://as.vanderbilt.edu/anthropology/people/index.php?group=", 30 | "nTables": 1, 31 | "departmentSearchString": "Anthropology", 32 | "departmentQId": "Q79117746", 33 | "testAuthorAffiliation": "Anthropology Vanderbilt", 34 | "labels": { 35 | "source": "column", 36 | "value": "name" 37 | }, 38 | "descriptions": { 39 | "source": "constant", 40 | "value": "Anthropology researcher" 41 | } 42 | }, 43 | "bsci": { 44 | "categories": [ 45 | "primary-training-faculty", 46 | "research-and-teaching-faculty", 47 | "secondary-faculty", 48 | "postdoc-fellows", 49 | "emeriti" 50 | ], 51 | "baseUrl": "https://as.vanderbilt.edu/biosci/people/index.php?group=", 52 | "nTables": 1, 53 | "departmentSearchString": "Biological Sciences", 54 | "departmentQId": "Q78041310", 55 | "testAuthorAffiliation": "Biological Sciences Vanderbilt", 56 | "labels": { 57 | "source": "column", 58 | "value": "name" 59 | }, 60 | "descriptions": { 61 | "source": "constant", 62 | "value": "biology researcher" 63 | } 64 | }, 65 | "classics": { 66 | "categories": [ 67 | "faculty", 68 | "affiliated-faculty", 69 | "emeriti" 70 | ], 71 | "baseUrl": "https://as.vanderbilt.edu/classics/people/index.php?group=", 72 | "nTables": 1, 73 | "departmentSearchString": "Classical and Mediterranean Studies", 74 | "departmentQId": "Q79117777", 75 | "testAuthorAffiliation": "Classical Mediterranean Studies Vanderbilt", 76 | "labels": { 77 | "source": "column", 78 | "value": "name" 79 | }, 80 | "descriptions": { 81 | "source": "constant", 82 | "value": "classical studies scholar" 83 | } 84 | }, 85 | "english": { 86 | "categories": [ 87 | "faculty" 88 | ], 89 | "baseUrl": "https://as.vanderbilt.edu/english/people/index.php?group=", 90 | "nTables": 2, 91 | "departmentSearchString": "English", 92 | "departmentQId": "Q79117817", 93 | "testAuthorAffiliation": "English Vanderbilt", 94 | "labels": { 95 | "source": "column", 96 | "value": "name" 97 | }, 98 | "descriptions": { 99 | "source": "constant", 100 | "value": "scholar of English" 101 | } 102 | }, 103 | "ees": { 104 | "categories": [ 105 | "faculty.php", 106 | "affiliatedfaculty.php", 107 | "postdocs.php", 108 | "emeriti.php" 109 | ], 110 | "baseUrl": "https://www.vanderbilt.edu/ees/people/", 111 | "nTables": 1, 112 | "departmentSearchString": "Earth and Environmental Sciences", 113 | "departmentQId": "Q79117803", 114 | "testAuthorAffiliation": "Earth Environmental Sciences Vanderbilt", 115 | "labels": { 116 | "source": "column", 117 | "value": "name" 118 | }, 119 | "descriptions": { 120 | "source": "constant", 121 | "value": "earth and environmental sciences researcher" 122 | } 123 | }, 124 | "physics": { 125 | "categories": [ 126 | "faculty", 127 | "emeritus-faculty", 128 | "academic-research-staff" 129 | ], 130 | "baseUrl": "https://as.vanderbilt.edu/physics/people/index.php?group=", 131 | "nTables": 1, 132 | "departmentSearchString": "Physics Astronomy", 133 | "departmentQId": "Q78779260", 134 | "testAuthorAffiliation": "Physics Astronomy Vanderbilt", 135 | "labels": { 136 | "source": "column", 137 | "value": "name" 138 | }, 139 | "descriptions": { 140 | "source": "constant", 141 | "value": "physics/astronomy researcher" 142 | } 143 | } 144 | } -------------------------------------------------------------------------------- /publications/departments/engineering.csv: -------------------------------------------------------------------------------- 1 | wikidataId,name,labelEn,alias,description,parentUnitReferenceRetrieved,officialWebsite 2 | ,Biomedical Engineering,Vanderbilt Department of Biomedical Engineering,"[""Vanderbilt Biomedical Engineering Department""]","department that integrates approaches in data science, artificial intelligence and engineering to drive medical or biological discovery",+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/bme/index.php 3 | ,Chemical and Biomolecular Engineering,Vanderbilt Department of Chemical and Biomolecular Engineering,"[""Vanderbilt Chemical and Biomolecular Engineering Department""]",engineering department at the interface of chemistry and molecular biology,+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/chbe/index.php 4 | ,Civil and Environmental Engineering,Vanderbilt Department of Civil and Environmental Engineering,"[""Vanderbilt Civil and Environmental Engineering Department""]",engineering department focused on the design of structures and solving environmental issues,+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/cee/index.php 5 | ,Electrical Engineering and Computer Science,Vanderbilt Department of Electrical Engineering and Computer Science,"[""Vanderbilt Electrical Engineering and Computer Science Department""]",engineering department focused on design and problem solving in computer and electrical systems,+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/eecs/index.php 6 | ,Mechanical Engineering,Vanderbilt Department of Mechanical Engineering,"[""Vanderbilt Mechanical Engineering Department""]","engineering department focused on materials, mechanical robotics, and related subjects",+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/me/index.php 7 | ,Division of General Engineering,Vanderbilt Department of Division of General Engineering,"[""Vanderbilt Division of General Engineering Department""]",interdisciplinary department that includes engineering science and management,+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/ge/index.php 8 | ,Interdisciplinary Materials Science Program,Vanderbilt Interdisciplinary Materials Science Program,,"an interdisciplinary program involving engineering, chemistry, physics, and medicine",+2020-02-02T00:00:00Z,https://engineering.vanderbilt.edu/materials-science/ 9 | -------------------------------------------------------------------------------- /publications/departments/medical-departments_full.csv: -------------------------------------------------------------------------------- 1 | department_string, 2 | Anesthesiology, 3 | Basic Sciences,"umbrella division, only one person" 4 | Biochemistry,Basic Sciences division 5 | Biological Sciences, 6 | Biomedical Engineering, 7 | Biomedical Informatics, 8 | Biostatistics, 9 | Cardiac Surgery,section of Surgical Sciences 10 | Cell & Developmental Biology,Basic Sciences division 11 | Chemical & Biomolecular Engineering, 12 | Chemistry, 13 | College of Arts & Science, 14 | Dermatology, 15 | Electrical Engineering & Computer Science, 16 | Emergency Medicine, 17 | Health Policy, 18 | Hearing & Speech Sciences, 19 | Mathematics, 20 | Medical Education & Administration (VU),seems to be a title rather than a department 21 | Medical Education & Administration (VUMC),seems to be a title rather than a department 22 | Medicine,"has a bunch of divisions like Nephrology, Infectious Diseases, Epidemiology, etc." 23 | Meharry, 24 | Molecular Physiology & Biophysics,Basic Sciences division 25 | Neurological Surgery,section of Surgical Sciences 26 | Neurology, 27 | Obstetrics & Gynecology, 28 | Ophthalmology & Visual Sciences, 29 | Oral & Maxillofacial Surgery,section of Surgical Sciences 30 | Orthopaedic Surgery, 31 | Otolaryngology - Head and Neck Surgery, 32 | "Pathology, Microbiology and Immunology", 33 | Pediatric Surgery,section of Surgical Sciences 34 | Pediatrics, 35 | Pharmacology,Basic Sciences division 36 | Physical Medicine and Rehabilitation, 37 | Physics & Astronomy, 38 | Plastic Surgery,section of Surgical Sciences 39 | Psychiatry and Behavioral Sciences, 40 | Psychology, 41 | Psychology & Human Development, 42 | Radiation Oncology, 43 | Radiology & Radiological Sciences, 44 | Special Education, 45 | Surgery,section of Surgical Sciences 46 | Thoracic Surgery,section of Surgical Sciences 47 | Urology, 48 | Vanderbilt Kennedy Center, 49 | , 50 | Cancer Biology,"a program, not a department" 51 | Preventitive Medicine,might be part of the department of Health Policy 52 | -------------------------------------------------------------------------------- /publications/departments/medicine-source.csv: -------------------------------------------------------------------------------- 1 | wikidataId,directory_string,short_name,search_string,test_affil,description 2 | Q89951871,Anesthesiology,anesthesiology,Anesthesiology,Anesthesiology Vanderbilt,anesthesiologist 3 | Q89953896,Biochemistry,biochemistry,Biochemistry,Biochemistry Vanderbilt,biochemist 4 | Q89953900,Biomedical Informatics,bioinformatics,Biomedical Informatics,Biomedical Informatics Vanderbilt,bioinformatician 5 | Q89953905,Biostatistics,biostats,Biostatistics,Biostatistics Vanderbilt,biostatistician 6 | Q89953909,Cardiac Surgery,cardiacsurg,Cardiac Surgery,Cardiac Surgery Vanderbilt,cardiac surgeon 7 | Q89953912,Cell & Developmental Biology,cell,Cell and Developmental Biology,Cell Developmental Biology Vanderbilt,cell biologist 8 | Q89953915,Dermatology,dermatology,Dermatology,Dermatology Vanderbilt,dermatologist 9 | Q89953919,Emergency Medicine,emergency,Emergency Medicine,Emergency Medicine Vanderbilt,emergency medicine physician 10 | Q89953922,Health Policy,healthpol,Health Policy,Health Policy Vanderbilt,health policy researcher 11 | Q89953926,Hearing & Speech Sciences,hearing,Hearing and Speech Sciences,Hearing Speech Sciences Vanderbilt,hearing and speach specialist 12 | Q89953931,Medicine,medicine,Medicine,Medicine Vanderbilt,physician 13 | Q89953935,Molecular Physiology & Biophysics,molphys,Molecular Physiology and Biophysics,Molecular Physiology Biophysics Vanderbilt,molecular physiologist 14 | Q89953940,Neurological Surgery,neurosurg,Neurological Surgery,Neurological Surgery Vanderbilt,neurosurgeon 15 | Q89953944,Neurology,neurology,Neurology,Neurology Vanderbilt,neurologist 16 | Q89953947,Obstetrics & Gynecology,obgyn,Obstetrics and Gynecology,Obstetrics Gynecology Vanderbilt,gynecologist 17 | Q89953948,Ophthalmology & Visual Sciences,opthalmology,Ophthalmology and Visual Sciences,Ophthalmology Visual Sciences Vanderbilt,opthalmologist 18 | Q89953951,Oral & Maxillofacial Surgery,oralsurg,Oral and Maxillofacial Surgery,Oral Maxillofacial Surgery Vanderbilt,oral surgeon 19 | Q89953955,Orthopaedic Surgery,orthosurg,Orthopaedic Surgery,Orthopaedic Surgery Vanderbilt,orthopaedic surgeon 20 | Q89953958,Otolaryngology - Head and Neck Surgery,otolaryn,Otolaryngology,Otolaryngology Vanderbilt,otolaryngologist 21 | Q89953964,"Pathology, Microbiology and Immunology",pathology,"Pathology, Microbiology and Immunology",Pathology Microbiology Immunology Vanderbilt,pathologist 22 | Q89953971,Pediatric Surgery,pedsurg,Pediatric Surgery,Pediatric Surgery Vanderbilt,pediatric surgeon 23 | Q89953976,Pediatrics,pediatrics,Pediatrics,Pediatrics Vanderbilt,pediatrician 24 | Q89953981,Pharmacology,pharma,Pharmacology,Pharmacology Vanderbilt,pharmacologist 25 | Q89953984,Physical Medicine and Rehabilitation,rehab,Physical Medicine and Rehabilitation,Physical Medicine Rehabilitation Vanderbilt,rehabilitation physician 26 | Q89953990,Plastic Surgery,plastic,Plastic Surgery,Plastic Surgery Vanderbilt,plastic surgeon 27 | Q89953993,Psychiatry and Behavioral Sciences,psychiatry,Psychiatry and Behavioral Sciences,Psychiatry Behavioral Sciences Vanderbilt,psychiatrist 28 | Q89953999,Radiation Oncology,radoncology,Radiation Oncology,Radiation Oncology Vanderbilt,radiation oncologist 29 | Q89954005,Radiology & Radiological Sciences,radiology,Radiology and Radiological Sciences,Radiology Radiological Sciences Vanderbilt,radiologist 30 | Q89954010,Surgery,surgery,Surgery,Surgery Vanderbilt,surgeon 31 | Q89954015,Thoracic Surgery,thoracicsurg,Thoracic Surgery,Thoracic Surgery Vanderbilt,thoracic surgeon 32 | Q89954019,Urology,urology,Urology,Urology Vanderbilt,urologist 33 | -------------------------------------------------------------------------------- /publications/departments/peabody-to-write.csv: -------------------------------------------------------------------------------- 1 | wikidataId,name,labelEn,alias,description,parentUnitStatementUuid,parentUnit,parentUnitReferenceHash,parentUnitReferenceSourceUrl,parentUnitReferenceRetrieved,officialWebsiteStatementUuid,officialWebsite,officialWebsiteReferenceHash,officialWebsiteReferenceRetrieved,instanceOfStatementUuid,instanceOf,instanceOfReferenceHash,instanceOfReferenceSourceUrl,instanceOfReferenceRetrieved 2 | Q83549841,Human and Organizational Development,Vanderbilt Peabody Department of Human and Organizational Development,"[""Vanderbilt Peabody Human and Organizational Development Department""]",group of interdisciplinary programs that share a focus on organizations and the people within them,FC113937-9943-4821-9218-46ABA3D5183F,Q7157226,d733b428a7eaa32fbc831998f90b22e2057e0bc7,https://peabody.vanderbilt.edu/departments/,+2020-01-24T00:00:00Z,369CFD71-5F5B-4412-B212-7A4EBC2038BC,https://peabody.vanderbilt.edu/departments/hod/,818c1ab9b87d646066ced29697070542bc9b8d39,+2020-01-24T00:00:00Z,C2FB29FD-1619-499C-BC3C-6ED51ECFA4F5,Q2467461,ec89ad81a31f26e0a2da9fc4aa1c571721c2af09,https://peabody.vanderbilt.edu/departments/hod/,+2020-01-24T00:00:00Z 3 | Q83550305,"Leadership, Policy, and Organizations","Vanderbilt Peabody Department of Leadership, Policy, and Organizations","[""Vanderbilt Peabody Leadership, Policy, and Organizations Department""]",academic department focused on educational leadership and policy,43C5B544-580D-4A08-B195-FD524F30548B,Q7157226,d733b428a7eaa32fbc831998f90b22e2057e0bc7,https://peabody.vanderbilt.edu/departments/,+2020-01-24T00:00:00Z,F234284A-8DD5-4916-A4E4-B189168F6361,https://peabody.vanderbilt.edu/departments/lpo/,818c1ab9b87d646066ced29697070542bc9b8d39,+2020-01-24T00:00:00Z,517BBAE7-9745-4E15-B6C6-5530BF60492F,Q2467461,a0be23302f8629dd6955e85caf9d81a7579e4e30,https://peabody.vanderbilt.edu/departments/lpo/,+2020-01-24T00:00:00Z 4 | Q83550311,Psychology and Human Development,Vanderbilt Peabody Department of Psychology and Human Development,"[""Vanderbilt Peabody Psychology and Human Development Department""]","academic department working to increase understanding of basic psychological functioning in family, school, and other social contexts that influence development",B3332F41-B1B9-48F5-9A8F-126A7C0950A9,Q7157226,d733b428a7eaa32fbc831998f90b22e2057e0bc7,https://peabody.vanderbilt.edu/departments/,+2020-01-24T00:00:00Z,8FC900D3-286B-4C7B-A962-B03DFEEEEB26,https://peabody.vanderbilt.edu/departments/psych/,818c1ab9b87d646066ced29697070542bc9b8d39,+2020-01-24T00:00:00Z,08E44886-BCC4-4133-BD70-0F4E869FF7A9,Q2467461,e7f0f653edd2530d4337a7b0cb5133fe45b979bb,https://peabody.vanderbilt.edu/departments/psych/,+2020-01-24T00:00:00Z 5 | Q83550317,Special Education,Vanderbilt Peabody Department of Special Education,"[""Vanderbilt Peabody Special Education Department""]",academic department focused on research and practice in special education,417A50F9-0E5C-48D0-A2E0-F4643263CC4C,Q7157226,d733b428a7eaa32fbc831998f90b22e2057e0bc7,https://peabody.vanderbilt.edu/departments/,+2020-01-24T00:00:00Z,5EB5ADF6-BC83-40E8-BFC0-07B4D19B336E,https://peabody.vanderbilt.edu/departments/sped/,818c1ab9b87d646066ced29697070542bc9b8d39,+2020-01-24T00:00:00Z,E0CE3BF9-B3C5-4320-B630-A713D56A23E4,Q2467461,842f9630e63a3b9b052d0ef15b42cee5dbeccb3f,https://peabody.vanderbilt.edu/departments/sped/,+2020-01-24T00:00:00Z 6 | Q83550321,Teaching and Learning,Vanderbilt Peabody Department of Teaching and Learning,"[""Vanderbilt Peabody Teaching and Learning Department""]","academic department providing graduate, professional, and undergraduate training in teaching",ED4EFD3E-7BAE-435B-AF35-9D45F7190322,Q7157226,d733b428a7eaa32fbc831998f90b22e2057e0bc7,https://peabody.vanderbilt.edu/departments/,+2020-01-24T00:00:00Z,D7D8CA45-1EF6-4DCA-9A57-F3F9AD20CBDA,https://peabody.vanderbilt.edu/departments/tl/,818c1ab9b87d646066ced29697070542bc9b8d39,+2020-01-24T00:00:00Z,4E38A575-E811-4181-813F-6E2E022EF7DB,Q2467461,2f1e2b4e38abfde4f73a6def33c33ccab4913a30,https://peabody.vanderbilt.edu/departments/tl/,+2020-01-24T00:00:00Z 7 | -------------------------------------------------------------------------------- /publications/departments/uva/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_path": "", 3 | "item_source_csv": "", 4 | "item_pattern_file": "graph_pattern.txt", 5 | "outfiles": [ 6 | { 7 | "manage_descriptions": true, 8 | "label_description_language_list": [ 9 | "en" 10 | ], 11 | "output_file_name": "departments.csv", 12 | "prop_list": [ 13 | { 14 | "pid": "P31", 15 | "variable": "instance_of", 16 | "value_type": "item", 17 | "qual": [], 18 | "ref": [ 19 | { 20 | "pid": "P854", 21 | "variable": "referenceUrl", 22 | "value_type": "uri" 23 | }, 24 | { 25 | "pid": "P813", 26 | "variable": "retrieved", 27 | "value_type": "date" 28 | } 29 | ] 30 | }, 31 | { 32 | "pid": "P361", 33 | "variable": "part_of_school", 34 | "value_type": "item", 35 | "qual": [], 36 | "ref": [ 37 | { 38 | "pid": "P854", 39 | "variable": "referenceUrl", 40 | "value_type": "uri" 41 | }, 42 | { 43 | "pid": "P813", 44 | "variable": "retrieved", 45 | "value_type": "date" 46 | } 47 | ] 48 | }, 49 | { 50 | "pid": "P856", 51 | "variable": "website", 52 | "value_type": "uri", 53 | "qual": [ 54 | { 55 | "pid": "P407", 56 | "variable": "language", 57 | "value_type": "item" 58 | } 59 | ], 60 | "ref": [ 61 | { 62 | "pid": "P813", 63 | "variable": "retrieved", 64 | "value_type": "date" 65 | } 66 | ] 67 | } 68 | ] 69 | } 70 | ] 71 | } -------------------------------------------------------------------------------- /publications/departments/uva/graph_pattern.txt: -------------------------------------------------------------------------------- 1 | {?qid wdt:P361+ wd:Q213439.} 2 | union 3 | {?qid wdt:P361/wdt:P749 wd:Q213439.} 4 | -------------------------------------------------------------------------------- /publications/divinity-law/__pycache__/vb_common_code.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/publications/divinity-law/__pycache__/vb_common_code.cpython-37.pyc -------------------------------------------------------------------------------- /publications/divinity-law/vandycite_statistics.md: -------------------------------------------------------------------------------- 1 | # Statistics on Vanderbilt Divinity School Publications in Wikidata 2 | 3 | Data as of 2023-03-29 4 | 5 | Total works in Wikidata with an author or editor affiliated with the Divinity School: 2889 6 | 7 | - Work items created by Python script using data from CrossRef: 426 8 | - Work items created by Python script using data from the Zotero database: 2681 9 | - Total work items added: 3107 * 10 | 11 | * This total is larger than the total number of works by Divinity School affiliates because it includes items created for books not edited by VU faculty that contained VU-authored chapters. 12 | 13 | ## Breakdown by type of work 14 | 15 | | type | number | 16 | | ---- | ------ | 17 | | journal article | 1305 | 18 | | book chapter | 776 | 19 | | book | 761 | 20 | | encyclopedia article | 122 | 21 | | dictionary entry | 86 | 22 | | monograph | 35 | 23 | | conference paper | 13 | 24 | | thesis | 7 | 25 | | encyclopedia | 2 | 26 | 27 | ## Summary by general type: 28 | - Journal articles and conference papers: 1318 29 | - Books, monographs, encyclopedias, and theses: 805 30 | - Encyclopedia articles and dictionary entries: 208 31 | 32 | # Total Wikidata edits by people involved in the VandyCite project: 33 | 34 | These numbers include all Wikidata edits (not just edits related to VandyCite). They include both manual edits and edits made using tools such as OpenRefine and Quickstatements, but exclude edits made using VanderBot. 35 | 36 | | participant | edits | 37 | | ----------- | ----- | 38 | | Charlotte Lew | 25119 | 39 | | Greg Weldy | 17084 | 40 | | Jeff Taylor | 9346 | 41 | | Chris Benda | 8565 | 42 | | Steve Baskauf | 6323 | 43 | | Marymae Jansson | 3080 | 44 | | Cliff Anderson | 1831 | 45 | 46 | There were also 53420 edits made by Baskauf using the VanderBot tool. This number is not directly comparable to the other edits, since a creating an item with VanderBot was considered a single edit even though it may have involved generating 10 or more statements and references. If created manually, each of those statements would have counted as separate edits. If made as seperate edits, the number made by VanderBot would have been on the order of half a million. 47 | 48 | Total edits by VandCite participants (including VanderBot): 125380 49 | 50 | Total new items created: 25782 51 | -------------------------------------------------------------------------------- /publications/elsevier/data/https%3A%2F%2Fapi.elsevier.com%2Fcontent%2Fabstract%2Fscopus_id%2F84872135457.json: -------------------------------------------------------------------------------- 1 | { 2 | "affiliation": [ 3 | { 4 | "affiliation-city": "Stanford", 5 | "affilname": "Stanford University School of Medicine", 6 | "affiliation-country": "United States" 7 | }, 8 | { 9 | "affiliation-city": "Palo Alto", 10 | "affilname": "VA Palo Alto Health Care System", 11 | "affiliation-country": "United States" 12 | }, 13 | { 14 | "affiliation-city": "Cambridge", 15 | "affilname": "Harvard University", 16 | "affiliation-country": "United States" 17 | }, 18 | { 19 | "affiliation-city": "Chevy Chase", 20 | "affilname": "Howard Hughes Medical Institute", 21 | "affiliation-country": "United States" 22 | }, 23 | { 24 | "affiliation-city": "Philadelphia", 25 | "affilname": "University of Pennsylvania", 26 | "affiliation-country": "United States" 27 | } 28 | ], 29 | "coredata": { 30 | "srctype": "j", 31 | "eid": "2-s2.0-84872135457", 32 | "pubmed-id": "23201690", 33 | "prism:coverDate": "2013-01-10", 34 | "prism:aggregationType": "Journal", 35 | "prism:url": "https://api.elsevier.com/content/abstract/scopus_id/84872135457", 36 | "dc:creator": { 37 | "author": [ 38 | { 39 | "ce:given-name": "Markus", 40 | "preferred-name": { 41 | "ce:given-name": "Markus", 42 | "ce:initials": "M.", 43 | "ce:surname": "Kretz", 44 | "ce:indexed-name": "Kretz M." 45 | }, 46 | "@seq": "1", 47 | "ce:initials": "M.", 48 | "@_fa": "true", 49 | "affiliation": { 50 | "@id": "60032838", 51 | "@href": "https://api.elsevier.com/content/affiliation/affiliation_id/60032838" 52 | }, 53 | "ce:surname": "Kretz", 54 | "@auid": "6701538998", 55 | "author-url": "https://api.elsevier.com/content/author/author_id/6701538998", 56 | "ce:indexed-name": "Kretz M." 57 | } 58 | ] 59 | }, 60 | "link": [ 61 | { 62 | "@_fa": "true", 63 | "@rel": "self", 64 | "@href": "https://api.elsevier.com/content/abstract/scopus_id/84872135457" 65 | }, 66 | { 67 | "@_fa": "true", 68 | "@rel": "scopus", 69 | "@href": "https://www.scopus.com/inward/record.uri?partnerID=HzOxMe3b&scp=84872135457&origin=inward" 70 | }, 71 | { 72 | "@_fa": "true", 73 | "@rel": "scopus-citedby", 74 | "@href": "https://www.scopus.com/inward/citedby.uri?partnerID=HzOxMe3b&scp=84872135457&origin=inward" 75 | } 76 | ], 77 | "source-id": "21206", 78 | "pii": "NATURE11661", 79 | "citedby-count": "561", 80 | "prism:volume": "493", 81 | "subtype": "ar", 82 | "dc:title": "Control of somatic tissue differentiation by the long non-coding RNA TINCR", 83 | "openaccess": "2", 84 | "prism:issn": "00280836 14764687", 85 | "prism:issueIdentifier": "7431", 86 | "subtypeDescription": "Article", 87 | "prism:publicationName": "Nature", 88 | "prism:pageRange": "231-235", 89 | "prism:endingPage": "235", 90 | "openaccessFlag": null, 91 | "prism:doi": "10.1038/nature11661", 92 | "prism:startingPage": "231", 93 | "dc:identifier": "SCOPUS_ID:84872135457" 94 | } 95 | } -------------------------------------------------------------------------------- /publications/orcid/README.md: -------------------------------------------------------------------------------- 1 | # Munging data from ORCID 2 | 3 | | file | description | 4 | |------|-------------| 5 | | orcid-get.py | uses the Python rdflib module to retrieve RDF data by dereferencing ORCID identifiers, puts it into RDF/XML | 6 | | orcid-id-get.xq | XQuery script to retrieve XML from ORCID on all people withaffiliation-org-name='Vanderbilt University' | 7 | | orcid-record-get.xq | XQuery script to retrieve the XML metadata for a particular ORCID ID (needs modification for many) | 8 | | vandy-people-all.xq | XQuery script to retrieve XML data for all Vanderbilt people based on the list of ORCID IDs saved from orcid-id-get.xq | 9 | | vandy-people-rdf-xml.xq | XQuery script that sorts through XML data loaded as a database; pulls out relevant stuff and turns into RDF/XML | 10 | | vanderbilt-orcid.csv | the CSV file that was used to pull all of the Vanderbilt ORCID XML from the ORCID API | 11 | | people.rdf | the output from the vandy-people-rdf-xml.xq script | 12 | 13 | # Baskauf notes on munging data from ORCID - 2017-09-10 14 | 15 | ## This part deals with the scripts and data in this directory 16 | 17 | 1\. Get ORCID URIs for 100 Vanderbilt people using 18 | ``` 19 | https://pub.orcid.org/v2.0/search/?q=affiliation-org-name:"Vanderbilt+University" 20 | ``` 21 | Note 2018-01-22: see https://members.orcid.org/api/tutorial/search-orcid-registry for information about paging to get results beyond 100. See https://github.com/HeardLibrary/semantic-web/blob/master/2017-fall/data-from-sparql.md#xquery for an example of sending an HTTP request using XQuery - hacking this would probably be the easiest way to get the IDs from all 2000+ Vanderbilt people. 22 | 23 | Note 2018-02-04: See https://github.com/HeardLibrary/semantic-web/blob/master/2018-spring/vu-people/orcid-id-get.xq for the ID retrieval script with paging. 24 | 25 | See https://github.com/HeardLibrary/semantic-web/blob/master/2018-spring/vu-people/orchid-record-get.xq for a stub script to retrieve a single record. See https://members.orcid.org/api/tutorial/reading-xml for details of what's in records. 26 | 27 | 2\. Retrieve RDF data from orcid.org using orcid-get.py program. 28 | 29 | 3\. Load output orcid.rdf into the SPARQL endpoint. 30 | 31 | 4\. Extract desired data using this query: 32 | 33 | ``` 34 | prefix rdfs: 35 | prefix foaf: 36 | prefix pav: 37 | 38 | SELECT DISTINCT ?s ?label ?given ?surname ?created ?modified WHERE { 39 | graph { 40 | ?s rdfs:label ?label. 41 | ?s foaf:givenName ?given. 42 | ?s foaf:familyName ?surname. 43 | ?doc foaf:maker ?s. 44 | ?doc pav:createdOn ?created. 45 | ?doc pav:lastUpdateOn ?modified. 46 | } 47 | } 48 | ``` 49 | 50 | 5\. Copy output table and save as a CSV. 51 | 52 | 6\. Manually copy DOIs from people on the list; look for social scientists and humanists. (2018-01-22 This really stinks, there needs to be a better way to match people with the DOIs of their works! Maybe through Wikidata? sjb) 53 | 54 | 7\. Munge DOIs into the URL form needed to download the RDF directly. 55 | 56 | _______________________ 57 | # This part of the procedures deals with the scripts and data in the crossref directory 58 | 59 | 8\. Retrieve RDF data from CrossRef using crossref-get.py program. 60 | 61 | 9\. Load doi.rdf into the SPARQL endpoint. 62 | 63 | 10\. Extract desired data about the works using this query: 64 | 65 | ``` 66 | prefix dcterms: 67 | prefix rdfs: 68 | prefix foaf: 69 | prefix bibo: 70 | 71 | SELECT DISTINCT ?s ?aTitle ?date ?aVolume ?start ?end ?journal ?jTitle ?publisher WHERE { 72 | graph { 73 | ?s dcterms:title ?aTitle. 74 | ?s dcterms:date ?date. 75 | OPTIONAL { 76 | ?s bibo:volume ?aVolume. 77 | } 78 | OPTIONAL { 79 | ?s bibo:pageStart ?start. 80 | } 81 | OPTIONAL { 82 | ?s bibo:pageEnd ?end. 83 | } 84 | OPTIONAL { 85 | ?s dcterms:isPartOf ?journal. 86 | ?journal dcterms:title ?jTitle. 87 | } 88 | OPTIONAL { 89 | ?s dcterms:publisher ?publisher. 90 | } 91 | } 92 | } 93 | ``` 94 | 95 | 11\. Copy output table and save as a CSV. 96 | 97 | 12\. Get Vanderbilt information from https://www.grid.ac/institutes/grid.152326.1 as text/turtle and fill in the CSV manually. 98 | 99 | ---- 100 | Revised 2019-05-01 101 | -------------------------------------------------------------------------------- /publications/orcid/orcid-get.py: -------------------------------------------------------------------------------- 1 | # Used the following URL to retrieve 100 (out of 1955 possible) records of people affiliated with Vanderbilt: 2 | # https://pub.orcid.org/v2.0/search/?q=affiliation-org-name:"Vanderbilt+University" 3 | # see https://members.orcid.org/api/resources/find-myresearchers for details 4 | 5 | # The results came in an XML file with this format: 6 | 7 | # 8 | # 9 | # 10 | # http://orcid.org/0000-0001-7216-2664 11 | # 0000-0001-7216-2664 12 | # orcid.org 13 | # 14 | # 15 | # 16 | # 17 | # http://orcid.org/0000-0003-1923-7406 18 | # 0000-0003-1923-7406 19 | # orcid.org 20 | # 21 | # 22 | # ... 23 | # 24 | 25 | # had to install rdflib using "pip install rdflib" before running the first time 26 | 27 | # see https://rdflib.readthedocs.org/en/latest/gettingstarted.html 28 | import rdflib 29 | 30 | # this is Python's built-in XML processor 31 | import xml.etree.ElementTree as etree 32 | 33 | # results.xml is the SPARQL results file that I saved after querying for GeoNames IRIs 34 | tree = etree.parse('orcid.xml') 35 | 36 | # I searched the XML to find the "path" elements (ORCID ID strings), then put them in an array 37 | resultsArray=tree.findall('.//{http://www.orcid.org/ns/common}path') 38 | 39 | #builtGraph is where I'm going to accumulate triples that I've scraped 40 | builtGraph=rdflib.Graph() 41 | 42 | #addedGraph contains triples that I got from a particular GeoNames RDF file 43 | addedGraph=rdflib.Graph() 44 | 45 | fileIndex=0 46 | while fileIndex 10 | return 11 | http:send-request($request) 12 | }; 13 | 14 | declare function local:get-hundred($start) 15 | { 16 | let $query := 'q=affiliation-org-name:"Vanderbilt+University"&start='||$start||'&rows=100' 17 | let $endpoint := 'https://pub.orcid.org/v2.0/search/' 18 | let $response := local:query-endpoint($endpoint,$query) 19 | for $result in $response[2]/search:search/search:result 20 | return $result/common:orcid-identifier/common:path/text() 21 | }; 22 | 23 | (: The initial query is just to determine the number of results :) 24 | let $query := 'q=affiliation-org-name:"Vanderbilt+University"&start=101&rows=100' 25 | let $endpoint := 'https://pub.orcid.org/v2.0/search/' 26 | let $response := local:query-endpoint($endpoint,$query) 27 | 28 | let $numberOfResults := number(data($response[2]/search:search/@num-found)) 29 | let $pages := ($numberOfResults idiv 100) (: pages are sets of 100 results :) 30 | 31 | (: retrieve ORCID IDs one hundred at a time :) 32 | for $page in (0 to $pages) (: for testing, replace $pages with 0 to get only the first 100 pages :) 33 | let $start := string(($page * 100) + 1) 34 | return local:get-hundred($start) 35 | -------------------------------------------------------------------------------- /publications/orcid/orcid-record-get.xq: -------------------------------------------------------------------------------- 1 | declare namespace search="http://www.orcid.org/ns/search"; 2 | declare namespace common="http://www.orcid.org/ns/common"; 3 | 4 | declare function local:query-endpoint($url) 5 | { 6 | (: Accept header can be specified explicitly as below. Options are "application/json" for JSON and "application/xml" for XML. :) 7 | let $acceptType := "application/xml" 8 | let $request := 9 | return 10 | http:send-request($request) 11 | }; 12 | 13 | 14 | (: For this test, I've used only a single person's ORCID ID. We'd want to run it for the whole VU set :) 15 | let $endpoint := 'https://orcid.org/0000-0003-3127-2722' 16 | let $response := local:query-endpoint($endpoint) 17 | 18 | return $response 19 | -------------------------------------------------------------------------------- /publications/orcid/vandy-people-all.xq: -------------------------------------------------------------------------------- 1 | (: Note: loop to retrieve all records is commented out. As is, will retrieve only one record for testing purposes. :) 2 | 3 | declare namespace search="http://www.orcid.org/ns/search"; 4 | declare namespace common="http://www.orcid.org/ns/common"; 5 | 6 | declare function local:query-endpoint($url) 7 | { 8 | (: Accept header can be specified explicitly as below. Options are "application/json" for JSON and "application/xml" for XML. :) 9 | let $acceptType := "application/xml" 10 | let $request := 11 | return 12 | http:send-request($request)[2] (: return only the data and not the HTTP response :) 13 | }; 14 | 15 | let $endpoint := 'https://orcid.org/' 16 | 17 | (: 18 | let $textOrcid := http:send-request()[2] 19 | let $xmlOrcid := csv:parse($textOrcid, map { 'header' : true(),'separator' : "|" }) 20 | 21 | for $orcidRecord in $xmlOrcid/csv/record 22 | let $orcidID := $orcidRecord/orcidId/text() 23 | 24 | return $orcidID 25 | :) 26 | 27 | let $orcidID := '0000-0003-3127-2722' 28 | let $URI := $endpoint||$orcidID 29 | 30 | let $response := local:query-endpoint($URI) 31 | 32 | return (file:write("c:\test\orcid\"||$orcidID||".xml",$response),$orcidID) -------------------------------------------------------------------------------- /publications/pubmed/README.md: -------------------------------------------------------------------------------- 1 | # PubMed search script 2 | 3 | | file | description | 4 | |------|-------------| 5 | | search.py | Python script to find publications using a PubMed search, the retrieve the data for each one of the hits. There was then a sorting into false positives, true positives, etc. with results output into CSVs | 6 | | falsePos.csv | false positive results | 7 | | truePos.csv | true positive results | 8 | | truePosCount.txt | placeholder file with count of true positives | 9 | 10 | This was from a project done in February 2019 to help Philip Walker sort out publications by the Dept of Biomedical Informatics at Vanderbilt. 11 | 12 | ---- 13 | Revised 2019-05-01 14 | -------------------------------------------------------------------------------- /publications/pubmed/covid_results.csv: -------------------------------------------------------------------------------- 1 | date,count 2 | 2020/03/11,58 3 | 2020/03/12,69 4 | 2020/03/13,68 5 | 2020/03/14,34 6 | 2020/03/15,19 7 | 2020/03/16,69 8 | 2020/03/17,71 9 | 2020/03/18,66 10 | 2020/03/19,66 11 | 2020/03/20,114 12 | 2020/03/21,50 13 | 2020/03/22,15 14 | 2020/03/23,79 15 | 2020/03/24,74 16 | 2020/03/25,90 17 | 2020/03/26,96 18 | 2020/03/27,141 19 | 2020/03/28,78 20 | 2020/03/29,32 21 | 2020/03/30,137 22 | 2020/03/31,170 23 | 2020/04/01,1188 24 | 2020/04/02,154 25 | 2020/04/03,198 26 | 2020/04/04,81 27 | 2020/04/05,28 28 | 2020/04/06,167 29 | 2020/04/07,135 30 | 2020/04/08,199 31 | 2020/04/09,186 32 | 2020/04/10,265 33 | 2020/04/11,146 34 | 2020/04/12,59 35 | 2020/04/13,143 36 | 2020/04/14,181 37 | 2020/04/15,253 38 | 2020/04/16,210 39 | 2020/04/17,272 40 | 2020/04/18,169 41 | 2020/04/19,53 42 | 2020/04/20,234 43 | 2020/04/21,279 44 | 2020/04/22,328 45 | 2020/04/23,314 46 | 2020/04/24,310 47 | 2020/04/25,149 48 | 2020/04/26,72 49 | 2020/04/27,322 50 | 2020/04/28,304 51 | 2020/04/29,283 52 | 2020/04/30,382 53 | 2020/05/01,2257 54 | 2020/05/02,112 55 | 2020/05/03,93 56 | 2020/05/04,254 57 | 2020/05/05,324 58 | 2020/05/06,321 59 | 2020/05/07,350 60 | 2020/05/08,392 61 | 2020/05/09,87 62 | 2020/05/10,71 63 | 2020/05/11,424 64 | 2020/05/12,359 65 | 2020/05/13,373 66 | 2020/05/14,402 67 | 2020/05/15,460 68 | 2020/05/16,238 69 | 2020/05/17,85 70 | 2020/05/18,337 71 | 2020/05/19,392 72 | 2020/05/20,306 73 | 2020/05/21,395 74 | 2020/05/22,373 75 | 2020/05/23,224 76 | 2020/05/24,78 77 | 2020/05/25,262 78 | 2020/05/26,374 79 | 2020/05/27,380 80 | 2020/05/28,417 81 | 2020/05/29,431 82 | 2020/05/30,288 83 | 2020/05/31,153 84 | 2020/06/01,4097 85 | 2020/06/02,347 86 | 2020/06/03,369 87 | 2020/06/04,369 88 | 2020/06/05,393 89 | 2020/06/06,204 90 | 2020/06/07,140 91 | 2020/06/08,366 92 | 2020/06/09,373 93 | 2020/06/10,359 94 | 2020/06/11,420 95 | 2020/06/12,369 96 | 2020/06/13,178 97 | 2020/06/14,69 98 | 2020/06/15,361 99 | 2020/06/16,381 100 | 2020/06/17,328 101 | 2020/06/18,408 102 | 2020/06/19,312 103 | 2020/06/20,221 104 | 2020/06/21,102 105 | 2020/06/22,297 106 | 2020/06/23,371 107 | 2020/06/24,341 108 | 2020/06/25,495 109 | 2020/06/26,357 110 | 2020/06/27,180 111 | 2020/06/28,120 112 | 2020/06/29,278 113 | 2020/06/30,396 114 | 2020/07/01,5303 115 | 2020/07/02,346 116 | 2020/07/03,364 117 | 2020/07/04,166 118 | 2020/07/05,56 119 | 2020/07/06,296 120 | 2020/07/07,358 121 | 2020/07/08,302 122 | 2020/07/09,375 123 | 2020/07/10,392 124 | 2020/07/11,161 125 | 2020/07/12,108 126 | 2020/07/13,305 127 | 2020/07/14,373 128 | 2020/07/15,488 129 | 2020/07/16,362 130 | 2020/07/17,357 131 | 2020/07/18,141 132 | 2020/07/19,106 133 | 2020/07/20,278 134 | 2020/07/21,367 135 | 2020/07/22,296 136 | 2020/07/23,398 137 | 2020/07/24,306 138 | 2020/07/25,178 139 | 2020/07/26,61 140 | 2020/07/27,259 141 | 2020/07/28,399 142 | 2020/07/29,289 143 | 2020/07/30,372 144 | 2020/07/31,357 145 | 2020/08/01,4592 146 | 2020/08/02,84 147 | 2020/08/03,323 148 | 2020/08/04,375 149 | 2020/08/05,343 150 | 2020/08/06,378 151 | 2020/08/07,363 152 | 2020/08/08,152 153 | 2020/08/09,88 154 | 2020/08/10,288 155 | 2020/08/11,356 156 | 2020/08/12,298 157 | 2020/08/13,341 158 | 2020/08/14,339 159 | 2020/08/15,230 160 | 2020/08/16,76 161 | 2020/08/17,259 162 | 2020/08/18,411 163 | 2020/08/19,286 164 | 2020/08/20,396 165 | 2020/08/21,254 166 | 2020/08/22,117 167 | 2020/08/23,51 168 | 2020/08/24,281 169 | 2020/08/25,395 170 | 2020/08/26,336 171 | 2020/08/27,385 172 | 2020/08/28,404 173 | 2020/08/29,146 174 | 2020/08/30,72 175 | 2020/08/31,260 176 | 2020/09/01,4565 177 | 2020/09/02,327 178 | 2020/09/03,350 179 | 2020/09/04,284 180 | 2020/09/05,129 181 | 2020/09/06,108 182 | 2020/09/07,255 183 | 2020/09/08,279 184 | 2020/09/09,323 185 | 2020/09/10,338 186 | 2020/09/11,319 187 | 2020/09/12,142 188 | 2020/09/13,55 189 | 2020/09/14,246 190 | 2020/09/15,465 191 | 2020/09/16,312 192 | 2020/09/17,283 193 | 2020/09/18,364 194 | 2020/09/19,132 195 | 2020/09/20,69 196 | 2020/09/21,324 197 | 2020/09/22,299 198 | 2020/09/23,307 199 | 2020/09/24,305 200 | 2020/09/25,319 201 | 2020/09/26,109 202 | 2020/09/27,61 203 | 2020/09/28,281 204 | 2020/09/29,272 205 | 2020/09/30,310 206 | 2020/10/01,3135 207 | 2020/10/02,176 208 | 2020/10/03,89 209 | 2020/10/04,61 210 | 2020/10/05,188 211 | 2020/10/06,260 212 | 2020/10/07,175 213 | 2020/10/08,118 214 | 2020/10/09,24 215 | -------------------------------------------------------------------------------- /publications/pubmed/truePosCount.txt: -------------------------------------------------------------------------------- 1 | 1240 -------------------------------------------------------------------------------- /publications/wikidata/README.md: -------------------------------------------------------------------------------- 1 | # Munging data from Wikidata 2 | 3 | | file | description | 4 | |------|-------------| 5 | | wikidata-to-turtle.sparql | SPARQL query to pull data about Vanderbilt people from Wikidata (including ORCID if there) | 6 | | wikidata-student.ttl | query results for Vanderbilt people who were students at Vanderbilt | 7 | | wikidata-employee.ttl | query results for Vanderbilt people who were employees of Vanderbilt | 8 | 9 | This was an attempt to identify people in Wikidata who were identified as affiliated with Vanderbilt and to match them with ORCIDs. However, few people had ORCIDs listed, so there was little overlap at the time when this was done (in 2017). 10 | 11 | ---- 12 | Revised 2019-05-01 13 | -------------------------------------------------------------------------------- /publications/wikidata/affiliation.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "label":"Blair School of Music", 4 | "wikidataId":"Q4924165" 5 | }, 6 | { 7 | "label":"College of Arts & Science", 8 | "wikidataId":"Q7914451" 9 | }, 10 | { 11 | "label":"Divinity School", 12 | "wikidataId":"Q7914452" 13 | }, 14 | { 15 | "label":"Graduate School", 16 | "wikidataId":"Q7914453" 17 | }, 18 | { 19 | "label":"Law School", 20 | "wikidataId":"Q7914456" 21 | }, 22 | { 23 | "label":"Owen Graduate School of Management", 24 | "wikidataId":"Q14710143" 25 | }, 26 | { 27 | "label":"Peabody College of Education & Human Development", 28 | "wikidataId":"Q7157226" 29 | }, 30 | { 31 | "label":"School of Engineering", 32 | "wikidataId":"Q7914459" 33 | }, 34 | { 35 | "label":"School of Medicine", 36 | "wikidataId":"Q7914466" 37 | }, 38 | { 39 | "label":"School of Nursing", 40 | "wikidataId":"Q7914461" 41 | }, 42 | { 43 | "label":"Jean and Alexander Heard Libraries", 44 | "wikidataId":"Q16849893" 45 | } 46 | ] -------------------------------------------------------------------------------- /publications/wikidata/alt_label copy.csv: -------------------------------------------------------------------------------- 1 | wikidataId,altLabel 2 | Q38099106,Cliff Anderson 3 | Q38099106,Clifford Blake Anderson 4 | Q37371192,Brandt Eichman 5 | -------------------------------------------------------------------------------- /publications/wikidata/alt_label.csv: -------------------------------------------------------------------------------- 1 | wikidataId,altLabel 2 | Q210472,Cliff Anderson 3 | Q210472,Clifford Blake Anderson 4 | Q210474,Brandt Eichman 5 | -------------------------------------------------------------------------------- /publications/wikidata/csv-metadata-real-props.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [{ 5 | "url": "researcher.csv", 6 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 7 | "tableSchema": { 8 | "columns": [{ 9 | "titles": "wikidataId", 10 | "name": "wikidataId", 11 | "datatype": "string", 12 | "suppressOutput": true 13 | },{ 14 | "titles": "orcid", 15 | "name": "orcid", 16 | "datatype": "string", 17 | "propertyUrl": "http://www.wikidata.org/prop/direct/P496" 18 | },{ 19 | "titles": "labelEn", 20 | "name": "labelEn", 21 | "datatype": "string", 22 | "propertyUrl": "rdfs:label", 23 | "lang": "en" 24 | },{ 25 | "titles": "givenNameString", 26 | "name": "givenNameString", 27 | "datatype": "string", 28 | "suppressOutput": true 29 | },{ 30 | "titles": "familyNameString", 31 | "name": "familyNameString", 32 | "datatype": "string", 33 | "suppressOutput": true 34 | },{ 35 | "titles": "affiliationP1416", 36 | "name": "affiliationP1416", 37 | "datatype": "string", 38 | "propertyUrl": "http://www.wikidata.org/prop/direct/P1416", 39 | "valueUrl": "http://www.wikidata.org/entity/{affiliationP1416}" 40 | },{ 41 | "titles": "fieldOfWorkP101", 42 | "name": "fieldOfWorkP101", 43 | "datatype": "string", 44 | "propertyUrl": "http://www.wikidata.org/prop/direct/P101", 45 | "valueUrl": "http://www.wikidata.org/entity/{fieldOfWorkP101}" 46 | },{ 47 | "titles": "notes", 48 | "name": "notes", 49 | "datatype": "string", 50 | "suppressOutput": true 51 | }] 52 | } 53 | }, { 54 | "url": "work.csv", 55 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 56 | "tableSchema": { 57 | "columns": [{ 58 | "titles": "wikidataId", 59 | "name": "wikidataId", 60 | "datatype": "string", 61 | "suppressOutput": true 62 | },{ 63 | "titles": "doi", 64 | "name": "doi", 65 | "datatype": "string", 66 | "propertyUrl": "http://www.wikidata.org/prop/direct/P356" 67 | },{ 68 | "titles": "title", 69 | "name": "title", 70 | "datatype": "string", 71 | "suppressOutput": true 72 | }] 73 | } 74 | }, { 75 | "url": "alt_label.csv", 76 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 77 | "tableSchema": { 78 | "columns": [{ 79 | "titles": "wikidataId", 80 | "name": "wikidataId", 81 | "datatype": "string", 82 | "suppressOutput": true 83 | },{ 84 | "titles": "altLabel", 85 | "name": "altLabel", 86 | "datatype": "string", 87 | "propertyUrl": "skos:altLabel", 88 | "lang": "en" 89 | }] 90 | } 91 | }, { 92 | "url": "work_author_join.csv", 93 | "aboutUrl": "http://www.wikidata.org/entity/{workWikidataId}", 94 | "tableSchema": { 95 | "columns": [{ 96 | "titles": "workWikidataId", 97 | "name": "workWikidataId", 98 | "datatype": "string", 99 | "suppressOutput": true 100 | },{ 101 | "titles": "authorWikidataId", 102 | "name": "authorWikidataId", 103 | "datatype": "string", 104 | "propertyUrl": "http://www.wikidata.org/prop/direct/P50", 105 | "valueUrl": "http://www.wikidata.org/entity/{authorWikidataId}" 106 | }] 107 | } 108 | }] 109 | } -------------------------------------------------------------------------------- /publications/wikidata/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [{ 5 | "url": "researcher.csv", 6 | "aboutUrl": "http://www.wikidata.org/entity/{wikidataId}", 7 | "tableSchema": { 8 | "columns": [{ 9 | "titles": "wikidataId", 10 | "name": "wikidataId", 11 | "datatype": "string", 12 | "suppressOutput": true 13 | },{ 14 | "titles": "orcidP90468", 15 | "name": "orcidP90468", 16 | "datatype": "string", 17 | "propertyUrl": "http://www.wikidata.org/prop/direct/P90468" 18 | },{ 19 | "titles": "labelEn", 20 | "name": "labelEn", 21 | "datatype": "string", 22 | "propertyUrl": "rdfs:label", 23 | "lang": "en" 24 | },{ 25 | "titles": "alias", 26 | "name": "alias", 27 | "datatype": "string", 28 | "propertyUrl": "skos:altLabel", 29 | "lang": "en" 30 | },{ 31 | "titles": "givenNameString", 32 | "name": "givenNameString", 33 | "datatype": "string", 34 | "suppressOutput": true 35 | },{ 36 | "titles": "familyNameString", 37 | "name": "familyNameString", 38 | "datatype": "string", 39 | "suppressOutput": true 40 | },{ 41 | "titles": "affiliationP95220", 42 | "name": "affiliationP95220", 43 | "datatype": "string", 44 | "propertyUrl": "http://www.wikidata.org/prop/direct/P95220", 45 | "valueUrl": "http://www.wikidata.org/entity/{affiliationP95220}" 46 | },{ 47 | "titles": "fieldOfWorkP128", 48 | "name": "fieldOfWorkP128", 49 | "datatype": "string", 50 | "propertyUrl": "http://www.wikidata.org/prop/direct/P128", 51 | "valueUrl": "http://www.wikidata.org/entity/{fieldOfWorkP128}" 52 | },{ 53 | "titles": "description", 54 | "name": "description", 55 | "datatype": "string", 56 | "propertyUrl": "schema:description", 57 | "lang": "en" 58 | },{ 59 | "titles": "notes", 60 | "name": "notes", 61 | "datatype": "string", 62 | "suppressOutput": true 63 | }] 64 | } 65 | }] 66 | } -------------------------------------------------------------------------------- /publications/wikidata/download-vanderbilt-people-altlabels.py: -------------------------------------------------------------------------------- 1 | import requests # best library to manage HTTP transactions 2 | import json 3 | import csv 4 | 5 | # function to write results to a file 6 | def writeCsv(fileName, array): 7 | fileObject = open(fileName, 'w', newline='', encoding='utf-8') 8 | writerObject = csv.writer(fileObject) 9 | for row in array: 10 | writerObject.writerow(row) 11 | fileObject.close() 12 | 13 | endpointUrl = 'https://query.wikidata.org/sparql' 14 | query = '''select distinct ?person ?altLabel where { 15 | ?person p:P108 ?statement. 16 | ?statement ps:P108 wd:Q29052. 17 | ?person skos:altLabel ?altLabel. 18 | FILTER(lang(?altLabel)="en") 19 | }''' 20 | 21 | # The endpoint defaults to returning XML, so the Accept: header is required 22 | r = requests.get(endpointUrl, params={'query' : query}, headers={'Accept' : 'application/json'}) 23 | 24 | data = r.json() 25 | print(json.dumps(data,indent = 2)) 26 | 27 | table = [['wikidataIri', 'altLabel']] 28 | items = data['results']['bindings'] 29 | for item in items: 30 | wikidataIri = item['person']['value'] 31 | altLabel = '' 32 | if 'altLabel' in item: 33 | altLabel = item['altLabel']['value'] 34 | table.append([wikidataIri, altLabel]) 35 | 36 | fileName = 'vanderbilt_wikidata_altlabels.csv' 37 | writeCsv(fileName, table) 38 | -------------------------------------------------------------------------------- /publications/wikidata/download-vanderbilt-people.py: -------------------------------------------------------------------------------- 1 | import requests # best library to manage HTTP transactions 2 | import json 3 | import csv 4 | 5 | # function to write results to a file 6 | def writeCsv(fileName, array): 7 | fileObject = open(fileName, 'w', newline='', encoding='utf-8') 8 | writerObject = csv.writer(fileObject) 9 | for row in array: 10 | writerObject.writerow(row) 11 | fileObject.close() 12 | 13 | endpointUrl = 'https://query.wikidata.org/sparql' 14 | query = '''select distinct ?person ?name ?orcid ?startDate ?endDate ?description where { 15 | ?person p:P108 ?statement. 16 | ?statement ps:P108 wd:Q29052. 17 | optional{ 18 | ?person rdfs:label ?name. 19 | FILTER(lang(?name)="en") 20 | } 21 | optional{?statement pq:P580 ?startDate.} 22 | optional{?statement pq:P582 ?endDate.} 23 | optional{?person wdt:P496 ?orcid.} 24 | optional{ 25 | ?person schema:description ?description. 26 | FILTER(lang(?description)="en") 27 | } 28 | }''' 29 | 30 | # The endpoint defaults to returning XML, so the Accept: header is required 31 | r = requests.get(endpointUrl, params={'query' : query}, headers={'Accept' : 'application/json'}) 32 | 33 | data = r.json() 34 | print(json.dumps(data,indent = 2)) 35 | 36 | table = [['wikidataIri', 'name', 'description', 'startDate', 'endDate', 'orcid']] 37 | items = data['results']['bindings'] 38 | for item in items: 39 | wikidataIri = item['person']['value'] 40 | name = '' 41 | if 'name' in item: 42 | name = item['name']['value'] 43 | description = '' 44 | if 'description' in item: 45 | description = item['description']['value'] 46 | startDate = '' 47 | if 'startDate' in item: 48 | startDate = item['startDate']['value'] 49 | endDate = '' 50 | if 'endDate' in item: 51 | endDate = item['endDate']['value'] 52 | orcid = '' 53 | if 'orcid' in item: 54 | orcid = item['orcid']['value'] 55 | table.append([wikidataIri, name, description, startDate, endDate, orcid]) 56 | 57 | fileName = 'vanderbilt_wikidata.csv' 58 | writeCsv(fileName, table) 59 | -------------------------------------------------------------------------------- /publications/wikidata/process_csv/researcher.csv: -------------------------------------------------------------------------------- 1 | wikidataId,labelEn,alias,description,givenNameString,familyNameString,affiliationP95220uuid,affiliationP95220,fieldOfWorkP128uuid,fieldOfWorkP128,orcidP90468uuid,orcidP90468,orcidP90468refHash,orcidP90468refUrlP93,orcidP90468refRetrievedP388,qualStartP355,qualEndP356,notes 2 | Q210953,jackalope22,,,,,1FE8E974-27CE-4376-B8F5-3B03FFF30E63,Q11,DD96694F-6B18-462F-853C-DEAAD6D0C9B6,Q10,B30D28E7-5A8D-476C-A6D1-6D080C4E46DC,0000-0002-7248-6551,f17aeb7506f9593757578add8ac9421d413693c0,https://orcid.org/0000-0001-6601-0234,2018,2014-10-01,2016-12, 3 | -------------------------------------------------------------------------------- /publications/wikidata/researcher copy.csv: -------------------------------------------------------------------------------- 1 | wikidataId,orcidP90468,labelEn,givenNameString,familyNameString,affiliationP95220,fieldOfWorkP128,description,notes 2 | ,0000-0002-7248-6551,Antonis L. Rokas,Antonis,Rokas,Q210476,Q210475,researcher,BSCI department faculty (multiple appointments) 3 | ,,Clifford L. Anderson,Clifford,Anderson,Q210477,,American librarian and professor of religious studies,"library, does not follow college departmental structure" 4 | ,,Amy-Jill L. Levine,Amy-Jill,Levine,Q210478,,American academic,"has multiple appointments (primary appointment in Div School with no depts, CAS Jewish Study department)" 5 | ,0000-0002-0965-2297,Brandt L. Eichman,Brandt,Eichman,Q210476,Q210475,researcher,BSCI department faculty (multiple appointments) 6 | -------------------------------------------------------------------------------- /publications/wikidata/researcher-real-ids.csv: -------------------------------------------------------------------------------- 1 | wikidataId,orcid,labelEn,givenNameString,familyNameString,affiliationP1416,fieldOfWorkP101,notes 2 | Q42352198,0000-0002-7248-6551,,Antonis,Rokas,Q7914451,Q420,BSCI department faculty (multiple appointments) 3 | Q38099106,0000-0003-0328-0792,Clifford B. Anderson,Clifford,Anderson,Q16849893,,"library, does not follow college departmental structure" 4 | Q4749053,,Amy-Jill Levine,Amy-Jill,Levine,Q7914452,,"has multiple appointments (primary appointment in Div School with no depts, CAS Jewish Study department)" 5 | Q37371192,0000-0002-0965-2297,Brandt F Eichman,Brandt,Eichman,Q7914451,Q420,BSCI department faculty (multiple appointments) 6 | -------------------------------------------------------------------------------- /publications/wikidata/researcher-with-ids.csv: -------------------------------------------------------------------------------- 1 | wikidataId,orcidP90468,labelEn,givenNameString,familyNameString,affiliationP95220,fieldOfWorkP128,description,notes 2 | Q210471,0000-0002-7248-6551,,Antonis,Rokas,Q210476,Q210475,researcher,BSCI department faculty (multiple appointments) 3 | Q210472,0000-0003-0328-0792,Clifford B. Anderson,Clifford,Anderson,Q210477,,American librarian and professor of religious studies,"library, does not follow college departmental structure" 4 | Q210473,,Amy-Jill Levine,Amy-Jill,Levine,Q210478,,American academic,"has multiple appointments (primary appointment in Div School with no depts, CAS Jewish Study department)" 5 | Q210474,0000-0002-0965-2297,Brandt F Eichman,Brandt,Eichman,Q210476,Q210475,researcher,BSCI department faculty (multiple appointments) 6 | -------------------------------------------------------------------------------- /publications/wikidata/researcher.csv: -------------------------------------------------------------------------------- 1 | wikidataId,orcidP90468,labelEn,alias,givenNameString,familyNameString,affiliationP95220,fieldOfWorkP128,description,notes 2 | ,0000-0002-7248-6551,Antonis S. Rokas,Antonis Rokas,Antonis,Rokas,Q210476,Q210475,researcher,BSCI department faculty (multiple appointments) 3 | ,,Clifford S. Anderson,Cliff Anderson,Clifford,Anderson,Q210477,,American librarian and professor of religious studies,"library, does not follow college departmental structure" 4 | ,,Amy-Jill S. Levine,,Amy-Jill,Levine,Q210478,,American academic,"has multiple appointments (primary appointment in Div School with no depts, CAS Jewish Study department)" 5 | ,0000-0002-0965-2297,Brandt S. Eichman,Brandt Eichman,Brandt,Eichman,Q210476,Q210475,researcher,BSCI department faculty (multiple appointments) 6 | -------------------------------------------------------------------------------- /publications/wikidata/wikidata-to-csv.sparql: -------------------------------------------------------------------------------- 1 | PREFIX dcterms: 2 | PREFIX foaf: 3 | PREFIX schema: 4 | PREFIX rdfs: 5 | 6 | SELECT DISTINCT ?person ?id ?label ?description ?birthDate ?givenName ?givenNameLabel ?familyName ?familyNameLabel ?gender ?interestedIn ?interestedInLabel ?studies ?studiesLabel ?fieldOfWork ?fieldOfWorkLabel ?occupation ?occupationLabel ?affiliation ?affiliationLabel ?orcidId 7 | WHERE { 8 | ?person wdt:P108 wd:Q29052. # use wdt:P69 "educated at" to find students 9 | ?person rdfs:label ?labelTagged. 10 | FILTER ( langMatches(lang(?labelTagged),"en" )) 11 | BIND (str(?labelTagged) AS ?label) 12 | 13 | OPTIONAL { 14 | ?person skos:altLabel ?altTagged. 15 | FILTER ( langMatches(lang(?altTagged),"en" )) 16 | BIND (str(?altTagged) AS ?alt) 17 | } 18 | 19 | OPTIONAL { 20 | ?person schema:description ?descriptionTagged. 21 | FILTER ( langMatches(lang(?descriptionTagged),"en" )) 22 | BIND (str(?descriptionTagged) AS ?description) 23 | } 24 | 25 | OPTIONAL {?person wdt:P569 ?birthDate.} 26 | 27 | OPTIONAL { 28 | ?person wdt:P735 ?givenName. # note: the values are URIs, not literals. 29 | ?givenName rdfs:label ?givenNameLabelTagged. 30 | FILTER ( langMatches(lang(?givenNameLabelTagged),"en" )) 31 | BIND (str(?givenNameLabelTagged) AS ?givenNameLabel) 32 | } 33 | 34 | OPTIONAL { 35 | ?person wdt:P734 ?familyName. 36 | ?familyName rdfs:label ?familyNameLabelTagged. 37 | FILTER ( langMatches(lang(?familyNameLabelTagged),"en" )) 38 | BIND (str(?familyNameLabelTagged) AS ?familyNameLabel) 39 | } 40 | 41 | OPTIONAL { 42 | ?person wdt:P21 ?gender. # note: the values are URIs, not literals. 43 | ?gender rdfs:label ?genderLabelTagged. 44 | FILTER ( langMatches(lang(?genderLabelTagged),"en" )) 45 | BIND (str(?genderLabelTagged) AS ?genderLabel) 46 | } 47 | 48 | OPTIONAL { 49 | ?person wdt:P2650 ?interestedIn. 50 | ?interestedIn rdfs:label ?interestedInLabelTagged. 51 | FILTER ( langMatches(lang(?interestedInLabelTagged),"en" )) 52 | BIND (str(?interestedInLabelTagged) AS ?interestedInLabel) 53 | } 54 | 55 | OPTIONAL { 56 | ?person wdt:P2578 ?studies. 57 | ?studies rdfs:label ?studiesLabelTagged. 58 | FILTER ( langMatches(lang(?studiesLabelTagged),"en" )) 59 | BIND (str(?studiesLabelTagged) AS ?studiesLabel) 60 | } 61 | 62 | OPTIONAL { 63 | ?person wdt:P101 ?fieldOfWork. 64 | ?fieldOfWork rdfs:label ?fieldOfWorkLabelTagged. 65 | FILTER ( langMatches(lang(?fieldOfWorkLabelLabelTagged),"en" )) 66 | BIND (str(?fieldOfWorkLabelTagged) AS ?fieldOfWorkLabel) 67 | } 68 | 69 | OPTIONAL { 70 | ?person wdt:P106 ?occupation. 71 | ?occupation rdfs:label ?occupationLabelTagged. 72 | FILTER ( langMatches(lang(?occupationLabelTagged),"en" )) 73 | BIND (str(?occupationLabelTagged) AS ?occupationLabel) 74 | } 75 | 76 | OPTIONAL { 77 | ?person wdt:P1416 ?affiliation. 78 | ?affiliation rdfs:label ?affiliationLabelTagged. 79 | FILTER ( langMatches(lang(?affiliationLabelTagged),"en" )) 80 | BIND (str(?affiliationLabelTagged) AS ?affiliationLabel) 81 | } 82 | 83 | OPTIONAL { 84 | ?person wdt:P69 ?educatedAt. 85 | ?educatedAt rdfs:label ?educatedAtLabelTagged. 86 | FILTER ( langMatches(lang(?educatedAtLabelTagged),"en" )) 87 | BIND (str(?educatedAtLabelTagged) AS ?educatedAtLabel) 88 | } 89 | OPTIONAL { 90 | ?person wdt:P496 ?orcidId. 91 | BIND (URI(CONCAT("http://orcid.org/",?orcidId)) AS ?orcidUri) # turn ORCID string into URI 92 | } 93 | 94 | BIND (SUBSTR(STR(?person),32) AS ?id) # remove domain name from Wikidata identifier, leaving only the "Q" part 95 | } -------------------------------------------------------------------------------- /publications/wikidata/wikidata-to-turtle.sparql: -------------------------------------------------------------------------------- 1 | PREFIX adhoc: 2 | PREFIX dcterms: 3 | PREFIX foaf: 4 | PREFIX schema: 5 | PREFIX rdfs: 6 | 7 | CONSTRUCT { 8 | ?person a foaf:Person. 9 | ?person a schema:Person. 10 | ?person adhoc:identifierSource "wikidata". 11 | ?person adhoc:status "employee". # use "student" when querying for P69 "educated at" 12 | ?person rdfs:label ?label. 13 | ?person foaf:name ?label. 14 | ?person foaf:givenName ?givenNameLabel. 15 | ?person schema:givenName ?givenNameLabel. 16 | ?person foaf:familyName ?familyNameLabel. 17 | ?person schema:familyName ?familyNameLabel. 18 | ?person foaf:name ?alt. 19 | ?person dcterms:description ?description. 20 | ?person dcterms:identifier ?id. 21 | ?person owl:sameAs ?orcidUri. 22 | ?person schema:birthDate ?birthDate. 23 | ?person schema:gender ?genderLabel. # note: schema.org expects http://schema.org/Male and http://schema.org/Female as values or a string. 24 | } 25 | WHERE { 26 | ?person wdt:P108 wd:Q29052. # use wdt:P69 "educated at" to find students 27 | ?person rdfs:label ?labelTagged. 28 | FILTER ( langMatches(lang(?labelTagged),"en" )) 29 | BIND (str(?labelTagged) AS ?label) 30 | 31 | OPTIONAL { 32 | ?person skos:altLabel ?altTagged. 33 | FILTER ( langMatches(lang(?altTagged),"en" )) 34 | BIND (str(?altTagged) AS ?alt) 35 | } 36 | 37 | ?person schema:description ?descriptionTagged. 38 | FILTER ( langMatches(lang(?descriptionTagged),"en" )) 39 | BIND (str(?descriptionTagged) AS ?description) 40 | 41 | OPTIONAL {?person wdt:P569 ?birthDate.} 42 | 43 | OPTIONAL { 44 | ?person wdt:P735 ?givenName. # note: the values are URIs, not literals. 45 | ?givenName rdfs:label ?givenNameLabelTagged. 46 | FILTER ( langMatches(lang(?givenNameLabelTagged),"en" )) 47 | BIND (str(?givenNameLabelTagged) AS ?givenNameLabel) 48 | } 49 | 50 | OPTIONAL { 51 | ?person wdt:P734 ?familyName. 52 | ?familyName rdfs:label ?familyNameLabelTagged. 53 | FILTER ( langMatches(lang(?familyNameLabelTagged),"en" )) 54 | BIND (str(?familyNameLabelTagged) AS ?familyNameLabel) 55 | } 56 | 57 | OPTIONAL { 58 | ?person wdt:P21 ?gender. # note: the values are URIs, not literals. 59 | ?gender rdfs:label ?genderLabelTagged. 60 | FILTER ( langMatches(lang(?genderLabelTagged),"en" )) 61 | BIND (str(?genderLabelTagged) AS ?genderLabel) 62 | } 63 | 64 | OPTIONAL { 65 | ?person wdt:P496 ?orcidId. 66 | BIND (URI(CONCAT("http://orcid.org/",?orcidId)) AS ?orcidUri) # turn ORCID string into URI 67 | } 68 | 69 | BIND (SUBSTR(STR(?person),32) AS ?id) # remove domain name from Wikidata identifier, leaving only the "Q" part 70 | } 71 | -------------------------------------------------------------------------------- /publications/wikidata/work.csv: -------------------------------------------------------------------------------- 1 | wikidataId,doi,title 2 | Q64124033,10.1371/JOURNAL.PBIO.3000255,Extensive loss of cell-cycle and DNA repair genes in an ancient lineage of bipolar budding yeasts 3 | Q61000827,10.1177/003463730610300109,"""To all the Gentiles"": a Jewish perspective on the Great Commission" 4 | -------------------------------------------------------------------------------- /publications/wikidata/work_author_join.csv: -------------------------------------------------------------------------------- 1 | workWikidataId,authorWikidataId 2 | Q64124033,Q42352198 3 | Q64124033,Q37371192 4 | Q61000827,Q4749053 5 | -------------------------------------------------------------------------------- /publications/work-person-figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/publications/work-person-figure.png -------------------------------------------------------------------------------- /publications/work-person-figure.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/publications/work-person-figure.pptx -------------------------------------------------------------------------------- /publications/wos/wos.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import time 3 | import woslite_client 4 | from woslite_client.rest import ApiException 5 | from pprint import pprint 6 | from pathlib import Path 7 | 8 | # This function will load some credential from a text file, either in the home directory or current working directory 9 | # The value of the directory variable should be either 'home' or 'working' 10 | # Keeping the credential in the home directory prevents accidentally uploading it with the notebook. 11 | # The function returns a single string, so if there is more than one credential (e.g. key plus secret), additional 12 | # parsing of the return value may be required. 13 | def load_credential(filename, directory): 14 | cred = '' 15 | # to change the script to look for the credential in the working directory, change the value of home to empty string 16 | if directory == 'home': 17 | home = str(Path.home()) #gets path to home directory; works for both Win and Mac 18 | credential_path = home + '/' + filename 19 | else: 20 | directory = 'working' 21 | credential_path = filename 22 | try: 23 | with open(credential_path, 'rt', encoding='utf-8') as file_object: 24 | cred = file_object.read() 25 | except: 26 | print(filename + ' file not found - is it in your ' + directory + ' directory?') 27 | exit() 28 | return(cred) 29 | 30 | clarivate_api_key = load_credential('clarivate_api_key.txt', 'home') 31 | 32 | # Configure API key authorization: key 33 | configuration = woslite_client.Configuration() 34 | configuration.api_key['X-ApiKey'] = clarivate_api_key 35 | 36 | # create an instance of the API class 37 | integration_api_instance = woslite_client.IntegrationApi(woslite_client.ApiClient(configuration)) 38 | search_api_instance = woslite_client.SearchApi(woslite_client.ApiClient(configuration)) 39 | database_id = 'WOS' # str | Database to search. Must be a valid database ID, one of the following: BCI/BIOABS/BIOSIS/CCC/DCI/DIIDW/MEDLINE/WOK/WOS/ZOOREC. WOK represents all databases. 40 | unique_id = 'WOS:000270372400005' # str | Primary item(s) id to be searched, ex: WOS:000270372400005. Cannot be null or an empty string. Multiple values are separated by comma. 41 | usr_query = 'TS=(cadmium)' # str | User query for requesting data, ex: TS=(cadmium). The query parser will return errors for invalid queries. 42 | count = 1 # int | Number of records returned in the request 43 | first_record = 1 # int | Specific record, if any within the result set to return. Cannot be less than 1 and greater than 100000. 44 | lang = 'en' # str | Language of search. This element can take only one value: en for English. If no language is specified, English is passed by default. (optional) 45 | sort_field = 'PY+D' # str | Order by field(s). Field name and order by clause separated by '+', use A for ASC and D for DESC, ex: PY+D. Multiple values are separated by comma. (optional) 46 | 47 | try: 48 | # Find record(s) by specific id 49 | api_response = integration_api_instance.id_unique_id_get(database_id, unique_id, count, first_record, lang=lang, 50 | sort_field=sort_field) 51 | # for more details look at the models 52 | firstAuthor = api_response.data[0].author.authors[0] 53 | print("Response: ") 54 | pprint(api_response) 55 | pprint("First author: " + firstAuthor) 56 | except ApiException as e: 57 | print("Exception when calling IntegrationApi->id_unique_id_get: %s\\n" % e) 58 | 59 | try: 60 | # Find record(s) by user query 61 | api_response = search_api_instance.root_get(database_id, usr_query, count, first_record, lang=lang, 62 | sort_field=sort_field) 63 | # for more details look at the models 64 | firstAuthor = api_response.data[0].author.authors[0] 65 | print("Response: ") 66 | pprint(api_response) 67 | pprint("First author: " + firstAuthor) 68 | except ApiException as e: 69 | print("Exception when calling SearchApi->root_get: %s\\n" % e) -------------------------------------------------------------------------------- /sparql/prefixes.txt: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX rdfs: 3 | PREFIX skos: 4 | PREFIX skosxl: 5 | PREFIX xsd: 6 | PREFIX dc: 7 | PREFIX dcterms: 8 | PREFIX dcat: 9 | PREFIX wd: 10 | PREFIX wdt: 11 | PREFIX aat: 12 | PREFIX gvp: 13 | 14 | -------------------------------------------------------------------------------- /sparql/sparql_results.csv: -------------------------------------------------------------------------------- 1 | s,p,o 2 | http://rs.tdwg.org/format/values/e005,http://purl.org/dc/terms/isPartOf,http://rs.tdwg.org/format/values/ 3 | http://rs.tdwg.org/format/values/e005,http://rs.tdwg.org/dwc/terms/attributes/decision,http://rs.tdwg.org/decisions/decision-2020-10-13_29 4 | http://rs.tdwg.org/format/values/m015,http://purl.org/dc/terms/isPartOf,http://rs.tdwg.org/format/values/ 5 | http://rs.tdwg.org/format/values/m015,http://rs.tdwg.org/dwc/terms/attributes/decision,http://rs.tdwg.org/decisions/decision-2020-10-13_29 6 | http://rs.tdwg.org/format/values/m033,http://purl.org/dc/terms/isPartOf,http://rs.tdwg.org/format/values/ 7 | http://rs.tdwg.org/format/values/m033,http://rs.tdwg.org/dwc/terms/attributes/decision,http://rs.tdwg.org/decisions/decision-2020-10-13_29 8 | http://rs.tdwg.org/format/values/e034,http://purl.org/dc/terms/isPartOf,http://rs.tdwg.org/format/values/ 9 | http://rs.tdwg.org/format/values/e034,http://rs.tdwg.org/dwc/terms/attributes/decision,http://rs.tdwg.org/decisions/decision-2020-10-13_29 10 | http://rs.tdwg.org/format/values/e037,http://purl.org/dc/terms/isPartOf,http://rs.tdwg.org/format/values/ 11 | http://rs.tdwg.org/format/values/e037,http://rs.tdwg.org/dwc/terms/attributes/decision,http://rs.tdwg.org/decisions/decision-2020-10-13_29 12 | -------------------------------------------------------------------------------- /swj/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "label_example.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles":"Q_ID", 11 | "name":"qid", 12 | "datatype":"string", 13 | "suppressOutput":true 14 | }, 15 | { 16 | "titles":"English_label", 17 | "name":"labelEn", 18 | "datatype":"string", 19 | "aboutUrl":"http://www.wikidata.org/entity/{qid}", 20 | "propertyUrl":"rdfs:label", 21 | "lang":"en" 22 | } 23 | ] 24 | } 25 | } 26 | ]} 27 | -------------------------------------------------------------------------------- /swj/example1/bluffton_presidents.csv: -------------------------------------------------------------------------------- 1 | Q_ID,instanceOf_uuid,instanceOf 2 | Q98569123,EFF1EFC4-7ECD-48E0-BE78-CB3DB55136B1,Q5 3 | Q98569121,48750747-669C-4124-BB3C-EE8F8559A265,Q5 4 | -------------------------------------------------------------------------------- /swj/example1/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "bluffton_presidents.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles":"Q_ID", 11 | "name":"qid", 12 | "datatype":"string", 13 | "suppressOutput":true 14 | }, 15 | { 16 | "titles":"instanceOf_uuid", 17 | "name":"instanceOf_uuid", 18 | "datatype":"string", 19 | "aboutUrl":"http://www.wikidata.org/entity/{qid}", 20 | "propertyUrl":"http://www.wikidata.org/prop/P31", 21 | "valueUrl":"http://www.wikidata.org/entity/statement/{qid}-{instanceOf_uuid}" 22 | }, 23 | { 24 | "titles":"instanceOf", 25 | "name":"instanceOf", 26 | "datatype":"string", 27 | "aboutUrl":"http://www.wikidata.org/entity/statement/{qid}-{instanceOf_uuid}", 28 | "propertyUrl":"http://www.wikidata.org/prop/statement/P31", 29 | "valueUrl":"http://www.wikidata.org/entity/{instanceOf}" 30 | } 31 | ]} 32 | } 33 | ]} 34 | -------------------------------------------------------------------------------- /swj/example1/output.ttl: -------------------------------------------------------------------------------- 1 | 2 | . 3 | 4 | . 5 | 6 | . 7 | 8 | . 9 | -------------------------------------------------------------------------------- /swj/example2/bluffton_employees.csv: -------------------------------------------------------------------------------- 1 | Q_ID,employer_uuid,employer_ref1_hash,employer_ref1_referenceUrl 2 | Q98569123,D6C927AD-64B1-4212-A3EA-7FB6309F5A96,8201f36c07b460d76ca1b61a2cca6d09913500fd,https://www.bluffton.edu/about/leadership/past-presidents.aspx 3 | Q98569121,1A5B3DE9-92D1-41DD-9AFF-66701CA2892B,8201f36c07b460d76ca1b61a2cca6d09913500fd,https://www.bluffton.edu/about/leadership/past-presidents.aspx 4 | -------------------------------------------------------------------------------- /swj/example2/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "bluffton_employees.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles":"Q_ID", 11 | "name":"qid", 12 | "datatype":"string", 13 | "suppressOutput":true 14 | }, 15 | { 16 | "titles":"employer_uuid", 17 | "name":"employer_uuid", 18 | "datatype":"string", 19 | "suppressOutput":true 20 | }, 21 | { 22 | "titles":"employer_ref1_hash", 23 | "name":"employer_ref1_hash", 24 | "datatype":"string", 25 | "aboutUrl":"http://www.wikidata.org/entity/statement/{qid}-{employer_uuid}", 26 | "propertyUrl":"prov:wasDerivedFrom", 27 | "valueUrl":"http://www.wikidata.org/reference/{employer_ref1_hash}" 28 | }, 29 | { 30 | "titles":"employer_ref1_referenceUrl", 31 | "name":"employer_ref1_referenceUrl", 32 | "datatype":"string", 33 | "aboutUrl":"http://www.wikidata.org/reference/{employer_ref1_hash}", 34 | "propertyUrl":"http://www.wikidata.org/prop/reference/P854", 35 | "valueUrl":"{+employer_ref1_referenceUrl}" 36 | } 37 | ]} 38 | } 39 | ]} 40 | -------------------------------------------------------------------------------- /swj/example2/output.ttl: -------------------------------------------------------------------------------- 1 | @prefix prov: . 2 | 3 | prov:wasDerivedFrom . 4 | 5 | prov:wasDerivedFrom . 6 | 7 | . 8 | -------------------------------------------------------------------------------- /swj/example3/bluffton_positions.csv: -------------------------------------------------------------------------------- 1 | Q_ID,positionHeld_uuid,positionHeld_startTime_nodeId,positionHeld_startTime_val,positionHeld_startTime_prec 2 | Q98569123,5B56773B-8730-4A9D-AD85-78F7ABA17225,3cf7cfe7-ee1d-49a9-b493-6a315b0ec219,1935-01-01T00:00:00Z,9 3 | -------------------------------------------------------------------------------- /swj/example3/csv-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@type": "TableGroup", 3 | "@context": "http://www.w3.org/ns/csvw", 4 | "tables": [ 5 | { 6 | "url": "bluffton_positions.csv", 7 | "tableSchema": { 8 | "columns": [ 9 | { 10 | "titles":"Q_ID", 11 | "name":"qid", 12 | "datatype":"string", 13 | "suppressOutput":true 14 | }, 15 | { 16 | "titles":"positionHeld_uuid", 17 | "name":"positionHeld_uuid", 18 | "datatype":"string", 19 | "aboutUrl":"http://www.wikidata.org/entity/{qid}", 20 | "propertyUrl":"http://www.wikidata.org/prop/P39", 21 | "valueUrl":"http://www.wikidata.org/entity/statement/{qid}-{positionHeld_uuid}" 22 | }, 23 | { 24 | "titles":"positionHeld_startTime_nodeId", 25 | "name":"positionHeld_startTime_nodeId", 26 | "datatype":"string", 27 | "aboutUrl":"http://www.wikidata.org/entity/statement/{qid}-{positionHeld_uuid}", 28 | "propertyUrl":"http://www.wikidata.org/prop/qualifier/value/P580", 29 | "valueUrl":"http://example.com/.well-known/genid/{positionHeld_startTime_nodeId}" 30 | }, 31 | { 32 | "titles":"positionHeld_startTime_val", 33 | "name":"positionHeld_startTime_val", 34 | "datatype":"dateTime", 35 | "aboutUrl":"http://example.com/.well-known/genid/{positionHeld_startTime_nodeId}", 36 | "propertyUrl":"http://wikiba.se/ontology#timeValue" 37 | }, 38 | { 39 | "titles":"positionHeld_startTime_prec", 40 | "name":"positionHeld_startTime_prec", 41 | "datatype":"integer", 42 | "aboutUrl":"http://example.com/.well-known/genid/{positionHeld_startTime_nodeId}", 43 | "propertyUrl":"http://wikiba.se/ontology#timePrecision" 44 | } 45 | ]} 46 | } 47 | ]} 48 | -------------------------------------------------------------------------------- /swj/example3/output.ttl: -------------------------------------------------------------------------------- 1 | @prefix xsd: . 2 | 3 | . 4 | 5 | 9; 6 | "1935-01-01T00:00:00Z"^^xsd:dateTime . 7 | 8 | . 9 | -------------------------------------------------------------------------------- /swj/example4.py: -------------------------------------------------------------------------------- 1 | # (c) 2020 Vanderbilt University. Author: Steve Baskauf (2020-11-28) 2 | # This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 3 | 4 | import requests 5 | 6 | # port 3030 is used by a local installation of Apache Jena Fuseki 7 | dataset_name = 'data' 8 | graph_iri = 'http://bluffton' 9 | endpoint = 'http://localhost:3030/' + dataset_name + '/update' 10 | 11 | namespaces = ''' 12 | prefix rdfs: 13 | prefix prov: 14 | prefix wikibase: 15 | prefix wd: 16 | prefix wdt: 17 | prefix p: 18 | prefix pq: 19 | prefix pr: 20 | prefix ps: 21 | prefix pqv: 22 | prefix prv: 23 | prefix psv: 24 | ''' 25 | 26 | value_types = [ 27 | {'string': 'time', 28 | 'local_names': ['timeValue'], 29 | 'datatype':'http://www.w3.org/2001/XMLSchema#dateTime', 30 | 'bind': '?literal0'}, 31 | {'string': 'quantity', 32 | 'local_names': ['quantityAmount'], 33 | 'datatype': 'http://www.w3.org/2001/XMLSchema#decimal', 34 | 'bind': '?literal0'}, 35 | {'string': 'globecoordinate', 36 | 'local_names': ['geoLatitude', 'geoLongitude'], 37 | 'datatype': 'http://www.opengis.net/ont/geosparql#wktLiteral', 38 | 'bind': 'concat("Point(", str(?literal0), " ", str(?literal1), ")")'} 39 | ] 40 | 41 | property_types = ['statement', 'qualifier', 'reference'] 42 | 43 | # Insert the missing value statements using values from value nodes 44 | for value_type in value_types: 45 | for property_type in property_types: 46 | query = ''' 47 | WITH <''' + graph_iri + '''> 48 | INSERT {?reference ?directProp ?literal.} 49 | WHERE { 50 | ?reference ?pxv ?value. 51 | ''' 52 | for ln_index in range(len(value_type['local_names'])): 53 | query += ' ?value wikibase:' + value_type['local_names'][ln_index] + ' ?literal' + str(ln_index) + '''. 54 | ''' 55 | query += ' bind(' + value_type['bind'] + ''' as ?literal) 56 | FILTER(SUBSTR(STR(?pxv),1,45)="http://www.wikidata.org/prop/''' + property_type + '''/value/") 57 | BIND(SUBSTR(STR(?pxv),46) AS ?id) 58 | BIND(IRI(CONCAT("http://www.wikidata.org/prop/''' + property_type + '''/", ?id)) AS ?directProp) 59 | } 60 | ''' 61 | #print(query) 62 | print('updating', property_type, value_type['string']) 63 | response = requests.post(endpoint, headers={'Content-Type': 'application/sparql-update'}, data = namespaces + query) 64 | print('update complete') 65 | 66 | # Insert the missing "truthy" statements from statement value statements 67 | query = ''' 68 | WITH <''' + graph_iri + '''> 69 | INSERT {?item ?truthyProp ?value.} 70 | WHERE { 71 | ?item ?p ?statement. 72 | ?statement ?ps ?value. 73 | FILTER(SUBSTR(STR(?ps),1,40)="http://www.wikidata.org/prop/statement/P") 74 | BIND(SUBSTR(STR(?ps),40) AS ?id) 75 | BIND(IRI(CONCAT("http://www.wikidata.org/prop/direct/", ?id)) AS ?truthyProp) 76 | } 77 | ''' 78 | #print(query) 79 | print ('updating truthy statements') 80 | response = requests.post(endpoint, headers={'Content-Type': 'application/sparql-update'}, data = namespaces + query) 81 | print('done') 82 | -------------------------------------------------------------------------------- /swj/label_example.csv: -------------------------------------------------------------------------------- 1 | Q_ID,English_label 2 | Q98569123,Arthur S. Rosenberger 3 | Q98569121,Samuel K. Mosiman 4 | -------------------------------------------------------------------------------- /swj/label_example_output.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | @prefix rdfs: . 3 | 4 | rdfs:label "Samuel K. Mosiman"@en . 5 | 6 | rdfs:label "Arthur S. Rosenberger"@en . 7 | -------------------------------------------------------------------------------- /vanderbot/department-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "deptShortName": "medicine", 3 | "medicine": { 4 | "scrapeType": 0, 5 | "categories": [ 6 | "" 7 | ], 8 | "baseUrl": "https://wag.app.vanderbilt.edu//PublicPage/Faculty/PickLetter?letter=", 9 | "nTables": 1, 10 | "departmentSearchString": "Medicine", 11 | "departmentQId": "Q89953931", 12 | "testAuthorAffiliation": "Medicine Vanderbilt", 13 | "labels": { 14 | "source": "column", 15 | "value": "name" 16 | }, 17 | "descriptions": { 18 | "source": "constant", 19 | "value": "physician" 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /vanderbot/graph.txt: -------------------------------------------------------------------------------- 1 | ?qid wdt:P195 wd:Q18563658. 2 | ?qid wdt:P31 wd:Q3305213. -------------------------------------------------------------------------------- /vanderbot/medicine-employees-to-write.csv: -------------------------------------------------------------------------------- 1 | department,wikidataId,name,labelEn,alias,description,orcidStatementUuid,orcid,orcidReferenceHash,orcidReferenceValue_nodeId,orcidReferenceValue_val,orcidReferenceValue_prec,employerStatementUuid,employer,employerReferenceHash,employerReferenceSourceUrl,employerReferenceRetrieved_nodeId,employerReferenceRetrieved_val,employerReferenceRetrieved_prec,affiliationStatementUuid,affiliation,affiliationReferenceHash,affiliationReferenceSourceUrl,affiliationReferenceRetrieved_nodeId,affiliationReferenceRetrieved_val,affiliationReferenceRetrieved_prec,instanceOfUuid,instanceOf,sexOrGenderUuid,sexOrGenderQId,gender,degree,category,wikidataStatus,role 2 | medicine,Q88631361,Maria Blanca Piazuelo,Maria Blanca Piazuelo,"[""M. Blanca Piazuelo"",""Blanca Piazuelo""]",physician,60AEDD59-C71F-467E-8A23-E6550C3070EC,0000-0002-0000-1324,7f81f24a4148728c2eb8e030140bb2808f105296,ee3890ae-03fa-4ff7-b040-9cfe25d2c3d2,2020-04-20T00:00:00Z,11,538599CD-1564-4E56-839A-1BAB9BAF74DC,Q29052,4820fb8bbf4060c54dd4f0f0745fa6ddc12cff80,https://wag.app.vanderbilt.edu//PublicPage/Faculty/PickLetter?letter=P,21ddcb18-18be-4afd-927b-ae45dfa598d2,2020-04-20T00:00:00Z,11,0BB1C55B-A784-4826-9EA6-10453CB6FBE9,Q89953931,4820fb8bbf4060c54dd4f0f0745fa6ddc12cff80,https://wag.app.vanderbilt.edu//PublicPage/Faculty/PickLetter?letter=P,21ddcb18-18be-4afd-927b-ae45dfa598d2,2020-04-20T00:00:00Z,11,388A4B21-1D84-412B-B501-89B7206FA52C,Q5,888850E6-14E5-462E-9A86-CBC983A2D8F7,Q6581072,f,M.D.,P,5,"[{""title"": ""Research Associate Professor"", ""department"": ""Medicine""}]" 3 | -------------------------------------------------------------------------------- /vanderbot/medicine-employees-with-wikidata.csv: -------------------------------------------------------------------------------- 1 | wikidataId,name,gender,degree,category,orcid,wikidataStatus,role 2 | Q39050075,Margaret L. Salisbury,f,M.D.,S,0000-0001-8217-6955,11,"[{""title"": ""Assistant Professor"", ""department"": ""Medicine""}]" 3 | -------------------------------------------------------------------------------- /vanderbot/medicine-employees.csv: -------------------------------------------------------------------------------- 1 | name,degree,role,category 2 | Maria Blanca Piazuelo,M.D.,"[{""title"": ""Research Associate Professor"", ""department"": ""Medicine""}]",P 3 | -------------------------------------------------------------------------------- /vanderbot/properties_to_add.csv: -------------------------------------------------------------------------------- 1 | pid,datatype,label_en,description_en,label_fr,description_fr,label_es,description_es 2 | ,string,property 1 label,property 1 description,étiquette de propriété 1,description de propriété 1,, 3 | ,monolingualtext,subtitle,the second part of a longer title,,,subtítulo, 4 | ,quantity,mass,the amount of material in an object,,,, 5 | ,time,date of destruction,the date on which an object no longer existed,,,, 6 | ,bad,bad property,property with an invalid datatype,,,, 7 | ,globe-coordinate,observation location,the place where an entity was detected,,,, 8 | ,wikibase-item,in collection,the collection of which the subject item is a part,,,, 9 | ,url,homepage,the URL of the landing page of a website,,,, 10 | -------------------------------------------------------------------------------- /vanderbot/vb5_check_labels_descriptions.py: -------------------------------------------------------------------------------- 1 | # VanderBot v1.5 (2020-09-08) vb5_check_labels_descriptions.py 2 | # (c) 2020 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0 3 | # Author: Steve Baskauf 4 | # For more information, see https://github.com/HeardLibrary/linked-data/tree/master/vanderbot 5 | 6 | # See http://baskauf.blogspot.com/2020/02/vanderbot-python-script-for-writing-to.html 7 | # for a series of blog posts about VanderBot. 8 | 9 | # This script is the fifth in a series of five that are used to prepare researcher/scholar ("employee") data 10 | # for upload to Wikidata. It inputs data output from the previous script, vb4_download_wikidata.py and 11 | # 12 | # It outputs data into a file for ingestion by the a script used to upload data to 13 | # Wikidata, vb6_upload_wikidata.py . 14 | 15 | # The last part of the script sets the deptShortName in the csv-metadata.json file, a necessary 16 | # precursor before running the upload script. 17 | # ----------------------------------------- 18 | # Version 1.1 change notes: 19 | # - no changes 20 | # ----------------------------------------- 21 | # Version 1.2 change notes: 22 | # - No substantive changes 23 | # ----------------------------------------- 24 | # Version 1.3 change notes (2020-08-06): 25 | # - no changes 26 | # ----------------------------------------- 27 | # Version 1.5 change notes (2020-09-08): 28 | # - no changes 29 | 30 | import json 31 | from time import sleep 32 | import csv 33 | 34 | import vb_common_code as vbc 35 | 36 | sparqlSleep = 0.25 37 | 38 | with open('department-configuration.json', 'rt', encoding='utf-8') as fileObject: 39 | text = fileObject.read() 40 | deptSettings = json.loads(text) 41 | deptShortName = deptSettings['deptShortName'] 42 | 43 | filename = deptShortName + '-employees-to-write.csv' 44 | employees = vbc.readDict(filename) 45 | 46 | for employeeIndex in range(0, len(employees)): 47 | if employees[employeeIndex]['wikidataId'] == '': 48 | #if employeeIndex == 1: 49 | #employees[employeeIndex]['labelEn'] = 'Muktar H Aliyu' 50 | #employees[employeeIndex]['description'] = 'researcher' 51 | query = '''select distinct ?entity where { 52 | ?entity rdfs:label "'''+ employees[employeeIndex]['labelEn'] + '''"@en. 53 | ?entity schema:description "'''+ employees[employeeIndex]['description'] + '''"@en. 54 | }''' 55 | print('Checking label: "' + employees[employeeIndex]['labelEn'] + '", description: "' + employees[employeeIndex]['description'] + '"') 56 | match = vbc.Query(uselabel = False, sleep=sparqlSleep).generic_query(query) 57 | if len(match) > 0: 58 | print('\nWarning! Row ' + str(employeeIndex + 2) + ' is the same as ' + match[0]) 59 | print('This must be fixed before writing to the API !!!\n') 60 | sleep(0.25) 61 | 62 | with open('csv-metadata.json', 'rt', encoding='utf-8') as inFileObject: 63 | text = inFileObject.read() 64 | schema = json.loads(text) 65 | schema['tables'][0]['url'] = deptShortName + '-employees-to-write.csv' 66 | outText = json.dumps(schema, indent = 2) 67 | with open('csv-metadata.json', 'wt', encoding='utf-8') as outFileObject: 68 | outFileObject.write(outText) 69 | print('Department to be written:', deptShortName) 70 | 71 | print('done') -------------------------------------------------------------------------------- /vanderbot/vu_authors.txt: -------------------------------------------------------------------------------- 1 | ?qid wdt:P108 wd:Q29052. 2 | ?article wdt:P50 ?qid. 3 | ?article wdt:P31 wd:Q13442814. -------------------------------------------------------------------------------- /vanderbot/wikidata-csv2rdf-metadata.css: -------------------------------------------------------------------------------- 1 | .form-container { 2 | display: flex; 3 | flex-direction: column; 4 | justify-content: space-between; 5 | } 6 | 7 | .form-section { 8 | display: flex; 9 | flex-direction: column; 10 | padding: 10px; 11 | } 12 | 13 | .item-entries { 14 | display: flex; 15 | flex-direction:column; 16 | } 17 | 18 | .add-button, .submit-button { 19 | width: fit-content; 20 | padding: 5px; 21 | margin: 5px; 22 | } 23 | 24 | .collapsible { 25 | background-color: #ddd; 26 | color: #444; 27 | cursor: pointer; 28 | border: none; 29 | text-align: left; 30 | outline: none; 31 | padding: 18px; 32 | } 33 | 34 | .subheader { 35 | background-color: #eee; 36 | padding: 5px; 37 | } 38 | 39 | .subsubheader { 40 | background-color: #f5f5f5; 41 | padding: 5px; 42 | } 43 | 44 | .section-item-headers { 45 | display: flex; 46 | flex-direction: row; 47 | } 48 | 49 | .section-contents { 50 | overflow: hidden; 51 | } 52 | 53 | .minimized:after { 54 | content: ' \02795'; 55 | } 56 | 57 | .maximized:after { 58 | content: " \2796"; 59 | } 60 | 61 | .output-file-contents { 62 | background-color: #eee; 63 | } 64 | 65 | .output-section { 66 | display: flex; 67 | flex-direction: column; 68 | } 69 | 70 | #output-json-contents { 71 | white-space: pre; 72 | font-family: "Lucida Console", Courier, monospace; 73 | } 74 | 75 | #output-csv-contents { 76 | overflow-wrap: anywhere; 77 | font-family: "Lucida Console", Courier, monospace; 78 | } 79 | 80 | .property-item { 81 | display: flex; 82 | flex-direction: row; 83 | justify-content: flex-end; 84 | border: 2px solid #bbb; 85 | margin: 3px; 86 | margin-bottom: 8px; 87 | padding: 3px; 88 | } 89 | 90 | .property-item-contents { 91 | flex-grow: 1; 92 | } 93 | 94 | .ref-item { 95 | display: flex; 96 | flex-direction: row; 97 | justify-content: flex-end; 98 | border: 1px solid #bbb; 99 | margin: 3px; 100 | margin-bottom: 8px; 101 | padding: 3px; 102 | } 103 | 104 | .ref-item-contents { 105 | flex-grow: 1; 106 | } 107 | 108 | .property-quals { 109 | display: flex; 110 | flex-direction: column; 111 | padding: 10px; 112 | margin-left: 5px; 113 | } 114 | 115 | .property-refs { 116 | display: flex; 117 | flex-direction: column; 118 | padding: 10px; 119 | margin-left: 5px; 120 | } 121 | 122 | .ref-properties { 123 | display: flex; 124 | flex-direction: column; 125 | padding: 10px; 126 | margin-left: 5px; 127 | } 128 | 129 | .property-delete { 130 | display: block; 131 | } 132 | 133 | .template { 134 | display: none; 135 | } 136 | 137 | 138 | .qualifier, .ref-hash, .ref-prop { 139 | width: 300px; 140 | } 141 | -------------------------------------------------------------------------------- /wikibase/README.md: -------------------------------------------------------------------------------- 1 | # Wikibase 2 | 3 | This directory contains work related to setting up Wikibase and automating interactions with Wikidata and Wikibase. 4 | 5 | Note on 2023-02-09: The files in this directory were from early experiments using pywikibot prior to 2019-05-02. Since that time, work has been focused on development of [VanderBot](http://vanderbi.lt/vanderbot) and related scripts for interacting with the Wikidata API and Wikibase APIs in general. This material has been left here for historical reference. For more current information, see [vanderbot subdirectory of this one](https://github.com/HeardLibrary/linked-data/tree/master/wikibase/vanderbot). 6 | 7 | | file | description | 8 | |------|-------------| 9 | | load-fac-wikibase.py | A Python script that uses the pywikibot module to load data about Vanderbilt faculty into a Wikibase instance | 10 | | vu-faculty.json | names, academic rank, and college affiliation of faculty at Vanderbilt taken from the official Registry | 11 | | interaction-diagram.png and .pptx | a diagram showing the ways that interactions between Wikidata and a Wikibase implementation might be mediated by humans and bots \ 12 | 13 | ## Web pages related to this topic 14 | 15 | [Installing and running Wikibase](https://heardlibrary.github.io/digital-scholarship/lod/install/#using-docker-compose-to-create-an-instance-of-wikibase-on-your-local-computer) 16 | 17 | [The Wikibase/Wikidata data model](https://heardlibrary.github.io/digital-scholarship/lod/wikibase/) 18 | 19 | [Building a bot to interact with Wikibase](https://heardlibrary.github.io/digital-scholarship/host/wikidata/bot/) 20 | 21 | ---- 22 | Revised 2023-02-09 23 | -------------------------------------------------------------------------------- /wikibase/interaction-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/wikibase/interaction-diagram.png -------------------------------------------------------------------------------- /wikibase/interaction-diagram.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeardLibrary/linked-data/9b0c7c70de9061e126f68da1f570846afa39537e/wikibase/interaction-diagram.pptx -------------------------------------------------------------------------------- /wikibase/load-fac-wikibase.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import json 4 | from pywikibot import family 5 | import pywikibot, json, csv, sys 6 | 7 | # Log in to the Wikibase instance 8 | site = pywikibot.Site('ldwg', 'ldwg') 9 | site.login() 10 | 11 | repo = site.data_repository() 12 | 13 | # read in the faculty scrape JSON file 14 | with open('vu-faculty.json', 'rt', encoding='utf-8') as fileObject: 15 | jsonString = fileObject.read() 16 | data = json.loads(jsonString) 17 | 18 | for fac in data: 19 | 20 | #fac = data[0] 21 | #if 1==1: 22 | # print(fac) 23 | # don't add if name includes "(Deceased)" 24 | if '(Deceased)' not in fac['name']: 25 | # remove "(On Leave)" from name 26 | if '(On Leave)' in fac['name']: 27 | fac['name'] = fac['name'].replace('(On Leave)','') 28 | # check for surnames followed by "II", "III", etc. since they don't have commas 29 | testName = fac['name'].split(' ') 30 | if testName[1].strip() == "II": 31 | splitName = fac['name'].split('II') 32 | name = splitName[1].strip() + ' ' + splitName[0].strip() + ' II' 33 | elif testName[1].strip() == "III": 34 | splitName = fac['name'].split('III') 35 | name = splitName[1].strip() + ' ' + splitName[0].strip() + ' III' 36 | elif testName[1].strip() == "IV": 37 | splitName = fac['name'].split('IV') 38 | name = splitName[1].strip() + ' ' + splitName[0].strip() + ' IV' 39 | else: 40 | splitName = fac['name'].split(',') 41 | name = splitName[1].strip() + ' ' + splitName[0].strip() 42 | 43 | some_labels = {"en": name} 44 | new_item = pywikibot.ItemPage(repo) 45 | new_item.editLabels(labels=some_labels, summary="Setting labels") 46 | 47 | claim = pywikibot.Claim(repo, u'P4') # employer 48 | target = pywikibot.ItemPage(repo, u"Q3") # Vanderbilt University 49 | claim.setTarget(target) 50 | new_item.addClaim(claim, summary=u'Adding employer claim') 51 | 52 | claim = pywikibot.Claim(repo, u'P6') # instance of 53 | target = pywikibot.ItemPage(repo, u"Q5") # human 54 | claim.setTarget(target) 55 | new_item.addClaim(claim, summary=u'Adding type claim') 56 | 57 | #P39 position held 58 | #P101 field of work 59 | #P106 occupation 60 | #P463 member of 61 | #Looks like we could use P803 (professorship) as the property for the rank. 62 | #P937 is "work location" 63 | #P108 is "employer" 64 | 65 | print(new_item.getID(), name) 66 | -------------------------------------------------------------------------------- /wikibase/vanderbot/README.md: -------------------------------------------------------------------------------- 1 | # Editing generic Wikibases using VanderBot and related scripts 2 | 3 | This directory contains work related to automating interactions with any wikibase using the [VanderBot](http://vanderbi.lt/vanderbot) and related scripts for interacting with the Wikidata API and wikibase APIs in general. 4 | 5 | This material was developed to support a presentation to the [LD4 Wikibase Working Hour](https://www.wikidata.org/wiki/Wikidata:WikiProject_LD4_Wikidata_Affinity_Group/Wikibase_Working_Hours) on 2023-02-13. The primary source of information is [a web page](https://heardlibrary.github.io/digital-scholarship/lod/wikibase/load/), which provides details on running the software, screenshots, etc. 6 | 7 | ## Related scripts 8 | 9 | In order to use the files in this directory, several scripts are required: 10 | 11 | | script/documentation link | filename/link | description | 12 | |--------|---------------|-------------| 13 | | [VanderBot](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/README.md) | [vanderbot.py](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/vanderbot.py) | | 14 | | [VanderDeleteBot](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/vanderdeletebot.md) | [vanderdeletebot.py](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/vanderdeletebot.py) | uses a Wikimedia API to delete claims (statements) or references based on their unique IDs | 15 | | [VanderPropertyBot](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/vanderpropertybot.md) | [vanderpropertybot.py](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/vanderpropertybot.py) | uses the API of a non-Wikimedia Foundation wikibase API to create new properties | 16 | | [ConvertConfigToMetadataSchema](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/convert-config.md) | [convert_config_to_metadata_schema.py](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/convert_config_to_metadata_schema.py) | generates CSV column headers and W3C standard CSV description file used by VanderBot from a YAML mapping configuration file | 17 | | [AcquireWikidataMetadata](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/acquire_wikidata.md) | [acquire_wikidata_metadata.py](https://github.com/HeardLibrary/linked-data/blob/master/vanderbot/acquire_wikidata_metadata.py) | uses SPARQL to download existing data from Wikidata or other wikibases | 18 | 19 | ## Files in this directory 20 | 21 | Descriptions of files in this directory and subdirectories 22 | 23 | | file | description | 24 | |------|-------------| 25 | | `config.yaml` (default filename) | YAML configuration file that maps CSV columns to custom wikibase properties. This particular file describes the `statues.csv` file used in the webpage examples | 26 | | `config_wikidata.yaml` | configuration file that maps the same column headers as `config.yaml`, but to corresponding Wikidata P IDs. Used to download existing data. | 27 | | `csv-metadata.json` | metadata description file in standard format described by the W3C [Generating RDF from Tabular Data on the Web](https://www.w3.org/TR/csv2rdf/) Recommendation and used by VanderBot to interpret the CSV containing source data to upload. This one was generated from `config.yaml` by the `convert_config_to_metadata_schema.py` script. | 28 | | `hstatues.csv` | CSV column headers generated from the `config.yaml` configuration file | 29 | | `qids.csv` | table containing the Q IDs of some famous statues whose metadata can be downloaded from Wikidata using the `acquire_wikidata_metadata.py` based on the properties in the `config_wikidata.yaml` file. | 30 | | `statues.csv` | CSV data file for statues data showing all of the identifiers generated by the custom wikibase API after the data were uploaded | 31 | | `statues_added_ready.csv` | CSV data file for statues data ready for upload to the custom wikibase after adding the data downloaded from Wikidata | 32 | | `statues_downloaded.csv` | CSV data file resulting from the Wikidata download by the `acquire_wikidata_metadata.py` based on the `config_wikidata.yaml` configuration data and the list of Q IDs in `qids.csv` | 33 | | `statues_raw.csv` | Statue of Liberty metadata added manually to the headers generated in `hstatues.csv` by the `convert_config_to_metadata_schema.py` script, ready to be written to the custom wikibase API. | 34 | | [elements](https://github.com/HeardLibrary/linked-data/tree/master/wikibase/vanderbot/elements) | directory containing data that can be used for a test upload of chemical elements to a custom wikibase | 35 | | [states](https://github.com/HeardLibrary/linked-data/tree/master/wikibase/vanderbot/states) | directory containing data that can be used for a test upload of state capitals and states to a custom wikibase | 36 | 37 | ---- 38 | Revised 2023-02-09 39 | -------------------------------------------------------------------------------- /wikibase/vanderbot/config.yaml: -------------------------------------------------------------------------------- 1 | data_path: "" 2 | item_pattern_file: graph_pattern.txt 3 | item_source_csv: "" 4 | outfiles: 5 | - output_file_name: statues.csv 6 | label_description_language_list: 7 | - en 8 | - fr 9 | manage_descriptions: true 10 | # Note: if no columns to ignore, you must omit the "ignore" key or provide an empty list value: [] 11 | ignore: [] 12 | prop_list: 13 | - variable: instance_of 14 | value_type: item 15 | pid: P1 16 | qual: [] 17 | ref: [] 18 | - variable: artist 19 | value_type: item 20 | pid: P2 21 | qual: 22 | - variable: series_ordinal 23 | value_type: string 24 | pid: P3 25 | ref: 26 | - variable: reference_url 27 | value_type: uri 28 | pid: P4 29 | - variable: retrieved 30 | value_type: date 31 | pid: P8 32 | - variable: height 33 | value_type: quantity 34 | pid: P5 35 | qual: [] 36 | ref: [] 37 | - variable: gps 38 | value_type: globecoordinate 39 | pid: P6 40 | qual: [] 41 | ref: [] 42 | - variable: title_en 43 | value_type: monolingualtext 44 | language: en 45 | pid: P7 46 | qual: [] 47 | ref: [] 48 | -------------------------------------------------------------------------------- /wikibase/vanderbot/config_wikidata.yaml: -------------------------------------------------------------------------------- 1 | data_path: "" 2 | item_pattern_file: "" 3 | item_source_csv: qids.csv 4 | outfiles: 5 | - output_file_name: statues_downloaded.csv 6 | label_description_language_list: 7 | - en 8 | - fr 9 | manage_descriptions: true 10 | # Note: if no columns to ignore, you must omit the "ignore" key or provide an empty list value: [] 11 | ignore: [] 12 | prop_list: 13 | - variable: instance_of 14 | value_type: item 15 | pid: P31 16 | qual: [] 17 | ref: [] 18 | - variable: artist 19 | value_type: item 20 | pid: P170 # creator 21 | qual: 22 | - variable: series_ordinal 23 | value_type: string 24 | pid: P1545 25 | ref: 26 | - variable: reference_url 27 | value_type: uri 28 | pid: P854 29 | - variable: retrieved 30 | value_type: date 31 | pid: P813 32 | - variable: height 33 | value_type: quantity 34 | pid: P2048 35 | qual: [] 36 | ref: [] 37 | - variable: gps 38 | value_type: globecoordinate 39 | pid: P625 # coordinate location 40 | qual: [] 41 | ref: [] 42 | - variable: title_en 43 | value_type: monolingualtext 44 | language: en 45 | pid: P1476 46 | qual: [] 47 | ref: [] 48 | -------------------------------------------------------------------------------- /wikibase/vanderbot/elements/README.md: -------------------------------------------------------------------------------- 1 | States data from 2 | 3 | Assumed data as CC0 4 | 5 | Retrieved: 2023-02-08 6 | -------------------------------------------------------------------------------- /wikibase/vanderbot/elements/config.yaml: -------------------------------------------------------------------------------- 1 | data_path: "" 2 | item_pattern_file: graph_pattern.txt 3 | item_source_csv: "" 4 | outfiles: 5 | - output_file_name: elements.csv 6 | label_description_language_list: 7 | - en 8 | manage_descriptions: true 9 | # Note: if no columns to ignore, you must omit the "ignore" key or provide an empty list value: [] 10 | ignore: [] 11 | prop_list: 12 | - variable: instance_of 13 | value_type: item 14 | pid: P1 15 | qual: [] 16 | ref: [] 17 | - variable: atomic number 18 | value_type: string 19 | pid: P15 20 | qual: [] 21 | ref: [] 22 | - variable: atomic mass 23 | value_type: string 24 | pid: P16 25 | qual: [] 26 | ref: [] 27 | - variable: abbreviation 28 | value_type: string 29 | pid: P10 30 | qual: [] 31 | ref: 32 | - variable: reference_url 33 | value_type: uri 34 | pid: P4 35 | - variable: retrieved 36 | value_type: date 37 | pid: P8 38 | - variable: discovery_date 39 | value_type: date 40 | pid: P13 41 | qual: [] 42 | ref: 43 | - variable: reference_url 44 | value_type: uri 45 | pid: P4 46 | - variable: retrieved 47 | value_type: date 48 | pid: P8 49 | - variable: phase 50 | value_type: item 51 | pid: P14 52 | qual: [] 53 | ref: 54 | - variable: reference_url 55 | value_type: uri 56 | pid: P4 57 | - variable: retrieved 58 | value_type: date 59 | pid: P8 60 | -------------------------------------------------------------------------------- /wikibase/vanderbot/hstatues.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,label_fr,description_en,description_fr,instance_of_uuid,instance_of,artist_uuid,artist,artist_series_ordinal,artist_ref1_hash,artist_ref1_reference_url,artist_ref1_retrieved_nodeId,artist_ref1_retrieved_val,artist_ref1_retrieved_prec,height_uuid,height_nodeId,height_val,height_unit,gps_uuid,gps_nodeId,gps_val,gps_long,gps_prec,title_en_uuid,title_en 2 | -------------------------------------------------------------------------------- /wikibase/vanderbot/qids.csv: -------------------------------------------------------------------------------- 1 | qid,label_en 2 | Q179900,David 3 | Q151952,Venus de Milo 4 | Q3595955,Lion Capital of Asoka 5 | Q28223155,Golden Rhinoceros of Mapungubwe 6 | -------------------------------------------------------------------------------- /wikibase/vanderbot/states/README.md: -------------------------------------------------------------------------------- 1 | States data from 2 | 3 | Available under an MIT license 4 | 5 | Retrieved: 2023-02-08 6 | -------------------------------------------------------------------------------- /wikibase/vanderbot/states/config.yaml: -------------------------------------------------------------------------------- 1 | data_path: "" 2 | item_pattern_file: graph_pattern.txt 3 | item_source_csv: "" 4 | outfiles: 5 | - output_file_name: states.csv 6 | label_description_language_list: 7 | - en 8 | manage_descriptions: true 9 | # Note: if no columns to ignore, you must omit the "ignore" key or provide an empty list value: [] 10 | ignore: [] 11 | prop_list: 12 | - variable: instance_of 13 | value_type: item 14 | pid: P1 15 | qual: [] 16 | ref: [] 17 | - variable: abbreviation 18 | value_type: string 19 | pid: P10 20 | qual: [] 21 | ref: 22 | - variable: reference_url 23 | value_type: uri 24 | pid: P4 25 | - variable: retrieved 26 | value_type: date 27 | pid: P8 28 | - variable: website 29 | value_type: uri 30 | pid: P9 31 | qual: [] 32 | ref: 33 | - variable: reference_url 34 | value_type: uri 35 | pid: P4 36 | - variable: retrieved 37 | value_type: date 38 | pid: P8 39 | - variable: inception 40 | value_type: date 41 | pid: P11 42 | qual: [] 43 | ref: 44 | - variable: reference_url 45 | value_type: uri 46 | pid: P4 47 | - variable: retrieved 48 | value_type: date 49 | pid: P8 50 | - variable: capital 51 | value_type: item 52 | pid: P12 53 | qual: [] 54 | ref: 55 | - variable: reference_url 56 | value_type: uri 57 | pid: P4 58 | - variable: retrieved 59 | value_type: date 60 | pid: P8 61 | -------------------------------------------------------------------------------- /wikibase/vanderbot/states/config_capital.yaml: -------------------------------------------------------------------------------- 1 | data_path: "" 2 | item_pattern_file: graph_pattern.txt 3 | item_source_csv: "" 4 | outfiles: 5 | - output_file_name: capitals.csv 6 | label_description_language_list: 7 | - en 8 | manage_descriptions: true 9 | # Note: if no columns to ignore, you must omit the "ignore" key or provide an empty list value: [] 10 | ignore: [] 11 | prop_list: 12 | - variable: instance_of 13 | value_type: item 14 | pid: P1 15 | qual: [] 16 | ref: [] 17 | - variable: website 18 | value_type: uri 19 | pid: P9 20 | qual: [] 21 | ref: 22 | - variable: reference_url 23 | value_type: uri 24 | pid: P4 25 | - variable: retrieved 26 | value_type: date 27 | pid: P8 28 | -------------------------------------------------------------------------------- /wikibase/vanderbot/statues.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,label_fr,description_en,description_fr,instance_of_uuid,instance_of,artist_uuid,artist,artist_series_ordinal,artist_ref1_hash,artist_ref1_reference_url,artist_ref1_retrieved_nodeId,artist_ref1_retrieved_val,artist_ref1_retrieved_prec,height_uuid,height_nodeId,height_val,height_unit,gps_uuid,gps_nodeId,gps_val,gps_long,gps_prec,title_en_uuid,title_en 2 | Q6,Statue of Liberty,La Liberté éclairant le monde,colossal sculpture in New York harbor,monument de la ville de New York,FD4E60B7-F166-47E9-A036-7CF8039AFA73,Q3,67790D58-E327-43F3-AC96-2885853E2F1C,Q1,1,8727faeff82286cd6b2868837d2db63e6f16ba54,https://www.nps.gov/stli/learn/historyculture/people.htm,1c826145-91fc-49c8-9068-ea9e689a7597,2022-02-07T00:00:00Z,11,2838EA6C-6D82-4330-9137-0AE65F25A5FE,b7043533-3599-48d3-add1-4efd88e03bb3,46,Q4,DF1432EB-60E5-4847-A8A0-2F05864EDBF1,ac03132b-db2b-4fd4-acec-0e88b7025aac,40.689167,-74.044444,0.0001,12977A40-F0E1-47A6-8930-63EF5DC76B2D,Liberty Enlightening the World 3 | Q8,Venus de Milo,Vénus de Milo,Ancient Greek marble statue of a woman,sculpture grecque de la fin de l'époque hellénistique,58A9CAD9-F06A-46B4-8274-222447A54CF6,Q3,DC36B268-F837-410E-93D2-607F45433DD7,_:1846801f6aaaa4abb2000797f7d3dc21,,,,,,,D486B6E4-8D47-4DF9-843D-5B24D408A1A2,9f2c91d5-29cc-4f3e-9efc-cda43a85ba05,202,Q12,DEE91BE1-575F-4E05-8CFE-FE8BE007EAA2,8f694aff-d0d0-437f-9e34-b5bc748733c6,48.859944681901,2.3372878730152,0.013654266337667,1B7B8F52-40A7-4BC6-B522-6205EE141883,Venus de Milo (Aphrodite of Milos) 4 | Q9,David,David,sculpture by Michelangelo,statue de Michel-Ange,E9253E8E-8D8C-4401-AF6E-25685C67DD92,Q3,2B6154C6-C5AC-42AF-96BA-B45066553FE3,Q7,,be91ac7fbc90be8621cf01ca24ae00dc43690fdd,https://www.galleriaaccademiafirenze.it/opere/david/,,,,5E0F4EB1-CC85-4C06-A237-B9DC8E25EA53,5ac5ba6f-7a8d-4e04-86af-42c5cde06654,517,Q12,305FF2EC-3226-4267-A05A-D9DEC3A08254,eb96b641-543b-4e25-8dd2-bce7bbabfb84,43.776702777778,11.25945,2.7777777777778E-06,F1CA18A0-DA98-4ADB-AEB7-AE80728BD492,David 5 | Q10,Golden Rhinoceros of Mapungubwe,,medieval artifact made from wood which is covered in thin sheets of gold from the Kingdom of Mapungubwe,,4F3414EE-2D9B-42C6-BB8A-BEF528C54317,Q3,,,,,,,,,,,,,,,,,,, 6 | Q11,Lion Capital of Asoka,Chapiteau aux lions d'Ashoka,"capital of a column of Mauryan emperor Ashoka in Sarnath, Uttar Pradesh, India",emblème national de l'Inde,B5FDCB2D-2040-48F6-936C-9B0A5E75628E,Q3,0B6B8471-870A-49CA-B6C1-E0A7A82E5870,_:2cd1b162bfddbb3d1c905fc36dd145d2,,e6cdf45c9eb4a44d13385e934d57e663e2a49f4d,https://beckchris.wordpress.com/visual-arts/best-works-of-art-of-all-time-the-critics-picks/,,,,,,,,9F9BFD9A-1253-40B1-8A2E-99A8B5BF532F,23f264d3-0422-4e89-92c8-4b2300a2cca0,25.3811,83.0214,0.0001,742FCC25-B792-4FBA-84BF-74752C8C4D2C,Lion Capital of Ashoka 7 | -------------------------------------------------------------------------------- /wikibase/vanderbot/statues_added_ready.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,label_fr,description_en,description_fr,instance_of_uuid,instance_of,artist_uuid,artist,artist_series_ordinal,artist_ref1_hash,artist_ref1_reference_url,artist_ref1_retrieved_nodeId,artist_ref1_retrieved_val,artist_ref1_retrieved_prec,height_uuid,height_nodeId,height_val,height_unit,gps_uuid,gps_nodeId,gps_val,gps_long,gps_prec,title_en_uuid,title_en 2 | Q6,Statue of Liberty,La Liberté éclairant le monde,colossal sculpture in New York harbor,monument de la ville de New York,FD4E60B7-F166-47E9-A036-7CF8039AFA73,Q3,67790D58-E327-43F3-AC96-2885853E2F1C,Q1,1,8727faeff82286cd6b2868837d2db63e6f16ba54,https://www.nps.gov/stli/learn/historyculture/people.htm,1c826145-91fc-49c8-9068-ea9e689a7597,2022-02-07T00:00:00Z,11,2838EA6C-6D82-4330-9137-0AE65F25A5FE,b7043533-3599-48d3-add1-4efd88e03bb3,46,Q4,DF1432EB-60E5-4847-A8A0-2F05864EDBF1,ac03132b-db2b-4fd4-acec-0e88b7025aac,40.689167,-74.044444,0.0001,12977A40-F0E1-47A6-8930-63EF5DC76B2D,Liberty Enlightening the World 3 | ,Venus de Milo,Vénus de Milo,Ancient Greek marble statue of a woman,sculpture grecque de la fin de l'époque hellénistique,,Q3,,_:1846801f6aaaa4abb2000797f7d3dc21,,,,,,,,,202,Q12,,,48.859944681901,2.3372878730152,0.013654266337667,,Venus de Milo (Aphrodite of Milos) 4 | ,David,David,sculpture by Michelangelo,statue de Michel-Ange,,Q3,,Q7,,,https://www.galleriaaccademiafirenze.it/opere/david/,,,,,,517,Q12,,,43.776702777778,11.25945,2.7777777777778E-06,,David 5 | ,Golden Rhinoceros of Mapungubwe,,medieval artifact made from wood which is covered in thin sheets of gold from the Kingdom of Mapungubwe,,,Q3,,,,,,,,,,,,,,,,,,, 6 | ,Lion Capital of Asoka,Chapiteau aux lions d'Ashoka,"capital of a column of Mauryan emperor Ashoka in Sarnath, Uttar Pradesh, India",emblème national de l'Inde,,Q3,,_:2cd1b162bfddbb3d1c905fc36dd145d2,,,https://beckchris.wordpress.com/visual-arts/best-works-of-art-of-all-time-the-critics-picks/,,,,,,,,,,25.3811,83.0214,0.0001,,Lion Capital of Ashoka 7 | -------------------------------------------------------------------------------- /wikibase/vanderbot/statues_downloaded.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,label_fr,description_en,description_fr,instance_of_uuid,instance_of,artist_uuid,artist,artist_series_ordinal,artist_ref1_hash,artist_ref1_reference_url,artist_ref1_retrieved_nodeId,artist_ref1_retrieved_val,artist_ref1_retrieved_prec,height_uuid,height_nodeId,height_val,height_unit,gps_uuid,gps_nodeId,gps_val,gps_long,gps_prec,title_en_uuid,title_en 2 | Q151952,Venus de Milo,Vénus de Milo,Ancient Greek marble statue of a woman,sculpture grecque de la fin de l'époque hellénistique,F35889F5-EA74-45B1-888B-38C335798FCE,Q860861,623A6CC8-DDD9-423C-84F6-C23DC2D5E9C6,_:1846801f6aaaa4abb2000797f7d3dc21,,,,,,,565E0DDB-8047-4B8C-9886-3526488AB435,877b688237e6de9ff02baa2b0b8ed761,202,Q174728,7ff5b82c-e946-44e8-bd36-c6b99c43b7fc,8861681ceab0da7990c0222fa4fc883a,48.859944681901,2.3372878730152,0.013654266337667,DA498897-246C-4FE3-B1CC-3A6D2AC5EF83,Venus de Milo (Aphrodite of Milos) 3 | Q179900,David,David,sculpture by Michelangelo,statue de Michel-Ange,785D46F7-8C10-478B-89F7-F1663BFEB828,Q179700,6F4A53EC-F468-4541-8670-8914803F336F,Q5592,,0eb9c02098ec870344e3b06efe7ddfa744770318,https://www.galleriaaccademiafirenze.it/opere/david/,,,,FBEAE122-A5E7-4C45-A605-55EBC1090778,4292b787ec2c703984497d42b7797908,517,Q174728,8e5895d4-3c67-4500-a8c5-1b644afd4811,f103b49f6173d027a36bbb0bc9999133,43.776702777778,11.25945,2.7777777777778E-06,8AA863AA-203F-4AAC-9183-1A79F98AEF89,David 4 | Q28223155,Golden Rhinoceros of Mapungubwe,,medieval artifact made from wood which is covered in thin sheets of gold from the Kingdom of Mapungubwe,,ED2DCC55-28D5-4D63-818E-A87AB8FD2F70,Q220659,,,,,,,,,,,,,,,,,,, 5 | Q3595955,Lion Capital of Asoka,Chapiteau aux lions d'Ashoka,"capital of a column of Mauryan emperor Ashoka in Sarnath, Uttar Pradesh, India",emblème national de l'Inde,a57f5082-46e9-7bf0-2972-b0b8fd1a6ab9,Q193893,6FAE1C5B-225F-4E3F-8EB3-FC1F8655B6A2,_:2cd1b162bfddbb3d1c905fc36dd145d2,,78239087ae2e2c39e31073b11e867ea534a2ffb0,https://beckchris.wordpress.com/visual-arts/best-works-of-art-of-all-time-the-critics-picks/,,,,,,,,f2b3e1d7-a1d1-4690-ae11-ef0f58a18cf1,e72afa61c7a1cbadab4d386135248992,25.3811,83.0214,0.0001,9F92E23F-28D0-467A-9449-75A52A7EF1A9,Lion Capital of Ashoka 6 | -------------------------------------------------------------------------------- /wikibase/vanderbot/statues_raw.csv: -------------------------------------------------------------------------------- 1 | qid,label_en,label_fr,description_en,description_fr,instance_of_uuid,instance_of,artist_uuid,artist,artist_series_ordinal,artist_ref1_hash,artist_ref1_reference_url,artist_ref1_retrieved_nodeId,artist_ref1_retrieved_val,artist_ref1_retrieved_prec,height_uuid,height_nodeId,height_val,height_unit,gps_uuid,gps_nodeId,gps_val,gps_long,gps_prec,title_en_uuid,title_en 2 | ,Statue of Liberty,La Liberté éclairant le monde,colossal sculpture in New York harbor,monument de la ville de New York,,Q3,,Q1,1,,https://www.nps.gov/stli/learn/historyculture/people.htm,,2022-02-07,,,,46,Q4,,,40.689167,-74.044444,0.0001,,Liberty Enlightening the World 3 | --------------------------------------------------------------------------------