├── .gitignore
├── code
    ├── zotero.py
    ├── oadoi.py
    ├── wikidata.py
    ├── doaj.py
    ├── orcid.py
    └── pyApiToolkit.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | venv
 2 | *.pyc
 3 | includes
 4 | .project
 5 | *.pyo
 6 | *~
 7 | *.db
 8 | staticfiles
 9 | .env
10 | *.sublime-project
11 | *.sublime-workspace
12 | data/
13 | notes.md
14 | include.py
15 | 


--------------------------------------------------------------------------------
/code/zotero.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | https://github.com/urschrei/pyzotero
 6 | https://www.zotero.org/support/dev/client_coding/javascript_api
 7 | http://pyzotero.readthedocs.io/en/latest/
 8 | """
 9 | 
10 | import pyApiToolkit as at
11 | import os
12 | from pyzotero import zotero
13 | import include
14 | 
15 | __author__ = "Stefan Kasberger"
16 | __copyright__ = "Copyright 2017"
17 | __license__ = "MIT"
18 | __version__ = "0.1"
19 | __maintainer__ = "Stefan Kasberger"
20 | __email__ = "mail@stefankasberger.at"
21 | __status__ = "Development" # 'Development', 'Production' or 'Prototype'
22 | 
23 | ###    GLOBAL   ###
24 | 
25 | DELAY_TIME = 5 # in seconds
26 | TS = at.get_timestring()
27 | # TS = '2015-10-28-14-59'
28 | 
29 | ###    FUNCTIONS   ###
30 | 
31 | ###    MAIN   ###
32 | 
33 | if __name__ == "__main__":
34 | 	startTime = at.start_timer()
35 | 
36 | 	rootFolder = at.get_root_folder()
37 | 	config = include.data['zotero']
38 | 	at.setup_environment()
39 | 	data = {}
40 | 
41 | 	# zotero API
42 | 	library_type = 'user'
43 | 	#library_type = 'group'
44 | 	zot = zotero.Zotero(config['zoteroID'], library_type, config['apiKey'])
45 | 	items = zot.top(limit=5)
46 | 
47 | 	for item in items:
48 | 	    print(item['data'])
49 | 	
50 | 	at.stop_timer(startTime)
51 | 


--------------------------------------------------------------------------------
/code/oadoi.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Documentation: https://oadoi.org/api
 6 | """
 7 | 
 8 | import pyApiToolkit as at
 9 | import os
10 | import include
11 | 
12 | __author__ = "Stefan Kasberger"
13 | __copyright__ = "Copyright 2017"
14 | __license__ = "MIT"
15 | __version__ = "0.1"
16 | __maintainer__ = "Stefan Kasberger"
17 | __email__ = "mail@stefankasberger.at"
18 | __status__ = "Development" # 'Development', 'Production' or 'Prototype'
19 | 
20 | ###    GLOBAL   ###
21 | 
22 | DELAY_TIME = 5 # in seconds
23 | TS = at.get_timestring()
24 | # TS = '2015-10-28-14-59'
25 | 
26 | ###    FUNCTIONS   ###
27 | 
28 | def request_dois(dois):
29 | 	data = {}
30 | 
31 | 	for doi in dois:
32 | 		data[doi] = at.request_query(baseUrl+doi+'?email='+config['email'])
33 | 	return data
34 | 
35 | def save_to_files(data, rootFolder):
36 | 	i = 0
37 | 	for id in data:
38 | 		at.save_to_json(data[id], rootFolder+'/data/raw/json/oadoi_'+str(i)+'.json')
39 | 		i+=1
40 | 
41 | ###    MAIN   ###
42 | 
43 | if __name__ == "__main__":
44 | 	startTime = at.start_timer()
45 | 
46 | 	rootFolder = at.get_root_folder()
47 | 	baseUrl = 'https://api.oadoi.org/'
48 | 	config = include.data['oadoi']
49 | 	at.setup_environment()
50 | 
51 | 	df = at.open_csv(rootFolder+'/data/raw/csv/oadoi.csv')
52 | 	
53 | 	# oadoi API
54 | 	data = request_dois(df['doi'])
55 | 
56 | 	save_to_files(data, rootFolder)
57 | 	
58 | 	at.stop_timer(startTime)
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python Open Science API toolkit
 2 | Python 3 scripts to access, create, distribute and publish open research data or data about open science works.
 3 | 
 4 | **Requirements**
 5 | - Pandas
 6 | 
 7 | 
 8 | **include.py**
 9 | 
10 | This is a config file, where you store your personal information in a dictionaries. See more details at the specific wrappers on how to use this. This is the main part of the toolkit, where the used functionalities are developed in.
11 | 
12 | ## pyApiToolkit.py
13 | 
14 | Basic functionalities, which are used by the other scripts listed further below.
15 | 
16 | ## API Wrapper
17 | 
18 | ### oadoi.py
19 | 
20 | Python wrapper to access the [oadoi.org API](https://oadoi.org/api).
21 | 
22 | **include.py**
23 | 
24 | ```
25 | data = {
26 | 	'oadoi': {
27 | 		'email': 'EMAIL'
28 | 	}
29 | }
30 | ```
31 | 
32 | ### wikidata.py
33 | Python wrapper to access the [wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) API via the [wikidataintegrator](https://github.com/SuLab/WikidataIntegrator) module (must be installed to work).
34 | 
35 | You have to have a wikidata account for this.
36 | 
37 | **include.py**
38 | 
39 | ```
40 | data = {
41 | 	'wikidata': {
42 | 		'user': 'USERNAME',
43 | 		'password': 'PASSWORD'
44 | 	}
45 | }
46 | ```
47 | 
48 | ### orcid.py
49 | Python wrapper to access the [ORCID](https://orcid.org/) API.
50 | 
51 | ### zotero.py
52 | 
53 | Wrapper for the [Zotero API](https://www.zotero.org/support/dev/client_coding/javascript_api). Ìt uses the [pyZotero](https://github.com/urschrei/pyzotero) python module, which must be installed to work.
54 | 
55 | **include.py**
56 | 
57 | ```
58 | data = {
59 | 	'zotero': {
60 | 		'apiKey': 'API_KEY',
61 | 		'zoteroID': 'ZOTEROID'
62 | 	}
63 | }
64 | ```
65 | 
66 | ### doaj.py
67 | Wrapper to access the [Digital Open Access Journal API](https://doaj.org/api/v1/docs).
68 | 
69 | 


--------------------------------------------------------------------------------
/code/wikidata.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | """
 6 | 
 7 | import pyApiToolkit as at
 8 | import os
 9 | from wikidataintegrator import wdi_core, wdi_login
10 | import include
11 | 
12 | __author__ = "Stefan Kasberger"
13 | __copyright__ = "Copyright 2017"
14 | __license__ = "MIT"
15 | __version__ = "0.1"
16 | __maintainer__ = "Stefan Kasberger"
17 | __email__ = "mail@stefankasberger.at"
18 | __status__ = "Development" # 'Development', 'Production' or 'Prototype'
19 | 
20 | ###    GLOBAL   ###
21 | 
22 | DELAY_TIME = 5 # in seconds
23 | TS = at.get_timestring()
24 | # TS = '2015-10-28-14-59'
25 | 
26 | ###    FUNCTIONS   ###
27 | 
28 | def login(user, password):
29 | 	login_instance = wdi_login.WDLogin(user=user, pwd=password)
30 | 
31 | def query_item(itemID):
32 | 	return wdi_core.WDItemEngine(wd_item_id=itemID)
33 | 
34 | def get_data(results):
35 | 	return results.get_wd_json_representation()
36 | 
37 | def write_item():
38 | 	# Search for and then edit/create new item
39 | 	wd_item = wdi_core.WDItemEngine(item_name='<your_item_name>', domain='genes', data=[entrez_gene_id])
40 | 	wd_item.write(login_instance)
41 | 
42 | def save_to_files(data, rootFolder):
43 | 	for item in data:
44 | 		at.save_to_json(data[item], rootFolder+'/data/raw/json/wikidata_'+item+'.json')
45 | 
46 | ###    MAIN   ###
47 | 
48 | if __name__ == "__main__":
49 | 	startTime = at.start_timer()
50 | 
51 | 	rootFolder = at.get_root_folder()
52 | 	config = include.data['wikidata']
53 | 	at.setup_environment()
54 | 	data = {}
55 | 
56 | 	# wikidata API
57 | 	login(config['user'], config['password'])
58 | 	
59 | 	df = at.open_csv(rootFolder+'/data/raw/csv/wikidata.csv')
60 | 
61 | 	for item in df['item']:
62 | 		results = query_item(item)
63 | 		data[item] = get_data(results)
64 | 
65 | 	save_to_files(data, rootFolder)
66 | 
67 | 	#at.create_sqlite3_db(rootFolder+'/data/sqlite3/wikidata.db')
68 | 
69 | 	at.stop_timer(startTime)
70 | 


--------------------------------------------------------------------------------
/code/doaj.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | Documentation: https://doaj.org/api/v1/docs
 6 | """
 7 | 
 8 | import pyApiToolkit as at
 9 | import os
10 | 
11 | __author__ = "Stefan Kasberger"
12 | __copyright__ = "Copyright 2017"
13 | __license__ = "MIT"
14 | __version__ = "0.1"
15 | __maintainer__ = "Stefan Kasberger"
16 | __email__ = "mail@stefankasberger.at"
17 | __status__ = "Development" # 'Development', 'Production' or 'Prototype'
18 | 
19 | ###    GLOBAL   ###
20 | 
21 | DELAY_TIME = 5 # in seconds
22 | TS = at.get_timestring()
23 | BASE_URL = 'http://doaj.org/api/v1/'
24 | # TS = '2015-10-28-14-59'
25 | 
26 | ###    FUNCTIONS   ###
27 | 
28 | def search_journals(query):
29 | 	return at.request_query(BASE_URL+'search/journals/'+query)
30 | 
31 | def search_articles(query):
32 | 	return at.request_query(BASE_URL+'search/articles/'+query)
33 | 
34 | def retrieve_article(articleId):
35 | 	return at.request_query(BASE_URL+'articles/'+articleId)
36 | 
37 | def retrieve_journal_by_id(journalId):
38 | 	return at.request_query(BASE_URL+'journals/'+journalId)
39 | 
40 | ###    MAIN   ###
41 | 
42 | if __name__ == "__main__":
43 | 	startTime = at.start_timer()
44 | 
45 | 	rootFolder = at.get_root_folder()
46 | 	at.setup_environment()
47 | 
48 | 	
49 | 	# search articles
50 | 	#df = at.open_csv(rootFolder+'/data/raw/csv/doaj_searcharticles.csv')
51 | 	query = 'libya'
52 | 	dataSA = search_articles(query)
53 | 	at.save_to_json(dataSA, rootFolder+'/data/raw/json/doaj_searcharticles_'+query+'.json')
54 | 
55 | 	# retrieve article
56 | 	#df = at.open_csv(rootFolder+'/data/raw/csv/doaj_articles.csv')
57 | 	articleId = '000011857dbc42afb0f1a8c7e35ab46f'
58 | 	dataRA = retrieve_article(articleId)
59 | 	at.save_to_json(dataRA, rootFolder+'/data/raw/json/doaj_retrievearticle_'+articleId+'.json')
60 | 
61 | 	# search journals
62 | 	#df = at.open_csv(rootFolder+'/data/raw/csv/doaj_searchjournals.csv')
63 | 	query = 'geography'
64 | 	dataSJ = search_journals(query)
65 | 	at.save_to_json(dataSJ, rootFolder+'/data/raw/json/doaj_searchjournals_'+query+'.json')
66 | 
67 | 	# retrieve journal by ID
68 | 	#df = at.open_csv(rootFolder+'/data/raw/csv/doaj_journalID.csv')
69 | 	#journalId = '2503-250X'
70 | 	#dataRJ = retrieve_journal_by_id(journalId)
71 | 	#at.save_to_json(dataRJ, rootFolder+'/data/raw/json/doaj_retrievejournal_'+journalId+'.json')
72 | 
73 | 	at.stop_timer(startTime)
74 | 


--------------------------------------------------------------------------------
/code/orcid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import requests	
 5 | import pyApiToolkit as at
 6 | 
 7 | __author__ = "Stefan Kasberger"
 8 | __copyright__ = "Copyright 2017"
 9 | __license__ = "MIT"
10 | __version__ = "0.1"
11 | __maintainer__ = "Stefan Kasberger"
12 | __email__ = "mail@stefankasberger.at"
13 | __status__ = "Development" # 'Development', 'Production' or 'Prototype'
14 | 
15 | ###    GLOBAL   ###
16 | 
17 | BASE_URL = 'http://pub.orcid.org/'
18 | API_VERSION = 'v1.1/'
19 | 
20 | # make request to DKAN API
21 | def request_api(query, datatype):
22 | 	"""
23 | 	"""
24 | 	if datatype == 'json':
25 | 		acceptkey = 'application/orcid+json' # header: "Accept: application/orcid+json"
26 | 	elif datatype == 'html':
27 | 		acceptkey = 'text/html' # header: "Accept: text/html"
28 | 	else:
29 | 		acceptkey = 'application/orcid+xml' # header: "Accept: application/orcid+xml"
30 | 	
31 | 	headers = {'Accept': acceptkey}
32 | 
33 | 	resp = requests.get(query, headers=headers)
34 | 
35 | 	if resp.status_code != 200:
36 | 		# This means something went wrong.
37 | 		raise ApiError('GET /tasks/ {}'.format(resp.status_code))
38 | 
39 | 	return resp.text
40 | 	
41 | # get all public fields from the bio
42 | def get_orcidId(orcidId, datatype):
43 | 	"""
44 | 	Returns the fields set as "Public" in the bio portion of the ORCID Record 
45 | 	for the scholar represented by the specified orcidId. When used with an 
46 | 	access token and the Member API, limited-access data is also returned.
47 | 	http://pub.orcid.org/v1.1/0000-0001-7857-2795
48 | 	"""
49 | 	return request_api(BASE_URL+API_VERSION+orcidId, datatype)
50 | 
51 | # get all public fields from the bio
52 | def get_works(orcidId, datatype):
53 | 	"""
54 | 	Returns the "works" research activities that are set as "Public" in the 
55 | 	ORCID Record for the scholar represented by the specified orcidId. When 
56 | 	used with an access token and the Member API, limited-access "works" are 
57 | 	also returned.
58 | 	http://pub.orcid.org/v1.1/0000-0001-7857-2795/orcid-works
59 | 	"""
60 | 	return request_api(BASE_URL+API_VERSION+orcidId+'/orcid-works', datatype)
61 | 
62 | # get all public fields from the bio
63 | def get_profile(orcidId, datatype):
64 | 	"""
65 | 	Returns the fields set as "Public" in the bio portion of the ORCID Record 
66 | 	for the scholar represented by the specified orcidId. When used with an 
67 | 	access token and the Member API, limited-access data is also returned.
68 | 	http://pub.orcid.org/v1.1/0000-0001-7857-2795/orcid-profile
69 | 	"""
70 | 	return request_api(BASE_URL+API_VERSION+orcidId+'/orcid-profile', datatype)
71 | 
72 | def request_ids(orcidIds):
73 | 	for id in orcidIds:
74 | 		data[id] = get_profile(id, datatype)
75 | 	return data
76 | 
77 | def save_to_file(data, datatype):
78 | 	if datatype	== 'xml':
79 | 		for id in data:
80 | 			at.save_to_file(data[id], rootFolder+'/data/raw/xml/orcid_'+id+'.xml')
81 | 
82 | # main
83 | if __name__ == '__main__':
84 | 	startTime = at.start_timer()
85 | 	data = {}
86 | 	rootFolder = at.get_root_folder()
87 | 	datatype = 'xml'
88 | 	baseUrl = 'http://pub.orcid.org/'
89 | 	at.setup_environment()
90 | 
91 | 	df = at.open_csv(rootFolder+'/data/raw/csv/orcid.csv')
92 | 
93 | 	#read_csv
94 | 	data = request_ids(df['orcid_id'])
95 | 
96 | 	save_to_file(data, datatype)
97 | 
98 | 	at.stop_timer(startTime)
99 | 


--------------------------------------------------------------------------------
/code/pyApiToolkit.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | """
  6 | 
  7 | import requests	
  8 | import json
  9 | import csv
 10 | import os
 11 | import sys
 12 | import sqlite3
 13 | import pandas as pd
 14 | from datetime import datetime
 15 | #import xml.etree.ElementTree as ET
 16 | #from sqlalchemy import Column, ForeignKey, Integer, String
 17 | #from sqlalchemy.exc import SQLAlchemyError
 18 | #from sqlalchemy.ext.declarative import declarative_base
 19 | #from sqlalchemy.orm import relationship
 20 | #from sqlalchemy import create_engine
 21 | 
 22 | 
 23 | def get_root_folder():
 24 | 	return os.path.dirname(os.getcwd())
 25 | 
 26 | def setup_environment():
 27 | 	"""Sets up the folder structure and working environment.
 28 | 	"""
 29 | 	rootFolder = get_root_folder()
 30 | 	folderRawJSON = rootFolder + '/data/raw/json/'
 31 | 	folderRawXML = rootFolder + '/data/raw/xml/'
 32 | 	folderRawCSV = rootFolder + '/data/raw/csv/'
 33 | 	folderSQLite3 = rootFolder + '/data/sqlite3/'
 34 | 	if not os.path.exists(folderRawCSV):
 35 | 		os.makedirs(folderRawCSV)
 36 | 	if not os.path.exists(folderRawJSON):
 37 | 		os.makedirs(folderRawJSON)
 38 | 	if not os.path.exists(folderRawXML):
 39 | 		os.makedirs(folderRawXML)
 40 | 	if not os.path.exists(folderSQLite3):
 41 | 		os.makedirs(folderSQLite3)
 42 | 
 43 | def get_timestring():
 44 | 	return datetime.now().strftime('%Y-%m-%d-%H-%M')
 45 | 
 46 | def save_to_file(data, filename):
 47 | 	"""Saves file on specified place on harddrive.
 48 | 	
 49 | 	Args:
 50 | 		data: string to save.
 51 | 		filename: string of the filepath.
 52 | 	"""
 53 | 	try:
 54 | 		f = open(filename, 'w')
 55 | 		text_file = open(filename, "w")
 56 | 		text_file.write(data)
 57 | 		text_file.close()
 58 | 	except:
 59 | 		print('Error writing', filename)
 60 | 		return False
 61 | 
 62 | 
 63 | def save_to_json(data, filename):
 64 | 	try:
 65 | 		data = json.dumps(data, indent=2, sort_keys=True)
 66 | 	except:
 67 | 		print('Error opening', filename)
 68 | 		return None
 69 | 	save_to_file(data, filename)
 70 | 
 71 | def read_file(filename):
 72 | 	"""Reads file and returns the text.
 73 | 	
 74 | 	Args:
 75 | 		filename: name of the file
 76 | 	
 77 | 	Returns:
 78 | 		string: content of file as string
 79 | 	"""
 80 | 	f = open(filename, 'w')
 81 | 	string = f.read()
 82 | 
 83 | 	return string
 84 | 
 85 | def create_sqlite3_db(filename):
 86 | 	return sqlite3.connect(filename)
 87 | 
 88 | def execute_sqlite3_query(query):
 89 | 	c = conn.cursor()
 90 | 	c.execute(query)
 91 | 
 92 | def commit_sqlite3(conn):
 93 | 	conn.commit()
 94 | 
 95 | def close_sqlite3_conn(conn):
 96 | 	conn.close()
 97 | 
 98 | def start_timer():
 99 | 	startTime = datetime.now()
100 | 	print('start:', startTime)
101 | 	return startTime	
102 | 
103 | def stop_timer(startTime):
104 | 	print('runtime:', (datetime.now() - startTime))
105 | 
106 | def request_query(query):
107 | 	resp = requests.get(query)
108 | 
109 | 	if resp.status_code != 200:
110 | 		# This means something went wrong.
111 | 		raise ApiError('GET /tasks/ {}'.format(resp.status_code))
112 | 
113 | 	return resp.json()
114 | 
115 | def open_csv(filename):
116 | 	data = []
117 | 	df = pd.read_csv(filename)
118 | 	
119 | 	return df
120 | 
121 | # sqlalchemy http://pythoncentral.io/introductory-tutorial-python-sqlalchemy/
122 | # http://docs.sqlalchemy.org/en/rel_1_1/
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------