├── requirements.txt ├── pyepicollect ├── _version.py ├── __init__.py ├── batch.py ├── auth.py └── api.py ├── tests ├── __init__.py ├── test_auth.py └── test_api.py ├── .gitignore ├── README.md └── setup.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.21.0 2 | -------------------------------------------------------------------------------- /pyepicollect/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = '5.1.1' -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ Testing package """ 4 | 5 | -------------------------------------------------------------------------------- /pyepicollect/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Python API for reading EpiCollect 5 data (https://five.epicollect.net/) """ 3 | from . import auth, api 4 | from .auth import Auth -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDEA IDE 2 | .idea/ 3 | 4 | # IPython Notebooks checkpoint 5 | **/.ipynb_checkpoints 6 | 7 | # Distribution / packaging 8 | build/ 9 | dist/ 10 | sdist/ 11 | wheels/ 12 | pyepicollect.egg-info 13 | 14 | # Virtual Environment 15 | env/ 16 | 17 | # Cache 18 | pyepicollect/.pytest_cache/ 19 | .pytest_cache 20 | .cache 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyEpiCollect 2 | 3 | ## Read EpiCollect 5 data from python 4 | 5 | * **python code**: Rodrigo E. Principe (fitoprincipe82 at gmail) 6 | * **EpiCollect expert**: Pablo Masera (pablomasera83 at gmail) 7 | 8 | ### Description 9 | 10 | This is a python wrapper around EpiCollect 5 API available at https://developers.epicollect.net/ 11 | 12 | ### Install 13 | 14 | > pip install pyepicollect 15 | 16 | ### Use 17 | 18 | See example in binder. 19 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fitoprincipe/pyepicollect/master) 20 | 21 | ### Unit Test 22 | 23 | 1. Make a virtual environment: (see https://docs.python-guide.org/dev/virtualenvs/#lower-level-virtualenv) 24 | > virtualenv env --python=python3 25 | 26 | Make sure to name the environment `env` so it's ignored by git. Do not do: 27 | > virtualenv venv --python=python3 28 | 29 | 2. install requirments 30 | > pip install -r requirements.txt 31 | 32 | 3. run test 33 | > python -m pytest -v 34 | -------------------------------------------------------------------------------- /tests/test_auth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ Test auth module """ 4 | import pytest 5 | import pyepicollect as pyep 6 | 7 | TEST_CLIENT_ID = 715 8 | TEST_CLIENT_SECRET = '7qmNC9xmvQiLxfzN6xW0B3KfvrVyBt5JWVO8chFi' 9 | TEST_NAME = 'Proyecto_API_Python' 10 | TEST_SLUG = 'proyecto-api-python' 11 | 12 | 13 | def test_request_token(): 14 | token = pyep.auth.request_token( 15 | TEST_CLIENT_ID, TEST_CLIENT_SECRET) 16 | 17 | assert type(token) is dict 18 | 19 | 20 | def test_Auth_access_token(): 21 | auth = pyep.Auth(TEST_CLIENT_ID, TEST_CLIENT_SECRET) 22 | 23 | access_token = auth.access_token 24 | assert type(access_token) is str 25 | 26 | 27 | def test_Auth_update(): 28 | auth = pyep.Auth(TEST_CLIENT_ID, TEST_CLIENT_SECRET) 29 | auth.update() 30 | request_time = auth.request_time 31 | 32 | from datetime import timedelta 33 | delta = timedelta(1) # 1 day 34 | 35 | auth.request_time = request_time - delta 36 | 37 | access_token = auth.access_token 38 | assert type(access_token) is str -------------------------------------------------------------------------------- /pyepicollect/batch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ Batch processes for Epi Collect """ 4 | from . import api 5 | import os 6 | 7 | 8 | def download_media(slug, name, file_type, file_format=None, path=None, 9 | token=None, stream=True): 10 | """ Download Media data 11 | 12 | :param name: the name for the file 13 | :type name: str 14 | :param file_type: The type of media. One of 'photo', 'audio', 'video' 15 | :type file_type: str 16 | :param file_format: The file_format of the media. Depends on the type. 17 | See url 18 | :type file_format: str 19 | :param path: the path to download the file 20 | :type path: str 21 | :param stream: stream the data to download 22 | :type stream: bool 23 | """ 24 | response = api.get_media( 25 | slug=slug, 26 | name=name, 27 | file_type=file_type, 28 | file_format=file_format, 29 | token=token, 30 | stream=stream) 31 | 32 | if not path: 33 | path = os.getcwd() 34 | 35 | name = os.path.join(path, name) 36 | 37 | with open(name, "wb") as handle: 38 | for data in response.iter_content(): 39 | handle.write(data) 40 | 41 | return handle -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from setuptools import setup, find_packages 5 | 6 | here = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | # Utility function to read the README file. 9 | # Used for the long_description. It's nice, because now 1) we have a top level 10 | # README file and 2) it's easier to type in the README file than to put a raw 11 | # string in below ... 12 | def read(fname): 13 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 14 | 15 | version_ns = {} 16 | with open(os.path.join(here, 'pyepicollect', '_version.py')) as f: 17 | exec(f.read(), {}, version_ns) 18 | 19 | # the setup 20 | setup( 21 | name='pyepicollect', 22 | version=version_ns['__version__'], 23 | description='Read EpiCollect 5 data from python', 24 | long_description=read('README.md'), 25 | url='', 26 | author='Rodrigo E. Principe', 27 | author_email='fitoprincipe82@gmail.com', 28 | license='', 29 | keywords='epicollect epicollect5 database', 30 | packages=find_packages(exclude=('docs', 'js')), 31 | include_package_data=True, 32 | install_requires=['requests'], 33 | extras_require={ 34 | 'dev': [], 35 | 'docs': [], 36 | 'testing': ['pytest>=4.2.1'], 37 | }, 38 | classifiers=['Programming Language :: Python :: 3', 39 | 'Programming Language :: Python :: 3.6'], 40 | ) -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ Test api module """ 4 | import pytest 5 | import pyepicollect as pyep 6 | 7 | TEST_CLIENT_ID = 715 8 | TEST_CLIENT_SECRET = '7qmNC9xmvQiLxfzN6xW0B3KfvrVyBt5JWVO8chFi' 9 | TEST_NAME = 'Proyecto_API_Python' 10 | TEST_SLUG = 'proyecto-api-python' 11 | 12 | 13 | def test_search_project(): 14 | result = pyep.api.search_project(TEST_NAME) 15 | ref = result['data'][0]['project']['ref'] 16 | 17 | assert ref == 'd5b2da82934f4761aec0d4ba3de61313' 18 | 19 | 20 | def test_get_project(): 21 | token = pyep.auth.request_token( 22 | TEST_CLIENT_ID, TEST_CLIENT_SECRET) 23 | token = token['access_token'] 24 | result = pyep.api.get_project(TEST_SLUG, token) 25 | 26 | ref = result['data']['project']['ref'] 27 | 28 | assert ref == 'd5b2da82934f4761aec0d4ba3de61313' 29 | 30 | 31 | def test_get_entries(): 32 | token = pyep.auth.request_token( 33 | TEST_CLIENT_ID, TEST_CLIENT_SECRET) 34 | token = token['access_token'] 35 | result = pyep.api.get_entries(TEST_SLUG, token) 36 | 37 | expected = ['meta', 'links', 'data'] 38 | keys = list(result.keys()) 39 | 40 | assert keys[0] in expected and keys[1] in expected and keys[2] in expected 41 | 42 | 43 | def test_get_media(): 44 | token = pyep.auth.request_token( 45 | TEST_CLIENT_ID, TEST_CLIENT_SECRET) 46 | token = token['access_token'] 47 | result = pyep.api.get_media(TEST_SLUG, 'logo.jpg', 'photo', 48 | file_format='project_mobile_logo', token=token) 49 | 50 | headers = result.headers 51 | 52 | assert headers['Content-Type'] == 'image/jpeg' 53 | 54 | 55 | def test_get_media_simple(): 56 | token = pyep.auth.request_token( 57 | TEST_CLIENT_ID, TEST_CLIENT_SECRET) 58 | token = token['access_token'] 59 | file_name = '3fefdeb2-1080-4bbc-88f4-57b8653b0082_1549981593.jpg' 60 | result = pyep.api.get_media(TEST_SLUG, file_name, token=token) 61 | 62 | headers = result.headers 63 | 64 | assert headers['Content-Type'] == 'image/jpeg' 65 | -------------------------------------------------------------------------------- /pyepicollect/auth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Authentication module for EpiCollect 5 """ 3 | import requests 4 | from datetime import datetime 5 | 6 | TOKEN_REQ_URL = 'https://five.epicollect.net/api/oauth/token' 7 | REQ_URL = 'https://five.epicollect.net/api' 8 | 9 | 10 | def request_token(client_id, client_secret): 11 | """ Get token. Each token is valid for 2 hours 12 | 13 | :return: A dictionary with the following keys: 14 | :rtype: dict 15 | """ 16 | request = requests.post(TOKEN_REQ_URL, data={ 17 | 'grant_type': 'client_credentials', 18 | 'client_id': client_id, 19 | 'client_secret': client_secret 20 | }) 21 | token_data = request.json() 22 | 23 | return token_data 24 | 25 | 26 | class Auth(object): 27 | def __init__(self, client_id, client_secret): 28 | """ Auth object to hold authentication info and validate token """ 29 | self.client_id = client_id 30 | self.client_secret = client_secret 31 | self.token = None 32 | self.request_time = None 33 | self._initialized = False 34 | 35 | def update(self): 36 | """ Request token and update object """ 37 | token = request_token(self.client_id, self.client_secret) 38 | self.request_time = datetime.now() 39 | self._initialized = True 40 | self.token = token 41 | 42 | @property 43 | def expires_in(self): 44 | """ Get seconds for the token to expire """ 45 | if not self._initialized: 46 | return None 47 | 48 | now = datetime.now() 49 | delta = now - self.request_time 50 | ellapsed = delta.total_seconds() 51 | 52 | expires = self.token['expires_in'] - ellapsed 53 | 54 | return expires if expires > 0 else 0 55 | 56 | def has_expired(self): 57 | """ Check if token has expired based on the requested time """ 58 | if not self._initialized: 59 | return True 60 | 61 | expires_in = self.expires_in 62 | if expires_in > 0: 63 | return False 64 | else: 65 | return True 66 | 67 | @property 68 | def access_token(self): 69 | """ This property will always return the access token """ 70 | if self.has_expired(): 71 | self.update() 72 | 73 | return self.token['access_token'] 74 | -------------------------------------------------------------------------------- /pyepicollect/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ Main API calls """ 4 | from . import auth 5 | import requests 6 | 7 | MEDIA_TYPES = ['photo', 'audio', 'video'] 8 | EXT_PHOTO = ['gif', 'ico', 'jpeg', 'jpg', 'svg', 'tiff', 'tif', 'webp'] 9 | EXT_AUDIO = ['aac', 'mid', 'midi', 'ogg', 'wav', 'weba', '3gp', '3g2'] 10 | EXT_VIDEO = ['avi', 'mpeg', 'mpg', 'ogv', 'webm', '3gp', '3g2'] 11 | 12 | 13 | def search_project(name): 14 | """ Search a Project based on it's name """ 15 | url = '{}/projects/{}'.format(auth.REQ_URL, name) 16 | response = requests.get(url) 17 | 18 | return response.json() 19 | 20 | 21 | def get_project(slug, token=None): 22 | """ Get a Project based on it's slug 23 | 24 | https://epicollect5.gitbooks.io/epicollect5-api/project/export-project.html 25 | """ 26 | url = '{}/export/project/{}'.format(auth.REQ_URL, slug) 27 | 28 | if not token: 29 | response = requests.get(url) 30 | else: 31 | response = requests.get( 32 | url, 33 | headers={'Authorization': 'Bearer ' + token} 34 | ) 35 | 36 | return response.json() 37 | 38 | 39 | def get_entries(slug, token=None, **kwargs): 40 | """ Get Entries. Extra params can be found on the web: 41 | 42 | https://epicollect5.gitbooks.io/epicollect5-api/entries.html 43 | """ 44 | url = '{}/export/entries/{}'.format(auth.REQ_URL, slug) 45 | 46 | if not token: 47 | response = requests.get(url, params=kwargs) 48 | else: 49 | response = requests.get( 50 | url, 51 | headers={'Authorization': 'Bearer ' + token}, 52 | params=kwargs 53 | ) 54 | 55 | return response.json() 56 | 57 | 58 | def get_branch_entries(slug, branch, token=None, **kwargs): 59 | """ Get the branch entries for a particular Branch in a Form for a Project 60 | 61 | https://epicollect5.gitbooks.io/epicollect5-api/get-branch-entries.html 62 | 63 | :param slug: The slugified project name 64 | :param branch: The ref of a branch input in a form 65 | :param token: access token 66 | :param kwargs: extra arguments. See URL 67 | """ 68 | return get_entries(slug, token, branch_ref=branch, **kwargs) 69 | 70 | 71 | def get_media(slug, name, file_type=None, file_format=None, token=None, 72 | stream=True): 73 | """ Get Media data 74 | 75 | https://epicollect5.gitbooks.io/epicollect5-api/media/get-media.html 76 | 77 | To get the binary content of the fetched data do: 78 | 79 | ```python 80 | media = api.get_media(**kwargs) 81 | binary_data = media.content 82 | ``` 83 | 84 | :param name: the name of the file to download 85 | :type name: str 86 | :param file_type: The type of media. One of 'photo', 'audio', 'video' 87 | :type file_type: str 88 | :param file_format: The format of the media. Depends on the type. See url 89 | :type file_format: str 90 | :return: a response object (see http://docs.python-requests.org/en/latest/user/quickstart/#response-content) 91 | :rtype: requests.models.Response 92 | """ 93 | url = '{}/export/media/{}'.format(auth.REQ_URL, slug) 94 | 95 | if file_type and file_type not in MEDIA_TYPES: 96 | raise ValueError( 97 | 'file_type parameter must be one of {}'.format(MEDIA_TYPES)) 98 | 99 | if not file_type: 100 | # get file extension 101 | try: 102 | ext = name.split('.')[1] 103 | except IndexError: 104 | raise ExtensionError 105 | else: 106 | if ext in EXT_PHOTO: 107 | file_type = 'photo' 108 | elif ext in EXT_AUDIO: 109 | file_type = 'audio' 110 | elif ext in EXT_VIDEO: 111 | file_type = 'video' 112 | else: 113 | raise ExtensionError 114 | 115 | if not file_format: 116 | formats = {'photo': 'entry_original', 117 | 'audio': 'audio', 118 | 'video': 'video'} 119 | file_format = formats[file_type] 120 | 121 | params = {'type': file_type, 'format': file_format, 'name': name} 122 | 123 | if not token: 124 | response = requests.get(url, params=params, stream=stream) 125 | else: 126 | response = requests.get( 127 | url, 128 | headers={'Authorization': 'Bearer ' + token}, 129 | params= params, 130 | stream= stream 131 | ) 132 | 133 | return response 134 | 135 | 136 | # Custom Exceptions 137 | class ExtensionError(Exception): 138 | def __init__(self, message=None): 139 | if message is None: 140 | message = 'The name does not contain an extension, please provide'\ 141 | ' a file_type with parameter file_type' 142 | super(ExtensionError, self).__init__(message) 143 | --------------------------------------------------------------------------------