├── docs ├── _config.yml ├── api.rst ├── index.rst ├── Makefile ├── usage.rst └── conf.py ├── setup.cfg ├── requirements.txt ├── examples ├── README.md ├── client_side_filtering.py └── multi_processing.py ├── LICENSE.md ├── .gitignore ├── .circleci └── config.yml ├── setup.py ├── tests └── test___init__.py ├── README.md └── enverus_developer_api └── __init__.py /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_file = LICENSE.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.5.1,<3 2 | unicodecsv==0.14.1 3 | 4 | pandas~=1.5.3 5 | setuptools~=65.3.0 6 | urllib3~=1.26.14 -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ### enverus-developer-api Examples 2 | 3 | Basic example workflows are provided as a reference here. 4 | 5 | If you'd like to see a usage example, please [open an issue](https://github.com/enverus-ea/enverus-developer-api/issues/new). 6 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | 2 | API documentation 3 | ================= 4 | 5 | DirectAccessV1 6 | -------------- 7 | 8 | .. autoclass:: directaccess.DirectAccessV1 9 | :members: query, to_csv 10 | :special-members: 11 | 12 | DirectAccessV2 13 | -------------- 14 | 15 | .. autoclass:: directaccess.DirectAccessV2 16 | :members: get_access_token, ddl, docs, count, in_, query, to_csv, to_dataframe 17 | :special-members: 18 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. direct-access-py documentation master file, created by 2 | sphinx-quickstart on Sun Jan 19 07:51:00 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to direct-access-py's documentation! 7 | ============================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | usage 14 | api 15 | 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Cole Howard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .idea/ 7 | requirements-dev.txt 8 | *.csv 9 | *.sql 10 | scripts/ 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | *.iml 97 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | deploy: 4 | docker: 5 | - image: circleci/python:3.6 6 | steps: 7 | - checkout 8 | 9 | - restore_cache: 10 | key: v1-dependency-cache-{{ checksum "setup.py" }} 11 | 12 | - run: 13 | name: install python dependencies 14 | command: | 15 | python3 -m venv venv 16 | . venv/bin/activate 17 | python3 -m pip install -r requirements.txt 18 | python3 -m pip install twine 19 | 20 | - save_cache: 21 | key: v1-dependency-cache-{{ checksum "setup.py" }} 22 | paths: 23 | - "venv" 24 | 25 | - run: 26 | name: verify git tag vs. version 27 | command: | 28 | python3 -m venv venv 29 | . venv/bin/activate 30 | python setup.py verify 31 | 32 | - run: 33 | name: init .pypirc 34 | command: | 35 | echo -e "[pypi]" >> ~/.pypirc 36 | echo -e "username = $PYPI_USERNAME" >> ~/.pypirc 37 | echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc 38 | 39 | - run: 40 | name: create packages 41 | command: | 42 | python3 setup.py sdist bdist_wheel 43 | 44 | - run: 45 | name: upload to pypi 46 | command: | 47 | . venv/bin/activate 48 | twine upload dist/* 49 | 50 | workflows: 51 | version: 2 52 | build_and_deploy: 53 | jobs: 54 | - deploy: 55 | filters: 56 | tags: 57 | only: /[0-9]+(\.[0-9]+)*/ 58 | branches: 59 | ignore: /.*/ 60 | -------------------------------------------------------------------------------- /examples/client_side_filtering.py: -------------------------------------------------------------------------------- 1 | """ 2 | client_side_filtering.py 3 | 4 | This example demonstrates using client-side filtering to query on columns that 5 | aren't filterable via the API. While there's no speed up using 6 | this method, we're able to query the API responses down in a memory-efficient way 7 | and without loading unneeded records into our workflow. 8 | 9 | Consider this the equivalent of a full table scan in a database. 10 | 11 | In the sample below, we're requesting all records in Texas and without DeletedDates in batches of 10k. 12 | Then, we're filtering the responses down to those records that have had their 13 | production allocated using Drillinginfo's production allocation algorithm and 14 | where LowerPerf values exist and are greater than or equal to 2000 and UpperPerf 15 | values exist and are less than or equal to 3000. 16 | """ 17 | import os 18 | try: # Use the memory-efficient ifilter function available in itertools for Python 2 19 | from itertools import ifilter as filter 20 | except ImportError: # The built in filter function returns a generator in Python 3 21 | pass 22 | 23 | from enverus_developer_api import DirectAccessV2 24 | 25 | # Initialize our Direct Access object 26 | d2 = DirectAccessV2( 27 | client_id=os.getenv('DIRECTACCESS_CLIENT_ID'), 28 | client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET') 29 | ) 30 | 31 | # Build the API query 32 | query = d2.query('producing-entities', pagesize=10000, deleteddate='eq(null)', state='TX') 33 | 34 | # Build the client-side filter 35 | rows = filter(lambda x: 36 | x['AllocPlus'] == 'Y' 37 | and x['LowerPerf'] is not None 38 | and x['LowerPerf'] >= 2000 39 | and x['UpperPerf'] is not None 40 | and x['UpperPerf'] <= 3000, 41 | query) 42 | 43 | # Execute the query and filter the responses 44 | # Note that there will be periods of apparent inactivity while records we don't need are tossed 45 | for row in rows: 46 | print(row) 47 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import sys 5 | from setuptools import setup, find_packages 6 | from setuptools.command.install import install 7 | 8 | VERSION = '3.2.0' 9 | 10 | 11 | class VerifyVersionCommand(install): 12 | description = 'verify that git tag matches VERSION prior to publishing to pypi' 13 | 14 | def run(self): 15 | tag = os.getenv('CIRCLE_TAG') 16 | 17 | if tag != VERSION: 18 | info = 'Git tag: {0} does not match the version of this app: {1}'.format( 19 | tag, VERSION 20 | ) 21 | sys.exit(info) 22 | 23 | 24 | def read(fname): 25 | with open(fname) as fp: 26 | content = fp.read() 27 | return content 28 | 29 | 30 | pandas = [ 31 | 'pandas>=0.24.0' 32 | ] 33 | 34 | setup( 35 | name='enverus-developer-api', 36 | version=VERSION, 37 | description='Enverus Developer API Python Client', 38 | long_description=read('README.md'), 39 | long_description_content_type='text/markdown', 40 | author='Direct Access', 41 | author_email='directaccess@enverus.com', 42 | url='https://github.com/enverus-ea/enverus-developer-api', 43 | license='MIT', 44 | keywords=['enverus', 'drillinginfo', 'directaccess', 'oil', 'gas'], 45 | packages=find_packages(exclude=('test*', )), 46 | package_dir={'enverus_developer_api': 'enverus_developer_api'}, 47 | install_requires=[ 48 | 'requests>=2.5.1,<3', 49 | 'unicodecsv==0.14.1', 50 | 'urllib3>=1.26.14', 51 | ], 52 | extras_require={'pandas': pandas}, 53 | cmdclass={ 54 | 'verify': VerifyVersionCommand, 55 | }, 56 | classifiers=[ 57 | 'Development Status :: 5 - Production/Stable', 58 | 'Intended Audience :: Developers', 59 | 'Intended Audience :: End Users/Desktop', 60 | 'License :: OSI Approved :: MIT License', 61 | 'Operating System :: OS Independent', 62 | 'Programming Language :: Python :: 2', 63 | 'Programming Language :: Python :: 3' 64 | ] 65 | ) 66 | -------------------------------------------------------------------------------- /examples/multi_processing.py: -------------------------------------------------------------------------------- 1 | """ 2 | multi_processing.py 3 | 4 | This example demonstrates concurrent loading of Drillinginfo datasets via Python's multiprocessing module. 5 | 6 | The DirectAccessV2 class accepts an optional access_token keyword argument (beginning in version 1.2.0). 7 | When provided, an initial authentication request will not be made. We still provide our API Key, Client ID 8 | and Client Secret to the class so that the access token can be refreshed if needed. 9 | 10 | In the sample below, we simultaneously write three CSVs from the producing-entities, well-rollups and permits 11 | API endpoints. This results in much faster loading time than when done sequentially. 12 | """ 13 | import os 14 | import csv 15 | from multiprocessing import Process 16 | 17 | from enverus_developer_api import DirectAccessV2 18 | 19 | # Retrieve our access token 20 | ACCESS_TOKEN = DirectAccessV2( 21 | client_id=os.getenv('DIRECTACCESS_CLIENT_ID'), 22 | client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET') 23 | ).access_token 24 | 25 | 26 | def load(endpoint, **options): 27 | """ 28 | A generic load function that will be called by each of the three processes. 29 | 30 | :param endpoint: the Direct Access API endpoint 31 | :param options: the query parameters to provide on the endpoint 32 | :return: 33 | """ 34 | # Create a DirectAccessV2 client within the function, providing it our already existing access token 35 | # and thus avoiding unnecessary authentication calls 36 | client = DirectAccessV2( 37 | client_id=os.getenv('DIRECTACCESS_CLIENT_ID'), 38 | client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET'), 39 | access_token=ACCESS_TOKEN 40 | ) 41 | 42 | count = None 43 | with open(endpoint + '.csv', mode='w') as f: 44 | writer = csv.writer(f) 45 | for i, row in enumerate(client.query(endpoint, **options), start=1): 46 | count = i 47 | if count == 1: 48 | writer.writerow(row.keys()) 49 | writer.writerow(row.values()) 50 | 51 | if count % options.get('pagesize', 100000) == 0: 52 | print('Wrote {} records for {}'.format(count, endpoint)) 53 | 54 | print('Completed writing {}. Final count: {}'.format(endpoint, count)) 55 | return 56 | 57 | 58 | def main(): 59 | procs = list() 60 | well_rollups_process = Process( 61 | target=load, 62 | kwargs=dict( 63 | endpoint='well-rollups', 64 | pagesize=10000, 65 | deleteddate='eq(null)' 66 | ) 67 | ) 68 | procs.append(well_rollups_process) 69 | 70 | producing_entity_process = Process( 71 | target=load, 72 | kwargs=dict( 73 | endpoint='producing-entities', 74 | pagesize=100000, 75 | deleteddate='eq(null)' 76 | ) 77 | ) 78 | procs.append(producing_entity_process) 79 | 80 | permits_process = Process( 81 | target=load, 82 | kwargs=dict( 83 | endpoint='permits', 84 | pagesize=100000, 85 | deleteddate='eq(null)' 86 | ) 87 | ) 88 | procs.append(permits_process) 89 | 90 | [x.start() for x in procs] 91 | [x.join() for x in procs] 92 | return 93 | 94 | 95 | if __name__ == '__main__': 96 | main() 97 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | 2 | Usage Guide 3 | =========== 4 | This module is a thin wrapper around Enverus' Developer API (formerly known as Direct Access). 5 | It handles authentication and token management, pagination and network-related 6 | error handling/retries. It also provides a simple, convenient method to write 7 | results to CSV. 8 | 9 | ``direct-access-py`` is built and tested on Python 3.6 but should work on Python 2.7 and up. 10 | 11 | 12 | Installation 13 | ############ 14 | 15 | The easiest way to install ``direct-access-py`` is from the `Python Package Index 16 | `_ using ``pip``: 17 | 18 | .. code-block:: bash 19 | 20 | $ pip install directaccess 21 | 22 | To install it manually, simply download the repository from Github: 23 | 24 | .. code-block:: bash 25 | 26 | $ git clone https://github.com/wchatx/direct-access-py.git 27 | $ cd directaccess/ 28 | $ python setup.py install 29 | 30 | Notes 31 | ##### 32 | The ``directaccess`` module only supports the JSON format from the API. The ``query`` method 33 | returns a generator of API responses as dictionaries. 34 | 35 | Version 2 of the API uses "soft deletes". Records marked as deleted will have a populated 36 | ``DeletedDate`` field. If these records are not important for your workflow, you should always 37 | provide ``deleteddate='null'`` as a keyword argument to the V2 ``query`` method 38 | 39 | It is also important to note that your API credentials should be treated like any other password. 40 | Take care to not check them into public code repositories or expose them outside of your organization. 41 | 42 | If you find a problem with this module, have a feature request or just need a little help getting started, 43 | please `open an issue `_! If you're having 44 | trouble with the Enverus Drillinginfo Developer API, you should `contact support 45 | `_. 46 | 47 | Quick Start 48 | ########### 49 | 50 | Direct Access Version 1 51 | *********************** 52 | For version 1 of the API, create an instance of the DirectAccessV1 class and provide it your API key 53 | 54 | .. code-block:: python 55 | 56 | from directaccess import DirectAccessV1 57 | 58 | d1 = DirectAccessV1(api_key='your-api-key') 59 | 60 | .. warning:: 61 | 62 | Direct Access Version 1 will reach the end of its life in July, 2020. 63 | Please upgrade your application as Version 1 will be inaccessible after that date. 64 | A future version of this module will drop support for Version 1. 65 | 66 | Provide the query method the dataset as the first argument and any query parameters as keyword arguments. 67 | See valid dataset names and query params in the Direct Access documentation. 68 | The query method returns a generator of API responses as dicts. 69 | 70 | .. code-block:: python 71 | 72 | for row in d1.query('legal-leases', county_parish='Reeves', state_province='TX'): 73 | print(row) 74 | 75 | Direct Access Version 2 76 | *********************** 77 | For version 2 of the API, create an instance of the DirectAccessV2 class, providing it your API key, client id and client secret. 78 | The returned access token will be available as an attribute on the instance (``d2.access_token``) and the Authorization 79 | header is set automatically 80 | 81 | .. code-block:: python 82 | 83 | from directaccess import DirectAccessV2 84 | 85 | d2 = DirectAccessV2( 86 | api_key='your-api-key', 87 | client_id='your-client-id', 88 | client_secret='your-client-secret', 89 | ) 90 | 91 | 92 | Like with the V1 class, provide the query method the dataset and query params. All query parameters must match the valid 93 | parameters found in the Direct Access documentation and be passed as keyword arguments. 94 | 95 | .. code-block:: python 96 | 97 | for row in d2.query('well-origins', county='REEVES', pagesize=10000): 98 | print(row) 99 | 100 | 101 | Version 2 Concepts 102 | ################## 103 | 104 | Filter Functions 105 | **************** 106 | Direct Access version 2 supports filter functions. These can be passed as strings on the keyword arguments. 107 | 108 | Some common filters are greater than (``gt()``), less than (``lt()``), ``null``, not null (``not(null)``) and 109 | between (``btw()``). 110 | See the Direct Access documentation for a list of all available filters. 111 | 112 | .. code-block:: python 113 | 114 | # Get well records updated after 2018-08-01 and without deleted dates 115 | for row in d2.query('well-origins', updateddate='gt(2018-08-01)', deleteddate='null'): 116 | print(row) 117 | 118 | # Get permit records with approved dates between 2018-03-01 and 2018-06-01 119 | for row in d2.query('permits', approveddate='btw(2018-03-01,2018-06-01)'): 120 | print(row) 121 | 122 | Fields keyword 123 | ************** 124 | You can use the ``fields`` keyword to limit the returned fields in your queries. 125 | This has the benefit of limiting the API responses to only those fields needed for your 126 | workflow and will significantly improve the speed of your queries. 127 | 128 | .. code-block:: python 129 | 130 | for row in d2.query('rigs', fields='DrillType,LeaseName,PermitDepth'): 131 | print(row) 132 | 133 | Escaping 134 | ******** 135 | When making requests containing certain characters like commas, use a backslash to escape them. 136 | 137 | .. code-block:: python 138 | 139 | # Escaping the comma before LLC 140 | for row in d2.query('producing-entities', curropername='PERCUSSION PETROLEUM OPERATING\, LLC'): 141 | print(row) 142 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('../')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'directaccess' 23 | copyright = '2021' 24 | author = 'directaccess' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.viewcode', 44 | 'sphinx.ext.githubpages', 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # The suffix(es) of source filenames. 51 | # You can specify multiple suffix as a list of string: 52 | # 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | # 62 | # This is also used if you do content translation via gettext catalogs. 63 | # Usually you set "language" from the command line for these cases. 64 | language = None 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path. 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 70 | 71 | # The name of the Pygments (syntax highlighting) style to use. 72 | pygments_style = None 73 | 74 | 75 | # -- Options for HTML output ------------------------------------------------- 76 | 77 | # The theme to use for HTML and HTML Help pages. See the documentation for 78 | # a list of builtin themes. 79 | # 80 | html_theme = 'alabaster' 81 | 82 | # Theme options are theme-specific and customize the look and feel of a theme 83 | # further. For a list of options available for each theme, see the 84 | # documentation. 85 | # 86 | # html_theme_options = {} 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ['_static'] 92 | 93 | # Custom sidebar templates, must be a dictionary that maps document names 94 | # to template names. 95 | # 96 | # The default sidebars (for documents that don't match any pattern) are 97 | # defined by theme itself. Builtin themes are using these templates by 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 99 | # 'searchbox.html']``. 100 | # 101 | # html_sidebars = {} 102 | 103 | 104 | # -- Options for HTMLHelp output --------------------------------------------- 105 | 106 | # Output file base name for HTML help builder. 107 | htmlhelp_basename = 'direct-access-pydoc' 108 | 109 | 110 | # -- Options for LaTeX output ------------------------------------------------ 111 | 112 | latex_elements = { 113 | # The paper size ('letterpaper' or 'a4paper'). 114 | # 115 | # 'papersize': 'letterpaper', 116 | 117 | # The font size ('10pt', '11pt' or '12pt'). 118 | # 119 | # 'pointsize': '10pt', 120 | 121 | # Additional stuff for the LaTeX preamble. 122 | # 123 | # 'preamble': '', 124 | 125 | # Latex figure (float) alignment 126 | # 127 | # 'figure_align': 'htbp', 128 | } 129 | 130 | # Grouping the document tree into LaTeX files. List of tuples 131 | # (source start file, target name, title, 132 | # author, documentclass [howto, manual, or own class]). 133 | latex_documents = [ 134 | (master_doc, 'direct-access-py.tex', 'direct-access-py Documentation', 135 | 'Direct Access', 'manual'), 136 | ] 137 | 138 | 139 | # -- Options for manual page output ------------------------------------------ 140 | 141 | # One entry per manual page. List of tuples 142 | # (source start file, name, description, authors, manual section). 143 | man_pages = [ 144 | (master_doc, 'direct-access-py', 'direct-access-py Documentation', 145 | [author], 1) 146 | ] 147 | 148 | 149 | # -- Options for Texinfo output ---------------------------------------------- 150 | 151 | # Grouping the document tree into Texinfo files. List of tuples 152 | # (source start file, target name, title, author, 153 | # dir menu entry, description, category) 154 | texinfo_documents = [ 155 | (master_doc, 'direct-access-py', 'direct-access-py Documentation', 156 | author, 'direct-access-py', 'One line description of project.', 157 | 'Miscellaneous'), 158 | ] 159 | 160 | 161 | # -- Options for Epub output ------------------------------------------------- 162 | 163 | # Bibliographic Dublin Core info. 164 | epub_title = project 165 | 166 | # The unique identifier of the text. This can be a ISBN number 167 | # or the project homepage. 168 | # 169 | # epub_identifier = '' 170 | 171 | # A unique identification for the text. 172 | # 173 | # epub_uid = '' 174 | 175 | # A list of files that should not be packed into the epub file. 176 | epub_exclude_files = ['search.html'] 177 | 178 | 179 | # -- Extension configuration ------------------------------------------------- 180 | -------------------------------------------------------------------------------- /tests/test___init__.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import unittest 3 | from unittest import TestCase 4 | import os 5 | import logging 6 | from tempfile import TemporaryFile, mkdtemp 7 | from pandas.api.types import is_datetime64_ns_dtype, is_float_dtype, is_int64_dtype, is_object_dtype 8 | from multiprocessing import Process 9 | 10 | from enverus_developer_api import ( 11 | DeveloperAPIv3, 12 | DirectAccessV2, 13 | DADatasetException, 14 | DAQueryException, 15 | DAAuthException 16 | ) 17 | 18 | LOG_LEVEL = logging.DEBUG 19 | if os.environ.get("GITHUB_SHA"): 20 | LOG_LEVEL = logging.ERROR 21 | 22 | 23 | def set_token_v2(): 24 | if not os.environ.get("DIRECTACCESS_TOKEN"): 25 | os.environ["DIRECTACCESS_TOKEN"] = DirectAccessV2( 26 | client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"), 27 | client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"), 28 | api_key=os.environ.get("DIRECTACCESS_API_KEY"), 29 | ).access_token 30 | return 31 | 32 | 33 | def set_token_v3(): 34 | if not os.environ.get("DIRECTACCESSV3_TOKEN"): 35 | os.environ["DIRECTACCESSV3_TOKEN"] = DeveloperAPIv3( 36 | secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"), 37 | # url="https://api.dev.enverus.com/" 38 | ).access_token 39 | return 40 | 41 | 42 | def create_developerapi_v3(): 43 | return DeveloperAPIv3( 44 | secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"), 45 | access_token=os.environ.get("DIRECTACCESSV3_TOKEN"), 46 | # url="https://api.dev.enverus.com/", 47 | retries=5, 48 | backoff_factor=10, 49 | log_level=LOG_LEVEL 50 | ) 51 | 52 | 53 | def create_directaccess_v2(): 54 | return DirectAccessV2( 55 | api_key=os.environ.get("DIRECTACCESS_API_KEY"), 56 | client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"), 57 | client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"), 58 | access_token=os.environ.get("DIRECTACCESS_TOKEN"), 59 | retries=5, 60 | backoff_factor=10, 61 | log_level=LOG_LEVEL 62 | ) 63 | 64 | 65 | def proc_query(dataset): 66 | v3 = create_developerapi_v3() 67 | resp = v3.query(dataset, deleteddate="null") 68 | next(resp) 69 | TestCase.assertTrue(resp) 70 | return 71 | 72 | 73 | class TestEnverusDeveloperAPI(TestCase): 74 | @classmethod 75 | def setUpClass(cls) -> None: 76 | set_token_v3() 77 | cls.v3 = create_developerapi_v3() 78 | 79 | set_token_v2() 80 | cls.v2 = create_directaccess_v2() 81 | 82 | def test_missing_secret_key_v3(self): 83 | with self.assertRaises(DAAuthException): 84 | DeveloperAPIv3(secret_key=None, log_level=LOG_LEVEL) 85 | 86 | def test_query_v3(self): 87 | query = self.v3.query("casings", pagesize=10, deleteddate="null") 88 | records = list() 89 | for i, row in enumerate(query, start=1): 90 | # print(row) 91 | records.append(row) 92 | if i % 30 == 0: 93 | break 94 | self.assertTrue(len(records) > 0, "test_query_v3 records list empty") 95 | 96 | def test_query_v3_omit_header_next_link(self): 97 | query = self.v3.query("casings", pagesize=10, deleteddate="null", _headers={'X-Omit-Header-Next-Links': 'true'}) 98 | records = list() 99 | for i, row in enumerate(query, start=1): 100 | records.append(row) 101 | if i % 30 == 0: 102 | break 103 | self.assertTrue(len(records) > 0, "test_query_v3 records list empty") 104 | 105 | def test_is_omit_header_next_link(self): 106 | is_omit_next_link = self.v3.is_omit_header_next_link(_headers={'X-Omit-Header-Next-Links': 'true'}) 107 | self.assertTrue(is_omit_next_link, "test_is_omit_header_next_link should contain omit header") 108 | 109 | is_omit_next_link = self.v3.is_omit_header_next_link(_headers={'Omit-Header-Next-Links': 'true'}) 110 | self.assertTrue(not is_omit_next_link, "test_is_omit_header_next_link should not contain omit header") 111 | 112 | def test_parse_links(self): 113 | links = self.v3.parse_links( 114 | {"next": "; rel='next'"}) 115 | self.assertTrue(links["next"]["url"], "/economics?action=next&next_page=WellID+%3C+840600005436298&pagesize=50") 116 | 117 | def test_docs_v3(self): 118 | docs = self.v3.docs("casings") 119 | self.assertTrue(docs) 120 | self.assertIsInstance(docs, list) 121 | 122 | def test_ddl_v3(self): 123 | ddl = self.v3.ddl("casings", database="pg") 124 | with TemporaryFile(mode="w+") as f: 125 | f.write(ddl) 126 | f.seek(0) 127 | for line in f: 128 | self.assertTrue(line.startswith("CREATE TABLE casings")) 129 | break 130 | 131 | def test_ddl_invalid_db_v3(self): 132 | with self.assertRaises(DAQueryException): 133 | self.v3.ddl("casings", database="invalid") 134 | 135 | def test_count_v3(self): 136 | count = self.v3.count("wells", updateddate="ge(2021-05-01)", StateProvince="in(TX,LA,WY)") 137 | self.assertIsNotNone(count) 138 | self.assertIsInstance(count, int) 139 | 140 | def test_count_invalid_dataset_v3(self): 141 | with self.assertRaises(DADatasetException): 142 | self.v3.count("invalid") 143 | 144 | def test_token_refresh_v3(self): 145 | v3 = DeveloperAPIv3( 146 | secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"), 147 | access_token="invalid", 148 | # url="https://api.dev.enverus.com/", 149 | retries=5, 150 | backoff_factor=10, 151 | log_level=LOG_LEVEL 152 | ) 153 | 154 | invalid_token = v3.access_token 155 | count = v3.count("rigs", deleteddate="null") 156 | query = v3.query("rigs", pagesize=10000, deleteddate="null") 157 | self.assertTrue(len([x for x in query]) == count) 158 | self.assertTrue(invalid_token != v3.access_token) 159 | 160 | def test_csv_v3(self): 161 | tempdir = mkdtemp() 162 | path = os.path.join(tempdir, "rigs.csv") 163 | 164 | dataset = "rigs" 165 | options = dict(pagesize=10000, deleteddate="null") 166 | 167 | count = self.v3.count(dataset, **options) 168 | query = self.v3.query(dataset, **options) 169 | self.v3.to_csv(query, path, log_progress=True, delimiter=",", quoting=csv.QUOTE_MINIMAL) 170 | 171 | with open(path, mode="r") as f: 172 | reader = csv.reader(f) 173 | row_count = len([x for x in reader]) 174 | self.assertTrue(row_count == (count + 1)) 175 | 176 | def test_dataframe_v3(self): 177 | df = self.v3.to_dataframe("rigs", pagesize=1000, deleteddate="null") 178 | 179 | # Check index is set to API endpoint "primary keys" 180 | self.assertListEqual(df.index.names, ["CompletionID", "WellID"]) 181 | 182 | # Check object dtypes 183 | self.assertTrue(is_object_dtype(df.API_UWI)) 184 | self.assertTrue(is_object_dtype(df.ActiveStatus)) 185 | 186 | # Check datetime64 dtypes 187 | self.assertTrue(is_datetime64_ns_dtype(df.DeletedDate)) 188 | self.assertTrue(is_datetime64_ns_dtype(df.SpudDate)) 189 | self.assertTrue(is_datetime64_ns_dtype(df.UpdatedDate)) 190 | 191 | # Check Int64 dtypes 192 | self.assertTrue(is_int64_dtype(df.RatedWaterDepth)) 193 | self.assertTrue(is_int64_dtype(df.RatedHP)) 194 | 195 | # Check float dtypes 196 | self.assertTrue(is_float_dtype(df.RigLatitudeWGS84)) 197 | self.assertTrue(is_float_dtype(df.RigLongitudeWGS84)) 198 | 199 | def test_multiple_processes_v3(self): 200 | # Launch two child processes, one for rigs and one for casings 201 | 202 | procs = [ 203 | Process(target=proc_query, kwargs=dict(dataset="rigs")), 204 | Process(target=proc_query, kwargs=dict(dataset="casings")) 205 | ] 206 | 207 | [x.start() for x in procs] 208 | [x.join() for x in procs] 209 | 210 | def test_enter_exit(self): 211 | with DeveloperAPIv3(secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"), 212 | access_token=os.environ.get("DIRECTACCESSV3_TOKEN")) as api: 213 | self.assertIsInstance(api, DeveloperAPIv3) 214 | self.assertIsNotNone(api.session) 215 | 216 | self.assertIsNone(api.session) 217 | 218 | # ******************** DirectAccessV2 Test Cases ********************** 219 | 220 | def test_missing_client_id_v2(self): 221 | with self.assertRaises(DAAuthException): 222 | DirectAccessV2(client_id=None, 223 | client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"), 224 | log_level=LOG_LEVEL) 225 | 226 | def test_missing_client_secret_v2(self): 227 | with self.assertRaises(DAAuthException): 228 | DirectAccessV2(client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"), 229 | client_secret=None, 230 | log_level=LOG_LEVEL) 231 | 232 | def test_query_v2(self): 233 | query = self.v2.query("rigs", pagesize=10, deleteddate="null") 234 | records = list() 235 | for i, row in enumerate(query, start=1): 236 | # print(row) 237 | records.append(row) 238 | if i % 30 == 0: 239 | break 240 | self.assertTrue(len(records) > 0, "test_query_v2 records list empty") 241 | 242 | 243 | if __name__ == '__main__': 244 | unittest.main() 245 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # enverus-developer-api 2 | 3 | [![PyPI version](https://badge.fury.io/py/enverus-developer-api.svg)](https://badge.fury.io/py/enverus-developer-api) 4 | 5 | A thin wrapper around Enverus' Developer API. Handles authentication and token management, pagination and 6 | network-related error handling/retries. 7 | 8 | This module is built and tested on Python 3.9 but should work on Python 2.7 and up. 9 | 10 | 11 | ## Install 12 | ```commandline 13 | pip install enverus-developer-api 14 | ``` 15 | 16 | ## Clients 17 | 18 | ### Developer API - Version 3 19 | DirectAccess has been rebranded as DeveloperAPI. For version 3 of the API, create an instance of the DeveloperAPIv3 class, providing it your secret_key (not the same as the v2 api_key). 20 | The returned access token will be available as an attribute on the instance (v3.access_token) and the Authorization 21 | header is set automatically. 22 | ```python 23 | from enverus_developer_api import DeveloperAPIv3 24 | 25 | v3 = DeveloperAPIv3(secret_key='') 26 | ``` 27 | Your secret_key can be generated, retrieved and revoked at https://app.enverus.com/provisioning/directaccess 28 | 29 | The Developer API Version 3 endpoint documentation can be found at https://app.enverus.com/direct/#/api/explorer/v3/gettingStarted 30 | 31 | ### Direct Access - Version 2 32 | For version 2 of the API, create an instance of the DirectAccessV2 class, providing it your API key, client id and client secret. 33 | The returned access token will be available as an attribute on the instance (d2.access_token) and the Authorization 34 | header is set automatically 35 | ```python 36 | from enverus_developer_api import DirectAccessV2 37 | 38 | d2 = DirectAccessV2( 39 | client_id='', 40 | client_secret='', 41 | ) 42 | ``` 43 | The Direct Access Version 2 endpoint documentation can be found at https://app.enverus.com/direct/#/api/explorer/v2/gettingStarted 44 | 45 | ## Usage 46 | 47 | The functionality outlined below exists for **both** DeveloperAPIv3 and DirectAccessV2 clients. 48 | 49 | Only 1 instance of the client needs to be created to perform all your queries. It can execute multiple simultaneous requests if needed, 50 | and will automatically refresh the access_token for the Authorization header if expired. 51 | An access_token is valid for 8 hours, and there is rate limit on the number of access_tokens that can be requested per minute 52 | which is why we recommend creating and reusing a single DeveloperAPIv3 client instance for all of your querying. 53 | 54 | Provide the query method the dataset and query params. All query parameters must match the valid 55 | Request Parameters found in the Developer API documentation for a given dataset and be passed as keyword arguments. 56 | 57 | ```python 58 | for row in v3.query('wells', county='REEVES', deleteddate='null'): 59 | print(row) 60 | ``` 61 | 62 | ### Filter functions 63 | Developer API supports filter functions. These can be passed as strings on the keyword arguments. 64 | 65 | Some common filters are greater than (`gt()`), less than (`lt()`), null, not null (`not(null)`) and between (`btw()`). 66 | See the Developer API documentation for a list of all available filters. 67 | 68 | ```python 69 | # Get well records updated after 2018-08-01 and without deleted dates 70 | for row in v3.query('wells', updateddate='gt(2018-08-01)', deleteddate='null'): 71 | print(row) 72 | 73 | # Get permit records with approved dates between 2018-03-01 and 2018-06-01 74 | for row in v3.query('rigs', spuddate='btw(2018-03-01,2018-06-01)'): 75 | print(row) 76 | ``` 77 | 78 | You can use the `fields` keyword to limit the returned fields in your request. 79 | 80 | ```python 81 | for row in v3.query('rigs', fields='PermitApprovedDate,LeaseName,RigName_Number,MD_FT'): 82 | print(row) 83 | ``` 84 | 85 | ### Escaping 86 | When making requests containing certain characters like commas, use a backslash to escape them. 87 | 88 | ```python 89 | # Escaping the comma before LLC 90 | for row in v3.query('rigs', envoperator='PERCUSSION PETROLEUM OPERATING\, LLC'): 91 | print(row) 92 | ``` 93 | 94 | ### Network request handling 95 | This module exposes functionality in python-requests for modifying network requests handling, namely: 96 | * retries and backoff 97 | * network proxies 98 | * ssl verification 99 | 100 | #### Retries and backoff 101 | Specify the number of retry attempts in `retries` and the backoff factor in `backoff_factor`. See the urllib3 102 | [Retry](https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.Retry) utility API for more info 103 | ```python 104 | from enverus_developer_api import DeveloperAPIv3 105 | 106 | v3 = DeveloperAPIv3( 107 | secret_key='', 108 | retries=5, 109 | backoff_factor=1 110 | ) 111 | ``` 112 | 113 | You can specify a network proxy by passing a dictionary with the host and port of your proxy to `proxies`. See the 114 | [proxies](https://requests.readthedocs.io/en/master/user/advanced/#proxies) section of the python-requests documentation 115 | for more info. 116 | ```python 117 | from enverus_developer_api import DeveloperAPIv3 118 | 119 | v3 = DeveloperAPIv3( 120 | secret_key='', 121 | proxies={'https': 'http://10.10.1.10:1080'} 122 | ) 123 | ``` 124 | 125 | Finally, if you're in an environment that provides its own SSL certificates that might not be in your trusted store, 126 | you can choose to ignore SSL verification altogether. This is typically not a good idea and you should seek to resolve 127 | certificate errors instead of ignore them. 128 | ```python 129 | from enverus_developer_api import DeveloperAPIv3 130 | 131 | v3 = DeveloperAPIv3( 132 | secret_key='', 133 | verify=False 134 | ) 135 | ``` 136 | 137 | ## Functions 138 | 139 | ### docs 140 | Returns a sample response for a given dataset 141 | ```python 142 | docs = v3.docs("casings") 143 | ``` 144 | 145 | ### ddl 146 | Returns a CREATE TABLE DDL statement for a given dataset. Must specify either 147 | "mssql" for MS SQL Server or "pg" for PostgreSQL as the database argument 148 | ```python 149 | from tempfile import TemporaryFile 150 | 151 | ddl = v3.ddl("casings", database="pg") 152 | with TemporaryFile(mode="w+") as f: 153 | f.write(ddl) 154 | f.seek(0) 155 | for line in f: 156 | print(line, end='') 157 | ``` 158 | 159 | ### count 160 | Returns the count of records for a given dataset and query options in the 161 | X-QUERY-RECORD-COUNT response header value 162 | ```python 163 | count = v3.count("rigs", deleteddate="null") 164 | ``` 165 | 166 | ### query 167 | Accepts a dataset name, request headers and a variable number of keyword arguments that correspond to the fields specified 168 | in the ‘Request Parameters’ section for each dataset in the Developer API documentation. 169 | 170 | This method only supports the JSON output provided by the API and yields dicts for each record 171 | ```python 172 | for row in v3.query("rigs", pagesize=1000, deleteddate="null"): 173 | print(row) 174 | ``` 175 | ##### X-Omit-Header-Next-Links header 176 | Omit the Next Link in the Response Header section, add the Next Link to the JSON Response Body. 177 | ```python 178 | for row in v3.query("rigs", pagesize=1000, deleteddate="null", _headers={'X-Omit-Header-Next-Links': 'true'}): 179 | print(row) 180 | ``` 181 | 182 | ### to_csv 183 | Write query results to CSV. Optional keyword arguments are provided to the csv writer object, 184 | allowing control over delimiters, quoting, etc. The default is comma-separated with csv.QUOTE_MINIMAL 185 | ```python 186 | import csv, os 187 | from tempfile import mkdtemp 188 | 189 | tempdir = mkdtemp() 190 | path = os.path.join(tempdir, "rigs.csv") 191 | 192 | dataset = "rigs" 193 | options = dict(pagesize=10000, deleteddate="null") 194 | 195 | v3.query(dataset, **options) 196 | v3.to_csv(query, path, log_progress=True, delimiter=",", quoting=csv.QUOTE_MINIMAL) 197 | 198 | with open(path, mode="r") as f: 199 | reader = csv.reader(f) 200 | ``` 201 | 202 | ### to_dataframe 203 | Write query results to a pandas Dataframe with properly set dtypes and index columns. 204 | 205 | This works by requesting the DDL for a given dataset and manipulating the text to build a list of dtypes, date columns and the index column(s). 206 | It then makes a query request for the dataset to ensure we know the exact fields to expect, 207 | (ie, if fields was a provided query parameter and the result will have fewer fields than the DDL). 208 | 209 | For endpoints with composite primary keys, a pandas MultiIndex is created. 210 | 211 | Query results are written to a temporary CSV file and then read into the dataframe. The CSV is removed afterwards. 212 | 213 | Pandas version 0.24.0 or higher is required for use of the Int64 dtype allowing integers with NaN values. 214 | It is not possible to coerce missing values for columns of dtype bool and so these are set to dtype object. 215 | 216 | You will need to have pandas installed to use the to_dataframe function 217 | ```python 218 | pip install pandas 219 | ``` 220 | 221 | Create a pandas dataframe from a dataset query 222 | ```python 223 | df = v3.to_dataframe("rigs", pagesize=10000, deleteddate="null") 224 | ``` 225 | 226 | Create a Texas rigs dataframe, replacing the state abbreviation with the complete name 227 | and removing commas from Operator names 228 | ```python 229 | df = v3.to_dataframe( 230 | dataset="rigs", 231 | deleteddate="null", 232 | pagesize=100000, 233 | stateprovince="TX", 234 | converters={ 235 | "StateProvince": lambda x: "TEXAS", 236 | "ENVOperator": lambda x: x.replace(",", "") 237 | } 238 | ) 239 | df.head(10) 240 | ``` 241 | Reset the index of the DataFrame, and use the default one instead. [reset_index()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html) 242 | ```python 243 | df = v3.to_dataframe(dataset, pagesize=10000, ENVBasin="SACRAMENTO") 244 | df.reset_index(inplace=True) 245 | df.head(10) 246 | ``` -------------------------------------------------------------------------------- /enverus_developer_api/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import time 4 | import json 5 | import base64 6 | import logging 7 | from uuid import uuid4 8 | from math import floor 9 | from shutil import rmtree 10 | from tempfile import mkdtemp 11 | from collections import OrderedDict 12 | 13 | import requests 14 | import unicodecsv as csv 15 | from requests.adapters import HTTPAdapter 16 | from urllib3.util.retry import Retry 17 | from requests.utils import parse_header_links 18 | 19 | 20 | class DAAuthException(Exception): 21 | pass 22 | 23 | 24 | class DAQueryException(Exception): 25 | pass 26 | 27 | 28 | class DADatasetException(Exception): 29 | pass 30 | 31 | 32 | def _chunks(iterable, n): 33 | """ 34 | Return iterables with n members from an input iterable 35 | From: http://stackoverflow.com/a/8290508 36 | :param iterable: the iterable to chunk up 37 | :param n: max number of items in chunked list 38 | """ 39 | l = len(iterable) 40 | for ndx in range(0, l, n): 41 | yield iterable[ndx: min(ndx + n, l)] 42 | 43 | 44 | class BaseAPI(object): 45 | 46 | def __init__(self, url, retries, backoff_factor, **kwargs): 47 | self.url = url 48 | self.retries = retries 49 | self.backoff_factor = backoff_factor 50 | 51 | if kwargs.get("logger"): 52 | self.logger = kwargs.pop("logger").getChild("directaccess") 53 | else: 54 | logging.basicConfig( 55 | level=kwargs.pop("log_level", logging.INFO), 56 | format="%(asctime)s %(name)s %(levelname)-8s %(message)s", 57 | datefmt="%a, %d %b %Y %H:%M:%S", 58 | ) 59 | self.logger = logging.getLogger("directaccess") 60 | 61 | self.session = requests.Session() 62 | self.session.verify = kwargs.pop("verify", True) 63 | self.session.proxies = kwargs.pop("proxies", {}) 64 | self.session.headers["User-Agent"] = "enverus-developer-api" 65 | 66 | self._status_forcelist = [500, 502, 503, 504] 67 | retries = Retry( 68 | total=self.retries, 69 | backoff_factor=self.backoff_factor, 70 | allowed_methods=frozenset(["GET", "POST", "HEAD"]), 71 | status_forcelist=self._status_forcelist, 72 | ) 73 | self.session.mount("https://", HTTPAdapter(max_retries=retries)) 74 | 75 | def __enter__(self): 76 | return self 77 | 78 | def __exit__(self, exc_type, exc_val, exc_tb): 79 | if self.session: 80 | self.session.close() 81 | self.session = None 82 | 83 | def get_access_token(self): 84 | raise NotImplementedError 85 | 86 | def to_csv(self, query, path, log_progress=True, **kwargs): 87 | """ 88 | Write query results to CSV. Optional keyword arguments are 89 | provided to the csv writer object, allowing control over 90 | delimiters, quoting, etc. The default is comma-separated 91 | with csv.QUOTE_MINIMAL 92 | 93 | :: 94 | 95 | d2 = DirectAccessV2(client_id, client_secret) 96 | query = d2.query('rigs', deleteddate='null', pagesize=1500) 97 | # Write tab-separated file 98 | d2.to_csv(query, '/path/to/rigs.csv', delimiter='\\t') 99 | 100 | :param query: DirectAccessV1 or DirectAccessV2 query object 101 | :param path: relative or absolute filesystem path for created CSV 102 | :type path: str 103 | :param log_progress: whether to log progress. if True, log a message with current written count 104 | :type log_progress: bool 105 | :return: the newly created CSV file path 106 | """ 107 | with open(path, mode="wb") as f: 108 | writer = csv.writer(f, **kwargs) 109 | count = None 110 | for i, row in enumerate(query, start=1): 111 | row = OrderedDict(sorted(row.items(), key=lambda t: t[0])) 112 | count = i 113 | if count == 1: 114 | writer.writerow(row.keys()) 115 | writer.writerow(row.values()) 116 | 117 | if log_progress and i % 100000 == 0: 118 | self.logger.info( 119 | "Wrote {count} records to file {path}".format( 120 | count=count, path=path 121 | ) 122 | ) 123 | self.logger.info( 124 | "Completed writing CSV file to {path}. Final count {count}".format( 125 | path=path, count=count 126 | ) 127 | ) 128 | return path 129 | 130 | def _check_response(self, response, *args, **kwargs): 131 | """ 132 | Check responses for errors. 133 | 134 | If the API returns 400, there was a problem with the provided parameters. Raise DAQueryException. 135 | If the API returns 400 and request was to /tokens endpoint, likely bad credentials. Raise DAAuthException. 136 | If the API returns 401, refresh access token if found and resend request. 137 | If the API returns 403 and request was to /tokens endpoint, sleep for 60 seconds and try again. 138 | If the API returns 404, an invalid dataset name was provided. Raise DADatasetException. 139 | 140 | 5xx errors are handled by the session's Retry configuration. Debug logging returns retries remaining. 141 | 142 | :param response: a requests Response object 143 | :type response: requests.Response 144 | :param args: 145 | :param kwargs: 146 | :return: 147 | """ 148 | 149 | if not response.ok: 150 | self.logger.debug("Response status code: " + str(response.status_code)) 151 | self.logger.debug("Response text: " + response.text) 152 | if response.status_code == 400: 153 | if "tokens" in response.url: 154 | raise DAAuthException( 155 | "Error getting token. Code: {} Message: {}".format( 156 | response.status_code, response.text 157 | ) 158 | ) 159 | raise DAQueryException(response.text) 160 | if response.status_code == 401: 161 | self.logger.warning("Access token expired. Acquiring a new one...") 162 | self.get_access_token() 163 | request = response.request 164 | request.headers["Authorization"] = self.session.headers["Authorization"] 165 | return self.session.send(request) 166 | if response.status_code == 403 and "tokens" in response.url: 167 | self.logger.warning("Throttled token request. Waiting 60 seconds...") 168 | self.retries -= 1 169 | self.logger.debug("Retries remaining: {}".format(self.retries)) 170 | time.sleep(60) 171 | request = response.request 172 | return self.session.send(request) 173 | if response.status_code == 404: 174 | raise DADatasetException("Invalid dataset name provided") 175 | if response.status_code in self._status_forcelist: 176 | self.logger.debug("Retries remaining: {}".format(self.retries)) 177 | 178 | def ddl(self, dataset, database): 179 | """ 180 | Get DDL statement for dataset. Must provide exactly one of mssql or pg for database argument. 181 | mssql is Microsoft SQL Server, pg is PostgreSQL 182 | 183 | :param dataset: a valid dataset name. See the Developer API documentation for valid values 184 | :param database: one of mssql or pg. 185 | :return: a DDL statement from the Developer API service as str 186 | """ 187 | ddl_url = os.path.join(self.url, dataset) 188 | self.logger.debug("Retrieving DDL for dataset: " + dataset) 189 | response = self.session.get(ddl_url, params=dict(ddl=database)) 190 | return response.text 191 | 192 | def docs(self, dataset): 193 | """ 194 | Get docs for dataset 195 | 196 | :param dataset: a valid dataset name. See the Developer API documentation for valid values 197 | :return: docs response for dataset as list[dict] or None if ?docs is not supported on the dataset 198 | """ 199 | docs_url = os.path.join(self.url, dataset) 200 | self.logger.debug("Retrieving docs for dataset: " + dataset) 201 | response = self.session.get(docs_url, params=dict(docs=True)) 202 | if response.status_code == 501: 203 | self.logger.warning( 204 | "docs and example params are not yet supported on dataset {dataset}".format( 205 | dataset=dataset 206 | ) 207 | ) 208 | return 209 | return response.json() 210 | 211 | def count(self, dataset, **options): 212 | """ 213 | Get the count of records given a dataset and query options 214 | 215 | :param dataset: a valid dataset name. See the Developer API documentation for valid values 216 | :param options: query parameters as keyword arguments 217 | :return: record count as int 218 | """ 219 | head_url = os.path.join(self.url, dataset) 220 | response = self.session.head(head_url, params=options) 221 | count = response.headers.get("X-Query-Record-Count") 222 | return int(count) 223 | 224 | @staticmethod 225 | def in_(items): 226 | """ 227 | Helper method for providing values to the API's `in()` filter function. 228 | 229 | The API currently supports GET requests to dataset endpoints. When providing a large list of values to the API's 230 | `in()` filter function, it's necessary to chunk up the values to avoid URLs larger than 2048 characters. The 231 | `query` method of this class handles the chunking transparently; this helper method simply stringifies 232 | the input items into the correct syntax. 233 | 234 | :: 235 | 236 | d2 = DirectAccessV2(client_id, client_secret) 237 | # Query well-origins 238 | well_origins_query = d2.query( 239 | dataset='well-origins', 240 | deleteddate='null', 241 | pagesize=100000 242 | ) 243 | # Get all UIDs for well-origins 244 | uid_parent_ids = [x['UID'] for x in well_origins_query] 245 | # Provide the UIDs to wellbores endpoint 246 | wellbores_query = d2.query( 247 | dataset='wellbores', 248 | deleteddate='null', 249 | pagesize=100000, 250 | uidparent=d2.in_(uid_parent_ids) 251 | ) 252 | 253 | :param items: list or generator of values to provide to in() filter function 254 | :type items: list 255 | :return: str to provide to DirectAccessV2 `query` method 256 | """ 257 | if not isinstance(items, list): 258 | raise TypeError( 259 | "Argument provided was not a list. Type provided: {}".format( 260 | type(items) 261 | ) 262 | ) 263 | return "in({})".format(",".join([str(x) for x in items])) 264 | 265 | def to_dataframe(self, dataset, converters=None, log_progress=True, **options): 266 | """ 267 | Write query results to a pandas Dataframe with properly set dtypes and index columns. 268 | 269 | This works by requesting the DDL for `dataset` and manipulating the text to build a list of dtypes, date columns 270 | and the index column(s). It then makes a query request for `dataset` to ensure we know the exact fields 271 | to expect, (ie, if `fields` was a provided query parameter and the result will have fewer fields than the DDL). 272 | 273 | For endpoints with composite primary keys, a pandas MultiIndex is created. 274 | 275 | This method is potentially fragile. The API's `docs` feature is preferable but not yet available on all 276 | endpoints. 277 | 278 | Query results are written to a temporary CSV file and then read into the dataframe. The CSV is removed 279 | afterwards. 280 | 281 | pandas version 0.24.0 or higher is required for use of the Int64 dtype allowing integers with NaN values. It is 282 | not possible to coerce missing values for columns of dtype bool and so these are set to `object` dtype. 283 | 284 | :: 285 | 286 | d2 = DirectAccessV2(client_id, client_secret) 287 | # Create a Texas permits dataframe, removing commas from Survey names and replacing the state 288 | # abbreviation with the complete name. 289 | df = d2.to_dataframe( 290 | dataset='permits', 291 | deleteddate='null', 292 | pagesize=100000, 293 | stateprovince='TX', 294 | converters={ 295 | 'StateProvince': lambda x: 'TEXAS', 296 | 'Survey': lambda x: x.replace(',', '') 297 | } 298 | ) 299 | df.head(10) 300 | 301 | :param dataset: a valid dataset name. See the Developer API documentation for valid values 302 | :type dataset: str 303 | :param converters: Dict of functions for converting values in certain columns. 304 | Keys can either be integers or column labels. 305 | :type converters: dict 306 | :param log_progress: whether to log progress. if True, log a message with current written count 307 | :type log_progress: bool 308 | :param options: query parameters as keyword arguments 309 | :return: pandas dataframe 310 | """ 311 | try: 312 | import pandas 313 | except ImportError: 314 | raise Exception( 315 | "pandas not installed. This method requires pandas >= 0.24.0" 316 | ) 317 | 318 | ddl = self.ddl(dataset, database="mssql") 319 | 320 | try: 321 | index_col = re.findall(r"PRIMARY KEY \(([a-z0-9_,]*)\)", ddl)[0].split(",") 322 | except IndexError: 323 | index_col = None 324 | 325 | self.logger.debug("index_col: {}".format(index_col)) 326 | ddl = { 327 | x.split(" ")[0]: x.split(" ")[1][:-1] 328 | for x in ddl.split("\n")[1:] 329 | if x and "CONSTRAINT" not in x 330 | } 331 | 332 | pagesize = options.pop("pagesize") if "pagesize" in options else None 333 | try: 334 | filter_ = OrderedDict( 335 | sorted( 336 | next(self.query(dataset, pagesize=1, **options)).items(), 337 | key=lambda x: x[0], 338 | ) 339 | ).keys() 340 | self.logger.debug( 341 | "Fields retrieved from query response: {}".format( 342 | json.dumps(list(filter_), indent=2, default=str) 343 | ) 344 | ) 345 | except StopIteration: 346 | raise Exception("No results returned from query") 347 | 348 | self.links = None 349 | 350 | if pagesize: 351 | options["pagesize"] = pagesize 352 | 353 | try: 354 | index_col = [ 355 | x for x in filter_ if x.upper() in [y.upper() for y in index_col] 356 | ] 357 | if index_col and len(index_col) == 1: 358 | index_col = index_col[0] 359 | except (IndexError, TypeError) as e: 360 | self.logger.warning("Could not discover index col(s): {}".format(e)) 361 | index_col = None 362 | self.logger.debug("index_col: {}".format(index_col)) 363 | 364 | date_cols = [k for k, v in ddl.items() if v.startswith("DATE") and k in filter_] 365 | self.logger.debug("date columns:\n{}".format(json.dumps(date_cols, indent=2))) 366 | 367 | for k, v in ddl.items(): 368 | if k in filter_: 369 | if v.startswith("VARCHAR"): 370 | ddl[k] = "VARCHAR" 371 | elif v.startswith("DOUBL"): 372 | ddl[k] = "DOUBLE" 373 | 374 | dtypes_mapping = { 375 | "TEXT": "object", 376 | "NUMERIC": "float64", 377 | "REAL": "float64", 378 | "DOUBLE": "float64", 379 | "DATETIME": "object", 380 | "SMALLINT": "Int64", 381 | "INT": "Int64", 382 | "INTEGER": "Int64", 383 | "BIGINT": "Int64", 384 | "VARCHAR": "object", 385 | "DATE": "object" 386 | } 387 | dtypes = {k: dtypes_mapping[v] for k, v in ddl.items() if k in filter_} 388 | self.logger.debug("dtypes:\n{}".format(json.dumps(dtypes, indent=2))) 389 | 390 | t = mkdtemp() 391 | self.logger.debug("Created temporary directory: " + t) 392 | 393 | query = self.query(dataset, **options) 394 | 395 | try: 396 | chunks = pandas.read_csv( 397 | filepath_or_buffer=self.to_csv( 398 | query, 399 | os.path.join(t, "{}.csv".format(uuid4().hex)), 400 | delimiter="|", 401 | log_progress=log_progress, 402 | ), 403 | sep="|", 404 | dtype=dtypes, 405 | index_col=index_col, 406 | parse_dates=date_cols, 407 | chunksize=options.get("pagesize", 100000), 408 | converters=converters, 409 | ) 410 | df = pandas.concat(chunks) 411 | return df 412 | finally: 413 | rmtree(t) 414 | self.logger.debug("Removed temporary directory") 415 | 416 | def query(self, dataset, **options): 417 | """ 418 | Query Developer API dataset 419 | 420 | Accepts a dataset name and a variable number of keyword arguments that correspond to the fields specified in 421 | the 'Request Parameters' section for each dataset in the Developer API documentation. 422 | 423 | This method only supports the JSON output provided by the API and yields dicts for each record. 424 | 425 | :param dataset: a valid dataset name. See the Developer API documentation for valid values 426 | :param options: query parameters as keyword arguments 427 | :return: query response as generator 428 | """ 429 | query_url = os.path.join(self.url, dataset) 430 | 431 | query_chunks = None 432 | for field, v in options.items(): 433 | if "in(" in str(v) and len(str(v)) > 1950: 434 | values = re.split(r"in\((.*?)\)", options[field])[1].split(",") 435 | chunksize = int(floor(1950 / len(max(values)))) 436 | query_chunks = (field, [x for x in _chunks(values, chunksize)]) 437 | 438 | paging = options.pop("paging") if "paging" in options else "true" 439 | 440 | while True: 441 | if self.links: 442 | response = self.session.get(self.url[:-1] + self.links["next"]["url"]) 443 | else: 444 | if query_chunks and query_chunks[1]: 445 | options[query_chunks[0]] = self.in_(query_chunks[1].pop(0)) 446 | 447 | response = self.session.get(query_url, params=options) 448 | 449 | if not response.ok: 450 | raise DAQueryException( 451 | "Non-200 response: {} {}".format(response.status_code, response.text) 452 | ) 453 | 454 | records = response.json() 455 | if isinstance(records, dict): 456 | records = [records] 457 | 458 | if not len(records): 459 | self.links = None 460 | 461 | if query_chunks and query_chunks[1]: 462 | continue 463 | 464 | break 465 | 466 | if "next" in response.links: 467 | self.links = response.links 468 | 469 | for record in records: 470 | yield record 471 | 472 | if self.links is None or paging.lower() == "false": 473 | break 474 | 475 | 476 | class DirectAccessV2(BaseAPI): 477 | """Client for Enverus' Developer API Version 2""" 478 | 479 | url = "https://di-api.drillinginfo.com/v2/direct-access/" 480 | 481 | def __init__( 482 | self, 483 | client_id, 484 | client_secret, 485 | retries=5, 486 | backoff_factor=1, 487 | links=None, 488 | access_token=None, 489 | **kwargs 490 | ): 491 | """ 492 | Enverus' Developer API Version 2 client 493 | 494 | API documentation and credentials can be found at: https://app.enverus.com/direct/#/api/explorer/v2/gettingStarted 495 | 496 | :param client_id: client id credential. 497 | :type client_id: str 498 | :param client_secret: client secret credential. 499 | :type client_secret: str 500 | :param retries: the number of attempts when retrying failed requests with status codes of 500, 502, 503 or 504 501 | :type retries: int 502 | :param backoff_factor: the factor to use when exponentially backing off prior to retrying a failed request 503 | :type backoff_factor: int 504 | :param links: a dictionary of prev and next links as provided by the python-requests Session. 505 | See https://requests.readthedocs.io/en/master/user/advanced/#link-headers 506 | :type dict 507 | :param access_token: an optional, pregenerated access token. If provided, the class instance will not 508 | automatically try to request a new access token. 509 | :type: access_token: str 510 | :param kwargs: 511 | """ 512 | super(DirectAccessV2, self).__init__(self.url, retries, backoff_factor, **kwargs) 513 | self.client_id = client_id 514 | self.client_secret = client_secret 515 | self.links = links 516 | self.access_token = access_token 517 | self.session.hooks["response"].append(self._check_response) 518 | 519 | if self.access_token: 520 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token) 521 | else: 522 | self.access_token = self.get_access_token()["access_token"] 523 | 524 | def get_access_token(self): 525 | """ 526 | Get an access token from /tokens endpoint. Automatically sets the Authorization header on the class instance's 527 | session. Raises DAAuthException on error 528 | 529 | :return: token response as dict 530 | """ 531 | if not self.client_id or not self.client_secret: 532 | raise DAAuthException( 533 | "CLIENT_ID and CLIENT_SECRET are required to generate an access token" 534 | ) 535 | 536 | token_url = os.path.join(self.url, "tokens") 537 | 538 | self.session.headers["Authorization"] = "Basic {}".format( 539 | base64.b64encode( 540 | ":".join([self.client_id, self.client_secret]).encode() 541 | ).decode() 542 | ) 543 | self.session.headers["Content-Type"] = "application/x-www-form-urlencoded" 544 | 545 | payload = {"grant_type": "client_credentials"} 546 | response = self.session.post(token_url, params=payload) 547 | self.logger.debug("Token response: " + json.dumps(response.json(), indent=2)) 548 | self.access_token = response.json()["access_token"] 549 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token) 550 | return response.json() 551 | 552 | 553 | class DeveloperAPIv3(BaseAPI): 554 | """Client for Enverus' Developer API Version 3""" 555 | 556 | url = "https://api.enverus.com/v3/direct-access/" 557 | 558 | def __init__( 559 | self, 560 | secret_key, 561 | retries=5, 562 | backoff_factor=1, 563 | links=None, 564 | access_token=None, 565 | **kwargs 566 | ): 567 | """ 568 | Enverus' Developer API Version 3 client 569 | 570 | API documentation and credentials can be found at: https://app.enverus.com/direct/#/api/explorer/v3/gettingStarted 571 | 572 | :param secret_key: api key credential. 573 | :type secret_key: str 574 | :param retries: the number of attempts when retrying failed requests with status codes of 500, 502, 503 or 504 575 | :type retries: int 576 | :param backoff_factor: the factor to use when exponentially backing off prior to retrying a failed request 577 | :type backoff_factor: int 578 | :param links: a dictionary of prev and next links as provided by the python-requests Session. 579 | See https://requests.readthedocs.io/en/master/user/advanced/#link-headers 580 | :type dict 581 | :param access_token: an optional, pregenerated access token. If provided, the class instance will not 582 | automatically try to request a new access token. 583 | :type: access_token: str 584 | :param kwargs: 585 | """ 586 | super(DeveloperAPIv3, self).__init__(self.url, retries, backoff_factor, **kwargs) 587 | self.secret_key = secret_key 588 | self.links = links 589 | self.access_token = access_token 590 | self.session.hooks["response"].append(self._check_response) 591 | 592 | if self.access_token: 593 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token) 594 | else: 595 | self.access_token = self.get_access_token()["token"] 596 | 597 | def get_access_token(self): 598 | """ 599 | Get an access token from /tokens endpoint. Automatically sets the Authorization header on the class instance's 600 | session. Raises DAAuthException on error 601 | 602 | :return: token response as dict 603 | """ 604 | 605 | if not self.secret_key: 606 | raise DAAuthException( 607 | "SECRET_KEY is required to generate an access token" 608 | ) 609 | 610 | token_url = os.path.join(self.url, "tokens") 611 | 612 | self.session.headers["Content-Type"] = "application/json" 613 | 614 | response = self.session.post(token_url, json={"secretKey": self.secret_key}) 615 | self.logger.debug("Token response: " + json.dumps(response.json(), indent=2)) 616 | 617 | self.access_token = response.json()["token"] 618 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token) 619 | 620 | return response.json() 621 | 622 | def is_omit_header_next_link(self, **options): 623 | if "_headers" in options: 624 | for (k, v) in options.get("_headers").items(): 625 | self.session.headers[k] = v 626 | if k.lower() == "x-omit-header-next-links" and v.lower() == "true": 627 | return True 628 | 629 | return False 630 | 631 | @staticmethod 632 | def parse_links(links_obj): 633 | result = {} 634 | if links_obj["next"]: 635 | links = parse_header_links(links_obj["next"]) 636 | 637 | for link in links: 638 | key = link.get("rel") or link.get("url") 639 | result[key] = link 640 | 641 | return result 642 | 643 | def query(self, dataset, **options): 644 | """ 645 | Query Developer API dataset 646 | 647 | Accepts a dataset name and a variable number of keyword arguments that correspond to the fields specified in 648 | the 'Request Parameters' section for each dataset in the Developer API documentation. 649 | 650 | This method only supports the JSON output provided by the API and yields dicts for each record. 651 | 652 | :param dataset: a valid dataset name. See the Developer API documentation for valid values 653 | :param options: query parameters as keyword arguments, _headers dict - request headers. 654 | :return: query response as generator 655 | """ 656 | request_headers = None 657 | omit_header_next_link = False 658 | if "_headers" in options: 659 | omit_header_next_link = self.is_omit_header_next_link(**options) 660 | request_headers = options.pop("_headers") 661 | 662 | query_url = os.path.join(self.url, dataset) 663 | 664 | query_chunks = None 665 | for field, v in options.items(): 666 | if "in(" in str(v) and len(str(v)) > 1950: 667 | values = re.split(r"in\((.*?)\)", options[field])[1].split(",") 668 | chunksize = int(floor(1950 / len(max(values)))) 669 | query_chunks = (field, [x for x in _chunks(values, chunksize)]) 670 | 671 | paging = options.pop("paging") if "paging" in options else "true" 672 | 673 | while True: 674 | if self.links: 675 | response = self.session.get(self.url[:-1] + self.links["next"]["url"], headers=request_headers) 676 | else: 677 | if query_chunks and query_chunks[1]: 678 | options[query_chunks[0]] = self.in_(query_chunks[1].pop(0)) 679 | 680 | response = self.session.get(query_url, params=options, headers=request_headers) 681 | 682 | if not response.ok: 683 | raise DAQueryException( 684 | "Non-200 response: {} {}".format(response.status_code, response.text) 685 | ) 686 | 687 | records = response.json() 688 | if omit_header_next_link and records["links"]: 689 | self.links = self.parse_links(records["links"]) 690 | records = records["data"] 691 | 692 | if isinstance(records, dict): 693 | records = [records] 694 | 695 | if not len(records): 696 | self.links = None 697 | 698 | if query_chunks and query_chunks[1]: 699 | continue 700 | 701 | break 702 | 703 | if not omit_header_next_link and "next" in response.links: 704 | self.links = response.links 705 | 706 | for record in records: 707 | yield record 708 | 709 | if self.links is None or paging.lower() == "false": 710 | break 711 | --------------------------------------------------------------------------------