├── docs
├── _config.yml
├── api.rst
├── index.rst
├── Makefile
├── usage.rst
└── conf.py
├── setup.cfg
├── requirements.txt
├── examples
├── README.md
├── client_side_filtering.py
└── multi_processing.py
├── LICENSE.md
├── .gitignore
├── .circleci
└── config.yml
├── setup.py
├── tests
└── test___init__.py
├── README.md
└── enverus_developer_api
└── __init__.py
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
4 | [metadata]
5 | license_file = LICENSE.md
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.5.1,<3
2 | unicodecsv==0.14.1
3 |
4 | pandas~=1.5.3
5 | setuptools~=65.3.0
6 | urllib3~=1.26.14
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | ### enverus-developer-api Examples
2 |
3 | Basic example workflows are provided as a reference here.
4 |
5 | If you'd like to see a usage example, please [open an issue](https://github.com/enverus-ea/enverus-developer-api/issues/new).
6 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 |
2 | API documentation
3 | =================
4 |
5 | DirectAccessV1
6 | --------------
7 |
8 | .. autoclass:: directaccess.DirectAccessV1
9 | :members: query, to_csv
10 | :special-members:
11 |
12 | DirectAccessV2
13 | --------------
14 |
15 | .. autoclass:: directaccess.DirectAccessV2
16 | :members: get_access_token, ddl, docs, count, in_, query, to_csv, to_dataframe
17 | :special-members:
18 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. direct-access-py documentation master file, created by
2 | sphinx-quickstart on Sun Jan 19 07:51:00 2020.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to direct-access-py's documentation!
7 | ============================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | usage
14 | api
15 |
16 |
17 |
18 | Indices and tables
19 | ==================
20 |
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = .
8 | BUILDDIR = _build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Cole Howard
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | .idea/
7 | requirements-dev.txt
8 | *.csv
9 | *.sql
10 | scripts/
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | env/
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *,cover
52 | .hypothesis/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # IPython Notebook
76 | .ipynb_checkpoints
77 |
78 | # pyenv
79 | .python-version
80 |
81 | # celery beat schedule file
82 | celerybeat-schedule
83 |
84 | # dotenv
85 | .env
86 |
87 | # virtualenv
88 | venv/
89 | ENV/
90 |
91 | # Spyder project settings
92 | .spyderproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 | *.iml
97 |
--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | deploy:
4 | docker:
5 | - image: circleci/python:3.6
6 | steps:
7 | - checkout
8 |
9 | - restore_cache:
10 | key: v1-dependency-cache-{{ checksum "setup.py" }}
11 |
12 | - run:
13 | name: install python dependencies
14 | command: |
15 | python3 -m venv venv
16 | . venv/bin/activate
17 | python3 -m pip install -r requirements.txt
18 | python3 -m pip install twine
19 |
20 | - save_cache:
21 | key: v1-dependency-cache-{{ checksum "setup.py" }}
22 | paths:
23 | - "venv"
24 |
25 | - run:
26 | name: verify git tag vs. version
27 | command: |
28 | python3 -m venv venv
29 | . venv/bin/activate
30 | python setup.py verify
31 |
32 | - run:
33 | name: init .pypirc
34 | command: |
35 | echo -e "[pypi]" >> ~/.pypirc
36 | echo -e "username = $PYPI_USERNAME" >> ~/.pypirc
37 | echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
38 |
39 | - run:
40 | name: create packages
41 | command: |
42 | python3 setup.py sdist bdist_wheel
43 |
44 | - run:
45 | name: upload to pypi
46 | command: |
47 | . venv/bin/activate
48 | twine upload dist/*
49 |
50 | workflows:
51 | version: 2
52 | build_and_deploy:
53 | jobs:
54 | - deploy:
55 | filters:
56 | tags:
57 | only: /[0-9]+(\.[0-9]+)*/
58 | branches:
59 | ignore: /.*/
60 |
--------------------------------------------------------------------------------
/examples/client_side_filtering.py:
--------------------------------------------------------------------------------
1 | """
2 | client_side_filtering.py
3 |
4 | This example demonstrates using client-side filtering to query on columns that
5 | aren't filterable via the API. While there's no speed up using
6 | this method, we're able to query the API responses down in a memory-efficient way
7 | and without loading unneeded records into our workflow.
8 |
9 | Consider this the equivalent of a full table scan in a database.
10 |
11 | In the sample below, we're requesting all records in Texas and without DeletedDates in batches of 10k.
12 | Then, we're filtering the responses down to those records that have had their
13 | production allocated using Drillinginfo's production allocation algorithm and
14 | where LowerPerf values exist and are greater than or equal to 2000 and UpperPerf
15 | values exist and are less than or equal to 3000.
16 | """
17 | import os
18 | try: # Use the memory-efficient ifilter function available in itertools for Python 2
19 | from itertools import ifilter as filter
20 | except ImportError: # The built in filter function returns a generator in Python 3
21 | pass
22 |
23 | from enverus_developer_api import DirectAccessV2
24 |
25 | # Initialize our Direct Access object
26 | d2 = DirectAccessV2(
27 | client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
28 | client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET')
29 | )
30 |
31 | # Build the API query
32 | query = d2.query('producing-entities', pagesize=10000, deleteddate='eq(null)', state='TX')
33 |
34 | # Build the client-side filter
35 | rows = filter(lambda x:
36 | x['AllocPlus'] == 'Y'
37 | and x['LowerPerf'] is not None
38 | and x['LowerPerf'] >= 2000
39 | and x['UpperPerf'] is not None
40 | and x['UpperPerf'] <= 3000,
41 | query)
42 |
43 | # Execute the query and filter the responses
44 | # Note that there will be periods of apparent inactivity while records we don't need are tossed
45 | for row in rows:
46 | print(row)
47 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import os
4 | import sys
5 | from setuptools import setup, find_packages
6 | from setuptools.command.install import install
7 |
8 | VERSION = '3.2.0'
9 |
10 |
11 | class VerifyVersionCommand(install):
12 | description = 'verify that git tag matches VERSION prior to publishing to pypi'
13 |
14 | def run(self):
15 | tag = os.getenv('CIRCLE_TAG')
16 |
17 | if tag != VERSION:
18 | info = 'Git tag: {0} does not match the version of this app: {1}'.format(
19 | tag, VERSION
20 | )
21 | sys.exit(info)
22 |
23 |
24 | def read(fname):
25 | with open(fname) as fp:
26 | content = fp.read()
27 | return content
28 |
29 |
30 | pandas = [
31 | 'pandas>=0.24.0'
32 | ]
33 |
34 | setup(
35 | name='enverus-developer-api',
36 | version=VERSION,
37 | description='Enverus Developer API Python Client',
38 | long_description=read('README.md'),
39 | long_description_content_type='text/markdown',
40 | author='Direct Access',
41 | author_email='directaccess@enverus.com',
42 | url='https://github.com/enverus-ea/enverus-developer-api',
43 | license='MIT',
44 | keywords=['enverus', 'drillinginfo', 'directaccess', 'oil', 'gas'],
45 | packages=find_packages(exclude=('test*', )),
46 | package_dir={'enverus_developer_api': 'enverus_developer_api'},
47 | install_requires=[
48 | 'requests>=2.5.1,<3',
49 | 'unicodecsv==0.14.1',
50 | 'urllib3>=1.26.14',
51 | ],
52 | extras_require={'pandas': pandas},
53 | cmdclass={
54 | 'verify': VerifyVersionCommand,
55 | },
56 | classifiers=[
57 | 'Development Status :: 5 - Production/Stable',
58 | 'Intended Audience :: Developers',
59 | 'Intended Audience :: End Users/Desktop',
60 | 'License :: OSI Approved :: MIT License',
61 | 'Operating System :: OS Independent',
62 | 'Programming Language :: Python :: 2',
63 | 'Programming Language :: Python :: 3'
64 | ]
65 | )
66 |
--------------------------------------------------------------------------------
/examples/multi_processing.py:
--------------------------------------------------------------------------------
1 | """
2 | multi_processing.py
3 |
4 | This example demonstrates concurrent loading of Drillinginfo datasets via Python's multiprocessing module.
5 |
6 | The DirectAccessV2 class accepts an optional access_token keyword argument (beginning in version 1.2.0).
7 | When provided, an initial authentication request will not be made. We still provide our API Key, Client ID
8 | and Client Secret to the class so that the access token can be refreshed if needed.
9 |
10 | In the sample below, we simultaneously write three CSVs from the producing-entities, well-rollups and permits
11 | API endpoints. This results in much faster loading time than when done sequentially.
12 | """
13 | import os
14 | import csv
15 | from multiprocessing import Process
16 |
17 | from enverus_developer_api import DirectAccessV2
18 |
19 | # Retrieve our access token
20 | ACCESS_TOKEN = DirectAccessV2(
21 | client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
22 | client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET')
23 | ).access_token
24 |
25 |
26 | def load(endpoint, **options):
27 | """
28 | A generic load function that will be called by each of the three processes.
29 |
30 | :param endpoint: the Direct Access API endpoint
31 | :param options: the query parameters to provide on the endpoint
32 | :return:
33 | """
34 | # Create a DirectAccessV2 client within the function, providing it our already existing access token
35 | # and thus avoiding unnecessary authentication calls
36 | client = DirectAccessV2(
37 | client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
38 | client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET'),
39 | access_token=ACCESS_TOKEN
40 | )
41 |
42 | count = None
43 | with open(endpoint + '.csv', mode='w') as f:
44 | writer = csv.writer(f)
45 | for i, row in enumerate(client.query(endpoint, **options), start=1):
46 | count = i
47 | if count == 1:
48 | writer.writerow(row.keys())
49 | writer.writerow(row.values())
50 |
51 | if count % options.get('pagesize', 100000) == 0:
52 | print('Wrote {} records for {}'.format(count, endpoint))
53 |
54 | print('Completed writing {}. Final count: {}'.format(endpoint, count))
55 | return
56 |
57 |
58 | def main():
59 | procs = list()
60 | well_rollups_process = Process(
61 | target=load,
62 | kwargs=dict(
63 | endpoint='well-rollups',
64 | pagesize=10000,
65 | deleteddate='eq(null)'
66 | )
67 | )
68 | procs.append(well_rollups_process)
69 |
70 | producing_entity_process = Process(
71 | target=load,
72 | kwargs=dict(
73 | endpoint='producing-entities',
74 | pagesize=100000,
75 | deleteddate='eq(null)'
76 | )
77 | )
78 | procs.append(producing_entity_process)
79 |
80 | permits_process = Process(
81 | target=load,
82 | kwargs=dict(
83 | endpoint='permits',
84 | pagesize=100000,
85 | deleteddate='eq(null)'
86 | )
87 | )
88 | procs.append(permits_process)
89 |
90 | [x.start() for x in procs]
91 | [x.join() for x in procs]
92 | return
93 |
94 |
95 | if __name__ == '__main__':
96 | main()
97 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 |
2 | Usage Guide
3 | ===========
4 | This module is a thin wrapper around Enverus' Developer API (formerly known as Direct Access).
5 | It handles authentication and token management, pagination and network-related
6 | error handling/retries. It also provides a simple, convenient method to write
7 | results to CSV.
8 |
9 | ``direct-access-py`` is built and tested on Python 3.6 but should work on Python 2.7 and up.
10 |
11 |
12 | Installation
13 | ############
14 |
15 | The easiest way to install ``direct-access-py`` is from the `Python Package Index
16 | `_ using ``pip``:
17 |
18 | .. code-block:: bash
19 |
20 | $ pip install directaccess
21 |
22 | To install it manually, simply download the repository from Github:
23 |
24 | .. code-block:: bash
25 |
26 | $ git clone https://github.com/wchatx/direct-access-py.git
27 | $ cd directaccess/
28 | $ python setup.py install
29 |
30 | Notes
31 | #####
32 | The ``directaccess`` module only supports the JSON format from the API. The ``query`` method
33 | returns a generator of API responses as dictionaries.
34 |
35 | Version 2 of the API uses "soft deletes". Records marked as deleted will have a populated
36 | ``DeletedDate`` field. If these records are not important for your workflow, you should always
37 | provide ``deleteddate='null'`` as a keyword argument to the V2 ``query`` method
38 |
39 | It is also important to note that your API credentials should be treated like any other password.
40 | Take care to not check them into public code repositories or expose them outside of your organization.
41 |
42 | If you find a problem with this module, have a feature request or just need a little help getting started,
43 | please `open an issue `_! If you're having
44 | trouble with the Enverus Drillinginfo Developer API, you should `contact support
45 | `_.
46 |
47 | Quick Start
48 | ###########
49 |
50 | Direct Access Version 1
51 | ***********************
52 | For version 1 of the API, create an instance of the DirectAccessV1 class and provide it your API key
53 |
54 | .. code-block:: python
55 |
56 | from directaccess import DirectAccessV1
57 |
58 | d1 = DirectAccessV1(api_key='your-api-key')
59 |
60 | .. warning::
61 |
62 | Direct Access Version 1 will reach the end of its life in July, 2020.
63 | Please upgrade your application as Version 1 will be inaccessible after that date.
64 | A future version of this module will drop support for Version 1.
65 |
66 | Provide the query method the dataset as the first argument and any query parameters as keyword arguments.
67 | See valid dataset names and query params in the Direct Access documentation.
68 | The query method returns a generator of API responses as dicts.
69 |
70 | .. code-block:: python
71 |
72 | for row in d1.query('legal-leases', county_parish='Reeves', state_province='TX'):
73 | print(row)
74 |
75 | Direct Access Version 2
76 | ***********************
77 | For version 2 of the API, create an instance of the DirectAccessV2 class, providing it your API key, client id and client secret.
78 | The returned access token will be available as an attribute on the instance (``d2.access_token``) and the Authorization
79 | header is set automatically
80 |
81 | .. code-block:: python
82 |
83 | from directaccess import DirectAccessV2
84 |
85 | d2 = DirectAccessV2(
86 | api_key='your-api-key',
87 | client_id='your-client-id',
88 | client_secret='your-client-secret',
89 | )
90 |
91 |
92 | Like with the V1 class, provide the query method the dataset and query params. All query parameters must match the valid
93 | parameters found in the Direct Access documentation and be passed as keyword arguments.
94 |
95 | .. code-block:: python
96 |
97 | for row in d2.query('well-origins', county='REEVES', pagesize=10000):
98 | print(row)
99 |
100 |
101 | Version 2 Concepts
102 | ##################
103 |
104 | Filter Functions
105 | ****************
106 | Direct Access version 2 supports filter functions. These can be passed as strings on the keyword arguments.
107 |
108 | Some common filters are greater than (``gt()``), less than (``lt()``), ``null``, not null (``not(null)``) and
109 | between (``btw()``).
110 | See the Direct Access documentation for a list of all available filters.
111 |
112 | .. code-block:: python
113 |
114 | # Get well records updated after 2018-08-01 and without deleted dates
115 | for row in d2.query('well-origins', updateddate='gt(2018-08-01)', deleteddate='null'):
116 | print(row)
117 |
118 | # Get permit records with approved dates between 2018-03-01 and 2018-06-01
119 | for row in d2.query('permits', approveddate='btw(2018-03-01,2018-06-01)'):
120 | print(row)
121 |
122 | Fields keyword
123 | **************
124 | You can use the ``fields`` keyword to limit the returned fields in your queries.
125 | This has the benefit of limiting the API responses to only those fields needed for your
126 | workflow and will significantly improve the speed of your queries.
127 |
128 | .. code-block:: python
129 |
130 | for row in d2.query('rigs', fields='DrillType,LeaseName,PermitDepth'):
131 | print(row)
132 |
133 | Escaping
134 | ********
135 | When making requests containing certain characters like commas, use a backslash to escape them.
136 |
137 | .. code-block:: python
138 |
139 | # Escaping the comma before LLC
140 | for row in d2.query('producing-entities', curropername='PERCUSSION PETROLEUM OPERATING\, LLC'):
141 | print(row)
142 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | import os
16 | import sys
17 | sys.path.insert(0, os.path.abspath('../'))
18 |
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = 'directaccess'
23 | copyright = '2021'
24 | author = 'directaccess'
25 |
26 | # The short X.Y version
27 | version = ''
28 | # The full version, including alpha/beta/rc tags
29 | release = ''
30 |
31 |
32 | # -- General configuration ---------------------------------------------------
33 |
34 | # If your documentation needs a minimal Sphinx version, state it here.
35 | #
36 | # needs_sphinx = '1.0'
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | 'sphinx.ext.autodoc',
43 | 'sphinx.ext.viewcode',
44 | 'sphinx.ext.githubpages',
45 | ]
46 |
47 | # Add any paths that contain templates here, relative to this directory.
48 | templates_path = ['_templates']
49 |
50 | # The suffix(es) of source filenames.
51 | # You can specify multiple suffix as a list of string:
52 | #
53 | # source_suffix = ['.rst', '.md']
54 | source_suffix = '.rst'
55 |
56 | # The master toctree document.
57 | master_doc = 'index'
58 |
59 | # The language for content autogenerated by Sphinx. Refer to documentation
60 | # for a list of supported languages.
61 | #
62 | # This is also used if you do content translation via gettext catalogs.
63 | # Usually you set "language" from the command line for these cases.
64 | language = None
65 |
66 | # List of patterns, relative to source directory, that match files and
67 | # directories to ignore when looking for source files.
68 | # This pattern also affects html_static_path and html_extra_path.
69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
70 |
71 | # The name of the Pygments (syntax highlighting) style to use.
72 | pygments_style = None
73 |
74 |
75 | # -- Options for HTML output -------------------------------------------------
76 |
77 | # The theme to use for HTML and HTML Help pages. See the documentation for
78 | # a list of builtin themes.
79 | #
80 | html_theme = 'alabaster'
81 |
82 | # Theme options are theme-specific and customize the look and feel of a theme
83 | # further. For a list of options available for each theme, see the
84 | # documentation.
85 | #
86 | # html_theme_options = {}
87 |
88 | # Add any paths that contain custom static files (such as style sheets) here,
89 | # relative to this directory. They are copied after the builtin static files,
90 | # so a file named "default.css" will overwrite the builtin "default.css".
91 | html_static_path = ['_static']
92 |
93 | # Custom sidebar templates, must be a dictionary that maps document names
94 | # to template names.
95 | #
96 | # The default sidebars (for documents that don't match any pattern) are
97 | # defined by theme itself. Builtin themes are using these templates by
98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
99 | # 'searchbox.html']``.
100 | #
101 | # html_sidebars = {}
102 |
103 |
104 | # -- Options for HTMLHelp output ---------------------------------------------
105 |
106 | # Output file base name for HTML help builder.
107 | htmlhelp_basename = 'direct-access-pydoc'
108 |
109 |
110 | # -- Options for LaTeX output ------------------------------------------------
111 |
112 | latex_elements = {
113 | # The paper size ('letterpaper' or 'a4paper').
114 | #
115 | # 'papersize': 'letterpaper',
116 |
117 | # The font size ('10pt', '11pt' or '12pt').
118 | #
119 | # 'pointsize': '10pt',
120 |
121 | # Additional stuff for the LaTeX preamble.
122 | #
123 | # 'preamble': '',
124 |
125 | # Latex figure (float) alignment
126 | #
127 | # 'figure_align': 'htbp',
128 | }
129 |
130 | # Grouping the document tree into LaTeX files. List of tuples
131 | # (source start file, target name, title,
132 | # author, documentclass [howto, manual, or own class]).
133 | latex_documents = [
134 | (master_doc, 'direct-access-py.tex', 'direct-access-py Documentation',
135 | 'Direct Access', 'manual'),
136 | ]
137 |
138 |
139 | # -- Options for manual page output ------------------------------------------
140 |
141 | # One entry per manual page. List of tuples
142 | # (source start file, name, description, authors, manual section).
143 | man_pages = [
144 | (master_doc, 'direct-access-py', 'direct-access-py Documentation',
145 | [author], 1)
146 | ]
147 |
148 |
149 | # -- Options for Texinfo output ----------------------------------------------
150 |
151 | # Grouping the document tree into Texinfo files. List of tuples
152 | # (source start file, target name, title, author,
153 | # dir menu entry, description, category)
154 | texinfo_documents = [
155 | (master_doc, 'direct-access-py', 'direct-access-py Documentation',
156 | author, 'direct-access-py', 'One line description of project.',
157 | 'Miscellaneous'),
158 | ]
159 |
160 |
161 | # -- Options for Epub output -------------------------------------------------
162 |
163 | # Bibliographic Dublin Core info.
164 | epub_title = project
165 |
166 | # The unique identifier of the text. This can be a ISBN number
167 | # or the project homepage.
168 | #
169 | # epub_identifier = ''
170 |
171 | # A unique identification for the text.
172 | #
173 | # epub_uid = ''
174 |
175 | # A list of files that should not be packed into the epub file.
176 | epub_exclude_files = ['search.html']
177 |
178 |
179 | # -- Extension configuration -------------------------------------------------
180 |
--------------------------------------------------------------------------------
/tests/test___init__.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import unittest
3 | from unittest import TestCase
4 | import os
5 | import logging
6 | from tempfile import TemporaryFile, mkdtemp
7 | from pandas.api.types import is_datetime64_ns_dtype, is_float_dtype, is_int64_dtype, is_object_dtype
8 | from multiprocessing import Process
9 |
10 | from enverus_developer_api import (
11 | DeveloperAPIv3,
12 | DirectAccessV2,
13 | DADatasetException,
14 | DAQueryException,
15 | DAAuthException
16 | )
17 |
18 | LOG_LEVEL = logging.DEBUG
19 | if os.environ.get("GITHUB_SHA"):
20 | LOG_LEVEL = logging.ERROR
21 |
22 |
23 | def set_token_v2():
24 | if not os.environ.get("DIRECTACCESS_TOKEN"):
25 | os.environ["DIRECTACCESS_TOKEN"] = DirectAccessV2(
26 | client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
27 | client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
28 | api_key=os.environ.get("DIRECTACCESS_API_KEY"),
29 | ).access_token
30 | return
31 |
32 |
33 | def set_token_v3():
34 | if not os.environ.get("DIRECTACCESSV3_TOKEN"):
35 | os.environ["DIRECTACCESSV3_TOKEN"] = DeveloperAPIv3(
36 | secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
37 | # url="https://api.dev.enverus.com/"
38 | ).access_token
39 | return
40 |
41 |
42 | def create_developerapi_v3():
43 | return DeveloperAPIv3(
44 | secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
45 | access_token=os.environ.get("DIRECTACCESSV3_TOKEN"),
46 | # url="https://api.dev.enverus.com/",
47 | retries=5,
48 | backoff_factor=10,
49 | log_level=LOG_LEVEL
50 | )
51 |
52 |
53 | def create_directaccess_v2():
54 | return DirectAccessV2(
55 | api_key=os.environ.get("DIRECTACCESS_API_KEY"),
56 | client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
57 | client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
58 | access_token=os.environ.get("DIRECTACCESS_TOKEN"),
59 | retries=5,
60 | backoff_factor=10,
61 | log_level=LOG_LEVEL
62 | )
63 |
64 |
65 | def proc_query(dataset):
66 | v3 = create_developerapi_v3()
67 | resp = v3.query(dataset, deleteddate="null")
68 | next(resp)
69 | TestCase.assertTrue(resp)
70 | return
71 |
72 |
73 | class TestEnverusDeveloperAPI(TestCase):
74 | @classmethod
75 | def setUpClass(cls) -> None:
76 | set_token_v3()
77 | cls.v3 = create_developerapi_v3()
78 |
79 | set_token_v2()
80 | cls.v2 = create_directaccess_v2()
81 |
82 | def test_missing_secret_key_v3(self):
83 | with self.assertRaises(DAAuthException):
84 | DeveloperAPIv3(secret_key=None, log_level=LOG_LEVEL)
85 |
86 | def test_query_v3(self):
87 | query = self.v3.query("casings", pagesize=10, deleteddate="null")
88 | records = list()
89 | for i, row in enumerate(query, start=1):
90 | # print(row)
91 | records.append(row)
92 | if i % 30 == 0:
93 | break
94 | self.assertTrue(len(records) > 0, "test_query_v3 records list empty")
95 |
96 | def test_query_v3_omit_header_next_link(self):
97 | query = self.v3.query("casings", pagesize=10, deleteddate="null", _headers={'X-Omit-Header-Next-Links': 'true'})
98 | records = list()
99 | for i, row in enumerate(query, start=1):
100 | records.append(row)
101 | if i % 30 == 0:
102 | break
103 | self.assertTrue(len(records) > 0, "test_query_v3 records list empty")
104 |
105 | def test_is_omit_header_next_link(self):
106 | is_omit_next_link = self.v3.is_omit_header_next_link(_headers={'X-Omit-Header-Next-Links': 'true'})
107 | self.assertTrue(is_omit_next_link, "test_is_omit_header_next_link should contain omit header")
108 |
109 | is_omit_next_link = self.v3.is_omit_header_next_link(_headers={'Omit-Header-Next-Links': 'true'})
110 | self.assertTrue(not is_omit_next_link, "test_is_omit_header_next_link should not contain omit header")
111 |
112 | def test_parse_links(self):
113 | links = self.v3.parse_links(
114 | {"next": "; rel='next'"})
115 | self.assertTrue(links["next"]["url"], "/economics?action=next&next_page=WellID+%3C+840600005436298&pagesize=50")
116 |
117 | def test_docs_v3(self):
118 | docs = self.v3.docs("casings")
119 | self.assertTrue(docs)
120 | self.assertIsInstance(docs, list)
121 |
122 | def test_ddl_v3(self):
123 | ddl = self.v3.ddl("casings", database="pg")
124 | with TemporaryFile(mode="w+") as f:
125 | f.write(ddl)
126 | f.seek(0)
127 | for line in f:
128 | self.assertTrue(line.startswith("CREATE TABLE casings"))
129 | break
130 |
131 | def test_ddl_invalid_db_v3(self):
132 | with self.assertRaises(DAQueryException):
133 | self.v3.ddl("casings", database="invalid")
134 |
135 | def test_count_v3(self):
136 | count = self.v3.count("wells", updateddate="ge(2021-05-01)", StateProvince="in(TX,LA,WY)")
137 | self.assertIsNotNone(count)
138 | self.assertIsInstance(count, int)
139 |
140 | def test_count_invalid_dataset_v3(self):
141 | with self.assertRaises(DADatasetException):
142 | self.v3.count("invalid")
143 |
144 | def test_token_refresh_v3(self):
145 | v3 = DeveloperAPIv3(
146 | secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
147 | access_token="invalid",
148 | # url="https://api.dev.enverus.com/",
149 | retries=5,
150 | backoff_factor=10,
151 | log_level=LOG_LEVEL
152 | )
153 |
154 | invalid_token = v3.access_token
155 | count = v3.count("rigs", deleteddate="null")
156 | query = v3.query("rigs", pagesize=10000, deleteddate="null")
157 | self.assertTrue(len([x for x in query]) == count)
158 | self.assertTrue(invalid_token != v3.access_token)
159 |
160 | def test_csv_v3(self):
161 | tempdir = mkdtemp()
162 | path = os.path.join(tempdir, "rigs.csv")
163 |
164 | dataset = "rigs"
165 | options = dict(pagesize=10000, deleteddate="null")
166 |
167 | count = self.v3.count(dataset, **options)
168 | query = self.v3.query(dataset, **options)
169 | self.v3.to_csv(query, path, log_progress=True, delimiter=",", quoting=csv.QUOTE_MINIMAL)
170 |
171 | with open(path, mode="r") as f:
172 | reader = csv.reader(f)
173 | row_count = len([x for x in reader])
174 | self.assertTrue(row_count == (count + 1))
175 |
176 | def test_dataframe_v3(self):
177 | df = self.v3.to_dataframe("rigs", pagesize=1000, deleteddate="null")
178 |
179 | # Check index is set to API endpoint "primary keys"
180 | self.assertListEqual(df.index.names, ["CompletionID", "WellID"])
181 |
182 | # Check object dtypes
183 | self.assertTrue(is_object_dtype(df.API_UWI))
184 | self.assertTrue(is_object_dtype(df.ActiveStatus))
185 |
186 | # Check datetime64 dtypes
187 | self.assertTrue(is_datetime64_ns_dtype(df.DeletedDate))
188 | self.assertTrue(is_datetime64_ns_dtype(df.SpudDate))
189 | self.assertTrue(is_datetime64_ns_dtype(df.UpdatedDate))
190 |
191 | # Check Int64 dtypes
192 | self.assertTrue(is_int64_dtype(df.RatedWaterDepth))
193 | self.assertTrue(is_int64_dtype(df.RatedHP))
194 |
195 | # Check float dtypes
196 | self.assertTrue(is_float_dtype(df.RigLatitudeWGS84))
197 | self.assertTrue(is_float_dtype(df.RigLongitudeWGS84))
198 |
199 | def test_multiple_processes_v3(self):
200 | # Launch two child processes, one for rigs and one for casings
201 |
202 | procs = [
203 | Process(target=proc_query, kwargs=dict(dataset="rigs")),
204 | Process(target=proc_query, kwargs=dict(dataset="casings"))
205 | ]
206 |
207 | [x.start() for x in procs]
208 | [x.join() for x in procs]
209 |
210 | def test_enter_exit(self):
211 | with DeveloperAPIv3(secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
212 | access_token=os.environ.get("DIRECTACCESSV3_TOKEN")) as api:
213 | self.assertIsInstance(api, DeveloperAPIv3)
214 | self.assertIsNotNone(api.session)
215 |
216 | self.assertIsNone(api.session)
217 |
218 | # ******************** DirectAccessV2 Test Cases **********************
219 |
220 | def test_missing_client_id_v2(self):
221 | with self.assertRaises(DAAuthException):
222 | DirectAccessV2(client_id=None,
223 | client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
224 | log_level=LOG_LEVEL)
225 |
226 | def test_missing_client_secret_v2(self):
227 | with self.assertRaises(DAAuthException):
228 | DirectAccessV2(client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
229 | client_secret=None,
230 | log_level=LOG_LEVEL)
231 |
232 | def test_query_v2(self):
233 | query = self.v2.query("rigs", pagesize=10, deleteddate="null")
234 | records = list()
235 | for i, row in enumerate(query, start=1):
236 | # print(row)
237 | records.append(row)
238 | if i % 30 == 0:
239 | break
240 | self.assertTrue(len(records) > 0, "test_query_v2 records list empty")
241 |
242 |
243 | if __name__ == '__main__':
244 | unittest.main()
245 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # enverus-developer-api
2 |
3 | [](https://badge.fury.io/py/enverus-developer-api)
4 |
5 | A thin wrapper around Enverus' Developer API. Handles authentication and token management, pagination and
6 | network-related error handling/retries.
7 |
8 | This module is built and tested on Python 3.9 but should work on Python 2.7 and up.
9 |
10 |
11 | ## Install
12 | ```commandline
13 | pip install enverus-developer-api
14 | ```
15 |
16 | ## Clients
17 |
18 | ### Developer API - Version 3
19 | DirectAccess has been rebranded as DeveloperAPI. For version 3 of the API, create an instance of the DeveloperAPIv3 class, providing it your secret_key (not the same as the v2 api_key).
20 | The returned access token will be available as an attribute on the instance (v3.access_token) and the Authorization
21 | header is set automatically.
22 | ```python
23 | from enverus_developer_api import DeveloperAPIv3
24 |
25 | v3 = DeveloperAPIv3(secret_key='')
26 | ```
27 | Your secret_key can be generated, retrieved and revoked at https://app.enverus.com/provisioning/directaccess
28 |
29 | The Developer API Version 3 endpoint documentation can be found at https://app.enverus.com/direct/#/api/explorer/v3/gettingStarted
30 |
31 | ### Direct Access - Version 2
32 | For version 2 of the API, create an instance of the DirectAccessV2 class, providing it your API key, client id and client secret.
33 | The returned access token will be available as an attribute on the instance (d2.access_token) and the Authorization
34 | header is set automatically
35 | ```python
36 | from enverus_developer_api import DirectAccessV2
37 |
38 | d2 = DirectAccessV2(
39 | client_id='',
40 | client_secret='',
41 | )
42 | ```
43 | The Direct Access Version 2 endpoint documentation can be found at https://app.enverus.com/direct/#/api/explorer/v2/gettingStarted
44 |
45 | ## Usage
46 |
47 | The functionality outlined below exists for **both** DeveloperAPIv3 and DirectAccessV2 clients.
48 |
49 | Only 1 instance of the client needs to be created to perform all your queries. It can execute multiple simultaneous requests if needed,
50 | and will automatically refresh the access_token for the Authorization header if expired.
51 | An access_token is valid for 8 hours, and there is rate limit on the number of access_tokens that can be requested per minute
52 | which is why we recommend creating and reusing a single DeveloperAPIv3 client instance for all of your querying.
53 |
54 | Provide the query method the dataset and query params. All query parameters must match the valid
55 | Request Parameters found in the Developer API documentation for a given dataset and be passed as keyword arguments.
56 |
57 | ```python
58 | for row in v3.query('wells', county='REEVES', deleteddate='null'):
59 | print(row)
60 | ```
61 |
62 | ### Filter functions
63 | Developer API supports filter functions. These can be passed as strings on the keyword arguments.
64 |
65 | Some common filters are greater than (`gt()`), less than (`lt()`), null, not null (`not(null)`) and between (`btw()`).
66 | See the Developer API documentation for a list of all available filters.
67 |
68 | ```python
69 | # Get well records updated after 2018-08-01 and without deleted dates
70 | for row in v3.query('wells', updateddate='gt(2018-08-01)', deleteddate='null'):
71 | print(row)
72 |
73 | # Get permit records with approved dates between 2018-03-01 and 2018-06-01
74 | for row in v3.query('rigs', spuddate='btw(2018-03-01,2018-06-01)'):
75 | print(row)
76 | ```
77 |
78 | You can use the `fields` keyword to limit the returned fields in your request.
79 |
80 | ```python
81 | for row in v3.query('rigs', fields='PermitApprovedDate,LeaseName,RigName_Number,MD_FT'):
82 | print(row)
83 | ```
84 |
85 | ### Escaping
86 | When making requests containing certain characters like commas, use a backslash to escape them.
87 |
88 | ```python
89 | # Escaping the comma before LLC
90 | for row in v3.query('rigs', envoperator='PERCUSSION PETROLEUM OPERATING\, LLC'):
91 | print(row)
92 | ```
93 |
94 | ### Network request handling
95 | This module exposes functionality in python-requests for modifying network requests handling, namely:
96 | * retries and backoff
97 | * network proxies
98 | * ssl verification
99 |
100 | #### Retries and backoff
101 | Specify the number of retry attempts in `retries` and the backoff factor in `backoff_factor`. See the urllib3
102 | [Retry](https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.Retry) utility API for more info
103 | ```python
104 | from enverus_developer_api import DeveloperAPIv3
105 |
106 | v3 = DeveloperAPIv3(
107 | secret_key='',
108 | retries=5,
109 | backoff_factor=1
110 | )
111 | ```
112 |
113 | You can specify a network proxy by passing a dictionary with the host and port of your proxy to `proxies`. See the
114 | [proxies](https://requests.readthedocs.io/en/master/user/advanced/#proxies) section of the python-requests documentation
115 | for more info.
116 | ```python
117 | from enverus_developer_api import DeveloperAPIv3
118 |
119 | v3 = DeveloperAPIv3(
120 | secret_key='',
121 | proxies={'https': 'http://10.10.1.10:1080'}
122 | )
123 | ```
124 |
125 | Finally, if you're in an environment that provides its own SSL certificates that might not be in your trusted store,
126 | you can choose to ignore SSL verification altogether. This is typically not a good idea and you should seek to resolve
127 | certificate errors instead of ignore them.
128 | ```python
129 | from enverus_developer_api import DeveloperAPIv3
130 |
131 | v3 = DeveloperAPIv3(
132 | secret_key='',
133 | verify=False
134 | )
135 | ```
136 |
137 | ## Functions
138 |
139 | ### docs
140 | Returns a sample response for a given dataset
141 | ```python
142 | docs = v3.docs("casings")
143 | ```
144 |
145 | ### ddl
146 | Returns a CREATE TABLE DDL statement for a given dataset. Must specify either
147 | "mssql" for MS SQL Server or "pg" for PostgreSQL as the database argument
148 | ```python
149 | from tempfile import TemporaryFile
150 |
151 | ddl = v3.ddl("casings", database="pg")
152 | with TemporaryFile(mode="w+") as f:
153 | f.write(ddl)
154 | f.seek(0)
155 | for line in f:
156 | print(line, end='')
157 | ```
158 |
159 | ### count
160 | Returns the count of records for a given dataset and query options in the
161 | X-QUERY-RECORD-COUNT response header value
162 | ```python
163 | count = v3.count("rigs", deleteddate="null")
164 | ```
165 |
166 | ### query
167 | Accepts a dataset name, request headers and a variable number of keyword arguments that correspond to the fields specified
168 | in the ‘Request Parameters’ section for each dataset in the Developer API documentation.
169 |
170 | This method only supports the JSON output provided by the API and yields dicts for each record
171 | ```python
172 | for row in v3.query("rigs", pagesize=1000, deleteddate="null"):
173 | print(row)
174 | ```
175 | ##### X-Omit-Header-Next-Links header
176 | Omit the Next Link in the Response Header section, add the Next Link to the JSON Response Body.
177 | ```python
178 | for row in v3.query("rigs", pagesize=1000, deleteddate="null", _headers={'X-Omit-Header-Next-Links': 'true'}):
179 | print(row)
180 | ```
181 |
182 | ### to_csv
183 | Write query results to CSV. Optional keyword arguments are provided to the csv writer object,
184 | allowing control over delimiters, quoting, etc. The default is comma-separated with csv.QUOTE_MINIMAL
185 | ```python
186 | import csv, os
187 | from tempfile import mkdtemp
188 |
189 | tempdir = mkdtemp()
190 | path = os.path.join(tempdir, "rigs.csv")
191 |
192 | dataset = "rigs"
193 | options = dict(pagesize=10000, deleteddate="null")
194 |
195 | v3.query(dataset, **options)
196 | v3.to_csv(query, path, log_progress=True, delimiter=",", quoting=csv.QUOTE_MINIMAL)
197 |
198 | with open(path, mode="r") as f:
199 | reader = csv.reader(f)
200 | ```
201 |
202 | ### to_dataframe
203 | Write query results to a pandas Dataframe with properly set dtypes and index columns.
204 |
205 | This works by requesting the DDL for a given dataset and manipulating the text to build a list of dtypes, date columns and the index column(s).
206 | It then makes a query request for the dataset to ensure we know the exact fields to expect,
207 | (ie, if fields was a provided query parameter and the result will have fewer fields than the DDL).
208 |
209 | For endpoints with composite primary keys, a pandas MultiIndex is created.
210 |
211 | Query results are written to a temporary CSV file and then read into the dataframe. The CSV is removed afterwards.
212 |
213 | Pandas version 0.24.0 or higher is required for use of the Int64 dtype allowing integers with NaN values.
214 | It is not possible to coerce missing values for columns of dtype bool and so these are set to dtype object.
215 |
216 | You will need to have pandas installed to use the to_dataframe function
217 | ```python
218 | pip install pandas
219 | ```
220 |
221 | Create a pandas dataframe from a dataset query
222 | ```python
223 | df = v3.to_dataframe("rigs", pagesize=10000, deleteddate="null")
224 | ```
225 |
226 | Create a Texas rigs dataframe, replacing the state abbreviation with the complete name
227 | and removing commas from Operator names
228 | ```python
229 | df = v3.to_dataframe(
230 | dataset="rigs",
231 | deleteddate="null",
232 | pagesize=100000,
233 | stateprovince="TX",
234 | converters={
235 | "StateProvince": lambda x: "TEXAS",
236 | "ENVOperator": lambda x: x.replace(",", "")
237 | }
238 | )
239 | df.head(10)
240 | ```
241 | Reset the index of the DataFrame, and use the default one instead. [reset_index()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html)
242 | ```python
243 | df = v3.to_dataframe(dataset, pagesize=10000, ENVBasin="SACRAMENTO")
244 | df.reset_index(inplace=True)
245 | df.head(10)
246 | ```
--------------------------------------------------------------------------------
/enverus_developer_api/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import time
4 | import json
5 | import base64
6 | import logging
7 | from uuid import uuid4
8 | from math import floor
9 | from shutil import rmtree
10 | from tempfile import mkdtemp
11 | from collections import OrderedDict
12 |
13 | import requests
14 | import unicodecsv as csv
15 | from requests.adapters import HTTPAdapter
16 | from urllib3.util.retry import Retry
17 | from requests.utils import parse_header_links
18 |
19 |
20 | class DAAuthException(Exception):
21 | pass
22 |
23 |
24 | class DAQueryException(Exception):
25 | pass
26 |
27 |
28 | class DADatasetException(Exception):
29 | pass
30 |
31 |
32 | def _chunks(iterable, n):
33 | """
34 | Return iterables with n members from an input iterable
35 | From: http://stackoverflow.com/a/8290508
36 | :param iterable: the iterable to chunk up
37 | :param n: max number of items in chunked list
38 | """
39 | l = len(iterable)
40 | for ndx in range(0, l, n):
41 | yield iterable[ndx: min(ndx + n, l)]
42 |
43 |
44 | class BaseAPI(object):
45 |
46 | def __init__(self, url, retries, backoff_factor, **kwargs):
47 | self.url = url
48 | self.retries = retries
49 | self.backoff_factor = backoff_factor
50 |
51 | if kwargs.get("logger"):
52 | self.logger = kwargs.pop("logger").getChild("directaccess")
53 | else:
54 | logging.basicConfig(
55 | level=kwargs.pop("log_level", logging.INFO),
56 | format="%(asctime)s %(name)s %(levelname)-8s %(message)s",
57 | datefmt="%a, %d %b %Y %H:%M:%S",
58 | )
59 | self.logger = logging.getLogger("directaccess")
60 |
61 | self.session = requests.Session()
62 | self.session.verify = kwargs.pop("verify", True)
63 | self.session.proxies = kwargs.pop("proxies", {})
64 | self.session.headers["User-Agent"] = "enverus-developer-api"
65 |
66 | self._status_forcelist = [500, 502, 503, 504]
67 | retries = Retry(
68 | total=self.retries,
69 | backoff_factor=self.backoff_factor,
70 | allowed_methods=frozenset(["GET", "POST", "HEAD"]),
71 | status_forcelist=self._status_forcelist,
72 | )
73 | self.session.mount("https://", HTTPAdapter(max_retries=retries))
74 |
75 | def __enter__(self):
76 | return self
77 |
78 | def __exit__(self, exc_type, exc_val, exc_tb):
79 | if self.session:
80 | self.session.close()
81 | self.session = None
82 |
83 | def get_access_token(self):
84 | raise NotImplementedError
85 |
86 | def to_csv(self, query, path, log_progress=True, **kwargs):
87 | """
88 | Write query results to CSV. Optional keyword arguments are
89 | provided to the csv writer object, allowing control over
90 | delimiters, quoting, etc. The default is comma-separated
91 | with csv.QUOTE_MINIMAL
92 |
93 | ::
94 |
95 | d2 = DirectAccessV2(client_id, client_secret)
96 | query = d2.query('rigs', deleteddate='null', pagesize=1500)
97 | # Write tab-separated file
98 | d2.to_csv(query, '/path/to/rigs.csv', delimiter='\\t')
99 |
100 | :param query: DirectAccessV1 or DirectAccessV2 query object
101 | :param path: relative or absolute filesystem path for created CSV
102 | :type path: str
103 | :param log_progress: whether to log progress. if True, log a message with current written count
104 | :type log_progress: bool
105 | :return: the newly created CSV file path
106 | """
107 | with open(path, mode="wb") as f:
108 | writer = csv.writer(f, **kwargs)
109 | count = None
110 | for i, row in enumerate(query, start=1):
111 | row = OrderedDict(sorted(row.items(), key=lambda t: t[0]))
112 | count = i
113 | if count == 1:
114 | writer.writerow(row.keys())
115 | writer.writerow(row.values())
116 |
117 | if log_progress and i % 100000 == 0:
118 | self.logger.info(
119 | "Wrote {count} records to file {path}".format(
120 | count=count, path=path
121 | )
122 | )
123 | self.logger.info(
124 | "Completed writing CSV file to {path}. Final count {count}".format(
125 | path=path, count=count
126 | )
127 | )
128 | return path
129 |
130 | def _check_response(self, response, *args, **kwargs):
131 | """
132 | Check responses for errors.
133 |
134 | If the API returns 400, there was a problem with the provided parameters. Raise DAQueryException.
135 | If the API returns 400 and request was to /tokens endpoint, likely bad credentials. Raise DAAuthException.
136 | If the API returns 401, refresh access token if found and resend request.
137 | If the API returns 403 and request was to /tokens endpoint, sleep for 60 seconds and try again.
138 | If the API returns 404, an invalid dataset name was provided. Raise DADatasetException.
139 |
140 | 5xx errors are handled by the session's Retry configuration. Debug logging returns retries remaining.
141 |
142 | :param response: a requests Response object
143 | :type response: requests.Response
144 | :param args:
145 | :param kwargs:
146 | :return:
147 | """
148 |
149 | if not response.ok:
150 | self.logger.debug("Response status code: " + str(response.status_code))
151 | self.logger.debug("Response text: " + response.text)
152 | if response.status_code == 400:
153 | if "tokens" in response.url:
154 | raise DAAuthException(
155 | "Error getting token. Code: {} Message: {}".format(
156 | response.status_code, response.text
157 | )
158 | )
159 | raise DAQueryException(response.text)
160 | if response.status_code == 401:
161 | self.logger.warning("Access token expired. Acquiring a new one...")
162 | self.get_access_token()
163 | request = response.request
164 | request.headers["Authorization"] = self.session.headers["Authorization"]
165 | return self.session.send(request)
166 | if response.status_code == 403 and "tokens" in response.url:
167 | self.logger.warning("Throttled token request. Waiting 60 seconds...")
168 | self.retries -= 1
169 | self.logger.debug("Retries remaining: {}".format(self.retries))
170 | time.sleep(60)
171 | request = response.request
172 | return self.session.send(request)
173 | if response.status_code == 404:
174 | raise DADatasetException("Invalid dataset name provided")
175 | if response.status_code in self._status_forcelist:
176 | self.logger.debug("Retries remaining: {}".format(self.retries))
177 |
178 | def ddl(self, dataset, database):
179 | """
180 | Get DDL statement for dataset. Must provide exactly one of mssql or pg for database argument.
181 | mssql is Microsoft SQL Server, pg is PostgreSQL
182 |
183 | :param dataset: a valid dataset name. See the Developer API documentation for valid values
184 | :param database: one of mssql or pg.
185 | :return: a DDL statement from the Developer API service as str
186 | """
187 | ddl_url = os.path.join(self.url, dataset)
188 | self.logger.debug("Retrieving DDL for dataset: " + dataset)
189 | response = self.session.get(ddl_url, params=dict(ddl=database))
190 | return response.text
191 |
192 | def docs(self, dataset):
193 | """
194 | Get docs for dataset
195 |
196 | :param dataset: a valid dataset name. See the Developer API documentation for valid values
197 | :return: docs response for dataset as list[dict] or None if ?docs is not supported on the dataset
198 | """
199 | docs_url = os.path.join(self.url, dataset)
200 | self.logger.debug("Retrieving docs for dataset: " + dataset)
201 | response = self.session.get(docs_url, params=dict(docs=True))
202 | if response.status_code == 501:
203 | self.logger.warning(
204 | "docs and example params are not yet supported on dataset {dataset}".format(
205 | dataset=dataset
206 | )
207 | )
208 | return
209 | return response.json()
210 |
211 | def count(self, dataset, **options):
212 | """
213 | Get the count of records given a dataset and query options
214 |
215 | :param dataset: a valid dataset name. See the Developer API documentation for valid values
216 | :param options: query parameters as keyword arguments
217 | :return: record count as int
218 | """
219 | head_url = os.path.join(self.url, dataset)
220 | response = self.session.head(head_url, params=options)
221 | count = response.headers.get("X-Query-Record-Count")
222 | return int(count)
223 |
224 | @staticmethod
225 | def in_(items):
226 | """
227 | Helper method for providing values to the API's `in()` filter function.
228 |
229 | The API currently supports GET requests to dataset endpoints. When providing a large list of values to the API's
230 | `in()` filter function, it's necessary to chunk up the values to avoid URLs larger than 2048 characters. The
231 | `query` method of this class handles the chunking transparently; this helper method simply stringifies
232 | the input items into the correct syntax.
233 |
234 | ::
235 |
236 | d2 = DirectAccessV2(client_id, client_secret)
237 | # Query well-origins
238 | well_origins_query = d2.query(
239 | dataset='well-origins',
240 | deleteddate='null',
241 | pagesize=100000
242 | )
243 | # Get all UIDs for well-origins
244 | uid_parent_ids = [x['UID'] for x in well_origins_query]
245 | # Provide the UIDs to wellbores endpoint
246 | wellbores_query = d2.query(
247 | dataset='wellbores',
248 | deleteddate='null',
249 | pagesize=100000,
250 | uidparent=d2.in_(uid_parent_ids)
251 | )
252 |
253 | :param items: list or generator of values to provide to in() filter function
254 | :type items: list
255 | :return: str to provide to DirectAccessV2 `query` method
256 | """
257 | if not isinstance(items, list):
258 | raise TypeError(
259 | "Argument provided was not a list. Type provided: {}".format(
260 | type(items)
261 | )
262 | )
263 | return "in({})".format(",".join([str(x) for x in items]))
264 |
265 | def to_dataframe(self, dataset, converters=None, log_progress=True, **options):
266 | """
267 | Write query results to a pandas Dataframe with properly set dtypes and index columns.
268 |
269 | This works by requesting the DDL for `dataset` and manipulating the text to build a list of dtypes, date columns
270 | and the index column(s). It then makes a query request for `dataset` to ensure we know the exact fields
271 | to expect, (ie, if `fields` was a provided query parameter and the result will have fewer fields than the DDL).
272 |
273 | For endpoints with composite primary keys, a pandas MultiIndex is created.
274 |
275 | This method is potentially fragile. The API's `docs` feature is preferable but not yet available on all
276 | endpoints.
277 |
278 | Query results are written to a temporary CSV file and then read into the dataframe. The CSV is removed
279 | afterwards.
280 |
281 | pandas version 0.24.0 or higher is required for use of the Int64 dtype allowing integers with NaN values. It is
282 | not possible to coerce missing values for columns of dtype bool and so these are set to `object` dtype.
283 |
284 | ::
285 |
286 | d2 = DirectAccessV2(client_id, client_secret)
287 | # Create a Texas permits dataframe, removing commas from Survey names and replacing the state
288 | # abbreviation with the complete name.
289 | df = d2.to_dataframe(
290 | dataset='permits',
291 | deleteddate='null',
292 | pagesize=100000,
293 | stateprovince='TX',
294 | converters={
295 | 'StateProvince': lambda x: 'TEXAS',
296 | 'Survey': lambda x: x.replace(',', '')
297 | }
298 | )
299 | df.head(10)
300 |
301 | :param dataset: a valid dataset name. See the Developer API documentation for valid values
302 | :type dataset: str
303 | :param converters: Dict of functions for converting values in certain columns.
304 | Keys can either be integers or column labels.
305 | :type converters: dict
306 | :param log_progress: whether to log progress. if True, log a message with current written count
307 | :type log_progress: bool
308 | :param options: query parameters as keyword arguments
309 | :return: pandas dataframe
310 | """
311 | try:
312 | import pandas
313 | except ImportError:
314 | raise Exception(
315 | "pandas not installed. This method requires pandas >= 0.24.0"
316 | )
317 |
318 | ddl = self.ddl(dataset, database="mssql")
319 |
320 | try:
321 | index_col = re.findall(r"PRIMARY KEY \(([a-z0-9_,]*)\)", ddl)[0].split(",")
322 | except IndexError:
323 | index_col = None
324 |
325 | self.logger.debug("index_col: {}".format(index_col))
326 | ddl = {
327 | x.split(" ")[0]: x.split(" ")[1][:-1]
328 | for x in ddl.split("\n")[1:]
329 | if x and "CONSTRAINT" not in x
330 | }
331 |
332 | pagesize = options.pop("pagesize") if "pagesize" in options else None
333 | try:
334 | filter_ = OrderedDict(
335 | sorted(
336 | next(self.query(dataset, pagesize=1, **options)).items(),
337 | key=lambda x: x[0],
338 | )
339 | ).keys()
340 | self.logger.debug(
341 | "Fields retrieved from query response: {}".format(
342 | json.dumps(list(filter_), indent=2, default=str)
343 | )
344 | )
345 | except StopIteration:
346 | raise Exception("No results returned from query")
347 |
348 | self.links = None
349 |
350 | if pagesize:
351 | options["pagesize"] = pagesize
352 |
353 | try:
354 | index_col = [
355 | x for x in filter_ if x.upper() in [y.upper() for y in index_col]
356 | ]
357 | if index_col and len(index_col) == 1:
358 | index_col = index_col[0]
359 | except (IndexError, TypeError) as e:
360 | self.logger.warning("Could not discover index col(s): {}".format(e))
361 | index_col = None
362 | self.logger.debug("index_col: {}".format(index_col))
363 |
364 | date_cols = [k for k, v in ddl.items() if v.startswith("DATE") and k in filter_]
365 | self.logger.debug("date columns:\n{}".format(json.dumps(date_cols, indent=2)))
366 |
367 | for k, v in ddl.items():
368 | if k in filter_:
369 | if v.startswith("VARCHAR"):
370 | ddl[k] = "VARCHAR"
371 | elif v.startswith("DOUBL"):
372 | ddl[k] = "DOUBLE"
373 |
374 | dtypes_mapping = {
375 | "TEXT": "object",
376 | "NUMERIC": "float64",
377 | "REAL": "float64",
378 | "DOUBLE": "float64",
379 | "DATETIME": "object",
380 | "SMALLINT": "Int64",
381 | "INT": "Int64",
382 | "INTEGER": "Int64",
383 | "BIGINT": "Int64",
384 | "VARCHAR": "object",
385 | "DATE": "object"
386 | }
387 | dtypes = {k: dtypes_mapping[v] for k, v in ddl.items() if k in filter_}
388 | self.logger.debug("dtypes:\n{}".format(json.dumps(dtypes, indent=2)))
389 |
390 | t = mkdtemp()
391 | self.logger.debug("Created temporary directory: " + t)
392 |
393 | query = self.query(dataset, **options)
394 |
395 | try:
396 | chunks = pandas.read_csv(
397 | filepath_or_buffer=self.to_csv(
398 | query,
399 | os.path.join(t, "{}.csv".format(uuid4().hex)),
400 | delimiter="|",
401 | log_progress=log_progress,
402 | ),
403 | sep="|",
404 | dtype=dtypes,
405 | index_col=index_col,
406 | parse_dates=date_cols,
407 | chunksize=options.get("pagesize", 100000),
408 | converters=converters,
409 | )
410 | df = pandas.concat(chunks)
411 | return df
412 | finally:
413 | rmtree(t)
414 | self.logger.debug("Removed temporary directory")
415 |
416 | def query(self, dataset, **options):
417 | """
418 | Query Developer API dataset
419 |
420 | Accepts a dataset name and a variable number of keyword arguments that correspond to the fields specified in
421 | the 'Request Parameters' section for each dataset in the Developer API documentation.
422 |
423 | This method only supports the JSON output provided by the API and yields dicts for each record.
424 |
425 | :param dataset: a valid dataset name. See the Developer API documentation for valid values
426 | :param options: query parameters as keyword arguments
427 | :return: query response as generator
428 | """
429 | query_url = os.path.join(self.url, dataset)
430 |
431 | query_chunks = None
432 | for field, v in options.items():
433 | if "in(" in str(v) and len(str(v)) > 1950:
434 | values = re.split(r"in\((.*?)\)", options[field])[1].split(",")
435 | chunksize = int(floor(1950 / len(max(values))))
436 | query_chunks = (field, [x for x in _chunks(values, chunksize)])
437 |
438 | paging = options.pop("paging") if "paging" in options else "true"
439 |
440 | while True:
441 | if self.links:
442 | response = self.session.get(self.url[:-1] + self.links["next"]["url"])
443 | else:
444 | if query_chunks and query_chunks[1]:
445 | options[query_chunks[0]] = self.in_(query_chunks[1].pop(0))
446 |
447 | response = self.session.get(query_url, params=options)
448 |
449 | if not response.ok:
450 | raise DAQueryException(
451 | "Non-200 response: {} {}".format(response.status_code, response.text)
452 | )
453 |
454 | records = response.json()
455 | if isinstance(records, dict):
456 | records = [records]
457 |
458 | if not len(records):
459 | self.links = None
460 |
461 | if query_chunks and query_chunks[1]:
462 | continue
463 |
464 | break
465 |
466 | if "next" in response.links:
467 | self.links = response.links
468 |
469 | for record in records:
470 | yield record
471 |
472 | if self.links is None or paging.lower() == "false":
473 | break
474 |
475 |
476 | class DirectAccessV2(BaseAPI):
477 | """Client for Enverus' Developer API Version 2"""
478 |
479 | url = "https://di-api.drillinginfo.com/v2/direct-access/"
480 |
481 | def __init__(
482 | self,
483 | client_id,
484 | client_secret,
485 | retries=5,
486 | backoff_factor=1,
487 | links=None,
488 | access_token=None,
489 | **kwargs
490 | ):
491 | """
492 | Enverus' Developer API Version 2 client
493 |
494 | API documentation and credentials can be found at: https://app.enverus.com/direct/#/api/explorer/v2/gettingStarted
495 |
496 | :param client_id: client id credential.
497 | :type client_id: str
498 | :param client_secret: client secret credential.
499 | :type client_secret: str
500 | :param retries: the number of attempts when retrying failed requests with status codes of 500, 502, 503 or 504
501 | :type retries: int
502 | :param backoff_factor: the factor to use when exponentially backing off prior to retrying a failed request
503 | :type backoff_factor: int
504 | :param links: a dictionary of prev and next links as provided by the python-requests Session.
505 | See https://requests.readthedocs.io/en/master/user/advanced/#link-headers
506 | :type dict
507 | :param access_token: an optional, pregenerated access token. If provided, the class instance will not
508 | automatically try to request a new access token.
509 | :type: access_token: str
510 | :param kwargs:
511 | """
512 | super(DirectAccessV2, self).__init__(self.url, retries, backoff_factor, **kwargs)
513 | self.client_id = client_id
514 | self.client_secret = client_secret
515 | self.links = links
516 | self.access_token = access_token
517 | self.session.hooks["response"].append(self._check_response)
518 |
519 | if self.access_token:
520 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
521 | else:
522 | self.access_token = self.get_access_token()["access_token"]
523 |
524 | def get_access_token(self):
525 | """
526 | Get an access token from /tokens endpoint. Automatically sets the Authorization header on the class instance's
527 | session. Raises DAAuthException on error
528 |
529 | :return: token response as dict
530 | """
531 | if not self.client_id or not self.client_secret:
532 | raise DAAuthException(
533 | "CLIENT_ID and CLIENT_SECRET are required to generate an access token"
534 | )
535 |
536 | token_url = os.path.join(self.url, "tokens")
537 |
538 | self.session.headers["Authorization"] = "Basic {}".format(
539 | base64.b64encode(
540 | ":".join([self.client_id, self.client_secret]).encode()
541 | ).decode()
542 | )
543 | self.session.headers["Content-Type"] = "application/x-www-form-urlencoded"
544 |
545 | payload = {"grant_type": "client_credentials"}
546 | response = self.session.post(token_url, params=payload)
547 | self.logger.debug("Token response: " + json.dumps(response.json(), indent=2))
548 | self.access_token = response.json()["access_token"]
549 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
550 | return response.json()
551 |
552 |
553 | class DeveloperAPIv3(BaseAPI):
554 | """Client for Enverus' Developer API Version 3"""
555 |
556 | url = "https://api.enverus.com/v3/direct-access/"
557 |
558 | def __init__(
559 | self,
560 | secret_key,
561 | retries=5,
562 | backoff_factor=1,
563 | links=None,
564 | access_token=None,
565 | **kwargs
566 | ):
567 | """
568 | Enverus' Developer API Version 3 client
569 |
570 | API documentation and credentials can be found at: https://app.enverus.com/direct/#/api/explorer/v3/gettingStarted
571 |
572 | :param secret_key: api key credential.
573 | :type secret_key: str
574 | :param retries: the number of attempts when retrying failed requests with status codes of 500, 502, 503 or 504
575 | :type retries: int
576 | :param backoff_factor: the factor to use when exponentially backing off prior to retrying a failed request
577 | :type backoff_factor: int
578 | :param links: a dictionary of prev and next links as provided by the python-requests Session.
579 | See https://requests.readthedocs.io/en/master/user/advanced/#link-headers
580 | :type dict
581 | :param access_token: an optional, pregenerated access token. If provided, the class instance will not
582 | automatically try to request a new access token.
583 | :type: access_token: str
584 | :param kwargs:
585 | """
586 | super(DeveloperAPIv3, self).__init__(self.url, retries, backoff_factor, **kwargs)
587 | self.secret_key = secret_key
588 | self.links = links
589 | self.access_token = access_token
590 | self.session.hooks["response"].append(self._check_response)
591 |
592 | if self.access_token:
593 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
594 | else:
595 | self.access_token = self.get_access_token()["token"]
596 |
597 | def get_access_token(self):
598 | """
599 | Get an access token from /tokens endpoint. Automatically sets the Authorization header on the class instance's
600 | session. Raises DAAuthException on error
601 |
602 | :return: token response as dict
603 | """
604 |
605 | if not self.secret_key:
606 | raise DAAuthException(
607 | "SECRET_KEY is required to generate an access token"
608 | )
609 |
610 | token_url = os.path.join(self.url, "tokens")
611 |
612 | self.session.headers["Content-Type"] = "application/json"
613 |
614 | response = self.session.post(token_url, json={"secretKey": self.secret_key})
615 | self.logger.debug("Token response: " + json.dumps(response.json(), indent=2))
616 |
617 | self.access_token = response.json()["token"]
618 | self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
619 |
620 | return response.json()
621 |
622 | def is_omit_header_next_link(self, **options):
623 | if "_headers" in options:
624 | for (k, v) in options.get("_headers").items():
625 | self.session.headers[k] = v
626 | if k.lower() == "x-omit-header-next-links" and v.lower() == "true":
627 | return True
628 |
629 | return False
630 |
631 | @staticmethod
632 | def parse_links(links_obj):
633 | result = {}
634 | if links_obj["next"]:
635 | links = parse_header_links(links_obj["next"])
636 |
637 | for link in links:
638 | key = link.get("rel") or link.get("url")
639 | result[key] = link
640 |
641 | return result
642 |
643 | def query(self, dataset, **options):
644 | """
645 | Query Developer API dataset
646 |
647 | Accepts a dataset name and a variable number of keyword arguments that correspond to the fields specified in
648 | the 'Request Parameters' section for each dataset in the Developer API documentation.
649 |
650 | This method only supports the JSON output provided by the API and yields dicts for each record.
651 |
652 | :param dataset: a valid dataset name. See the Developer API documentation for valid values
653 | :param options: query parameters as keyword arguments, _headers dict - request headers.
654 | :return: query response as generator
655 | """
656 | request_headers = None
657 | omit_header_next_link = False
658 | if "_headers" in options:
659 | omit_header_next_link = self.is_omit_header_next_link(**options)
660 | request_headers = options.pop("_headers")
661 |
662 | query_url = os.path.join(self.url, dataset)
663 |
664 | query_chunks = None
665 | for field, v in options.items():
666 | if "in(" in str(v) and len(str(v)) > 1950:
667 | values = re.split(r"in\((.*?)\)", options[field])[1].split(",")
668 | chunksize = int(floor(1950 / len(max(values))))
669 | query_chunks = (field, [x for x in _chunks(values, chunksize)])
670 |
671 | paging = options.pop("paging") if "paging" in options else "true"
672 |
673 | while True:
674 | if self.links:
675 | response = self.session.get(self.url[:-1] + self.links["next"]["url"], headers=request_headers)
676 | else:
677 | if query_chunks and query_chunks[1]:
678 | options[query_chunks[0]] = self.in_(query_chunks[1].pop(0))
679 |
680 | response = self.session.get(query_url, params=options, headers=request_headers)
681 |
682 | if not response.ok:
683 | raise DAQueryException(
684 | "Non-200 response: {} {}".format(response.status_code, response.text)
685 | )
686 |
687 | records = response.json()
688 | if omit_header_next_link and records["links"]:
689 | self.links = self.parse_links(records["links"])
690 | records = records["data"]
691 |
692 | if isinstance(records, dict):
693 | records = [records]
694 |
695 | if not len(records):
696 | self.links = None
697 |
698 | if query_chunks and query_chunks[1]:
699 | continue
700 |
701 | break
702 |
703 | if not omit_header_next_link and "next" in response.links:
704 | self.links = response.links
705 |
706 | for record in records:
707 | yield record
708 |
709 | if self.links is None or paging.lower() == "false":
710 | break
711 |
--------------------------------------------------------------------------------