├── docs
    ├── _config.yml
    ├── api.rst
    ├── index.rst
    ├── Makefile
    ├── usage.rst
    └── conf.py
├── setup.cfg
├── requirements.txt
├── examples
    ├── README.md
    ├── client_side_filtering.py
    └── multi_processing.py
├── LICENSE.md
├── .gitignore
├── .circleci
    └── config.yml
├── setup.py
├── tests
    └── test___init__.py
├── README.md
└── enverus_developer_api
    └── __init__.py


/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 | 
4 | [metadata]
5 | license_file = LICENSE.md


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.5.1,<3
2 | unicodecsv==0.14.1
3 | 
4 | pandas~=1.5.3
5 | setuptools~=65.3.0
6 | urllib3~=1.26.14


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | ### enverus-developer-api Examples
2 | 
3 | Basic example workflows are provided as a reference here.  
4 | 
5 | If you'd like to see a usage example, please [open an issue](https://github.com/enverus-ea/enverus-developer-api/issues/new).  
6 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | API documentation
 3 | =================
 4 | 
 5 | DirectAccessV1
 6 | --------------
 7 | 
 8 | .. autoclass:: directaccess.DirectAccessV1
 9 |    :members: query, to_csv
10 |    :special-members:
11 | 
12 | DirectAccessV2
13 | --------------
14 | 
15 | .. autoclass:: directaccess.DirectAccessV2
16 |    :members: get_access_token, ddl, docs, count, in_, query, to_csv, to_dataframe
17 |    :special-members:
18 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. direct-access-py documentation master file, created by
 2 |    sphinx-quickstart on Sun Jan 19 07:51:00 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to direct-access-py's documentation!
 7 | ============================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    usage
14 |    api
15 | 
16 | 
17 | 
18 | Indices and tables
19 | ==================
20 | 
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Cole Howard
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | .idea/
 7 | requirements-dev.txt
 8 | *.csv
 9 | *.sql
10 | scripts/
11 | 
12 | # C extensions
13 | *.so
14 | 
15 | # Distribution / packaging
16 | .Python
17 | env/
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | 
33 | # PyInstaller
34 | #  Usually these files are written by a python script from a template
35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 | 
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 | 
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *,cover
52 | .hypothesis/
53 | 
54 | # Translations
55 | *.mo
56 | *.pot
57 | 
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | 
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 | 
66 | # Scrapy stuff:
67 | .scrapy
68 | 
69 | # Sphinx documentation
70 | docs/_build/
71 | 
72 | # PyBuilder
73 | target/
74 | 
75 | # IPython Notebook
76 | .ipynb_checkpoints
77 | 
78 | # pyenv
79 | .python-version
80 | 
81 | # celery beat schedule file
82 | celerybeat-schedule
83 | 
84 | # dotenv
85 | .env
86 | 
87 | # virtualenv
88 | venv/
89 | ENV/
90 | 
91 | # Spyder project settings
92 | .spyderproject
93 | 
94 | # Rope project settings
95 | .ropeproject
96 | *.iml
97 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   deploy:
 4 |     docker:
 5 |       - image: circleci/python:3.6
 6 |     steps:
 7 |       - checkout
 8 | 
 9 |       - restore_cache:
10 |           key: v1-dependency-cache-{{ checksum "setup.py" }}
11 | 
12 |       - run:
13 |           name: install python dependencies
14 |           command: |
15 |             python3 -m venv venv
16 |             . venv/bin/activate
17 |             python3 -m pip install -r requirements.txt
18 |             python3 -m pip install twine
19 | 
20 |       - save_cache:
21 |           key: v1-dependency-cache-{{ checksum "setup.py" }}
22 |           paths:
23 |             - "venv"
24 | 
25 |       - run:
26 |           name: verify git tag vs. version
27 |           command: |
28 |             python3 -m venv venv
29 |             . venv/bin/activate
30 |             python setup.py verify
31 | 
32 |       - run:
33 |           name: init .pypirc
34 |           command: |
35 |             echo -e "[pypi]" >> ~/.pypirc
36 |             echo -e "username = $PYPI_USERNAME" >> ~/.pypirc
37 |             echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
38 | 
39 |       - run:
40 |           name: create packages
41 |           command: |
42 |             python3 setup.py sdist bdist_wheel
43 | 
44 |       - run:
45 |           name: upload to pypi
46 |           command: |
47 |             . venv/bin/activate
48 |             twine upload dist/*
49 | 
50 | workflows:
51 |   version: 2
52 |   build_and_deploy:
53 |     jobs:
54 |       - deploy:
55 |           filters:
56 |             tags:
57 |               only: /[0-9]+(\.[0-9]+)*/
58 |             branches:
59 |               ignore: /.*/
60 | 


--------------------------------------------------------------------------------
/examples/client_side_filtering.py:
--------------------------------------------------------------------------------
 1 | """
 2 | client_side_filtering.py
 3 | 
 4 | This example demonstrates using client-side filtering to query on columns that
 5 | aren't filterable via the API. While there's no speed up using
 6 | this method, we're able to query the API responses down in a memory-efficient way
 7 | and without loading unneeded records into our workflow.
 8 | 
 9 | Consider this the equivalent of a full table scan in a database.
10 | 
11 | In the sample below, we're requesting all records in Texas and without DeletedDates in batches of 10k.
12 | Then, we're filtering the responses down to those records that have had their
13 | production allocated using Drillinginfo's production allocation algorithm and
14 | where LowerPerf values exist and are greater than or equal to 2000 and UpperPerf
15 | values exist and are less than or equal to 3000.
16 | """
17 | import os
18 | try:  # Use the memory-efficient ifilter function available in itertools for Python 2
19 |     from itertools import ifilter as filter
20 | except ImportError:  # The built in filter function returns a generator in Python 3
21 |     pass
22 | 
23 | from enverus_developer_api import DirectAccessV2
24 | 
25 | # Initialize our Direct Access object
26 | d2 = DirectAccessV2(
27 |     client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
28 |     client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET')
29 | )
30 | 
31 | # Build the API query
32 | query = d2.query('producing-entities', pagesize=10000, deleteddate='eq(null)', state='TX')
33 | 
34 | # Build the client-side filter
35 | rows = filter(lambda x:
36 |               x['AllocPlus'] == 'Y'
37 |               and x['LowerPerf'] is not None
38 |               and x['LowerPerf'] >= 2000
39 |               and x['UpperPerf'] is not None
40 |               and x['UpperPerf'] <= 3000,
41 |               query)
42 | 
43 | # Execute the query and filter the responses
44 | # Note that there will be periods of apparent inactivity while records we don't need are tossed
45 | for row in rows:
46 |     print(row)
47 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import os
 4 | import sys
 5 | from setuptools import setup, find_packages
 6 | from setuptools.command.install import install
 7 | 
 8 | VERSION = '3.2.0'
 9 | 
10 | 
11 | class VerifyVersionCommand(install):
12 |     description = 'verify that git tag matches VERSION prior to publishing to pypi'
13 | 
14 |     def run(self):
15 |         tag = os.getenv('CIRCLE_TAG')
16 | 
17 |         if tag != VERSION:
18 |             info = 'Git tag: {0} does not match the version of this app: {1}'.format(
19 |                 tag, VERSION
20 |             )
21 |             sys.exit(info)
22 | 
23 | 
24 | def read(fname):
25 |     with open(fname) as fp:
26 |         content = fp.read()
27 |     return content
28 | 
29 | 
30 | pandas = [
31 |     'pandas>=0.24.0'
32 | ]
33 | 
34 | setup(
35 |     name='enverus-developer-api',
36 |     version=VERSION,
37 |     description='Enverus Developer API Python Client',
38 |     long_description=read('README.md'),
39 |     long_description_content_type='text/markdown',
40 |     author='Direct Access',
41 |     author_email='directaccess@enverus.com',
42 |     url='https://github.com/enverus-ea/enverus-developer-api',
43 |     license='MIT',
44 |     keywords=['enverus', 'drillinginfo', 'directaccess', 'oil', 'gas'],
45 |     packages=find_packages(exclude=('test*', )),
46 |     package_dir={'enverus_developer_api': 'enverus_developer_api'},
47 |     install_requires=[
48 |         'requests>=2.5.1,<3',
49 |         'unicodecsv==0.14.1',
50 |         'urllib3>=1.26.14',
51 |     ],
52 |     extras_require={'pandas': pandas},
53 |     cmdclass={
54 |         'verify': VerifyVersionCommand,
55 |     },
56 |     classifiers=[
57 |         'Development Status :: 5 - Production/Stable',
58 |         'Intended Audience :: Developers',
59 |         'Intended Audience :: End Users/Desktop',
60 |         'License :: OSI Approved :: MIT License',
61 |         'Operating System :: OS Independent',
62 |         'Programming Language :: Python :: 2',
63 |         'Programming Language :: Python :: 3'
64 |     ]
65 | )
66 | 


--------------------------------------------------------------------------------
/examples/multi_processing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | multi_processing.py
 3 | 
 4 | This example demonstrates concurrent loading of Drillinginfo datasets via Python's multiprocessing module.
 5 | 
 6 | The DirectAccessV2 class accepts an optional access_token keyword argument (beginning in version 1.2.0).
 7 | When provided, an initial authentication request will not be made. We still provide our API Key, Client ID
 8 | and Client Secret to the class so that the access token can be refreshed if needed.
 9 | 
10 | In the sample below, we simultaneously write three CSVs from the producing-entities, well-rollups and permits
11 | API endpoints. This results in much faster loading time than when done sequentially.
12 | """
13 | import os
14 | import csv
15 | from multiprocessing import Process
16 | 
17 | from enverus_developer_api import DirectAccessV2
18 | 
19 | # Retrieve our access token
20 | ACCESS_TOKEN = DirectAccessV2(
21 |     client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
22 |     client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET')
23 | ).access_token
24 | 
25 | 
26 | def load(endpoint, **options):
27 |     """
28 |     A generic load function that will be called by each of the three processes.
29 | 
30 |     :param endpoint: the Direct Access API endpoint
31 |     :param options: the query parameters to provide on the endpoint
32 |     :return:
33 |     """
34 |     # Create a DirectAccessV2 client within the function, providing it our already existing access token
35 |     # and thus avoiding unnecessary authentication calls
36 |     client = DirectAccessV2(
37 |         client_id=os.getenv('DIRECTACCESS_CLIENT_ID'),
38 |         client_secret=os.getenv('DIRECTACCESS_CLIENT_SECRET'),
39 |         access_token=ACCESS_TOKEN
40 |     )
41 | 
42 |     count = None
43 |     with open(endpoint + '.csv', mode='w') as f:
44 |         writer = csv.writer(f)
45 |         for i, row in enumerate(client.query(endpoint, **options), start=1):
46 |             count = i
47 |             if count == 1:
48 |                 writer.writerow(row.keys())
49 |             writer.writerow(row.values())
50 | 
51 |             if count % options.get('pagesize', 100000) == 0:
52 |                 print('Wrote {} records for {}'.format(count, endpoint))
53 | 
54 |     print('Completed writing {}. Final count: {}'.format(endpoint, count))
55 |     return
56 | 
57 | 
58 | def main():
59 |     procs = list()
60 |     well_rollups_process = Process(
61 |         target=load,
62 |         kwargs=dict(
63 |             endpoint='well-rollups',
64 |             pagesize=10000,
65 |             deleteddate='eq(null)'
66 |         )
67 |     )
68 |     procs.append(well_rollups_process)
69 | 
70 |     producing_entity_process = Process(
71 |         target=load,
72 |         kwargs=dict(
73 |             endpoint='producing-entities',
74 |             pagesize=100000,
75 |             deleteddate='eq(null)'
76 |         )
77 |     )
78 |     procs.append(producing_entity_process)
79 | 
80 |     permits_process = Process(
81 |         target=load,
82 |         kwargs=dict(
83 |             endpoint='permits',
84 |             pagesize=100000,
85 |             deleteddate='eq(null)'
86 |         )
87 |     )
88 |     procs.append(permits_process)
89 | 
90 |     [x.start() for x in procs]
91 |     [x.join() for x in procs]
92 |     return
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     main()
97 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Usage Guide
  3 | ===========
  4 | This module is a thin wrapper around Enverus' Developer API (formerly known as Direct Access).
  5 | It handles authentication and token management, pagination and network-related
  6 | error handling/retries. It also provides a simple, convenient method to write
  7 | results to CSV.
  8 | 
  9 | ``direct-access-py`` is built and tested on Python 3.6 but should work on Python 2.7 and up.
 10 | 
 11 | 
 12 | Installation
 13 | ############
 14 | 
 15 | The easiest way to install ``direct-access-py`` is from the `Python Package Index
 16 | <https://pypi.python.org/pypi/directaccess/>`_ using ``pip``:
 17 | 
 18 | .. code-block:: bash
 19 | 
 20 |    $ pip install directaccess
 21 | 
 22 | To install it manually, simply download the repository from Github:
 23 | 
 24 | .. code-block:: bash
 25 | 
 26 |    $ git clone https://github.com/wchatx/direct-access-py.git
 27 |    $ cd directaccess/
 28 |    $ python setup.py install
 29 | 
 30 | Notes
 31 | #####
 32 | The ``directaccess`` module only supports the JSON format from the API. The ``query`` method
 33 | returns a generator of API responses as dictionaries.
 34 | 
 35 | Version 2 of the API uses "soft deletes". Records marked as deleted will have a populated
 36 | ``DeletedDate`` field. If these records are not important for your workflow, you should always
 37 | provide ``deleteddate='null'`` as a keyword argument to the V2 ``query`` method
 38 | 
 39 | It is also important to note that your API credentials should be treated like any other password.
 40 | Take care to not check them into public code repositories or expose them outside of your organization.
 41 | 
 42 | If you find a problem with this module, have a feature request or just need a little help getting started,
 43 | please `open an issue <https://github.com/wchatx/direct-access-py/issues/new>`_! If you're having
 44 | trouble with the Enverus Drillinginfo Developer API, you should `contact support
 45 | <mailto:support@drillinginfo.com>`_.
 46 | 
 47 | Quick Start
 48 | ###########
 49 | 
 50 | Direct Access Version 1
 51 | ***********************
 52 | For version 1 of the API, create an instance of the DirectAccessV1 class and provide it your API key
 53 | 
 54 | .. code-block:: python
 55 | 
 56 |   from directaccess import DirectAccessV1
 57 | 
 58 |   d1 = DirectAccessV1(api_key='your-api-key')
 59 | 
 60 | .. warning::
 61 | 
 62 |   Direct Access Version 1 will reach the end of its life in July, 2020.
 63 |   Please upgrade your application as Version 1 will be inaccessible after that date.
 64 |   A future version of this module will drop support for Version 1.
 65 | 
 66 | Provide the query method the dataset as the first argument and any query parameters as keyword arguments.
 67 | See valid dataset names and query params in the Direct Access documentation.
 68 | The query method returns a generator of API responses as dicts.
 69 | 
 70 | .. code-block:: python
 71 | 
 72 |   for row in d1.query('legal-leases', county_parish='Reeves', state_province='TX'):
 73 |       print(row)
 74 | 
 75 | Direct Access Version 2
 76 | ***********************
 77 | For version 2 of the API, create an instance of the DirectAccessV2 class, providing it your API key, client id and client secret.
 78 | The returned access token will be available as an attribute on the instance (``d2.access_token``) and the Authorization
 79 | header is set automatically
 80 | 
 81 | .. code-block:: python
 82 | 
 83 |   from directaccess import DirectAccessV2
 84 | 
 85 |   d2 = DirectAccessV2(
 86 |       api_key='your-api-key',
 87 |       client_id='your-client-id',
 88 |       client_secret='your-client-secret',
 89 |   )
 90 | 
 91 | 
 92 | Like with the V1 class, provide the query method the dataset and query params. All query parameters must match the valid
 93 | parameters found in the Direct Access documentation and be passed as keyword arguments.
 94 | 
 95 | .. code-block:: python
 96 | 
 97 | 	for row in d2.query('well-origins', county='REEVES', pagesize=10000):
 98 | 	    print(row)
 99 | 
100 | 
101 | Version 2 Concepts
102 | ##################
103 | 
104 | Filter Functions
105 | ****************
106 | Direct Access version 2 supports filter functions. These can be passed as strings on the keyword arguments.
107 | 
108 | Some common filters are greater than (``gt()``), less than (``lt()``), ``null``, not null (``not(null)``) and
109 | between (``btw()``).
110 | See the Direct Access documentation for a list of all available filters.
111 | 
112 | .. code-block:: python
113 | 
114 | 	# Get well records updated after 2018-08-01 and without deleted dates
115 | 	for row in d2.query('well-origins', updateddate='gt(2018-08-01)', deleteddate='null'):
116 | 	    print(row)
117 | 
118 | 	# Get permit records with approved dates between 2018-03-01 and 2018-06-01
119 | 	for row in d2.query('permits', approveddate='btw(2018-03-01,2018-06-01)'):
120 | 	    print(row)
121 | 
122 | Fields keyword
123 | **************
124 | You can use the ``fields`` keyword to limit the returned fields in your queries.
125 | This has the benefit of limiting the API responses to only those fields needed for your
126 | workflow and will significantly improve the speed of your queries.
127 | 
128 | .. code-block:: python
129 | 
130 | 	for row in d2.query('rigs', fields='DrillType,LeaseName,PermitDepth'):
131 | 	    print(row)
132 | 
133 | Escaping
134 | ********
135 | When making requests containing certain characters like commas, use a backslash to escape them.
136 | 
137 | .. code-block:: python
138 | 
139 | 	# Escaping the comma before LLC
140 | 	for row in d2.query('producing-entities', curropername='PERCUSSION PETROLEUM OPERATING\, LLC'):
141 | 	    print(row)
142 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | sys.path.insert(0, os.path.abspath('../'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'directaccess'
 23 | copyright = '2021'
 24 | author = 'directaccess'
 25 | 
 26 | # The short X.Y version
 27 | version = ''
 28 | # The full version, including alpha/beta/rc tags
 29 | release = ''
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.viewcode',
 44 |     'sphinx.ext.githubpages',
 45 | ]
 46 | 
 47 | # Add any paths that contain templates here, relative to this directory.
 48 | templates_path = ['_templates']
 49 | 
 50 | # The suffix(es) of source filenames.
 51 | # You can specify multiple suffix as a list of string:
 52 | #
 53 | # source_suffix = ['.rst', '.md']
 54 | source_suffix = '.rst'
 55 | 
 56 | # The master toctree document.
 57 | master_doc = 'index'
 58 | 
 59 | # The language for content autogenerated by Sphinx. Refer to documentation
 60 | # for a list of supported languages.
 61 | #
 62 | # This is also used if you do content translation via gettext catalogs.
 63 | # Usually you set "language" from the command line for these cases.
 64 | language = None
 65 | 
 66 | # List of patterns, relative to source directory, that match files and
 67 | # directories to ignore when looking for source files.
 68 | # This pattern also affects html_static_path and html_extra_path.
 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 70 | 
 71 | # The name of the Pygments (syntax highlighting) style to use.
 72 | pygments_style = None
 73 | 
 74 | 
 75 | # -- Options for HTML output -------------------------------------------------
 76 | 
 77 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 78 | # a list of builtin themes.
 79 | #
 80 | html_theme = 'alabaster'
 81 | 
 82 | # Theme options are theme-specific and customize the look and feel of a theme
 83 | # further.  For a list of options available for each theme, see the
 84 | # documentation.
 85 | #
 86 | # html_theme_options = {}
 87 | 
 88 | # Add any paths that contain custom static files (such as style sheets) here,
 89 | # relative to this directory. They are copied after the builtin static files,
 90 | # so a file named "default.css" will overwrite the builtin "default.css".
 91 | html_static_path = ['_static']
 92 | 
 93 | # Custom sidebar templates, must be a dictionary that maps document names
 94 | # to template names.
 95 | #
 96 | # The default sidebars (for documents that don't match any pattern) are
 97 | # defined by theme itself.  Builtin themes are using these templates by
 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
 99 | # 'searchbox.html']``.
100 | #
101 | # html_sidebars = {}
102 | 
103 | 
104 | # -- Options for HTMLHelp output ---------------------------------------------
105 | 
106 | # Output file base name for HTML help builder.
107 | htmlhelp_basename = 'direct-access-pydoc'
108 | 
109 | 
110 | # -- Options for LaTeX output ------------------------------------------------
111 | 
112 | latex_elements = {
113 |     # The paper size ('letterpaper' or 'a4paper').
114 |     #
115 |     # 'papersize': 'letterpaper',
116 | 
117 |     # The font size ('10pt', '11pt' or '12pt').
118 |     #
119 |     # 'pointsize': '10pt',
120 | 
121 |     # Additional stuff for the LaTeX preamble.
122 |     #
123 |     # 'preamble': '',
124 | 
125 |     # Latex figure (float) alignment
126 |     #
127 |     # 'figure_align': 'htbp',
128 | }
129 | 
130 | # Grouping the document tree into LaTeX files. List of tuples
131 | # (source start file, target name, title,
132 | #  author, documentclass [howto, manual, or own class]).
133 | latex_documents = [
134 |     (master_doc, 'direct-access-py.tex', 'direct-access-py Documentation',
135 |      'Direct Access', 'manual'),
136 | ]
137 | 
138 | 
139 | # -- Options for manual page output ------------------------------------------
140 | 
141 | # One entry per manual page. List of tuples
142 | # (source start file, name, description, authors, manual section).
143 | man_pages = [
144 |     (master_doc, 'direct-access-py', 'direct-access-py Documentation',
145 |      [author], 1)
146 | ]
147 | 
148 | 
149 | # -- Options for Texinfo output ----------------------------------------------
150 | 
151 | # Grouping the document tree into Texinfo files. List of tuples
152 | # (source start file, target name, title, author,
153 | #  dir menu entry, description, category)
154 | texinfo_documents = [
155 |     (master_doc, 'direct-access-py', 'direct-access-py Documentation',
156 |      author, 'direct-access-py', 'One line description of project.',
157 |      'Miscellaneous'),
158 | ]
159 | 
160 | 
161 | # -- Options for Epub output -------------------------------------------------
162 | 
163 | # Bibliographic Dublin Core info.
164 | epub_title = project
165 | 
166 | # The unique identifier of the text. This can be a ISBN number
167 | # or the project homepage.
168 | #
169 | # epub_identifier = ''
170 | 
171 | # A unique identification for the text.
172 | #
173 | # epub_uid = ''
174 | 
175 | # A list of files that should not be packed into the epub file.
176 | epub_exclude_files = ['search.html']
177 | 
178 | 
179 | # -- Extension configuration -------------------------------------------------
180 | 


--------------------------------------------------------------------------------
/tests/test___init__.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import unittest
  3 | from unittest import TestCase
  4 | import os
  5 | import logging
  6 | from tempfile import TemporaryFile, mkdtemp
  7 | from pandas.api.types import is_datetime64_ns_dtype, is_float_dtype, is_int64_dtype, is_object_dtype
  8 | from multiprocessing import Process
  9 | 
 10 | from enverus_developer_api import (
 11 |     DeveloperAPIv3,
 12 |     DirectAccessV2,
 13 |     DADatasetException,
 14 |     DAQueryException,
 15 |     DAAuthException
 16 | )
 17 | 
 18 | LOG_LEVEL = logging.DEBUG
 19 | if os.environ.get("GITHUB_SHA"):
 20 |     LOG_LEVEL = logging.ERROR
 21 | 
 22 | 
 23 | def set_token_v2():
 24 |     if not os.environ.get("DIRECTACCESS_TOKEN"):
 25 |         os.environ["DIRECTACCESS_TOKEN"] = DirectAccessV2(
 26 |             client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
 27 |             client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
 28 |             api_key=os.environ.get("DIRECTACCESS_API_KEY"),
 29 |         ).access_token
 30 |     return
 31 | 
 32 | 
 33 | def set_token_v3():
 34 |     if not os.environ.get("DIRECTACCESSV3_TOKEN"):
 35 |         os.environ["DIRECTACCESSV3_TOKEN"] = DeveloperAPIv3(
 36 |             secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
 37 |             # url="https://api.dev.enverus.com/"
 38 |         ).access_token
 39 |     return
 40 | 
 41 | 
 42 | def create_developerapi_v3():
 43 |     return DeveloperAPIv3(
 44 |         secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
 45 |         access_token=os.environ.get("DIRECTACCESSV3_TOKEN"),
 46 |         # url="https://api.dev.enverus.com/",
 47 |         retries=5,
 48 |         backoff_factor=10,
 49 |         log_level=LOG_LEVEL
 50 |     )
 51 | 
 52 | 
 53 | def create_directaccess_v2():
 54 |     return DirectAccessV2(
 55 |         api_key=os.environ.get("DIRECTACCESS_API_KEY"),
 56 |         client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
 57 |         client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
 58 |         access_token=os.environ.get("DIRECTACCESS_TOKEN"),
 59 |         retries=5,
 60 |         backoff_factor=10,
 61 |         log_level=LOG_LEVEL
 62 |     )
 63 | 
 64 | 
 65 | def proc_query(dataset):
 66 |     v3 = create_developerapi_v3()
 67 |     resp = v3.query(dataset, deleteddate="null")
 68 |     next(resp)
 69 |     TestCase.assertTrue(resp)
 70 |     return
 71 | 
 72 | 
 73 | class TestEnverusDeveloperAPI(TestCase):
 74 |     @classmethod
 75 |     def setUpClass(cls) -> None:
 76 |         set_token_v3()
 77 |         cls.v3 = create_developerapi_v3()
 78 | 
 79 |         set_token_v2()
 80 |         cls.v2 = create_directaccess_v2()
 81 | 
 82 |     def test_missing_secret_key_v3(self):
 83 |         with self.assertRaises(DAAuthException):
 84 |             DeveloperAPIv3(secret_key=None, log_level=LOG_LEVEL)
 85 | 
 86 |     def test_query_v3(self):
 87 |         query = self.v3.query("casings", pagesize=10, deleteddate="null")
 88 |         records = list()
 89 |         for i, row in enumerate(query, start=1):
 90 |             # print(row)
 91 |             records.append(row)
 92 |             if i % 30 == 0:
 93 |                 break
 94 |         self.assertTrue(len(records) > 0, "test_query_v3 records list empty")
 95 | 
 96 |     def test_query_v3_omit_header_next_link(self):
 97 |         query = self.v3.query("casings", pagesize=10, deleteddate="null", _headers={'X-Omit-Header-Next-Links': 'true'})
 98 |         records = list()
 99 |         for i, row in enumerate(query, start=1):
100 |             records.append(row)
101 |             if i % 30 == 0:
102 |                 break
103 |         self.assertTrue(len(records) > 0, "test_query_v3 records list empty")
104 | 
105 |     def test_is_omit_header_next_link(self):
106 |         is_omit_next_link = self.v3.is_omit_header_next_link(_headers={'X-Omit-Header-Next-Links': 'true'})
107 |         self.assertTrue(is_omit_next_link, "test_is_omit_header_next_link should contain omit header")
108 | 
109 |         is_omit_next_link = self.v3.is_omit_header_next_link(_headers={'Omit-Header-Next-Links': 'true'})
110 |         self.assertTrue(not is_omit_next_link, "test_is_omit_header_next_link should not contain omit header")
111 | 
112 |     def test_parse_links(self):
113 |         links = self.v3.parse_links(
114 |             {"next": "</economics?action=next&next_page=WellID+%3C+840600005436298&pagesize=50>; rel='next'"})
115 |         self.assertTrue(links["next"]["url"], "/economics?action=next&next_page=WellID+%3C+840600005436298&pagesize=50")
116 | 
117 |     def test_docs_v3(self):
118 |         docs = self.v3.docs("casings")
119 |         self.assertTrue(docs)
120 |         self.assertIsInstance(docs, list)
121 | 
122 |     def test_ddl_v3(self):
123 |         ddl = self.v3.ddl("casings", database="pg")
124 |         with TemporaryFile(mode="w+") as f:
125 |             f.write(ddl)
126 |             f.seek(0)
127 |             for line in f:
128 |                 self.assertTrue(line.startswith("CREATE TABLE casings"))
129 |                 break
130 | 
131 |     def test_ddl_invalid_db_v3(self):
132 |         with self.assertRaises(DAQueryException):
133 |             self.v3.ddl("casings", database="invalid")
134 | 
135 |     def test_count_v3(self):
136 |         count = self.v3.count("wells", updateddate="ge(2021-05-01)", StateProvince="in(TX,LA,WY)")
137 |         self.assertIsNotNone(count)
138 |         self.assertIsInstance(count, int)
139 | 
140 |     def test_count_invalid_dataset_v3(self):
141 |         with self.assertRaises(DADatasetException):
142 |             self.v3.count("invalid")
143 | 
144 |     def test_token_refresh_v3(self):
145 |         v3 = DeveloperAPIv3(
146 |             secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
147 |             access_token="invalid",
148 |             # url="https://api.dev.enverus.com/",
149 |             retries=5,
150 |             backoff_factor=10,
151 |             log_level=LOG_LEVEL
152 |         )
153 | 
154 |         invalid_token = v3.access_token
155 |         count = v3.count("rigs", deleteddate="null")
156 |         query = v3.query("rigs", pagesize=10000, deleteddate="null")
157 |         self.assertTrue(len([x for x in query]) == count)
158 |         self.assertTrue(invalid_token != v3.access_token)
159 | 
160 |     def test_csv_v3(self):
161 |         tempdir = mkdtemp()
162 |         path = os.path.join(tempdir, "rigs.csv")
163 | 
164 |         dataset = "rigs"
165 |         options = dict(pagesize=10000, deleteddate="null")
166 | 
167 |         count = self.v3.count(dataset, **options)
168 |         query = self.v3.query(dataset, **options)
169 |         self.v3.to_csv(query, path, log_progress=True, delimiter=",", quoting=csv.QUOTE_MINIMAL)
170 | 
171 |         with open(path, mode="r") as f:
172 |             reader = csv.reader(f)
173 |             row_count = len([x for x in reader])
174 |             self.assertTrue(row_count == (count + 1))
175 | 
176 |     def test_dataframe_v3(self):
177 |         df = self.v3.to_dataframe("rigs", pagesize=1000, deleteddate="null")
178 | 
179 |         # Check index is set to API endpoint "primary keys"
180 |         self.assertListEqual(df.index.names, ["CompletionID", "WellID"])
181 | 
182 |         # Check object dtypes
183 |         self.assertTrue(is_object_dtype(df.API_UWI))
184 |         self.assertTrue(is_object_dtype(df.ActiveStatus))
185 | 
186 |         # Check datetime64 dtypes
187 |         self.assertTrue(is_datetime64_ns_dtype(df.DeletedDate))
188 |         self.assertTrue(is_datetime64_ns_dtype(df.SpudDate))
189 |         self.assertTrue(is_datetime64_ns_dtype(df.UpdatedDate))
190 | 
191 |         # Check Int64 dtypes
192 |         self.assertTrue(is_int64_dtype(df.RatedWaterDepth))
193 |         self.assertTrue(is_int64_dtype(df.RatedHP))
194 | 
195 |         # Check float dtypes
196 |         self.assertTrue(is_float_dtype(df.RigLatitudeWGS84))
197 |         self.assertTrue(is_float_dtype(df.RigLongitudeWGS84))
198 | 
199 |     def test_multiple_processes_v3(self):
200 |         # Launch two child processes, one for rigs and one for casings
201 | 
202 |         procs = [
203 |             Process(target=proc_query, kwargs=dict(dataset="rigs")),
204 |             Process(target=proc_query, kwargs=dict(dataset="casings"))
205 |         ]
206 | 
207 |         [x.start() for x in procs]
208 |         [x.join() for x in procs]
209 | 
210 |     def test_enter_exit(self):
211 |         with DeveloperAPIv3(secret_key=os.environ.get("DIRECTACCESSV3_API_KEY"),
212 |                             access_token=os.environ.get("DIRECTACCESSV3_TOKEN")) as api:
213 |             self.assertIsInstance(api, DeveloperAPIv3)
214 |             self.assertIsNotNone(api.session)
215 | 
216 |         self.assertIsNone(api.session)
217 | 
218 |     # ******************** DirectAccessV2 Test Cases **********************
219 | 
220 |     def test_missing_client_id_v2(self):
221 |         with self.assertRaises(DAAuthException):
222 |             DirectAccessV2(client_id=None,
223 |                            client_secret=os.environ.get("DIRECTACCESS_CLIENT_SECRET"),
224 |                            log_level=LOG_LEVEL)
225 | 
226 |     def test_missing_client_secret_v2(self):
227 |         with self.assertRaises(DAAuthException):
228 |             DirectAccessV2(client_id=os.environ.get("DIRECTACCESS_CLIENT_ID"),
229 |                            client_secret=None,
230 |                            log_level=LOG_LEVEL)
231 | 
232 |     def test_query_v2(self):
233 |         query = self.v2.query("rigs", pagesize=10, deleteddate="null")
234 |         records = list()
235 |         for i, row in enumerate(query, start=1):
236 |             # print(row)
237 |             records.append(row)
238 |             if i % 30 == 0:
239 |                 break
240 |         self.assertTrue(len(records) > 0, "test_query_v2 records list empty")
241 | 
242 | 
243 | if __name__ == '__main__':
244 |     unittest.main()
245 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # enverus-developer-api
  2 | 
  3 | [![PyPI version](https://badge.fury.io/py/enverus-developer-api.svg)](https://badge.fury.io/py/enverus-developer-api)
  4 | 
  5 | A thin wrapper around Enverus' Developer API. Handles authentication and token management, pagination and
  6 | network-related error handling/retries.  
  7 | 
  8 | This module is built and tested on Python 3.9 but should work on Python 2.7 and up.
  9 | 
 10 | 
 11 | ## Install
 12 | ```commandline
 13 | pip install enverus-developer-api
 14 | ```
 15 | 
 16 | ## Clients
 17 | 
 18 | ### Developer API - Version 3
 19 | DirectAccess has been rebranded as DeveloperAPI. For version 3 of the API, create an instance of the DeveloperAPIv3 class, providing it your secret_key (not the same as the v2 api_key).
 20 | The returned access token will be available as an attribute on the instance (v3.access_token) and the Authorization
 21 | header is set automatically.
 22 | ```python
 23 | from enverus_developer_api import DeveloperAPIv3
 24 | 
 25 | v3 = DeveloperAPIv3(secret_key='<your-secret-key>')
 26 | ```
 27 | Your secret_key can be generated, retrieved and revoked at https://app.enverus.com/provisioning/directaccess
 28 | 
 29 | The Developer API Version 3 endpoint documentation can be found at https://app.enverus.com/direct/#/api/explorer/v3/gettingStarted
 30 | 
 31 | ### Direct Access - Version 2
 32 | For version 2 of the API, create an instance of the DirectAccessV2 class, providing it your API key, client id and client secret.
 33 | The returned access token will be available as an attribute on the instance (d2.access_token) and the Authorization
 34 | header is set automatically
 35 | ```python
 36 | from enverus_developer_api import DirectAccessV2
 37 | 
 38 | d2 = DirectAccessV2(
 39 |     client_id='<your-client-id>',
 40 |     client_secret='<your-client-secret>',
 41 | )
 42 | ```
 43 | The Direct Access Version 2 endpoint documentation can be found at https://app.enverus.com/direct/#/api/explorer/v2/gettingStarted
 44 | 
 45 | ## Usage
 46 | 
 47 | The functionality outlined below exists for **both** DeveloperAPIv3 and DirectAccessV2 clients.
 48 | 
 49 | Only 1 instance of the client needs to be created to perform all your queries. It can execute multiple simultaneous requests if needed,
 50 | and will automatically refresh the access_token for the Authorization header if expired.
 51 | An access_token is valid for 8 hours, and there is rate limit on the number of access_tokens that can be requested per minute
 52 | which is why we recommend creating and reusing a single DeveloperAPIv3 client instance for all of your querying.
 53 | 
 54 | Provide the query method the dataset and query params. All query parameters must match the valid
 55 | Request Parameters found in the Developer API documentation for a given dataset and be passed as keyword arguments.
 56 | 
 57 | ```python
 58 | for row in v3.query('wells', county='REEVES', deleteddate='null'):
 59 |     print(row)
 60 | ```
 61 | 
 62 | ### Filter functions
 63 | Developer API supports filter functions. These can be passed as strings on the keyword arguments.
 64 | 
 65 | Some common filters are greater than (`gt()`), less than (`lt()`), null, not null (`not(null)`) and between (`btw()`).  
 66 | See the Developer API documentation for a list of all available filters.
 67 | 
 68 | ```python
 69 | # Get well records updated after 2018-08-01 and without deleted dates
 70 | for row in v3.query('wells', updateddate='gt(2018-08-01)', deleteddate='null'):
 71 |     print(row)
 72 | 
 73 | # Get permit records with approved dates between 2018-03-01 and 2018-06-01
 74 | for row in v3.query('rigs', spuddate='btw(2018-03-01,2018-06-01)'):
 75 |     print(row) 
 76 | ```
 77 | 
 78 | You can use the `fields` keyword to limit the returned fields in your request.
 79 | 
 80 | ```python
 81 | for row in v3.query('rigs', fields='PermitApprovedDate,LeaseName,RigName_Number,MD_FT'):
 82 |     print(row)
 83 | ```
 84 | 
 85 | ### Escaping
 86 | When making requests containing certain characters like commas, use a backslash to escape them.
 87 | 
 88 | ```python
 89 | # Escaping the comma before LLC
 90 | for row in v3.query('rigs', envoperator='PERCUSSION PETROLEUM OPERATING\, LLC'):
 91 |     print(row)
 92 | ```
 93 | 
 94 | ### Network request handling
 95 | This module exposes functionality in python-requests for modifying network requests handling, namely:
 96 | * retries and backoff
 97 | * network proxies
 98 | * ssl verification
 99 | 
100 | #### Retries and backoff
101 | Specify the number of retry attempts in `retries` and the backoff factor in `backoff_factor`. See the urllib3
102 | [Retry](https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.Retry) utility API for more info
103 | ```python
104 | from enverus_developer_api import DeveloperAPIv3
105 | 
106 | v3 = DeveloperAPIv3(
107 |     secret_key='<your-secret-key>',
108 |     retries=5,
109 |     backoff_factor=1
110 | )
111 | ```
112 | 
113 | You can specify a network proxy by passing a dictionary with the host and port of your proxy to `proxies`. See the
114 | [proxies](https://requests.readthedocs.io/en/master/user/advanced/#proxies) section of the python-requests documentation
115 | for more info.
116 | ```python
117 | from enverus_developer_api import DeveloperAPIv3
118 | 
119 | v3 = DeveloperAPIv3(
120 |     secret_key='<your-secret-key>',
121 |     proxies={'https': 'http://10.10.1.10:1080'}
122 | )
123 | ```
124 | 
125 | Finally, if you're in an environment that provides its own SSL certificates that might not be in your trusted store,
126 | you can choose to ignore SSL verification altogether. This is typically not a good idea and you should seek to resolve
127 | certificate errors instead of ignore them.
128 | ```python
129 | from enverus_developer_api import DeveloperAPIv3
130 | 
131 | v3 = DeveloperAPIv3(
132 |     secret_key='<your-secret-key>',
133 |     verify=False
134 | )
135 | ```
136 | 
137 | ## Functions
138 | 
139 | ### docs
140 | Returns a sample response for a given dataset
141 | ```python
142 | docs = v3.docs("casings")
143 | ```
144 | 
145 | ### ddl
146 | Returns a CREATE TABLE DDL statement for a given dataset. Must specify either 
147 | "mssql" for MS SQL Server or "pg" for PostgreSQL as the database argument
148 | ```python
149 | from tempfile import TemporaryFile
150 | 
151 | ddl = v3.ddl("casings", database="pg")
152 | with TemporaryFile(mode="w+") as f:
153 |   f.write(ddl)
154 |   f.seek(0)
155 |   for line in f:
156 |     print(line, end='')
157 | ```
158 | 
159 | ### count
160 | Returns the count of records for a given dataset and query options in the 
161 | X-QUERY-RECORD-COUNT response header value
162 | ```python
163 | count = v3.count("rigs", deleteddate="null")
164 | ```
165 | 
166 | ### query
167 | Accepts a dataset name, request headers and a variable number of keyword arguments that correspond to the fields specified 
168 | in the ‘Request Parameters’ section for each dataset in the Developer API documentation.
169 | 
170 | This method only supports the JSON output provided by the API and yields dicts for each record
171 | ```python
172 | for row in v3.query("rigs", pagesize=1000, deleteddate="null"):
173 |     print(row)
174 | ```
175 | ##### X-Omit-Header-Next-Links header
176 | Omit the Next Link in the Response Header section, add the Next Link to the JSON Response Body.
177 | ```python
178 | for row in v3.query("rigs", pagesize=1000, deleteddate="null", _headers={'X-Omit-Header-Next-Links': 'true'}):
179 |     print(row)
180 | ```
181 | 
182 | ### to_csv
183 | Write query results to CSV. Optional keyword arguments are provided to the csv writer object, 
184 | allowing control over delimiters, quoting, etc. The default is comma-separated with csv.QUOTE_MINIMAL
185 | ```python
186 | import csv, os
187 | from tempfile import mkdtemp
188 | 
189 | tempdir = mkdtemp()
190 | path = os.path.join(tempdir, "rigs.csv")
191 | 
192 | dataset = "rigs"
193 | options = dict(pagesize=10000, deleteddate="null")
194 | 
195 | v3.query(dataset, **options)
196 | v3.to_csv(query, path, log_progress=True, delimiter=",", quoting=csv.QUOTE_MINIMAL)
197 | 
198 | with open(path, mode="r") as f:
199 |   reader = csv.reader(f)
200 | ```
201 | 
202 | ### to_dataframe
203 | Write query results to a pandas Dataframe with properly set dtypes and index columns.
204 | 
205 | This works by requesting the DDL for a given dataset and manipulating the text to build a list of dtypes, date columns and the index column(s). 
206 | It then makes a query request for the dataset to ensure we know the exact fields to expect, 
207 | (ie, if fields was a provided query parameter and the result will have fewer fields than the DDL).
208 | 
209 | For endpoints with composite primary keys, a pandas MultiIndex is created.
210 | 
211 | Query results are written to a temporary CSV file and then read into the dataframe. The CSV is removed afterwards.
212 | 
213 | Pandas version 0.24.0 or higher is required for use of the Int64 dtype allowing integers with NaN values. 
214 | It is not possible to coerce missing values for columns of dtype bool and so these are set to dtype object.
215 | 
216 | You will need to have pandas installed to use the to_dataframe function
217 | ```python
218 | pip install pandas
219 | ```
220 | 
221 | Create a pandas dataframe from a dataset query
222 | ```python
223 | df = v3.to_dataframe("rigs", pagesize=10000, deleteddate="null")
224 | ```
225 | 
226 | Create a Texas rigs dataframe, replacing the state abbreviation with the complete name
227 | and removing commas from Operator names
228 | ```python
229 | df = v3.to_dataframe(
230 |   dataset="rigs",
231 |   deleteddate="null",
232 |   pagesize=100000,
233 |   stateprovince="TX",
234 |   converters={
235 |     "StateProvince": lambda x: "TEXAS",
236 |     "ENVOperator": lambda x: x.replace(",", "")
237 |   }
238 | )
239 | df.head(10)
240 | ```
241 | Reset the index of the DataFrame, and use the default one instead. [reset_index()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html)
242 | ```python
243 |     df = v3.to_dataframe(dataset, pagesize=10000, ENVBasin="SACRAMENTO")
244 |     df.reset_index(inplace=True)
245 |     df.head(10)
246 | ```


--------------------------------------------------------------------------------
/enverus_developer_api/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import time
  4 | import json
  5 | import base64
  6 | import logging
  7 | from uuid import uuid4
  8 | from math import floor
  9 | from shutil import rmtree
 10 | from tempfile import mkdtemp
 11 | from collections import OrderedDict
 12 | 
 13 | import requests
 14 | import unicodecsv as csv
 15 | from requests.adapters import HTTPAdapter
 16 | from urllib3.util.retry import Retry
 17 | from requests.utils import parse_header_links
 18 | 
 19 | 
 20 | class DAAuthException(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | class DAQueryException(Exception):
 25 |     pass
 26 | 
 27 | 
 28 | class DADatasetException(Exception):
 29 |     pass
 30 | 
 31 | 
 32 | def _chunks(iterable, n):
 33 |     """
 34 |     Return iterables with n members from an input iterable
 35 |     From: http://stackoverflow.com/a/8290508
 36 |     :param iterable: the iterable to chunk up
 37 |     :param n: max number of items in chunked list
 38 |     """
 39 |     l = len(iterable)
 40 |     for ndx in range(0, l, n):
 41 |         yield iterable[ndx: min(ndx + n, l)]
 42 | 
 43 | 
 44 | class BaseAPI(object):
 45 | 
 46 |     def __init__(self, url, retries, backoff_factor, **kwargs):
 47 |         self.url = url
 48 |         self.retries = retries
 49 |         self.backoff_factor = backoff_factor
 50 | 
 51 |         if kwargs.get("logger"):
 52 |             self.logger = kwargs.pop("logger").getChild("directaccess")
 53 |         else:
 54 |             logging.basicConfig(
 55 |                 level=kwargs.pop("log_level", logging.INFO),
 56 |                 format="%(asctime)s %(name)s %(levelname)-8s %(message)s",
 57 |                 datefmt="%a, %d %b %Y %H:%M:%S",
 58 |             )
 59 |             self.logger = logging.getLogger("directaccess")
 60 | 
 61 |         self.session = requests.Session()
 62 |         self.session.verify = kwargs.pop("verify", True)
 63 |         self.session.proxies = kwargs.pop("proxies", {})
 64 |         self.session.headers["User-Agent"] = "enverus-developer-api"
 65 | 
 66 |         self._status_forcelist = [500, 502, 503, 504]
 67 |         retries = Retry(
 68 |             total=self.retries,
 69 |             backoff_factor=self.backoff_factor,
 70 |             allowed_methods=frozenset(["GET", "POST", "HEAD"]),
 71 |             status_forcelist=self._status_forcelist,
 72 |         )
 73 |         self.session.mount("https://", HTTPAdapter(max_retries=retries))
 74 | 
 75 |     def __enter__(self):
 76 |         return self
 77 | 
 78 |     def __exit__(self, exc_type, exc_val, exc_tb):
 79 |         if self.session:
 80 |             self.session.close()
 81 |         self.session = None
 82 | 
 83 |     def get_access_token(self):
 84 |         raise NotImplementedError
 85 | 
 86 |     def to_csv(self, query, path, log_progress=True, **kwargs):
 87 |         """
 88 |         Write query results to CSV. Optional keyword arguments are
 89 |         provided to the csv writer object, allowing control over
 90 |         delimiters, quoting, etc. The default is comma-separated
 91 |         with csv.QUOTE_MINIMAL
 92 | 
 93 |         ::
 94 | 
 95 |             d2 = DirectAccessV2(client_id, client_secret)
 96 |             query = d2.query('rigs', deleteddate='null', pagesize=1500)
 97 |             # Write tab-separated file
 98 |             d2.to_csv(query, '/path/to/rigs.csv', delimiter='\\t')
 99 | 
100 |         :param query: DirectAccessV1 or DirectAccessV2 query object
101 |         :param path: relative or absolute filesystem path for created CSV
102 |         :type path: str
103 |         :param log_progress: whether to log progress. if True, log a message with current written count
104 |         :type log_progress: bool
105 |         :return: the newly created CSV file path
106 |         """
107 |         with open(path, mode="wb") as f:
108 |             writer = csv.writer(f, **kwargs)
109 |             count = None
110 |             for i, row in enumerate(query, start=1):
111 |                 row = OrderedDict(sorted(row.items(), key=lambda t: t[0]))
112 |                 count = i
113 |                 if count == 1:
114 |                     writer.writerow(row.keys())
115 |                 writer.writerow(row.values())
116 | 
117 |                 if log_progress and i % 100000 == 0:
118 |                     self.logger.info(
119 |                         "Wrote {count} records to file {path}".format(
120 |                             count=count, path=path
121 |                         )
122 |                     )
123 |             self.logger.info(
124 |                 "Completed writing CSV file to {path}. Final count {count}".format(
125 |                     path=path, count=count
126 |                 )
127 |             )
128 |         return path
129 | 
130 |     def _check_response(self, response, *args, **kwargs):
131 |         """
132 |         Check responses for errors.
133 | 
134 |         If the API returns 400, there was a problem with the provided parameters. Raise DAQueryException.
135 |         If the API returns 400 and request was to /tokens endpoint, likely bad credentials. Raise DAAuthException.
136 |         If the API returns 401, refresh access token if found and resend request.
137 |         If the API returns 403 and request was to /tokens endpoint, sleep for 60 seconds and try again.
138 |         If the API returns 404, an invalid dataset name was provided. Raise DADatasetException.
139 | 
140 |         5xx errors are handled by the session's Retry configuration. Debug logging returns retries remaining.
141 | 
142 |         :param response: a requests Response object
143 |         :type response: requests.Response
144 |         :param args:
145 |         :param kwargs:
146 |         :return:
147 |         """
148 | 
149 |         if not response.ok:
150 |             self.logger.debug("Response status code: " + str(response.status_code))
151 |             self.logger.debug("Response text: " + response.text)
152 |             if response.status_code == 400:
153 |                 if "tokens" in response.url:
154 |                     raise DAAuthException(
155 |                         "Error getting token. Code: {} Message: {}".format(
156 |                             response.status_code, response.text
157 |                         )
158 |                     )
159 |                 raise DAQueryException(response.text)
160 |             if response.status_code == 401:
161 |                 self.logger.warning("Access token expired. Acquiring a new one...")
162 |                 self.get_access_token()
163 |                 request = response.request
164 |                 request.headers["Authorization"] = self.session.headers["Authorization"]
165 |                 return self.session.send(request)
166 |             if response.status_code == 403 and "tokens" in response.url:
167 |                 self.logger.warning("Throttled token request. Waiting 60 seconds...")
168 |                 self.retries -= 1
169 |                 self.logger.debug("Retries remaining: {}".format(self.retries))
170 |                 time.sleep(60)
171 |                 request = response.request
172 |                 return self.session.send(request)
173 |             if response.status_code == 404:
174 |                 raise DADatasetException("Invalid dataset name provided")
175 |             if response.status_code in self._status_forcelist:
176 |                 self.logger.debug("Retries remaining: {}".format(self.retries))
177 | 
178 |     def ddl(self, dataset, database):
179 |         """
180 |         Get DDL statement for dataset. Must provide exactly one of mssql or pg for database argument.
181 |         mssql is Microsoft SQL Server, pg is PostgreSQL
182 | 
183 |         :param dataset: a valid dataset name. See the Developer API documentation for valid values
184 |         :param database: one of mssql or pg.
185 |         :return: a DDL statement from the Developer API service as str
186 |         """
187 |         ddl_url = os.path.join(self.url, dataset)
188 |         self.logger.debug("Retrieving DDL for dataset: " + dataset)
189 |         response = self.session.get(ddl_url, params=dict(ddl=database))
190 |         return response.text
191 | 
192 |     def docs(self, dataset):
193 |         """
194 |         Get docs for dataset
195 | 
196 |         :param dataset: a valid dataset name. See the Developer API documentation for valid values
197 |         :return: docs response for dataset as list[dict] or None if ?docs is not supported on the dataset
198 |         """
199 |         docs_url = os.path.join(self.url, dataset)
200 |         self.logger.debug("Retrieving docs for dataset: " + dataset)
201 |         response = self.session.get(docs_url, params=dict(docs=True))
202 |         if response.status_code == 501:
203 |             self.logger.warning(
204 |                 "docs and example params are not yet supported on dataset {dataset}".format(
205 |                     dataset=dataset
206 |                 )
207 |             )
208 |             return
209 |         return response.json()
210 | 
211 |     def count(self, dataset, **options):
212 |         """
213 |         Get the count of records given a dataset and query options
214 | 
215 |         :param dataset: a valid dataset name. See the Developer API documentation for valid values
216 |         :param options: query parameters as keyword arguments
217 |         :return: record count as int
218 |         """
219 |         head_url = os.path.join(self.url, dataset)
220 |         response = self.session.head(head_url, params=options)
221 |         count = response.headers.get("X-Query-Record-Count")
222 |         return int(count)
223 | 
224 |     @staticmethod
225 |     def in_(items):
226 |         """
227 |         Helper method for providing values to the API's `in()` filter function.
228 | 
229 |         The API currently supports GET requests to dataset endpoints. When providing a large list of values to the API's
230 |         `in()` filter function, it's necessary to chunk up the values to avoid URLs larger than 2048 characters. The
231 |         `query` method of this class handles the chunking transparently; this helper method simply stringifies
232 |         the input items into the correct syntax.
233 | 
234 |         ::
235 | 
236 |             d2 = DirectAccessV2(client_id, client_secret)
237 |             # Query well-origins
238 |             well_origins_query = d2.query(
239 |                 dataset='well-origins',
240 |                 deleteddate='null',
241 |                 pagesize=100000
242 |             )
243 |             # Get all UIDs for well-origins
244 |             uid_parent_ids = [x['UID'] for x in well_origins_query]
245 |             # Provide the UIDs to wellbores endpoint
246 |             wellbores_query = d2.query(
247 |                 dataset='wellbores',
248 |                 deleteddate='null',
249 |                 pagesize=100000,
250 |                 uidparent=d2.in_(uid_parent_ids)
251 |             )
252 | 
253 |         :param items: list or generator of values to provide to in() filter function
254 |         :type items: list
255 |         :return: str to provide to DirectAccessV2 `query` method
256 |         """
257 |         if not isinstance(items, list):
258 |             raise TypeError(
259 |                 "Argument provided was not a list. Type provided: {}".format(
260 |                     type(items)
261 |                 )
262 |             )
263 |         return "in({})".format(",".join([str(x) for x in items]))
264 | 
265 |     def to_dataframe(self, dataset, converters=None, log_progress=True, **options):
266 |         """
267 |         Write query results to a pandas Dataframe with properly set dtypes and index columns.
268 | 
269 |         This works by requesting the DDL for `dataset` and manipulating the text to build a list of dtypes, date columns
270 |         and the index column(s). It then makes a query request for `dataset` to ensure we know the exact fields
271 |         to expect, (ie, if `fields` was a provided query parameter and the result will have fewer fields than the DDL).
272 | 
273 |         For endpoints with composite primary keys, a pandas MultiIndex is created.
274 | 
275 |         This method is potentially fragile. The API's `docs` feature is preferable but not yet available on all
276 |         endpoints.
277 | 
278 |         Query results are written to a temporary CSV file and then read into the dataframe. The CSV is removed
279 |         afterwards.
280 | 
281 |         pandas version 0.24.0 or higher is required for use of the Int64 dtype allowing integers with NaN values. It is
282 |         not possible to coerce missing values for columns of dtype bool and so these are set to `object` dtype.
283 | 
284 |         ::
285 | 
286 |             d2 = DirectAccessV2(client_id, client_secret)
287 |             # Create a Texas permits dataframe, removing commas from Survey names and replacing the state
288 |             # abbreviation with the complete name.
289 |             df = d2.to_dataframe(
290 |                 dataset='permits',
291 |                 deleteddate='null',
292 |                 pagesize=100000,
293 |                 stateprovince='TX',
294 |                 converters={
295 |                     'StateProvince': lambda x: 'TEXAS',
296 |                     'Survey': lambda x: x.replace(',', '')
297 |                 }
298 |             )
299 |             df.head(10)
300 | 
301 |         :param dataset: a valid dataset name. See the Developer API documentation for valid values
302 |         :type dataset: str
303 |         :param converters: Dict of functions for converting values in certain columns.
304 |             Keys can either be integers or column labels.
305 |         :type converters: dict
306 |         :param log_progress: whether to log progress. if True, log a message with current written count
307 |         :type log_progress: bool
308 |         :param options: query parameters as keyword arguments
309 |         :return: pandas dataframe
310 |         """
311 |         try:
312 |             import pandas
313 |         except ImportError:
314 |             raise Exception(
315 |                 "pandas not installed. This method requires pandas >= 0.24.0"
316 |             )
317 | 
318 |         ddl = self.ddl(dataset, database="mssql")
319 | 
320 |         try:
321 |             index_col = re.findall(r"PRIMARY KEY \(([a-z0-9_,]*)\)", ddl)[0].split(",")
322 |         except IndexError:
323 |             index_col = None
324 | 
325 |         self.logger.debug("index_col: {}".format(index_col))
326 |         ddl = {
327 |             x.split(" ")[0]: x.split(" ")[1][:-1]
328 |             for x in ddl.split("\n")[1:]
329 |             if x and "CONSTRAINT" not in x
330 |         }
331 | 
332 |         pagesize = options.pop("pagesize") if "pagesize" in options else None
333 |         try:
334 |             filter_ = OrderedDict(
335 |                 sorted(
336 |                     next(self.query(dataset, pagesize=1, **options)).items(),
337 |                     key=lambda x: x[0],
338 |                 )
339 |             ).keys()
340 |             self.logger.debug(
341 |                 "Fields retrieved from query response: {}".format(
342 |                     json.dumps(list(filter_), indent=2, default=str)
343 |                 )
344 |             )
345 |         except StopIteration:
346 |             raise Exception("No results returned from query")
347 | 
348 |         self.links = None
349 | 
350 |         if pagesize:
351 |             options["pagesize"] = pagesize
352 | 
353 |         try:
354 |             index_col = [
355 |                 x for x in filter_ if x.upper() in [y.upper() for y in index_col]
356 |             ]
357 |             if index_col and len(index_col) == 1:
358 |                 index_col = index_col[0]
359 |         except (IndexError, TypeError) as e:
360 |             self.logger.warning("Could not discover index col(s): {}".format(e))
361 |             index_col = None
362 |         self.logger.debug("index_col: {}".format(index_col))
363 | 
364 |         date_cols = [k for k, v in ddl.items() if v.startswith("DATE") and k in filter_]
365 |         self.logger.debug("date columns:\n{}".format(json.dumps(date_cols, indent=2)))
366 | 
367 |         for k, v in ddl.items():
368 |             if k in filter_:
369 |                 if v.startswith("VARCHAR"):
370 |                     ddl[k] = "VARCHAR"
371 |                 elif v.startswith("DOUBL"):
372 |                     ddl[k] = "DOUBLE"
373 | 
374 |         dtypes_mapping = {
375 |             "TEXT": "object",
376 |             "NUMERIC": "float64",
377 |             "REAL": "float64",
378 |             "DOUBLE": "float64",
379 |             "DATETIME": "object",
380 |             "SMALLINT": "Int64",
381 |             "INT": "Int64",
382 |             "INTEGER": "Int64",
383 |             "BIGINT": "Int64",
384 |             "VARCHAR": "object",
385 |             "DATE": "object"
386 |         }
387 |         dtypes = {k: dtypes_mapping[v] for k, v in ddl.items() if k in filter_}
388 |         self.logger.debug("dtypes:\n{}".format(json.dumps(dtypes, indent=2)))
389 | 
390 |         t = mkdtemp()
391 |         self.logger.debug("Created temporary directory: " + t)
392 | 
393 |         query = self.query(dataset, **options)
394 | 
395 |         try:
396 |             chunks = pandas.read_csv(
397 |                 filepath_or_buffer=self.to_csv(
398 |                     query,
399 |                     os.path.join(t, "{}.csv".format(uuid4().hex)),
400 |                     delimiter="|",
401 |                     log_progress=log_progress,
402 |                 ),
403 |                 sep="|",
404 |                 dtype=dtypes,
405 |                 index_col=index_col,
406 |                 parse_dates=date_cols,
407 |                 chunksize=options.get("pagesize", 100000),
408 |                 converters=converters,
409 |             )
410 |             df = pandas.concat(chunks)
411 |             return df
412 |         finally:
413 |             rmtree(t)
414 |             self.logger.debug("Removed temporary directory")
415 | 
416 |     def query(self, dataset, **options):
417 |         """
418 |         Query Developer API dataset
419 | 
420 |         Accepts a dataset name and a variable number of keyword arguments that correspond to the fields specified in
421 |         the 'Request Parameters' section for each dataset in the Developer API documentation.
422 | 
423 |         This method only supports the JSON output provided by the API and yields dicts for each record.
424 | 
425 |         :param dataset: a valid dataset name. See the Developer API documentation for valid values
426 |         :param options: query parameters as keyword arguments
427 |         :return: query response as generator
428 |         """
429 |         query_url = os.path.join(self.url, dataset)
430 | 
431 |         query_chunks = None
432 |         for field, v in options.items():
433 |             if "in(" in str(v) and len(str(v)) > 1950:
434 |                 values = re.split(r"in\((.*?)\)", options[field])[1].split(",")
435 |                 chunksize = int(floor(1950 / len(max(values))))
436 |                 query_chunks = (field, [x for x in _chunks(values, chunksize)])
437 | 
438 |         paging = options.pop("paging") if "paging" in options else "true"
439 | 
440 |         while True:
441 |             if self.links:
442 |                 response = self.session.get(self.url[:-1] + self.links["next"]["url"])
443 |             else:
444 |                 if query_chunks and query_chunks[1]:
445 |                     options[query_chunks[0]] = self.in_(query_chunks[1].pop(0))
446 | 
447 |                 response = self.session.get(query_url, params=options)
448 | 
449 |             if not response.ok:
450 |                 raise DAQueryException(
451 |                     "Non-200 response: {} {}".format(response.status_code, response.text)
452 |                 )
453 | 
454 |             records = response.json()
455 |             if isinstance(records, dict):
456 |                 records = [records]
457 | 
458 |             if not len(records):
459 |                 self.links = None
460 | 
461 |                 if query_chunks and query_chunks[1]:
462 |                     continue
463 | 
464 |                 break
465 | 
466 |             if "next" in response.links:
467 |                 self.links = response.links
468 | 
469 |             for record in records:
470 |                 yield record
471 | 
472 |             if self.links is None or paging.lower() == "false":
473 |                 break
474 | 
475 | 
476 | class DirectAccessV2(BaseAPI):
477 |     """Client for Enverus' Developer API Version 2"""
478 | 
479 |     url = "https://di-api.drillinginfo.com/v2/direct-access/"
480 | 
481 |     def __init__(
482 |             self,
483 |             client_id,
484 |             client_secret,
485 |             retries=5,
486 |             backoff_factor=1,
487 |             links=None,
488 |             access_token=None,
489 |             **kwargs
490 |     ):
491 |         """
492 |         Enverus' Developer API Version 2 client
493 | 
494 |         API documentation and credentials can be found at: https://app.enverus.com/direct/#/api/explorer/v2/gettingStarted
495 | 
496 |         :param client_id: client id credential.
497 |         :type client_id: str
498 |         :param client_secret: client secret credential.
499 |         :type client_secret: str
500 |         :param retries: the number of attempts when retrying failed requests with status codes of 500, 502, 503 or 504
501 |         :type retries: int
502 |         :param backoff_factor: the factor to use when exponentially backing off prior to retrying a failed request
503 |         :type backoff_factor: int
504 |         :param links: a dictionary of prev and next links as provided by the python-requests Session.
505 |         See https://requests.readthedocs.io/en/master/user/advanced/#link-headers
506 |         :type dict
507 |         :param access_token: an optional, pregenerated access token. If provided, the class instance will not
508 |         automatically try to request a new access token.
509 |         :type: access_token: str
510 |         :param kwargs:
511 |         """
512 |         super(DirectAccessV2, self).__init__(self.url, retries, backoff_factor, **kwargs)
513 |         self.client_id = client_id
514 |         self.client_secret = client_secret
515 |         self.links = links
516 |         self.access_token = access_token
517 |         self.session.hooks["response"].append(self._check_response)
518 | 
519 |         if self.access_token:
520 |             self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
521 |         else:
522 |             self.access_token = self.get_access_token()["access_token"]
523 | 
524 |     def get_access_token(self):
525 |         """
526 |         Get an access token from /tokens endpoint. Automatically sets the Authorization header on the class instance's
527 |         session. Raises DAAuthException on error
528 | 
529 |         :return: token response as dict
530 |         """
531 |         if not self.client_id or not self.client_secret:
532 |             raise DAAuthException(
533 |                 "CLIENT_ID and CLIENT_SECRET are required to generate an access token"
534 |             )
535 | 
536 |         token_url = os.path.join(self.url, "tokens")
537 | 
538 |         self.session.headers["Authorization"] = "Basic {}".format(
539 |             base64.b64encode(
540 |                 ":".join([self.client_id, self.client_secret]).encode()
541 |             ).decode()
542 |         )
543 |         self.session.headers["Content-Type"] = "application/x-www-form-urlencoded"
544 | 
545 |         payload = {"grant_type": "client_credentials"}
546 |         response = self.session.post(token_url, params=payload)
547 |         self.logger.debug("Token response: " + json.dumps(response.json(), indent=2))
548 |         self.access_token = response.json()["access_token"]
549 |         self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
550 |         return response.json()
551 | 
552 | 
553 | class DeveloperAPIv3(BaseAPI):
554 |     """Client for Enverus' Developer API Version 3"""
555 | 
556 |     url = "https://api.enverus.com/v3/direct-access/"
557 | 
558 |     def __init__(
559 |             self,
560 |             secret_key,
561 |             retries=5,
562 |             backoff_factor=1,
563 |             links=None,
564 |             access_token=None,
565 |             **kwargs
566 |     ):
567 |         """
568 |         Enverus' Developer API Version 3 client
569 | 
570 |         API documentation and credentials can be found at: https://app.enverus.com/direct/#/api/explorer/v3/gettingStarted
571 | 
572 |         :param secret_key: api key credential.
573 |         :type secret_key: str
574 |         :param retries: the number of attempts when retrying failed requests with status codes of 500, 502, 503 or 504
575 |         :type retries: int
576 |         :param backoff_factor: the factor to use when exponentially backing off prior to retrying a failed request
577 |         :type backoff_factor: int
578 |         :param links: a dictionary of prev and next links as provided by the python-requests Session.
579 |         See https://requests.readthedocs.io/en/master/user/advanced/#link-headers
580 |         :type dict
581 |         :param access_token: an optional, pregenerated access token. If provided, the class instance will not
582 |         automatically try to request a new access token.
583 |         :type: access_token: str
584 |         :param kwargs:
585 |         """
586 |         super(DeveloperAPIv3, self).__init__(self.url, retries, backoff_factor, **kwargs)
587 |         self.secret_key = secret_key
588 |         self.links = links
589 |         self.access_token = access_token
590 |         self.session.hooks["response"].append(self._check_response)
591 | 
592 |         if self.access_token:
593 |             self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
594 |         else:
595 |             self.access_token = self.get_access_token()["token"]
596 | 
597 |     def get_access_token(self):
598 |         """
599 |         Get an access token from /tokens endpoint. Automatically sets the Authorization header on the class instance's
600 |         session. Raises DAAuthException on error
601 | 
602 |         :return: token response as dict
603 |         """
604 | 
605 |         if not self.secret_key:
606 |             raise DAAuthException(
607 |                 "SECRET_KEY is required to generate an access token"
608 |             )
609 | 
610 |         token_url = os.path.join(self.url, "tokens")
611 | 
612 |         self.session.headers["Content-Type"] = "application/json"
613 | 
614 |         response = self.session.post(token_url, json={"secretKey": self.secret_key})
615 |         self.logger.debug("Token response: " + json.dumps(response.json(), indent=2))
616 | 
617 |         self.access_token = response.json()["token"]
618 |         self.session.headers["Authorization"] = "bearer {}".format(self.access_token)
619 | 
620 |         return response.json()
621 | 
622 |     def is_omit_header_next_link(self, **options):
623 |         if "_headers" in options:
624 |             for (k, v) in options.get("_headers").items():
625 |                 self.session.headers[k] = v
626 |                 if k.lower() == "x-omit-header-next-links" and v.lower() == "true":
627 |                     return True
628 | 
629 |         return False
630 | 
631 |     @staticmethod
632 |     def parse_links(links_obj):
633 |         result = {}
634 |         if links_obj["next"]:
635 |             links = parse_header_links(links_obj["next"])
636 | 
637 |             for link in links:
638 |                 key = link.get("rel") or link.get("url")
639 |                 result[key] = link
640 | 
641 |         return result
642 | 
643 |     def query(self, dataset, **options):
644 |         """
645 |         Query Developer API dataset
646 | 
647 |         Accepts a dataset name and a variable number of keyword arguments that correspond to the fields specified in
648 |         the 'Request Parameters' section for each dataset in the Developer API documentation.
649 | 
650 |         This method only supports the JSON output provided by the API and yields dicts for each record.
651 | 
652 |         :param dataset: a valid dataset name. See the Developer API documentation for valid values
653 |         :param options: query parameters as keyword arguments, _headers dict - request headers.
654 |         :return: query response as generator
655 |         """
656 |         request_headers = None
657 |         omit_header_next_link = False
658 |         if "_headers" in options:
659 |             omit_header_next_link = self.is_omit_header_next_link(**options)
660 |             request_headers = options.pop("_headers")
661 | 
662 |         query_url = os.path.join(self.url, dataset)
663 | 
664 |         query_chunks = None
665 |         for field, v in options.items():
666 |             if "in(" in str(v) and len(str(v)) > 1950:
667 |                 values = re.split(r"in\((.*?)\)", options[field])[1].split(",")
668 |                 chunksize = int(floor(1950 / len(max(values))))
669 |                 query_chunks = (field, [x for x in _chunks(values, chunksize)])
670 | 
671 |         paging = options.pop("paging") if "paging" in options else "true"
672 | 
673 |         while True:
674 |             if self.links:
675 |                 response = self.session.get(self.url[:-1] + self.links["next"]["url"], headers=request_headers)
676 |             else:
677 |                 if query_chunks and query_chunks[1]:
678 |                     options[query_chunks[0]] = self.in_(query_chunks[1].pop(0))
679 | 
680 |                 response = self.session.get(query_url, params=options, headers=request_headers)
681 | 
682 |             if not response.ok:
683 |                 raise DAQueryException(
684 |                     "Non-200 response: {} {}".format(response.status_code, response.text)
685 |                 )
686 | 
687 |             records = response.json()
688 |             if omit_header_next_link and records["links"]:
689 |                 self.links = self.parse_links(records["links"])
690 |                 records = records["data"]
691 | 
692 |             if isinstance(records, dict):
693 |                 records = [records]
694 | 
695 |             if not len(records):
696 |                 self.links = None
697 | 
698 |                 if query_chunks and query_chunks[1]:
699 |                     continue
700 | 
701 |                 break
702 | 
703 |             if not omit_header_next_link and "next" in response.links:
704 |                 self.links = response.links
705 | 
706 |             for record in records:
707 |                 yield record
708 | 
709 |             if self.links is None or paging.lower() == "false":
710 |                 break
711 | 


--------------------------------------------------------------------------------