├── .gitattributes
├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── conda-recipe
    └── meta.yaml
├── datamine
    ├── __init__.py
    ├── io.py
    ├── loaders
    │   ├── 1qbit.py
    │   ├── __init__.py
    │   ├── base.py
    │   ├── block.py
    │   ├── cryptocurrency.py
    │   ├── eod.py
    │   ├── eris.py
    │   ├── fx.py
    │   ├── govpx.py
    │   ├── liqtool.py
    │   ├── orbitalinsight.py
    │   ├── rsmetrics.py
    │   ├── sofr.py
    │   ├── sofrsr.py
    │   ├── telluslabs.py
    │   ├── tick.py
    │   └── voi.py
    └── utils.py
├── docs
    └── CME Query API's - EOD_Block_Tick_BBO - Google Docs.pdf
├── examples
    ├── Load Datamine Data Locally Example.ipynb
    └── images
    │   ├── BitcoinEndofDayValue.png
    │   └── BitcoinRTIndexValue.png
├── setup.cfg
└── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | datamine/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | examples/data/
 2 | 
 3 | # Byte-compiled / optimized / DLL files
 4 | __pycache__/
 5 | *.py[cod]
 6 | 
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Distribution / packaging
11 | .Python
12 | env/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | .ipynb_checkpoints/
61 | node_modules/
62 | screenshots/
63 | *.xunit.xml
64 | 
65 | # npm
66 | package-lock.json
67 | 
68 | data
69 | settings.json
70 | 
71 | # datamine logs
72 | datamine.log
73 | 
74 | # Spyder Project Files
75 | .spyproject
76 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: generic
 2 | sudo: false
 3 | 
 4 | branches:
 5 |   only:
 6 |   - master
 7 |   - "/^\\d+\\.\\d+.*$/"
 8 | 
 9 | env:
10 | os:
11 | - linux
12 | 
13 | install:
14 | - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
15 | - bash miniconda.sh -b -p $HOME/miniconda
16 | - source $HOME/miniconda/bin/activate
17 | - conda config --set always_yes yes --set changeps1 no --set auto_update_conda no
18 | - conda install conda conda-build conda-verify anaconda-client
19 | - conda info -a
20 | - printenv | sort
21 | 
22 | script:
23 | - conda build conda-recipe
24 | - conda build --test $HOME/miniconda/conda-bld/*/*.tar.bz2
25 | # We're not uploading these, but we can at least ensure that they build.
26 | - python setup.py sdist
27 | - python setup.py bdist_wheel
28 | 
29 | deploy:
30 | - provider: script
31 |   skip_cleanup: true
32 |   on:
33 |     branch: master
34 |     tags: true
35 |   script:
36 |   - anaconda --verbose --token $ANACONDA_TOKEN upload --user CME_Group $HOME/miniconda/conda-bld/*/*.tar.bz2
37 |     --force
38 | - provider: script
39 |   skip_cleanup: true
40 |   on:
41 |     branch: master
42 |     tags: false
43 |   script:
44 |   - anaconda --verbose --token $ANACONDA_TOKEN upload --user CME_Group --label
45 |     main $HOME/miniconda/conda-bld/*/*.tar.bz2 --force
46 | - provider: pypi
47 |   skip_cleanup: true
48 |   on:
49 |     branch: master
50 |     tags: false
51 |   username: "__token__"
52 |   password: $PYPI_TOKEN
53 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, CME Group
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CME Datamine
  2 | [![Build Status](https://travis-ci.org/CMEGroup/datamine_python.svg?branch=master)](https://travis-ci.org/CMEGroup/datamine_python)
  3 | # Overview
  4 | 
  5 | CME Datamine is offered via a self-service cloud solution, where you can access more than
  6 | 450 terabytes of historical data almost instantaneously, using some of the most flexible
  7 | data delivery methods available. Extensively back-test strategies using real benchmark
  8 | markets that date back as far as the 1970s, to help you gauge profitability and risk.
  9 | 
 10 | This python package will support your rapid analysis by supplying a basic framework for
 11 | direct iteration with CME Datamine cloud system to accomplish the following tasks.
 12 | 
 13 | 1. Load your data item catalog which you have subscribed
 14 | 2. Download your data items to your local machines from the cloud
 15 | 3. Specific data items automatically structured into a Pandas dataframe from
 16 | your local copy.  This includes correct typing and other generic routines to support
 17 | your analysis needs.
 18 | 4. Examples of working with this data in Pandas via a collection of Ipyhon Notebook files.
 19 | 
 20 | # Installation
 21 | 
 22 | ## Conda
 23 | 
 24 | The easiest way to install this package is to do so in a
 25 | Python environment created with [Anaconda](https://www.anaconda.com/distribution/)
 26 | or its minimalist alternative [Miniconda](https://docs.conda.io/en/latest/miniconda.html).
 27 | Once this environment is installed and activated, simply run this command:
 28 | ```
 29 | conda install -c cme_group datamine
 30 | ```
 31 | 
 32 | ## PyPi
 33 | 
 34 | Installation from [PyPi](https://pypi.org/project/datamine/)
 35 | ```
 36 | pip install datamine
 37 | ```
 38 | 
 39 | ## From source
 40 | 
 41 | To install from source, clone this repository and execute
 42 | ```
 43 | pip install .
 44 | ```
 45 | If you wish to install the package in writable mode for development, do
 46 | ```
 47 | pip install -e .
 48 | ```
 49 | 
 50 | # Example usage
 51 | 
 52 | The following sections quickly outline some of the simple methods to access
 53 | CME Datamine data. For interactive use, we recommend the use of a
 54 | [Jupyter](https://jupyter.org) notebook or the
 55 | [JupyterLab](https://jupyterlab.readthedocs.io/en/latest) platform.
 56 | 
 57 | ## Load My Data Catalog Items
 58 | 
 59 | ```buildoutcfg
 60 | myDatamine = dm.DatamineCon(username='YOUR_CME_APP_ID', password='YOUR_CME_APP_PASSWORD', path='./data/')
 61 | #Get My Datamine Data Catalog
 62 | myDatamine.get_catalog(limit=1000)
 63 | # Review one of the data catalog items as supplied in dict format.  
 64 | myDatamine.data_catalog.popitem()
 65 | 
 66 | 
 67 | ```
 68 | 
 69 | ## Download Specific Data Products
 70 | You can request specific data products.  Current data products supported are as follows.
 71 | When requesting your data, you must specify the _dataset_ tag or leave it blank will request
 72 | all items in your catalog.  
 73 | 
 74 | ### CME Data Products
 75 | 
 76 | |  Data Set Name                | Data Type     | _dataset_ Tag  |
 77 | |---                            |---            |---|
 78 | |  CME Time and Sales           | Price         | TICK     |
 79 | |  CME  Market Depth MBO        | Price         | MBO  |
 80 | |  CME CF Crypto Currency       | Index         | CRYPTOCURRENCY  |
 81 | |  BrokerTec Top of Book       | Price         | NEXBROKERTECTOB  |
 82 | |  BrokerTec Depth of Book     | Price         | NEXBROKERTECDOB  |
 83 | |  BrokerTec Full Book         | Price         | NEXBROKERTECFOB  |
 84 | |  Eris PAI                     | Market Analytics | ERIS  |
 85 | |  SSTL INT Settlements         | Price         | STL  |
 86 | 
 87 | 
 88 | 
 89 | ### Third Party Data
 90 | 
 91 | |  Data Set Name                | Data Type     | _dataset_ Tag  |
 92 | |---                            |---            |---|
 93 | |  TellusLabs                   | Alternative - Ags             | TELLUSLABS  |
 94 | |  Orbital Insight              | Alternative - Energy          | ORBITALINSIGHT  |
 95 | |  Bantix Technologies          | Market Analytics - Options    | BANTIX  |
 96 | |  RS Metrics                   | Alternative - Metals          | RSMETRICS  |
 97 | |  1 Qbit                    | Market Analytics           | 1QBIT  |
 98 | 
 99 | 
100 | A complete list of data products can be reviewed on [CME Datamine]([https://datamine.cmegroup.com/#t=p&p=cme.dataHome)
101 | 
102 | 
103 | Example request for specific Data Sets using the _dataset_ tag.
104 | ```buildoutcfg
105 | myDatamine.get_catalog(dataset='CRYPTOCURRENCY', limit=1000)
106 | myDatamine.get_catalog(dataset='TICK', limit=1000)
107 | myDatamine.get_catalog(dataset='TELLUSLABS', limit=1000)
108 | myDatamine.get_catalog(dataset='RSMETRICS', limit=1000)
109 | ```
110 | 
111 | ## Use Bitcoin Information in Analysis
112 | The following example can be found in the [Load Datamine Data Locally Example Notebook](https://github.com/CMEGroup/datamine_python/blob/master/examples/Load%20Datamine%20Data%20Locally%20Example.ipynb)
113 | ```buildoutcfg
114 | myDatamine.get_catalog(dataset='CRYPTOCURRENCY', limit=1000)
115 | myDatamine.crypto_load()
116 | 
117 | #plot second interval index values for Bitcoin
118 | indexValue = myDatamine.crypto_DF.loc[myDatamine.crypto_DF['symbol'] =='BRTI','mdEntryPx'].plot(figsize=[15,5]);
119 | plt.title('Historical Bitcoin Intraday Reference Rate')
120 | plt.xlabel('Date')
121 | plt.ylabel('USD/BTC')
122 | plt.style.use('fivethirtyeight')
123 | plt.show()
124 | 
125 | ```
126 | ![Bitcoin RT Index Plot Example](https://github.com/CMEGroup/datamine_python/blob/master/examples/images/BitcoinRTIndexValue.png "Bitcoin Logo")
127 | 
128 | 
129 | ## Questions and Comments?
130 | Please use the Issues feature.
131 | 
132 | 
133 | ## Notice
134 | The information herein has been complied by CME Group for general informational and education purposes only and does not constitute trading advice or the solicitation of purchases or sale of futures, options, or swaps. The views in this video reflect solely those of the author and not necessarily those of CME Group or its affiliated institutions. All examples discussed are hypothetical situations, used for explanation purposes only, and should not be considered investment advice of the results of actual market experience. Although every attempt has been made to ensure the accuracy of the information herein, CME Group and its affiliates assume no responsibility for any errors or omissions. All data is sourced by CME Group unless otherwise stated. All matters pertaining to rules and specification herein are made subject to and are superseded by official CME, CBOT, NYMEX, and COMEX rules. Current rules should be consulted in all cases concerning contact specifications.
135 | 
136 | CME Group, the Globe Logo, CME, Globex, E-Mini, CME Direct, CME Datamine and Chicago Mercantile Exchange are trademarks of Chicago Mercantile Exchange Inc.  CBOT is a trademark of the Board of Trade of the City of Chicago, Inc.  NYMEX is a trademark of New York Mercantile Exchange, Inc.  COMEX is a trademark of Commodity Exchange, Inc. All other trademarks are the property of their respective owners.
137 | 


--------------------------------------------------------------------------------
/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set data = load_setup_py_data() %}
 2 | 
 3 | package:
 4 |   name: datamine
 5 |   version: {{ data.get('version') }}
 6 | 
 7 | source:
 8 |   path: ../
 9 | 
10 | build:
11 |   number: 0
12 |   noarch: python
13 |   script:
14 |     - {{ PYTHON }} -m pip install . --no-deps --ignore-installed --no-cache-dir
15 | 
16 | requirements:
17 |   host:
18 |     - python {{ python }}
19 |     - pip
20 |   run:
21 |     - python
22 |     - requests
23 |     - urllib3
24 |     - pandas
25 |     - tqdm
26 |     - futures-compat
27 | 
28 | test:
29 |   imports:
30 |     - datamine
31 |     - datamine.io
32 |     - datamine.utils
33 |     - datamine.loaders
34 | 
35 | about:
36 |   home: https://github.com/CMEGroup/datamine_python
37 |   license: BSD 3-Clause
38 |   license_file: LICENSE
39 |   summary: 'Python connector for the CME DataMine service.'
40 | 


--------------------------------------------------------------------------------
/datamine/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/datamine/io.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple Python client for CME Group Datamine
  3 | 
  4 | https://datamine.cmegroup.com
  5 | 
  6 | .. moduleauthor:: Aaron Walters <aaron.walters@cmegroup.com>
  7 | 
  8 | """
  9 | 
 10 | import requests
 11 | import urllib3
 12 | import cgi
 13 | import os
 14 | import sys
 15 | from datetime import datetime
 16 | import logging
 17 | 
 18 | # Generate logger
 19 | logging.basicConfig(filename='datamine.log', filemode='w', format='%(levelname)s - %(asctime)s - %(message)s', level=logging.ERROR)
 20 | 
 21 | from .utils import tqdm_execute_tasks, MAX_WORKERS, logger
 22 | from .loaders import Loader
 23 | 
 24 | DEFAULT_URL = 'https://datamine.cmegroup.com/cme/api/v1'
 25 | NO_LIMIT = sys.maxsize
 26 | TIMEOUTS = (3.05, 60)
 27 | PAGE_SIZE = 1000
 28 | CHUNK_SIZE = 1024
 29 | 
 30 | 
 31 | def _url_params(url):
 32 |     parts = url.split('?', 1)
 33 |     if len(parts) == 1:
 34 |         return parts[0], None
 35 |     return parts[0], dict(map(lambda x: x.split('=', 1), parts[1].split('&')))
 36 | 
 37 | class RequestError(RuntimeError):
 38 |     pass
 39 | 
 40 | 
 41 | class DatamineCon(object):
 42 |     """
 43 |         This class operates with CME Datamine to retrieve your data catalog,
 44 |         download specific data onto a specified path, load the data from
 45 |         that path, and finally structure data
 46 | 
 47 |         Example usage::
 48 | 
 49 |         import datamine_io
 50 | 
 51 |         datamine = datamine_io.Datamine(user='CHANGE_ME', passwd='CHANGE_ME')
 52 |         datamine.getCatalog()
 53 |         datamine.loadCrypto()  #for crypto data sets
 54 | 
 55 |         datamine.debug = True # turn on debug logging
 56 |     """
 57 |     
 58 |     debug = False 
 59 | 
 60 |     def __init__(self, path='./', username=None, password=None,
 61 |                  url=DEFAULT_URL, threads=MAX_WORKERS):
 62 |         """creates the variables associated with the class
 63 | 
 64 |         :type path: string
 65 |         :param path: The URL path where you would files save on your local environment
 66 | 
 67 |         :type user: string
 68 |         :param user: CME Group Login User Name.  See https://www.cmegroup.com/market-data/datamine-api.html
 69 | 
 70 |         :type password: string
 71 |         :param password: CME Group APP Password for Datamine Services
 72 | 
 73 |         :type url: string
 74 |         :param url: The primary URL for CME Datamine API.
 75 | 
 76 |         :type url: int
 77 |         :param url: The number of threads for downloading files.
 78 |         """
 79 |         self.url = url
 80 | 
 81 |         # Leverage basic request/urllib3 functionality as much as possible:
 82 |         # Persistent sessions, connection pooling, retry management
 83 |         self.session = requests.Session()
 84 |         self.session.auth = requests.auth.HTTPBasicAuth(username, password)
 85 |         retry = urllib3.util.Retry(read=3, backoff_factor=2, status_forcelist=[400])
 86 |         adapter = requests.adapters.HTTPAdapter(max_retries=retry)
 87 |         self.session.mount('', adapter)
 88 | 
 89 |         self.path = path
 90 |         self.data_catalog = {}
 91 |         self._dataset = None
 92 |         self._limit = -1
 93 |         self.threads = threads
 94 | 
 95 |     def _call_api(self, endpoint, params, stream=False):
 96 |         url = self.url + '/' + endpoint
 97 |         param_str = '&'.join('{}={}'.format(*p) for p in params.items())
 98 |         logger.debug('_call_api: {}'.format(param_str))
 99 |         return self.session.get(url, timeout=TIMEOUTS, params=params, stream=stream)
100 | 
101 |     def download_file(self, fid):
102 |         """Download a single file denoted by the given FID.
103 | 
104 |            :type fid: string
105 |            :param fid: The FID of the file to be retrieved.
106 |         """
107 | 
108 |         if fid not in self.data_catalog:
109 |             raise RequestError('FID not found in the catalog: {}'.format(fid))
110 |         record = self.data_catalog[fid]
111 |         supplied_url, params = _url_params(record['url'])
112 |         assert supplied_url == self.url + '/download'
113 |         response = self._call_api('download', params, stream=True)
114 |         try:
115 |             # The filename is embedded in the Content-Disposition header
116 |             header = response.headers.get('content-disposition', '')
117 |             try:
118 |                 filename = cgi.parse_header(header)[1]['filename']
119 |             except Exception:
120 |                 filename = 'error.txt'
121 |                 print ('''File Handling Area, looking for Content-Disposition Header and Lacks a 'header'...''')
122 |                 print('Expected a "filename" entry in the Content-Disposition header found:\n  {}'.format(header))
123 |                 print('See log file for further detail.')
124 |                 logging.error(str(record['dataset']) + ' ' + str(supplied_url) + ' ' + ' ' + str(params) + ' ' + ('Expected a "filename" entry in the Content-Disposition header found:\n  {}'.format(header)))
125 |                 pass
126 |                              
127 |             dest_path = os.path.join(self.path, record['dataset'])
128 |             if not os.path.exists(dest_path):
129 |                 try:
130 |                     os.makedirs(dest_path)
131 |                 except:
132 |                     pass
133 |             abs_path = os.path.join(dest_path, os.path.basename(filename))
134 |             with open(abs_path, 'wb') as target:
135 |                 try:
136 |                     for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
137 |                         if chunk:
138 |                             target.write(chunk)
139 |                             target.flush()
140 |                 except:
141 |                     pass
142 |         finally:
143 |             # It would be more convenient to use the context manager idiom,
144 |             # but avoiding it allows us to support older versions of requests.
145 |             response.close()
146 | 
147 |     def download_data(self, dataset=None):
148 |         """Download the entire catalog or a specific dataset to the local directory.
149 | 
150 |         :type dataset: string, or None
151 |         :param dataset: The specific CME Datamine dataset name as retreived from catalog.
152 |                         If None, the entire catalog is downloaded.
153 |         """
154 | 
155 |         fids = [fid for fid, record in self.data_catalog.items()
156 |                 if dataset is None or record['dataset'] == dataset]
157 |         description = 'downloading {} data'.format(dataset if dataset else 'all datasets')
158 |         tqdm_execute_tasks(self.download_file, fids, description, self.threads, mode='thread')
159 | 
160 |     def get_catalog(self, dataset=None, limit=None, refresh=False):
161 |         """Get the list of data files avaliable to you
162 |         This may take time depending upon how many items are currenty
163 |         have available to your login.  Items are retrieved in groups of 1000
164 |         per the standard call support.
165 | 
166 |         Parameters
167 |         ----------
168 |         :type dataset: string
169 |         :param dataset: The specific dataset items that you would like to retrieve.
170 | 
171 |         :type limit: integer
172 |         :param limit: Limits the amount of catalog items you would like to retrieve.
173 | 
174 |         :type refresh: bool
175 |         :param refresh: Set to True if you want to force a refresh of the local copy.
176 | 
177 |         Creates
178 |         -------
179 |         :creates: python.dictionary self.data_catalog -- containing custom data catalog available.
180 | 
181 |         Returns
182 |         -------
183 |         Returns None -- dictionary of the data catalog from Datamine
184 |         """
185 |         logger.info('get_catalog: retrieving {}, limit {}'.format(dataset if dataset else 'all datasets', limit))
186 | 
187 |         # No need to download more data if:
188 |         #   -- if the dataset matches, and the new limit is smaller
189 |         #   -- if the previous dataset was None, and there was no limit
190 |         if limit is None:
191 |             limit = NO_LIMIT
192 |         elif not isinstance(limit, int) or limit < 0:
193 |             raise RequestError('Invalid limit value: {!r}'.format(limit))
194 |         is_valid = (self._dataset == dataset and limit <= self._limit or
195 |                     self._dataset is None and self._limit == NO_LIMIT)
196 | 
197 |         if refresh or not is_valid:
198 |             if self._limit >= 0:
199 |                 reason = 'by request' if refresh else 'for new parameters'
200 |                 logger.debug('get_catalog: refreshing {}'.format(reason))
201 |             self.data_catalog = {}
202 |             self._dataset = None
203 |             self._limit = 0
204 |             is_valid = False
205 | 
206 |         if is_valid:
207 |             logger.info('get_catalog: requested data already downloaded')
208 |             return
209 | 
210 |         params = {}
211 |         duplicates = 0
212 |         nrecs = len(self.data_catalog)
213 |         if dataset:
214 |             params['dataset'] = dataset
215 |         while True:
216 |             params['limit'] = min(PAGE_SIZE, limit - nrecs)
217 |             if params['limit'] <= 0:
218 |                 logger.warning('get_catalog: {}-record limit reached'.format(limit))
219 |                 break
220 | 
221 |             resp = self._call_api('list', params)
222 |             if resp.text == '"Could not initiate UNO connection"':
223 |                 raise RequestError('Invalid username/password combination.')
224 |             try:
225 |                 response = resp.json()
226 |                 if response is None:
227 |                     logger.warning('get_catalog: empty record obtained, assuming end of data reached')
228 |                     limit = NO_LIMIT
229 |                     break
230 |                 files = response['files']
231 |                 next_url = response['paging']['next']
232 |             except (ValueError, TypeError):
233 |                 raise RequestError('Invalid JSON data:\n   URL: {}\n  Text: {}\n'.format(resp.url, resp.text))
234 | 
235 |             self.data_catalog.update((item['fid'], item) for item in files)
236 |             orecs, nrecs = nrecs, len(self.data_catalog)
237 |             duplicates += orecs + len(files) - nrecs
238 | 
239 |             if not next_url:
240 |                 logger.debug('get_catalog: end of data raeached')
241 |                 limit = NO_LIMIT
242 |                 break
243 |             _, params = _url_params(next_url)
244 | 
245 |         logger.info('get_catalog: {} records downloaded, {} duplicates, {} saved'.format(nrecs + duplicates, duplicates, nrecs))
246 |         self._limit = max(limit, len(self.data_catalog))
247 |         self._dataset = dataset
248 | 
249 |     def load_dataset(self, dataset, download=True, limit=None, dataset_args = {}):
250 |         """Load a dataset, optionally downloading files listed in the catalog.
251 |            Parameters
252 |            ----------
253 |            :param download: Attempt to download any data avaliable before loading data from local disk.
254 |            :type download: bool
255 | 
256 |            :param limit: Limit the number of files loaded to the given number.
257 |            :type limit: integer, or None
258 | 
259 |            Returns
260 |            -------
261 |            :returns: pandas.DataFrame
262 |         """
263 |         
264 |         if download:
265 |             self.download_data(dataset)
266 | 
267 |         path = os.path.join(self.path, dataset)
268 |         return Loader.by_name(dataset, dataset_args).load(path, limit=limit)
269 | 
270 |     '''
271 |     Script consists of "load" and "download" functions.
272 |     "download" functions only download files into local directory
273 |     "load" functions download files into local directory, and then read + structure into a pandas DataFrame
274 | 
275 |     Design pattern for _download family
276 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    
277 |         Parameters
278 |         ----------
279 |         :param download: Attempt to download any data available before loading data from local disk.
280 |         :type download: bool.
281 | 
282 |         Creates
283 |         -------
284 |         :creates: None
285 | 
286 |         Returns
287 |         -------
288 |         :returns:  None
289 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
290 | 
291 |     Design pattern for _load family
292 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    
293 |         Parameters
294 |         ----------
295 |         :param download: Attempt to download any data avaliable before loading data from local disk.
296 |         :type download: bool.
297 | 
298 |         Creates
299 |         -------
300 |         :creates: pandas.DataFrame object.datasetname_DF
301 | 
302 |         Returns
303 |         -------
304 |         :returns:  None
305 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
306 |     '''
307 | 
308 |     def block_load(self, download=True):
309 |         """
310 |         Data Set - Block Trades
311 |         File Path - /BLOCK
312 |         Function Type - Download & Load
313 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Block+Trades
314 |         """
315 |         self.block_DF = self.load_dataset('BLOCK')
316 |         
317 | 
318 |     def brokertech_tob_download(self, download=True):
319 |         """
320 |         Data Set - Nex BrokerTech Top of Book Data Sets
321 |         File Path - /NEXBROKERTECTOB
322 |         Function Type - Download Only
323 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/NEX+-+BrokerTec+Historical+Data
324 |         """
325 |         if download:
326 |             self.download_data('NEXBROKERTECTOB')
327 | 
328 |     def brokertech_dob_download(self, download=True):
329 |         """
330 |         Data Set - Nex BrokerTech Depth of Book Data Sets
331 |         File Path - /NEXBROKERTECDOB
332 |         Function Type - Download Only
333 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/NEX+-+BrokerTec+Historical+Data
334 |         """
335 |         if download:
336 |             self.download_data('NEXBROKERTECDOB')
337 | 
338 |     def brokertech_fob_download(self, download=True):
339 |         """
340 |         Data Set - Nex BrokerTech Full Book Data Sets
341 |         File Path - /NEXBROKERTECFOB
342 |         Function Type - Download Only
343 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/NEX+-+BrokerTec+Historical+Data
344 |         """
345 |         if download:
346 |             self.download_data('NEXBROKERTECFOB')
347 |         
348 |     def crypto_load(self, download=True):
349 |         """
350 |         Data Set - Crypto Data, Bitcoin & Etherium
351 |         File Path - /cryptocurrency
352 |         Function Type - Download & Load
353 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Liquidity+Tool+Dataset
354 |         """
355 |         self.crypto_DF = self.load_dataset('CRYPTOCURRENCY')
356 |     
357 |     def eod_load(self, download=True):
358 |         """
359 |         Data Set - End of Day Complete
360 |         File Path - /EOD
361 |         Function Type - Download & Load
362 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/End+of+Day
363 |         """
364 |         self.eod_DF = self.load_dataset('EOD', download=download)
365 | 
366 |     def voi_load(self, download=True):
367 |         """
368 |         Data Set - Volume and Open Interest
369 |         File Path - /VOI
370 |         Function Type - Download & Load
371 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Volume+and+Open+Interest
372 |         """
373 |         self.voi_DF = self.load_dataset('VOI', download=download)
374 | 
375 |     def eris_load(self, download=True):
376 |         """
377 |         Data Set - Eris PAI
378 |         File Path - /ERIS
379 |         Function Type - Download & Load
380 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Eris+PAI+Dataset
381 |         """
382 |         self.eris_DF = self.load_dataset('ERIS')
383 | 
384 |     def fx_load(self, download=True):
385 |         """
386 |         Data Set - FX Premium
387 |         File Path - /FX
388 |         Function Type - Download & Load
389 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Premium+FX+Feed+Historical+Data
390 |         Warning -- Files are large when uncompressed
391 |         """
392 |         self.fx_DF = self.load_dataset('FX', download=download)
393 |         
394 | #    def govpx_load(self, download=True, dataset_args = {}):
395 | #        """
396 | #        Data Set - GovPX
397 | #        File Path - /GOVPX
398 | #        Function Type - Download & Load
399 | #        Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/GovPX+Historical+Data#
400 | #        """
401 | #        self.govpx_DF = self.load_dataset(dataset = 'GOVPX', dataset_args = dataset_args, download=download)
402 | 
403 |     def govpx_download(self, download=True):
404 |         """
405 |         Data Set - GovPX
406 |         File Path - /GOVPX
407 |         Function Type - Download Only
408 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/GovPX+Historical+Data
409 |         """
410 |         if download:
411 |             self.download_data('GOVPX')
412 | 
413 |     def govpxeod_download(self, download=True):
414 |         """
415 |         Data Set - GovPX End of Day
416 |         File Path - /GOVPXEOD
417 |         Function Type - Download Only
418 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/GovPX+End+of+Day+Historical+Data
419 |         """
420 |         if download:
421 |             self.download_data('GOVPXEOD')
422 |             
423 |     def STL_download(self, download=True):
424 |         """
425 |         Data Set - STL INT Settlements
426 |         File Path - /STL
427 |         Function Type - Download Only
428 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/STL+INT+Settlements
429 |         """
430 |         if download:
431 |             self.download_data('STL')
432 | 
433 |     def liqtool_load(self, download=True):
434 |         """
435 |         Data Set - Liquidity Tool
436 |         File Path - /LIQTOOL
437 |         Function Type - Download & Load
438 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Liquidity+Tool+Dataset
439 |         """
440 |         self.liqtool_DF = self.load_dataset('LIQTOOL')        
441 | 
442 |     def MD_download(self, download=True):
443 |         """
444 |         Data Set - Market Depth FIX
445 |         File Path - /MD
446 |         Function Type - Download Only
447 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Market+Depth
448 |         """
449 |         if download:
450 |             self.download_data('MD')
451 |             
452 |     def RLC_download(self, download=True):
453 |         """
454 |         Data Set - Market Depth RLC
455 |         File Path - /RLC
456 |         Function Type - Download Only
457 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Market+Depth
458 |         """
459 |         if download:
460 |             self.download_data('RLC')
461 |             
462 |     def RLCSECDEF_download(self, download=True):
463 |         """
464 |         Data Set - SECDEF RLC
465 |         File Path - /RLCSECDEF
466 |         Function Type - Download Only
467 |         Help URL - 
468 |         """
469 |         if download:
470 |             self.download_data('RLCSECDEF')
471 | 
472 |     def MBO_download(self, download=True):
473 |         """
474 |         Data Set - MBO FIX
475 |         File Path - /MBO
476 |         Function Type - Download Only
477 |         Help URL - https://wiki.chicago.cme.com/confluence/display/EPICSANDBOX/MBO+FIX
478 |         """
479 |         if download:
480 |             self.download_data('MBO')
481 | 
482 |     def PCAP_download(self, download=True):
483 |         """
484 |         Data Set - Packet Capture (PCAP)
485 |         File Path - /PCAP
486 |         Function Type - Download Only
487 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Packet+Capture+Dataset
488 |         """
489 |         if download:
490 |             self.download_data('PCAP')
491 | 
492 |     def sofrois_load(self, download=True):
493 |         """
494 |         Data Set - SOFR OIS Index
495 |         File Path - /SOFR
496 |         Function Type - Download & Load
497 |         Help URL - https://www.cmegroup.com/market-data/faq-sofr-third-party-data.html
498 |         """
499 |         self.sofrois_DF = self.load_dataset('SOFR', download=download)
500 | 
501 |     def sofrstriprates_load(self, orient='long', download=True):
502 |         """
503 |         Data Set - SOFR Strip Rates
504 |         File Path - /SOFRSR
505 |         Function Type - Download & Load
506 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/SOFR+Strip+Rates
507 |         """
508 |         if orient == 'wide':
509 |             
510 |             self.sofrstriprates_DF = self.load_dataset('SOFRSR', 
511 |                                                 download=download).pivot(index='businessDate', 
512 |                                                         columns='Description', values='rate').sort_values('businessDate', ascending=False).reset_index()
513 | 
514 |             self.sofrstriprates_DF['businessDate'] = self.sofrstriprates_DF['businessDate'].dt.date                                          
515 |             self.sofrstriprates_DF.set_index('businessDate', inplace=True)
516 |         elif orient == 'long':
517 |             self.sofrstriprates_DF = self.load_dataset('SOFRSR', download=download)
518 |         else:
519 |             print("Incorrect orientation parameter. Defaulting to long.")
520 |             self.sofrstriprates_DF = self.load_dataset('SOFRSR', download=download)
521 | 
522 |     def SECDEF_download(self, download=True):
523 |         """
524 |         Data Set - Securities Definition (SECDEF)
525 |         File Path - /SECDEF
526 |         Function Type - Download Only
527 |         Help URL - Not Applicable
528 |         """
529 |         if download:
530 |             self.download_data('SECDEF')
531 | 
532 |     def time_sales_load(self, download=True):
533 |         """
534 |         Data Set - Time and Sales (TICK)
535 |         File Path - /TICK
536 |         Function Type - Download & Load
537 |         Help URL - 
538 |         """
539 |         self.time_sales_DF = self.load_dataset('TICK')
540 | 
541 |     def BBO_download(self, download=True):
542 |         """
543 |         Data Set - Top-of-Book (BBO)
544 |         File Path - /BBO
545 |         Function Type - Download Only
546 |         Help URL - https://www.cmegroup.com/confluence/display/EPICSANDBOX/Top+of+Book+-+BBO
547 |         """
548 |         if download:
549 |             self.download_data('BBO')
550 | 
551 |     def bantix_download(self, download=True):
552 |         """
553 |         Data Set - bantix
554 |         File Path - /BANTIX
555 |         Function Type - Download Only
556 |         Help URL - https://www.cmegroup.com/market-data/quikstrike-via-bantix-technologies.html
557 |         """
558 |         if download:
559 |             self.download_data('BANTIX')
560 | 
561 |     def JSE_download(self, download=True):
562 |         """
563 |         Data Set - Johannesburg Stock Exchange
564 |         File Path - /JSE
565 |         Function Type - Download Only
566 |         Help URL - 
567 |         """
568 |         if download:
569 |             self.download_data('JSE')
570 | 
571 |     def orbital_insights_load(self, download=True):
572 |         """
573 |         Data Set - Orbital Insights (https://orbitalinsight.com/)
574 |         File Path - /ORBITALINSIGHT
575 |         Function Type - Download & Load
576 |         Help URL - https://www.cmegroup.com/market-data/orbital-insight/faq.html
577 |         """
578 |         self.orbital_insights_DF = self.load_dataset('ORBITALINSIGHT')
579 | 
580 |     def rsmetrics_load(self, download=True):
581 |         """
582 |         Data Set - RS Metrics
583 |         File Path - /RSMETRICS
584 |         Function Type - Download & Load
585 |         Help URL - https://www.cmegroup.com/market-data/rs-metrics/faq-rs-metrics.html
586 |         """
587 |         self.rsmetrics_DF = self.load_dataset('RSMETRICS')
588 | 
589 |     def tellus_labs_load(self, download=True):
590 |         """
591 |         Data Set - Tellus Labs (https://telluslabs.com)
592 |         File Path - /TELLUSLABS
593 |         Function Type - Download & Load
594 |         Help URL - https://www.cmegroup.com/education/articles-and-reports/telluslabs-faq.html
595 |         """
596 |         self.tellus_labs_DF = self.load_dataset('TELLUSLABS')
597 | 
598 |     def oneqbit_load(self, download=True):
599 |         """
600 |         Data Set - 1Qbit
601 |         File Path - /1QBIT
602 |         Function Type - Download & Load
603 |         Help URL - https://www.cmegroup.com/market-data/faq-1qbit.html
604 |         """
605 |         self.oneqbit_DF = self.load_dataset('1QBIT')
606 | 


--------------------------------------------------------------------------------
/datamine/loaders/1qbit.py:
--------------------------------------------------------------------------------
  1 | from . import Loader
  2 | import pandas as pd
  3 | 
  4 | class OneQBitLoader(Loader):
  5 |     dataset = '1QBIT'
  6 |     fileglob = '1QBit_*.csv'
  7 | 
  8 |     columns = ['TRADEDATE', 'DATA_SOURCE', 'EODDESC', 'CHART_TITLE', 
  9 |                'YYYY', 'MM', 'DD', 'DATECODE_EXCEL', 'DATE_LABEL', 'F_PROD_CODE', 'O_PROD_CODE', 
 10 |                'PRICE_SETTLE_ACTIVE', 'PRICE_HIGH_ACTIVE', 'PRICE_LOW_ACTIVE', 'YYYY_ACTIVE', 'MM_ACTIVE', 'F_VOLUME_ACTIVE', 
 11 |                'PRICE_SETTLE_NEXT', 'PRICE_HIGH_NEXT', 'PRICE_LOW_NEXT', 'YYYY_NEXT', 'MM_NEXT', 'F_VOLUME_NEXT', 
 12 |                'F_VOLUME', 'IMPLIED_VOL', 'PUT_VOLUME', 'CALL_VOLUME', 'OPTIONS_VOLUME', 'PUT_OI', 'CALL_OI', 'O_OI', 
 13 |                'CURRENT_PRICE_MOST_ACTIVE', 'PREVIOUS_PRICE_MOST_ACTIVE', 'PRICE_PCT_CHG', 'EXCESS_RETURN_INDEX',
 14 |                'IMPLIED_VOL_ST', 'IMPLIED_VOL_LT', 'DAILY_VARIANCE', 'HISTORICAL_STD_ST', 'HISTORICAL_STD_LT', 'RATIO_STD_ST_LT', 
 15 |                'RATIO_STD_ST_TO_IMPLIED_VOL_CURRENT', 'RATIO_HIGH_LOW_PCT', 'HIGH_LOW_PCT_ST', 'HIGH_LOW_PCT_LT', 
 16 |                'RATIO_HIGH_LOW_ST_LT', 'PUT_VOLUME_ST', 'PUT_VOLUME_LT', 'RATIO_PUT_VOLUME_ST_LT', 'CALL_VOLUME_ST', 
 17 |                'CALL_VOLUME_LT', 'RATIO_CALL_VOLUME_ST_LT', 'RATIO_PUT_CALL_VOLUME_ST', 'RATIO_PUT_CALL_VOLUME_LT', 
 18 |                'PCT_DIF_PUT_CALL_ST_LT_RATIO', 'MOMENTUM_ST', 'MOMENTUM_LT', 'RATIO_MOMENTUM_ST_LT', 'RATIO_MOMENTUM_TO_STD_ST', 
 19 |                'RATIO_MOMENTUM_TO_STD_LT', 'PRICE_20D_MA', 'PRICE_60D_MA', 'PRICE_200D_MA', 'PCT_DIF_CURRENT_200D_PRICE', 
 20 |                'PCT_DIF_20D_200D_PRICE', 'PEAK_PRICE', 'PEAK_200D_PRICE', '20PCT_BELOW_PEAK_200D', '20PCT_ABOVE_60DMA', 
 21 |                '20PCT_BELOW_60DMA', 'MIX_PROB_20PCT_ABOVE_60DMA', 
 22 |                'MIX_PROB_20PCT_BELOW_60DMA', 'MIX_MEAN', 'MIX_MEDIAN', 'MIX_MODE_1', 'MIX_MODE_2', 
 23 |                'MIX_STD', 'MIX_STD_LT', 'MIX_SKEW', 'MIX_KURTOSIS', 'MIX_STATE', 'MIX_COMPLACENT', 'MIX_BALANCED', 'MIX_ANXIOUS', 
 24 |                'MIX_CONFLICTED', 'MIX_MODALITY', 'MIX_DISTANCE', 'MIX_INTENSITY', 'MIX_LOW_BIN', 'MIX_BIN_SIZE', 'MIX_BINS', 
 25 |                'MIX_BIN_NEG_100', 'MIX_BIN_NEG_99', 'MIX_BIN_NEG_98', 'MIX_BIN_NEG_97', 'MIX_BIN_NEG_96', 'MIX_BIN_NEG_95', 
 26 |                'MIX_BIN_NEG_94', 'MIX_BIN_NEG_93', 'MIX_BIN_NEG_92', 'MIX_BIN_NEG_91', 'MIX_BIN_NEG_90', 'MIX_BIN_NEG_89', 
 27 |                'MIX_BIN_NEG_88', 'MIX_BIN_NEG_87', 'MIX_BIN_NEG_86', 'MIX_BIN_NEG_85', 'MIX_BIN_NEG_84', 'MIX_BIN_NEG_83', 
 28 |                'MIX_BIN_NEG_82', 'MIX_BIN_NEG_81', 'MIX_BIN_NEG_80', 'MIX_BIN_NEG_79', 'MIX_BIN_NEG_78', 'MIX_BIN_NEG_77', 
 29 |                'MIX_BIN_NEG_76', 'MIX_BIN_NEG_75', 'MIX_BIN_NEG_74', 'MIX_BIN_NEG_73', 'MIX_BIN_NEG_72', 'MIX_BIN_NEG_71', 
 30 |                'MIX_BIN_NEG_70', 'MIX_BIN_NEG_69', 'MIX_BIN_NEG_68', 'MIX_BIN_NEG_67', 'MIX_BIN_NEG_66', 'MIX_BIN_NEG_65', 
 31 |                'MIX_BIN_NEG_64', 'MIX_BIN_NEG_63', 'MIX_BIN_NEG_62', 'MIX_BIN_NEG_61', 'MIX_BIN_NEG_60', 'MIX_BIN_NEG_59', 
 32 |                'MIX_BIN_NEG_58', 'MIX_BIN_NEG_57', 'MIX_BIN_NEG_56', 'MIX_BIN_NEG_55', 'MIX_BIN_NEG_54', 'MIX_BIN_NEG_53', 
 33 |                'MIX_BIN_NEG_52', 'MIX_BIN_NEG_51', 'MIX_BIN_NEG_50', 'MIX_BIN_NEG_49', 'MIX_BIN_NEG_48', 'MIX_BIN_NEG_47', 
 34 |                'MIX_BIN_NEG_46', 'MIX_BIN_NEG_45', 'MIX_BIN_NEG_44', 'MIX_BIN_NEG_43', 'MIX_BIN_NEG_42', 'MIX_BIN_NEG_41',
 35 |                'MIX_BIN_NEG_40', 'MIX_BIN_NEG_39', 'MIX_BIN_NEG_38', 'MIX_BIN_NEG_37', 'MIX_BIN_NEG_36', 'MIX_BIN_NEG_35',
 36 |                'MIX_BIN_NEG_34', 'MIX_BIN_NEG_33', 'MIX_BIN_NEG_32', 'MIX_BIN_NEG_31', 'MIX_BIN_NEG_30', 'MIX_BIN_NEG_29',
 37 |                'MIX_BIN_NEG_28', 'MIX_BIN_NEG_27', 'MIX_BIN_NEG_26', 'MIX_BIN_NEG_25', 'MIX_BIN_NEG_24', 'MIX_BIN_NEG_23',
 38 |                'MIX_BIN_NEG_22', 'MIX_BIN_NEG_21', 'MIX_BIN_NEG_20', 'MIX_BIN_NEG_19', 'MIX_BIN_NEG_18', 'MIX_BIN_NEG_17',
 39 |                'MIX_BIN_NEG_16', 'MIX_BIN_NEG_15', 'MIX_BIN_NEG_14', 'MIX_BIN_NEG_13', 'MIX_BIN_NEG_12', 'MIX_BIN_NEG_11', 
 40 |                'MIX_BIN_NEG_10', 'MIX_BIN_NEG_09', 'MIX_BIN_NEG_08', 'MIX_BIN_NEG_07', 'MIX_BIN_NEG_06', 'MIX_BIN_NEG_05',
 41 |                'MIX_BIN_NEG_04', 'MIX_BIN_NEG_03', 'MIX_BIN_NEG_02', 'MIX_BIN_NEG_01', 'MIX_BIN_POS_00', 'MIX_BIN_POS_01',
 42 |                'MIX_BIN_POS_02', 'MIX_BIN_POS_03', 'MIX_BIN_POS_04', 'MIX_BIN_POS_05', 'MIX_BIN_POS_06', 'MIX_BIN_POS_07',
 43 |                'MIX_BIN_POS_08', 'MIX_BIN_POS_09', 'MIX_BIN_POS_10', 'MIX_BIN_POS_11', 'MIX_BIN_POS_12', 'MIX_BIN_POS_13',
 44 |                'MIX_BIN_POS_14', 'MIX_BIN_POS_15', 'MIX_BIN_POS_16', 'MIX_BIN_POS_17', 'MIX_BIN_POS_18', 'MIX_BIN_POS_19',
 45 |                'MIX_BIN_POS_20', 'MIX_BIN_POS_21', 'MIX_BIN_POS_22', 'MIX_BIN_POS_23', 'MIX_BIN_POS_24', 'MIX_BIN_POS_25',
 46 |                'MIX_BIN_POS_26', 'MIX_BIN_POS_27', 'MIX_BIN_POS_28', 'MIX_BIN_POS_29', 'MIX_BIN_POS_30', 'MIX_BIN_POS_31',
 47 |                'MIX_BIN_POS_32', 'MIX_BIN_POS_33', 'MIX_BIN_POS_34', 'MIX_BIN_POS_35', 'MIX_BIN_POS_36', 'MIX_BIN_POS_37',
 48 |                'MIX_BIN_POS_38', 'MIX_BIN_POS_39', 'MIX_BIN_POS_40', 'MIX_BIN_POS_41', 'MIX_BIN_POS_42', 'MIX_BIN_POS_43',
 49 |                'MIX_BIN_POS_44', 'MIX_BIN_POS_45', 'MIX_BIN_POS_46', 'MIX_BIN_POS_47', 'MIX_BIN_POS_48', 'MIX_BIN_POS_49',
 50 |                'MIX_BIN_POS_50', 'MIX_BIN_POS_51', 'MIX_BIN_POS_52', 'MIX_BIN_POS_53', 'MIX_BIN_POS_54', 'MIX_BIN_POS_55',
 51 |                'MIX_BIN_POS_56', 'MIX_BIN_POS_57', 'MIX_BIN_POS_58', 'MIX_BIN_POS_59', 'MIX_BIN_POS_60', 'MIX_BIN_POS_61',
 52 |                'MIX_BIN_POS_62', 'MIX_BIN_POS_63', 'MIX_BIN_POS_64', 'MIX_BIN_POS_65', 'MIX_BIN_POS_66', 'MIX_BIN_POS_67',
 53 |                'MIX_BIN_POS_68', 'MIX_BIN_POS_69', 'MIX_BIN_POS_70', 'MIX_BIN_POS_71', 'MIX_BIN_POS_72', 'MIX_BIN_POS_73',
 54 |                'MIX_BIN_POS_74', 'MIX_BIN_POS_75', 'MIX_BIN_POS_76', 'MIX_BIN_POS_77', 'MIX_BIN_POS_78', 'MIX_BIN_POS_79',
 55 |                'MIX_BIN_POS_80', 'MIX_BIN_POS_81', 'MIX_BIN_POS_82', 'MIX_BIN_POS_83', 'MIX_BIN_POS_84', 'MIX_BIN_POS_85',
 56 |                'MIX_BIN_POS_86', 'MIX_BIN_POS_87', 'MIX_BIN_POS_88', 'MIX_BIN_POS_89', 'MIX_BIN_POS_90', 'MIX_BIN_POS_91',
 57 |                'MIX_BIN_POS_92', 'MIX_BIN_POS_93', 'MIX_BIN_POS_94', 'MIX_BIN_POS_95', 'MIX_BIN_POS_96', 'MIX_BIN_POS_97',
 58 |                'MIX_BIN_POS_98', 'MIX_BIN_POS_99', 'MIX_BIN_POS_100', 'MIX_BIN_POS_101', 'MIX_BIN_POS_102', 'MIX_BIN_POS_103',
 59 |                'MIX_BIN_POS_104', 'MIX_BIN_POS_105', 'MIX_BIN_POS_106', 'MIX_BIN_POS_107', 'MIX_BIN_POS_108', 'MIX_BIN_POS_109',
 60 |                'MIX_BIN_POS_110', 'MIX_BIN_POS_111', 'MIX_BIN_POS_112', 'MIX_BIN_POS_113', 'MIX_BIN_POS_114', 'MIX_BIN_POS_115',
 61 |                'MIX_BIN_POS_116', 'MIX_BIN_POS_117', 'MIX_BIN_POS_118', 'MIX_BIN_POS_119', 'MIX_BIN_POS_120', 'MIX_BIN_POS_121',
 62 |                'MIX_BIN_POS_122', 'MIX_BIN_POS_123', 'MIX_BIN_POS_124', 'MIX_BIN_POS_125', 'MIX_BIN_POS_126', 'MIX_BIN_POS_127',
 63 |                'MIX_BIN_POS_128', 'MIX_BIN_POS_129', 'MIX_BIN_POS_130', 'MIX_BIN_POS_131', 'MIX_BIN_POS_132', 'MIX_BIN_POS_133',
 64 |                'MIX_BIN_POS_134', 'MIX_BIN_POS_135', 'MIX_BIN_POS_136', 'MIX_BIN_POS_137', 'MIX_BIN_POS_138', 'MIX_BIN_POS_139',
 65 |                'MIX_BIN_POS_140', 'MIX_BIN_POS_141', 'MIX_BIN_POS_142', 'MIX_BIN_POS_143', 'MIX_BIN_POS_144', 'MIX_BIN_POS_145', 
 66 |                'MIX_BIN_POS_146', 'MIX_BIN_POS_147', 'MIX_BIN_POS_148', 'MIX_BIN_POS_149', 'MIX_BIN_POS_150', 'MIX_BIN_POS_151', 
 67 |                'MIX_BIN_POS_152', 'MIX_BIN_POS_153', 'MIX_BIN_POS_154', 'MIX_BIN_POS_155']
 68 | 
 69 |     dtypes = {'category': ('DATA_SOURCE', 'EODDESC', 'CHART_TITLE', 'F_PROD_CODE', 'O_PROD_CODE','MIX_STATE','MIX_MODALITY'),
 70 |               'int64': ('YYYY', 'MM', 'DD', 'YYYY_ACTIVE', 'MM_ACTIVE', 'F_VOLUME_ACTIVE', 
 71 |                         'YYYY_NEXT', 'MM_NEXT', 'F_VOLUME_NEXT', 'F_VOLUME', 'PUT_VOLUME', 'CALL_VOLUME', 
 72 |                         'OPTIONS_VOLUME', 'PUT_OI', 'CALL_OI','O_OI',
 73 |                         'MIX_COMPLACENT', 'MIX_BALANCED', 'MIX_ANXIOUS', 'MIX_CONFLICTED','MIX_DISTANCE'),
 74 | 
 75 |               'float': ('DATECODE_EXCEL','CURRENT_PRICE_MOST_ACTIVE', 'PREVIOUS_PRICE_MOST_ACTIVE', 'PRICE_PCT_CHG', 'EXCESS_RETURN_INDEX',
 76 |                         'IMPLIED_VOL_ST', 'IMPLIED_VOL_LT', 'DAILY_VARIANCE', 'HISTORICAL_STD_ST', 'HISTORICAL_STD_LT', 'RATIO_STD_ST_LT', 
 77 |                'RATIO_STD_ST_TO_IMPLIED_VOL_CURRENT', 'RATIO_HIGH_LOW_PCT', 'HIGH_LOW_PCT_ST', 'HIGH_LOW_PCT_LT', 
 78 |                'RATIO_HIGH_LOW_ST_LT', 'PUT_VOLUME_ST', 'PUT_VOLUME_LT', 'RATIO_PUT_VOLUME_ST_LT', 'CALL_VOLUME_ST', 
 79 |                'CALL_VOLUME_LT', 'RATIO_CALL_VOLUME_ST_LT', 'RATIO_PUT_CALL_VOLUME_ST', 'RATIO_PUT_CALL_VOLUME_LT', 
 80 |                'PCT_DIF_PUT_CALL_ST_LT_RATIO', 'MOMENTUM_ST', 'MOMENTUM_LT', 'RATIO_MOMENTUM_ST_LT', 'RATIO_MOMENTUM_TO_STD_ST', 
 81 |                'RATIO_MOMENTUM_TO_STD_LT', 'PRICE_20D_MA', 'PRICE_60D_MA', 'PRICE_200D_MA', 'PCT_DIF_CURRENT_200D_PRICE', 
 82 |                'PCT_DIF_20D_200D_PRICE', 'PEAK_PRICE', 'PEAK_200D_PRICE', '20PCT_BELOW_PEAK_200D', '20PCT_ABOVE_60DMA', 
 83 |                '20PCT_BELOW_60DMA', 'MIX_PROB_20PCT_ABOVE_60DMA', 
 84 |                'MIX_PROB_20PCT_BELOW_60DMA', 'MIX_MEAN', 'MIX_MEDIAN', 'MIX_MODE_1', 'MIX_MODE_2', 
 85 |                'MIX_STD', 'MIX_STD_LT', 'MIX_SKEW', 'MIX_KURTOSIS','MIX_INTENSITY', 'MIX_LOW_BIN', 'MIX_BIN_SIZE', 'MIX_BINS', 
 86 |                'MIX_BIN_NEG_100', 'MIX_BIN_NEG_99', 'MIX_BIN_NEG_98', 'MIX_BIN_NEG_97', 'MIX_BIN_NEG_96', 'MIX_BIN_NEG_95', 
 87 |                'MIX_BIN_NEG_94', 'MIX_BIN_NEG_93', 'MIX_BIN_NEG_92', 'MIX_BIN_NEG_91', 'MIX_BIN_NEG_90', 'MIX_BIN_NEG_89', 
 88 |                'MIX_BIN_NEG_88', 'MIX_BIN_NEG_87', 'MIX_BIN_NEG_86', 'MIX_BIN_NEG_85', 'MIX_BIN_NEG_84', 'MIX_BIN_NEG_83', 
 89 |                'MIX_BIN_NEG_82', 'MIX_BIN_NEG_81', 'MIX_BIN_NEG_80', 'MIX_BIN_NEG_79', 'MIX_BIN_NEG_78', 'MIX_BIN_NEG_77', 
 90 |                'MIX_BIN_NEG_76', 'MIX_BIN_NEG_75', 'MIX_BIN_NEG_74', 'MIX_BIN_NEG_73', 'MIX_BIN_NEG_72', 'MIX_BIN_NEG_71', 
 91 |                'MIX_BIN_NEG_70', 'MIX_BIN_NEG_69', 'MIX_BIN_NEG_68', 'MIX_BIN_NEG_67', 'MIX_BIN_NEG_66', 'MIX_BIN_NEG_65', 
 92 |                'MIX_BIN_NEG_64', 'MIX_BIN_NEG_63', 'MIX_BIN_NEG_62', 'MIX_BIN_NEG_61', 'MIX_BIN_NEG_60', 'MIX_BIN_NEG_59', 
 93 |                'MIX_BIN_NEG_58', 'MIX_BIN_NEG_57', 'MIX_BIN_NEG_56', 'MIX_BIN_NEG_55', 'MIX_BIN_NEG_54', 'MIX_BIN_NEG_53', 
 94 |                'MIX_BIN_NEG_52', 'MIX_BIN_NEG_51', 'MIX_BIN_NEG_50', 'MIX_BIN_NEG_49', 'MIX_BIN_NEG_48', 'MIX_BIN_NEG_47', 
 95 |                'MIX_BIN_NEG_46', 'MIX_BIN_NEG_45', 'MIX_BIN_NEG_44', 'MIX_BIN_NEG_43', 'MIX_BIN_NEG_42', 'MIX_BIN_NEG_41',
 96 |                'MIX_BIN_NEG_40', 'MIX_BIN_NEG_39', 'MIX_BIN_NEG_38', 'MIX_BIN_NEG_37', 'MIX_BIN_NEG_36', 'MIX_BIN_NEG_35',
 97 |                'MIX_BIN_NEG_34', 'MIX_BIN_NEG_33', 'MIX_BIN_NEG_32', 'MIX_BIN_NEG_31', 'MIX_BIN_NEG_30', 'MIX_BIN_NEG_29',
 98 |                'MIX_BIN_NEG_28', 'MIX_BIN_NEG_27', 'MIX_BIN_NEG_26', 'MIX_BIN_NEG_25', 'MIX_BIN_NEG_24', 'MIX_BIN_NEG_23',
 99 |                'MIX_BIN_NEG_22', 'MIX_BIN_NEG_21', 'MIX_BIN_NEG_20', 'MIX_BIN_NEG_19', 'MIX_BIN_NEG_18', 'MIX_BIN_NEG_17',
100 |                'MIX_BIN_NEG_16', 'MIX_BIN_NEG_15', 'MIX_BIN_NEG_14', 'MIX_BIN_NEG_13', 'MIX_BIN_NEG_12', 'MIX_BIN_NEG_11', 
101 |                'MIX_BIN_NEG_10', 'MIX_BIN_NEG_09', 'MIX_BIN_NEG_08', 'MIX_BIN_NEG_07', 'MIX_BIN_NEG_06', 'MIX_BIN_NEG_05',
102 |                'MIX_BIN_NEG_04', 'MIX_BIN_NEG_03', 'MIX_BIN_NEG_02', 'MIX_BIN_NEG_01', 'MIX_BIN_POS_00', 'MIX_BIN_POS_01',
103 |                'MIX_BIN_POS_02', 'MIX_BIN_POS_03', 'MIX_BIN_POS_04', 'MIX_BIN_POS_05', 'MIX_BIN_POS_06', 'MIX_BIN_POS_07',
104 |                'MIX_BIN_POS_08', 'MIX_BIN_POS_09', 'MIX_BIN_POS_10', 'MIX_BIN_POS_11', 'MIX_BIN_POS_12', 'MIX_BIN_POS_13',
105 |                'MIX_BIN_POS_14', 'MIX_BIN_POS_15', 'MIX_BIN_POS_16', 'MIX_BIN_POS_17', 'MIX_BIN_POS_18', 'MIX_BIN_POS_19',
106 |                'MIX_BIN_POS_20', 'MIX_BIN_POS_21', 'MIX_BIN_POS_22', 'MIX_BIN_POS_23', 'MIX_BIN_POS_24', 'MIX_BIN_POS_25',
107 |                'MIX_BIN_POS_26', 'MIX_BIN_POS_27', 'MIX_BIN_POS_28', 'MIX_BIN_POS_29', 'MIX_BIN_POS_30', 'MIX_BIN_POS_31',
108 |                'MIX_BIN_POS_32', 'MIX_BIN_POS_33', 'MIX_BIN_POS_34', 'MIX_BIN_POS_35', 'MIX_BIN_POS_36', 'MIX_BIN_POS_37',
109 |                'MIX_BIN_POS_38', 'MIX_BIN_POS_39', 'MIX_BIN_POS_40', 'MIX_BIN_POS_41', 'MIX_BIN_POS_42', 'MIX_BIN_POS_43',
110 |                'MIX_BIN_POS_44', 'MIX_BIN_POS_45', 'MIX_BIN_POS_46', 'MIX_BIN_POS_47', 'MIX_BIN_POS_48', 'MIX_BIN_POS_49',
111 |                'MIX_BIN_POS_50', 'MIX_BIN_POS_51', 'MIX_BIN_POS_52', 'MIX_BIN_POS_53', 'MIX_BIN_POS_54', 'MIX_BIN_POS_55',
112 |                'MIX_BIN_POS_56', 'MIX_BIN_POS_57', 'MIX_BIN_POS_58', 'MIX_BIN_POS_59', 'MIX_BIN_POS_60', 'MIX_BIN_POS_61',
113 |                'MIX_BIN_POS_62', 'MIX_BIN_POS_63', 'MIX_BIN_POS_64', 'MIX_BIN_POS_65', 'MIX_BIN_POS_66', 'MIX_BIN_POS_67',
114 |                'MIX_BIN_POS_68', 'MIX_BIN_POS_69', 'MIX_BIN_POS_70', 'MIX_BIN_POS_71', 'MIX_BIN_POS_72', 'MIX_BIN_POS_73',
115 |                'MIX_BIN_POS_74', 'MIX_BIN_POS_75', 'MIX_BIN_POS_76', 'MIX_BIN_POS_77', 'MIX_BIN_POS_78', 'MIX_BIN_POS_79',
116 |                'MIX_BIN_POS_80', 'MIX_BIN_POS_81', 'MIX_BIN_POS_82', 'MIX_BIN_POS_83', 'MIX_BIN_POS_84', 'MIX_BIN_POS_85',
117 |                'MIX_BIN_POS_86', 'MIX_BIN_POS_87', 'MIX_BIN_POS_88', 'MIX_BIN_POS_89', 'MIX_BIN_POS_90', 'MIX_BIN_POS_91',
118 |                'MIX_BIN_POS_92', 'MIX_BIN_POS_93', 'MIX_BIN_POS_94', 'MIX_BIN_POS_95', 'MIX_BIN_POS_96', 'MIX_BIN_POS_97',
119 |                'MIX_BIN_POS_98', 'MIX_BIN_POS_99', 'MIX_BIN_POS_100', 'MIX_BIN_POS_101', 'MIX_BIN_POS_102', 'MIX_BIN_POS_103',
120 |                'MIX_BIN_POS_104', 'MIX_BIN_POS_105', 'MIX_BIN_POS_106', 'MIX_BIN_POS_107', 'MIX_BIN_POS_108', 'MIX_BIN_POS_109',
121 |                'MIX_BIN_POS_110', 'MIX_BIN_POS_111', 'MIX_BIN_POS_112', 'MIX_BIN_POS_113', 'MIX_BIN_POS_114', 'MIX_BIN_POS_115',
122 |                'MIX_BIN_POS_116', 'MIX_BIN_POS_117', 'MIX_BIN_POS_118', 'MIX_BIN_POS_119', 'MIX_BIN_POS_120', 'MIX_BIN_POS_121',
123 |                'MIX_BIN_POS_122', 'MIX_BIN_POS_123', 'MIX_BIN_POS_124', 'MIX_BIN_POS_125', 'MIX_BIN_POS_126', 'MIX_BIN_POS_127',
124 |                'MIX_BIN_POS_128', 'MIX_BIN_POS_129', 'MIX_BIN_POS_130', 'MIX_BIN_POS_131', 'MIX_BIN_POS_132', 'MIX_BIN_POS_133',
125 |                'MIX_BIN_POS_134', 'MIX_BIN_POS_135', 'MIX_BIN_POS_136', 'MIX_BIN_POS_137', 'MIX_BIN_POS_138', 'MIX_BIN_POS_139',
126 |                'MIX_BIN_POS_140', 'MIX_BIN_POS_141', 'MIX_BIN_POS_142', 'MIX_BIN_POS_143', 'MIX_BIN_POS_144', 'MIX_BIN_POS_145', 
127 |                'MIX_BIN_POS_146', 'MIX_BIN_POS_147', 'MIX_BIN_POS_148', 'MIX_BIN_POS_149', 'MIX_BIN_POS_150', 'MIX_BIN_POS_151', 
128 |                'MIX_BIN_POS_152', 'MIX_BIN_POS_153', 'MIX_BIN_POS_154', 'MIX_BIN_POS_155'),
129 |               'date': ('DATE_LABEL'),
130 |              'date:%Y%m%d': ('TRADEDATE')}
131 | 
132 |     def _load(self, file):
133 |         df = pd.read_csv(file, skiprows = [1,2], low_memory=False)
134 |         return df
135 | 
136 | oneqbitloader = OneQBitLoader()
137 | 


--------------------------------------------------------------------------------
/datamine/loaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Loader
2 | 


--------------------------------------------------------------------------------
/datamine/loaders/base.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import os
  3 | import glob
  4 | import sys
  5 | 
  6 | from importlib import import_module
  7 | from importlib import reload
  8 | from ..utils import tqdm_execute_tasks, logger
  9 | 
 10 | __all__ = ['Loader']
 11 | 
 12 | 
 13 | class Loader(object):
 14 |     columns = None
 15 |     dtypes = None
 16 |     dataset = None
 17 |     dataset_args = None
 18 |     fileglob = '*.csv'
 19 |     index = None
 20 | 
 21 |     _by_name = None
 22 | 
 23 |     @classmethod
 24 |     def _load_datasets(cls):
 25 |         cls._by_name = {}
 26 |         pkg = __name__.rsplit('.', 1)[0]
 27 |         fpath, base = os.path.split(__file__)
 28 |         for fname in glob.glob(os.path.join(fpath, '*.py')):
 29 |             fname = os.path.basename(fname)
 30 |             if fname in (base, '__init__.py'):
 31 |                 continue
 32 |             module = import_module('.' + fname[:-3], pkg)
 33 |             for key, value in module.__dict__.items():
 34 |                 if isinstance(value, cls):
 35 |                     if not isinstance(value.dataset, str):
 36 |                         raise RuntimeError('Invalid Loader: dataset must be a string, not {}'.format(type(value.dataset)))
 37 |                     elif value.dataset in cls._by_name:
 38 |                         raise RuntimeError('Invalid Loader: duplicate loader for {} dataset'.format(value.dataset))
 39 |                     else:
 40 |                         cls._by_name[value.dataset] = value
 41 |                         # {'BLOCK' : <datamine.loaders.block.BlockLoader object at 0x0000026E3AE01DD8>}
 42 | 
 43 |     @classmethod
 44 |     def datasets(cls):
 45 |         if cls._by_name is None:
 46 |             cls._load_datasets()
 47 |         return list(cls._by_name.keys())
 48 |     
 49 |     @classmethod
 50 |     def by_name(cls, dataset, dataset_args = {}):
 51 |         cls.dataset_args = dataset_args
 52 |         if cls._by_name is None:
 53 |             cls._load_datasets()
 54 |         if dataset not in cls._by_name:
 55 |             raise RuntimeError('Dataset not found: {}'.format(dataset))
 56 |         return cls._by_name[dataset]
 57 |     
 58 |     def _set_dtypes(self, df):
 59 |         if self.dtypes is None:
 60 |             return
 61 |         
 62 |         column_check = []
 63 |         for k, v in self.dtypes.items():
 64 |             for value in v:
 65 |                 column_check.append(value)
 66 |         if self.columns is not None:
 67 |             if set(self.columns).difference(column_check):
 68 |                 print("Mismatched column names & dtypes. Mismatches:", set(self.columns).difference(column_check))
 69 |                 logger.error(("Mismatched column names & dtypes. Mismatches:", set(self.columns).difference(column_check)))
 70 |         for dtype, cols in self.dtypes.items():
 71 |             for col in ((cols,) if isinstance(cols, str) else cols):
 72 |                 if col in df:
 73 |                     if dtype.startswith('date'):
 74 |                         format = None if dtype == 'date' else dtype[5:]
 75 |                         df[col] = pd.to_datetime(df[col], format=format, utc=True, errors='ignore')
 76 |                     else:
 77 |                         df[col] = df[col].astype(dtype, errors='ignore')
 78 | 
 79 |     def _glob(self, path):
 80 |         return glob.glob(os.path.join(path, self.fileglob))
 81 | 
 82 |     def _load(self, filename):
 83 |         '''Return a raw, unprocessed dataframe.'''
 84 |         return pd.read_csv(filename, low_memory=False)
 85 | 
 86 |     def _load_single(self, filename):
 87 |         '''Use _load to read a dataframe from disk, then assign new column
 88 |            names and coerce the datatypes, as appropriate.'''
 89 |         df = self._load(filename)
 90 |         if self.columns is not None:
 91 |             df.columns = self.columns
 92 |         self._set_dtypes(df)
 93 |         if self.index is not None:
 94 |             df = df.set_index(self.index)
 95 |         return df
 96 | 
 97 |     def _finalize(self, df):
 98 |         return df
 99 | 
100 |     def load(self, filenames, limit=None, max_workers=None):
101 |         '''Load a composite dataframe by concatenating individual files.'''
102 |         if isinstance(filenames, str):
103 |             if os.path.isdir(filenames):
104 |                 filenames = self._glob(filenames)
105 |             elif '*' in filenames:
106 |                 filenames = glob.glob(filenames)
107 |             else:
108 |                 filenames = [filenames]
109 |         nframes = len(filenames)
110 |         if limit and nframes > limit:
111 |             logger.info('limiting to {}/{} files'.format(limit, nframes))
112 |             filenames = filenames[-limit:]
113 |             nframes = limit
114 |         if nframes == 0:
115 |             result = pd.DataFrame(columns=self.columns)
116 |             self._set_dtypes(result)
117 |         elif nframes == 1:
118 |             result = self._load_single(filenames[0])
119 |         else:
120 |             result = tqdm_execute_tasks(self._load_single, filenames,
121 |                                         'reading {} data'.format(self.dataset), max_workers)
122 |             logger.info('concatenating {} dataframes'.format(nframes))
123 |             result = pd.concat(result, ignore_index=self.index is None)
124 |             # Set the categorical columns again, because concatenation often
125 |             # results in a reversion to object dtype
126 |             cols = self.dtypes.get('category', ())
127 |             for col in ((cols,) if isinstance(cols, str) else cols):
128 |                 if col in result:
129 |                     result[col] = result[col].astype('category', errors='ignore')
130 |         return self._finalize(result)
131 | 


--------------------------------------------------------------------------------
/datamine/loaders/block.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | import datetime
 5 | import pytz
 6 | 
 7 | 
 8 | class BlockLoader(Loader):
 9 |     dataset = 'BLOCK'
10 |     fileglob = '*.csv.gz'
11 | 
12 |     # Column "Product Type 2" has an extra space after the name.
13 |     # columns = ['Trade Datetime', 'Reported Datetime',
14 |     #            'Contract Symbol', 'Product Code', 'Asset Class', 'Market Sector', 'Description ', 'Product Type ', 'Contract Year', 'Contract Month', 'Strike Price', 'Put/Call', 'Exchange Code', 'Trade Price', 'Trade Quantity', 'Trade Source', 'Spread Type', 'Spread Description',
15 |     #            'Contract Symbol 2', 'Product Code 2', 'Asset Class 2', 'Market Sector 2', 'Description 2', 'Product Type 2 ', 'Contract Year 2', 'Contract Month 2', 'Strike Price 2', 'Put/Call 2', 'Exchange Code 2','Trade Price 2', 'Trade Quantity 2',
16 |     #            'Contract Symbol 3', 'Product Code 3', 'Asset Class 3', 'Market Sector 3', 'Description 3', 'Product Type 3 ', 'Contract Year 3', 'Contract Month 3', 'Strike Price 3', 'Put/Call 3', 'Exchange Code 3',
17 |     #            'Contract Symbol 4', 'Product Code 4', 'Asset Class 4', 'Market Sector 4', 'Description 4', 'Product Type 4 ', 'Contract Year 4', 'Contract Month 4', 'Strike Price 4', 'Put/Call 4', 'Exchange Code 4']
18 | 
19 |     dtypes = {'category': ('Contract Symbol', 'Product Code', 'Asset Class', 'Market Sector', 'Description ', 'Product Type ', 'Put/Call', 'Exchange Code', 'Trade Source', 'Spread Type', 'Spread Description',
20 |                            'Contract Symbol 2', 'Product Code 2', 'Asset Class 2', 'Market Sector 2', 'Description 2', 'Product Type 2 ', 'Put/Call 2', 'Exchange Code 2',
21 |                            'Contract Symbol 3', 'Product Code 3', 'Asset Class 3', 'Market Sector 3', 'Description 3', 'Product Type 3 ', 'Put/Call 3', 'Exchange Code 3',
22 |                            'Contract Symbol 4', 'Product Code 4', 'Asset Class 4', 'Market Sector 4', 'Description 4', 'Product Type 4 ', 'Put/Call 4', 'Exchange Code 4'),
23 |               'int64': ('Contract Year', 'Contract Month',
24 |                         'Contract Year 2', 'Contract Month 2',
25 |                         'Contract Year 3', 'Contract Month 3',
26 |                         'Contract Year 4', 'Contract Month 4',),
27 |               'float': ('Strike Price', 'Trade Price', 'Trade Quantity',
28 |                         'Strike Price 2', 'Trade Price 2', 'Trade Quantity 2',
29 |                         'Strike Price 3',
30 |                         'Strike Price 4'),
31 |               'date': ()}
32 |     
33 |     def _load(self, file):
34 |         df = pd.read_csv(file, low_memory = False)
35 |         
36 |         df['Trade Datetime'] = df['Trade Date'].astype('str') + ' ' + df['Trade Time'].astype('str')
37 |         df['Reported Datetime'] = df['Trade Date'].astype('str') + ' ' + df['Reported Time'].astype('str')
38 |         
39 |         timezone = df['Trade Datetime'].str[-2:]
40 |         if timezone.unique()[0] == "ET":
41 |             sub_string = " ET"
42 |             timezone = pytz.timezone("US/Eastern") 
43 |         elif timezone.unique()[0] == "CT":
44 |             sub_string = " CT"
45 |             timezone = pytz.timezone("US/Central") 
46 |         else:
47 |             pass
48 |             
49 |         df['Trade Datetime'] = df['Trade Datetime'].str.replace(sub_string, "")
50 |         df['Reported Datetime'] = df['Reported Datetime'].str.replace(sub_string, "")
51 |         
52 |         df['Trade Datetime'] = df['Trade Datetime'].apply(datetime.datetime.strptime, args=('%Y%m%d %H:%M:%S',))
53 |         df['Trade Datetime'] = df['Trade Datetime'].apply(timezone.localize)
54 |         df['Reported Datetime'] = df['Reported Datetime'].apply(datetime.datetime.strptime, args=('%Y%m%d %H:%M',))
55 |         df['Reported Datetime'] = df['Reported Datetime'].apply(timezone.localize)
56 | 
57 |         df = df.drop(['Trade Date', 'Trade Time', 'Reported Time'], axis=1)
58 |         return(df)
59 |         
60 | 
61 | blockLoader = BlockLoader()
62 | 


--------------------------------------------------------------------------------
/datamine/loaders/cryptocurrency.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | import gzip
 5 | import json
 6 | 
 7 | class CryptocurrencyLoader(Loader):
 8 |     dataset = 'CRYPTOCURRENCY'
 9 |     fileglob = '*_btcIndexJson.gz'
10 |     index = 'mdEntryDateTime'
11 | 
12 |     dtypes = {'category': ('mdEntryCode', 'mdEntryType', 'mdUpdateAction',
13 |                            'symbol', 'openCloseSettlFlag'),
14 |               'int64': ('rptSeq',),
15 |               'float': ('netChgPrevDay', 'netPctChg', 'mdEntryPx'),
16 |               'date:%Y%m%d_%H:%M:%S.%f': 'mdEntryDateTime'}
17 | 
18 |     def _load(self, filename):
19 |         result = []
20 |         with gzip.open(filename, 'rt', encoding='utf-8') as f:
21 |             for line in f:
22 |                 line = json.loads(line)
23 |                 if 'mdEntries' in line:
24 |                     result.append(line['mdEntries'][0])
25 |         result = pd.DataFrame(result)
26 |         result['mdEntryDateTime'] = result['mdEntryDate'] + '_' + result['mdEntryTime']
27 |         return result.drop(['mdEntryDate', 'mdEntryType'], axis=1)
28 | 
29 | cryptocurrencyLoader = CryptocurrencyLoader()
30 | 


--------------------------------------------------------------------------------
/datamine/loaders/eod.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | class EODLoader(Loader):
 7 |     dataset = 'EOD'
 8 |     fileglob = '*.gz'
 9 | 
10 |     columns = ['Trade Date','Exchange Code', 'Asset Class', 'Product Code', 'Clearing Code',
11 |        'Product Description', 'Product Type', 'Underlying Product Code',
12 |        'Put/Call', 'Strike Price', 'Contract Year', 'Contract Month',
13 |        'Contract Day', 'Settlement', 'Settlement Cabinet Indicator',
14 |        'Open Interest', 'Total Volume', 'Globex Volume', 'Floor Volume',
15 |        'PNT Volume', 'Block Volume', 'EFP Volume', 'EOO Volume', 'EFR Volume',
16 |        'EFS Volume', 'EFB Volume', 'EFM Volume', 'SUB Volume', 'OPNT Volume',
17 |        'TAS Volume', 'TAS Block Volume', 'TAM Singapore Volume',
18 |        'TAM Singapore Block Volume', 'TAM London Volume',
19 |        'TAM London Block Volume', 'Globex Open Price',
20 |        'Globex Open Price Bid/Ask Indicator',
21 |        'Globex Open Price Cabinet Indicator', 'Globex High Price',
22 |        'Globex High Price Bid/Ask Indicator',
23 |        'Globex High Price Cabinet Indicator', 'Globex Low Price',
24 |        'Globex Low Price Bid/Ask Indicator',
25 |        'Globex Low Price Cabinet Indicator', 'Globex Close Price',
26 |        'Globex Close Price Bid/Ask Indicator',
27 |        'Globex Close Price Cabinet Indicator', 'Floor Open Price',
28 |        'Floor Open Price Bid/Ask Indicator',
29 |        'Floor Open Price Cabinet Indicator', 'Floor Open Second Price',
30 |        'Floor Open Second Price Bid/Ask Indicator', 'Floor High Price',
31 |        'Floor High Price Bid/Ask Indicator',
32 |        'Floor High Price Cabinet Indicator', 'Floor Low Price',
33 |        'Floor Low Price Bid/Ask Indicator',
34 |        'Floor Low Price Cabinet Indicator', 'Floor Close Price',
35 |        'Floor Close Price Bid/Ask Indicator',
36 |        'Floor Close Price Cabinet Indicator', 'Floor Close Second Price',
37 |        'Floor Close Second Price Bid/Ask Indicator', 'Floor Post-Close Price',
38 |        'Floor Post-Close Price Bid/Ask Indicator',
39 |        'Floor Post-Close Second Price',
40 |        'Floor Post-Close Second Price Bid/Ask Indicator', 'Delta',
41 |        'Implied Volatility', 'Last Trade Date', 'TAM (Trade At Marker)']
42 | 
43 |     dtypes = {'category': ('Settlement Cabinet Indicator', 'Asset Class', 'Product Code', 'Clearing Code',
44 |                   'Product Description', 'Product Type', 'Underlying Product Code',
45 |                   'Put/Call', 'Strike Price', 'Contract Year', 'Contract Month',
46 |                   'Contract Day','Exchange Code','Globex Open Price Bid/Ask Indicator',
47 |                   'Globex Open Price Cabinet Indicator','Globex High Price Bid/Ask Indicator',
48 |                   'Globex High Price Cabinet Indicator','Globex Close Price Bid/Ask Indicator',
49 |                   'Globex Close Price Cabinet Indicator','Floor Open Price Bid/Ask Indicator',
50 |                   'Floor Open Price Cabinet Indicator','Globex Low Price Bid/Ask Indicator',
51 |                   'Globex Low Price Cabinet Indicator', 'Floor Open Second Price Bid/Ask Indicator',
52 |                   'Floor High Price Bid/Ask Indicator',
53 |                   'Floor High Price Cabinet Indicator','Floor Low Price Bid/Ask Indicator',
54 |                   'Floor Low Price Cabinet Indicator','Floor Close Price Bid/Ask Indicator',
55 |                   'Floor Close Price Cabinet Indicator','Floor Post-Close Price Bid/Ask Indicator',
56 |                   'Floor Post-Close Second Price Bid/Ask Indicator','Floor Close Second Price Bid/Ask Indicator', 
57 |                   ),
58 |             'int64': ('Open Interest', 'Total Volume', 'Globex Volume', 'Floor Volume',
59 |                   'PNT Volume', 'Block Volume', 'EFP Volume', 'EOO Volume', 'EFR Volume',
60 |                   'EFS Volume', 'EFB Volume', 'EFM Volume', 'SUB Volume', 'OPNT Volume',
61 |                   'TAS Volume', 'TAS Block Volume', 'TAM Singapore Volume',
62 |                   'TAM Singapore Block Volume', 'TAM London Volume',
63 |                   'TAM London Block Volume'),
64 |             'float': ('Settlement',
65 |                       'Globex Open Price',
66 |                   'Globex High Price',
67 |                   'Globex Low Price',
68 |                   'Globex Close Price',
69 |                   'Floor Open Price',
70 |                   'Floor Open Second Price',
71 |                   'Floor High Price',
72 |                   'Floor Low Price',
73 |                   'Floor Close Price',
74 |                   'Floor Close Second Price',
75 |                   'Floor Post-Close Price',
76 |                   'Floor Post-Close Second Price',
77 |                   'Delta',
78 |                   'Implied Volatility', 'TAM (Trade At Marker)'),
79 |               'date:%Y%m%d': ('Trade Date','Last Trade Date'),
80 |               }
81 | 
82 |     def _load(self, file):
83 |         df = pd.read_csv(file, skiprows=1, header=None, low_memory=False)
84 |         if len(df.columns) == 70:
85 |             df.insert(len(df.columns), "TAM (Trade At Marker)", float(np.nan))
86 |         return df
87 | 
88 | eodLoader = EODLoader()
89 | 


--------------------------------------------------------------------------------
/datamine/loaders/eris.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | class ErisLoader(Loader):
 7 |     dataset = 'ERIS'
 8 |     fileglob = 'ERIS_*.csv'
 9 | 
10 |     columns = ['Symbol', 'FinalSettlementPrice', 'EvaluationDate', 'FirstTradeDate',
11 |                'ErisPAIDate', 'EffectiveDate', 'CashFlowAlignmentDate', 'MaturityDate', 'NPV (A)',
12 |                'FixedNPV', 'FloatingNPV', 'Coupon (%)', 'FairCoupon (%)', 'FixedPayment', 'FloatingPayment',
13 |                'NextFixedPaymentDate', 'NextFixedPaymentAmount', 'PreviousFixingDate', 'PreviousFixingRate',
14 |                'NextFloatingPaymentDate', 'NextFloatingPaymentAmount', 'NextFixingDate', 'PreviousSettlementDate',
15 |                'PreviousSettlementPrice', 'PreviousErisPAI', 'FedFundsDate', 'FedFundsRate (%)', 'AccrualDays',
16 |                'DailyIncrementalErisPAI', 'AccruedCoupons (B)', 'ErisPAI (C)', 'SettlementPrice (100+A+B-C)',
17 |                'RFQ NPV TickSize ($)', 'Nominal', 'ResetRateDescriptor', 'InterpolationFactor', 'HighTradePrice',
18 |                'LowTradePrice', 'LastTradePrice', 'DailyContractVolume', 'Tag55(T)', 'Tag65(T)', 'Tag55(T+1)',
19 |                'Tag65(T+1)', 'LastTradeDate', 'InitialSpeculatorMargin', 'SecondarySpeculatorMargin',
20 |                'InitialHedgerMargin', 'SecondaryHedgerMargin', 'ExchangeSymbol (EX005)', 'BloombergTicker',
21 |                'FirstFixingDate', 'Category', 'BenchmarkContractName', 'PV01', 'DV01', 'ShortName',
22 |                'EffectiveYearMonth', 'UnpaidFixedAccrualStartDate', 'UnpaidFixedAccrual', 'UnpaidFloatingAccrualStartDate', 'UnpaidFloatingAccrual', 'NetUnpaidFixedFloatingAccrual', 'NPV(A)lessNetUnpaidFixedFloatingAccrual', 'AccruedCoupons(B)plusNetUnpaidFixedFloatingAccrual']
23 | 
24 |     dtypes = {'category': ('Symbol', 'ResetRateDescriptor', 'ExchangeSymbol (EX005)', 'BloombergTicker', 'EffectiveYearMonth'),
25 |               'int64': ('AccrualDays', 'EffectiveYearMonth', 'Nominal'),
26 |               'float': ('FinalSettlementPrice', 'NPV (A)', 'FixedNPV', 'FloatingNPV', 'Coupon (%)',
27 |                         'FairCoupon (%)', 'FixedPayment', 'FloatingPayment', 'NextFixedPaymentAmount',
28 |                         'PreviousFixingRate', 'NextFloatingPaymentAmount', 'PreviousSettlementPrice',
29 |                         'PreviousErisPAI', 'FedFundsRate (%)', 'DailyIncrementalErisPAI', 'AccruedCoupons (B)',
30 |                         'ErisPAI (C)', 'SettlementPrice (100+A+B-C)', 'InterpolationFactor',
31 |                         'HighTradePrice', 'PV01', 'DV01',
32 |                         'UnpaidFixedAccrual','UnpaidFloatingAccrual','NetUnpaidFixedFloatingAccrual',
33 |                         'NPV(A)lessNetUnpaidFixedFloatingAccrual', 'AccruedCoupons(B)plusNetUnpaidFixedFloatingAccrual'),
34 |               'date:%m/%d/%Y': ('EvaluationDate', 'FirstTradeDate', 'ErisPAIDate',
35 |                                 'EffectiveDate', 'CashFlowAlignmentDate', 'MaturityDate',
36 |                                 'NextFixedPaymentDate', 'PreviousFixingDate', 'NextFloatingPaymentDate',
37 |                                 'NextFixingDate', 'PreviousSettlementDate',
38 |                                 'FedFundsDate', 'LastTradeDate', 'FirstFixingDate',
39 |                                 'UnpaidFixedAccrualStartDate', 'UnpaidFloatingAccrualStartDate')}
40 | 
41 |     def _load(self, file):
42 |         df = pd.read_csv(file, low_memory=False)
43 |         if len(df.columns) == 58:
44 |             col_adjustment = {'UnpaidFixedAccrualStartDate' : np.datetime64(), 'UnpaidFixedAccrual' : float(), 'UnpaidFloatingAccrualStartDate' : np.datetime64(), 'UnpaidFloatingAccrual' : float(), 'NetUnpaidFixedFloatingAccrual' : float(), 'NPV(A)lessNetUnpaidFixedFloatingAccrual' : float(), 'AccruedCoupons(B)plusNetUnpaidFixedFloatingAccrual' : float()}
45 |             for k, v in col_adjustment.items():
46 |                 df.insert(len(df.columns), k, v)
47 |         return df
48 | 
49 | erisLoader = ErisLoader()
50 | 


--------------------------------------------------------------------------------
/datamine/loaders/fx.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | 
 5 | class FXLoader(Loader):
 6 |     dataset = 'FX'
 7 |     fileglob = '*.gz'
 8 | 
 9 |     columns = ['Timestamp', 'Pair', 'Ask', 'Bid']
10 | 
11 |     dtypes = {'category': ('Pair',),
12 |               'int64': (),
13 |               'float': ('Ask','Bid'),
14 |               'date': ('Timestamp',),
15 |               }
16 | 
17 |     def _load(self, file):
18 |         df = pd.read_csv(file, skiprows=1, header=None, low_memory=False)
19 |         
20 |         return df
21 | 
22 | fxLoader = FXLoader()
23 | 


--------------------------------------------------------------------------------
/datamine/loaders/govpx.py:
--------------------------------------------------------------------------------
 1 | from datamine.loaders import Loader
 2 | 
 3 | import pandas as pd
 4 | 
 5 | class GOVPXLoader(Loader):
 6 |         
 7 |     dataset = 'GOVPX'
 8 |     
 9 |     govpx_us_treasury_cols = ['Timestamp','Producer','Record','Ask','AskType','AskYield','Bid','BidType','BidYield','BidYieldChg','CashAskPrice','CashBidPrice','CashMidPrice','Change','Coupon','CUSIP','Description','DollarFlow','High','ICAPVOL','IndicativeAskPrice','IndicativeAskYield','IndicativeBidPrice','IndicativeBidYield','IssueDate','Last','LastHitorTake','LastYield','Low','MaturityDate','Mid','MidChg','MidSnapChg','MidYield','MidYldSnapChg','Open','SettlementDate','ShortDescription','TreasuryType','VoiceAskPrice','VoiceAskSize','VoiceAskYield','VoiceBidPrice','VoiceBidSize','VoiceBidYield','VoiceTradeSize','VWAP','VWAP10AM-11AM','VWAP11AM-12PM','VWAP12PM-1PM','VWAP1PM-2PM','VWAP2PM-3PM','VWAP3PM-4PM','VWAP8AM-9AM','VWAP9AM-10AM','VWAY','VWAY10AM-11AM','VWAY11AM-12PM','VWAY12PM-1PM','VWAY1PM-2PM','VWAY2PM-3PM','VWAY3PM-4PM','VWAY8AM-9AM','VWAY9AM-10AM']
10 |     govpx_us_tips_cols = ['Timestamp','Producer','Record','Ask','AskYield','Bid','BidYield','BidYieldChg','BidYieldChg','Coupon','CUSIP','Description','High','ICAPVOL','IndicativeAskPrice','IndicativeAskYield','IndicativeBidPrice','IndicativeBidYield','IssueDate','Last','LastHitorTake','LastYield','Low','MaturityDate','Mid','MidChg','MidSnapChg','MidYield','MidYldSnapChg','Open','SettlementDate','ShortDescription','Spread','TreasuryType','VoiceAskPrice','VoiceAskSize','VoiceAskYield','VoiceBidPrice','VoiceBidSize','VoiceBidYield','VoiceTradeSize']
11 |     govpx_us_frn_cols = ['Date','Producer','Record','Ask','AskYield','Bid','BidYield','CashAskPrice','CashBidPrice','CashMidPrice','Coupon','CUSIP','Description','FirstCouponDate','FRNIndexRate','High','IndicativeAskPrice','IndicativeAskYield','IndicativeBidPrice','IssueDate','Last','LastHitorTake','LastYield','Low','MaturityDate','Mid','MidSnapChg','MidYield','MidYldSnapChg','ModifiedDuration','Open','PriceDuration','SettlementDate','TreasuryType','VoiceAskPrice','VoiceAskSize','VoiceAskYield','VoiceBidPrice','VoiceBidSize','VoiceBidYield','VoiceTradeSize']
12 |     govpx_us_agencies_cols = ['Timestamp','Producer','Record','AgencySwapSpd','AgencySwapSprdChg','Ask','AskSpread','AskYield','AskYTMSpread','Bid','BidSpread','BidYield','BidYTMSpread','Change','Coupon','CUSIP','Description','IndicativeAskYield','IndicativeAskSpd','IndicativeBidYield','IndicativeBidSpd','IndicativeBidYield','MaturityDate']
13 |             
14 |     govpx_us_treasury_dtypes = {'category': ('Producer','Record','CUSIP','Description','LastHitorTake','ShortDescription',),
15 |               'int64': ('ICAPVOL','TreasuryType','VoiceAskSize','VoiceBidSize',),
16 |               'float': ('Ask','AskType','AskYield',
17 |                         'Bid','BidType','BidYield','BidYieldChg',
18 |                         'CashAskPrice','CashBidPrice','CashMidPrice',
19 |                         'Change','Coupon','DollarFlow','High',
20 |                         'IndicativeAskPrice','IndicativeAskYield','IndicativeBidPrice','IndicativeBidYield',
21 |                         'Last', 'LastYield', 'Low',
22 |                         'Mid','MidChg','MidSnapChg','MidYield','MidYldSnapChg',
23 |                         'Open','VoiceAskPrice','VoiceAskYield','VoiceBidPrice','VoiceBidYield','VoiceTradeSize',
24 |                         'VWAP','VWAP10AM-11AM','VWAP11AM-12PM','VWAP12PM-1PM','VWAP1PM-2PM','VWAP2PM-3PM','VWAP3PM-4PM','VWAP8AM-9AM','VWAP9AM-10AM',
25 |                         'VWAY','VWAY10AM-11AM','VWAY11AM-12PM','VWAY12PM-1PM','VWAY1PM-2PM','VWAY2PM-3PM','VWAY3PM-4PM','VWAY8AM-9AM','VWAY9AM-10AM',),
26 |               'date': ('Timestamp','IssueDate','MaturityDate','SettlementDate'),
27 |               }
28 |     
29 |     govpx_us_tips_dtypes = {'category': ('Producer','Record','CUSIP','Description','LastHitorTake','ShortDescription',),
30 |               'int64': ('ICAPVOL','TreasuryType','VoiceAskSize','VoiceBidSize',),
31 |               'float': ('Ask','AskYield',
32 |                         'Bid','BidYield','BidYieldChg',
33 |                         'Coupon','High',
34 |                         'IndicativeAskPrice','IndicativeAskYield','IndicativeBidPrice','IndicativeBidYield',
35 |                         'Last', 'LastYield', 'Low',
36 |                         'Mid','MidChg','MidSnapChg','MidYield','MidYldSnapChg',
37 |                         'Open','Spread','VoiceAskPrice','VoiceAskYield','VoiceBidPrice','VoiceBidYield','VoiceTradeSize',),
38 |               'date': ('Timestamp','IssueDate','MaturityDate','SettlementDate'),
39 |               }
40 |     
41 |     govpx_us_frn_dtypes = {'category': ('Producer','Record','CUSIP','Description','LastHitorTake',),
42 |               'int64': ('TreasuryType','VoiceAskSize','VoiceBidSize',),
43 |               'float': ('Ask','AskYield',
44 |                         'Bid','BidYield',
45 |                         'CashAskPrice','CashBidPrice','CashMidPrice',
46 |                         'Coupon','High',
47 |                         'IndicativeAskPrice','IndicativeAskYield','IndicativeBidPrice',
48 |                         'Last', 'LastYield', 'Low',
49 |                         'Mid','MidSnapChg','MidYield','MidYldSnapChg',
50 |                         'Open','VoiceAskPrice','VoiceAskYield','VoiceBidPrice','VoiceBidYield','VoiceTradeSize',
51 |                         'FRNIndexRate','ModifiedDuration','PriceDuration'),
52 |               'date': ('Date', 'IssueDate','MaturityDate','SettlementDate', 'FirstCouponDate'),
53 |               }
54 |     
55 |     govpx_us_agencies_dtypes = {'category': ('Producer','Record','CUSIP','Description',),
56 |               'int64': (),
57 |               'float': ('Ask','AskYield',
58 |                         'Bid','BidYield',
59 |                         'Change','Coupon',
60 |                         'IndicativeAskYield','IndicativeBidYield',
61 |                         'AgencySwapSpd','AgencySwapSprdChg',
62 |                         'AskSpread','AskYTMSpread',
63 |                         'BidSpread','BidYTMSpread',
64 |                         'IndicativeAskSpd','IndicativeBidSpd',),
65 |               'date': ('Timestamp','MaturityDate',),
66 |               }
67 | 
68 |     if Loader.dataset_args == None:
69 |         print("Specify a dataset for the GovPX loader.")
70 |     else:
71 |         for k, v in Loader.dataset_args.items():
72 |             if k == 'dataset':
73 |                 if v == 'treasury':
74 |                     columns = govpx_us_treasury_cols
75 |                     dtypes = govpx_us_treasury_dtypes
76 |                     fileglob = "*_UST_*.csv"
77 |                 elif v == 'tips':
78 |                     columns = govpx_us_tips_cols
79 |                     dtypes = govpx_us_tips_dtypes
80 |                     fileglob = "*_TIPS_*.csv"
81 |                 elif v == 'frn':
82 |                     columns = govpx_us_frn_cols
83 |                     dtypes = govpx_us_frn_dtypes
84 |                     fileglob = "*_FRN_*.csv"
85 |                 elif v == 'agencies':
86 |                     columns = govpx_us_agencies_cols
87 |                     dtypes = govpx_us_agencies_dtypes
88 |                     fileglob = "*_Agencies_*.csv"
89 |             print("Complete reload")
90 |             
91 |     def _load(self, file):
92 |         df = pd.read_csv(file, skiprows=1, header=None, low_memory=False)
93 |         return df
94 | 
95 | govpxLoader = GOVPXLoader()
96 | 


--------------------------------------------------------------------------------
/datamine/loaders/liqtool.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | from datetime import datetime, timedelta
 5 | start = datetime(1970, 1, 1)  # Unix epoch start time
 6 | 
 7 | class LiqLoader(Loader):
 8 |     dataset = 'LIQTOOL'
 9 |     fileglob = 'LIQTOOL_*.csv.gz'
10 |     index = 'tradedate'
11 |     
12 |     dtypes = {'category': ('symbol', 'time_zone'),
13 |               'int64': ('lot_1_size', 'lot_2_size', 'lot_3_size', 'lot_4_size', 'lot_5_size',
14 |                         'lot_6_size', 'lot_7_size', 'lot_8_size', 'lot_9_size', 'lot_10_size',
15 |                         'lot_11_size', 'lot_12_size', 'lot_13_size', 'lot_14_size', 'lot_15_size',
16 |                         'lot_16_size', 'lot_17_size', 'lot_18_size', 'lot_19_size', 'lot_20_size',
17 |                         'lot_21_size', 'lot_22_size', 'lot_23_size', 'lot_24_size', 'lot_25_size', 'frontmonth'),
18 |               'float': ('avg_level_1_spread', 'avg_level_1_midprice', 'avg_level_1_weightedprice', 'avg_level_1_ask_price', 'avg_level_1_bid_price', 'avg_level_1_ask_quantity', 'avg_level_1_bid_quantity', 'avg_level_1_ask_orders', 'avg_level_1_bid_orders',
19 |                         'avg_level_2_spread', 'avg_level_2_midprice', 'avg_level_2_weightedprice', 'avg_level_2_ask_price', 'avg_level_2_bid_price', 'avg_level_2_ask_quantity', 'avg_level_2_bid_quantity', 'avg_level_2_ask_orders', 'avg_level_2_bid_orders',
20 |                         'avg_level_3_spread', 'avg_level_3_midprice', 'avg_level_3_weightedprice', 'avg_level_3_ask_price', 'avg_level_3_bid_price', 'avg_level_3_ask_quantity', 'avg_level_3_bid_quantity', 'avg_level_3_ask_orders', 'avg_level_3_bid_orders',
21 |                         'avg_level_4_spread', 'avg_level_4_midprice', 'avg_level_4_weightedprice', 'avg_level_4_ask_price', 'avg_level_4_bid_price', 'avg_level_4_ask_quantity', 'avg_level_4_bid_quantity', 'avg_level_4_ask_orders', 'avg_level_4_bid_orders',
22 |                         'avg_level_5_spread', 'avg_level_5_midprice', 'avg_level_5_weightedprice', 'avg_level_5_ask_price', 'avg_level_5_bid_price', 'avg_level_5_ask_quantity', 'avg_level_5_bid_quantity', 'avg_level_5_ask_orders', 'avg_level_5_bid_orders',
23 |                         'avg_level_6_spread', 'avg_level_6_midprice', 'avg_level_6_weightedprice', 'avg_level_6_ask_price', 'avg_level_6_bid_price', 'avg_level_6_ask_quantity', 'avg_level_6_bid_quantity', 'avg_level_6_ask_orders', 'avg_level_6_bid_orders',
24 |                         'avg_level_7_spread', 'avg_level_7_midprice', 'avg_level_7_weightedprice', 'avg_level_7_ask_price', 'avg_level_7_bid_price', 'avg_level_7_ask_quantity', 'avg_level_7_bid_quantity', 'avg_level_7_ask_orders', 'avg_level_7_bid_orders',
25 |                         'avg_level_8_spread', 'avg_level_8_midprice', 'avg_level_8_weightedprice', 'avg_level_8_ask_price', 'avg_level_8_bid_price', 'avg_level_8_ask_quantity', 'avg_level_8_bid_quantity', 'avg_level_8_ask_orders', 'avg_level_8_bid_orders',
26 |                         'avg_level_9_spread', 'avg_level_9_midprice', 'avg_level_9_weightedprice', 'avg_level_9_ask_price', 'avg_level_9_bid_price', 'avg_level_9_ask_quantity', 'avg_level_9_bid_quantity', 'avg_level_9_ask_orders', 'avg_level_9_bid_orders',
27 |                         'avg_level_10_spread', 'avg_level_10_midprice', 'avg_level_10_weightedprice', 'avg_level_10_ask_price', 'avg_level_10_bid_price', 'avg_level_10_ask_quantity', 'avg_level_10_bid_quantity', 'avg_level_10_ask_orders', 'avg_level_10_bid_orders',
28 |                         'lot_1_buy_ctt', 'lot_1_sell_ctt', 'lot_1_buy_depth', 'lot_1_sell_depth',
29 |                         'lot_2_buy_ctt', 'lot_2_sell_ctt', 'lot_2_buy_depth', 'lot_2_sell_depth',
30 |                         'lot_3_buy_ctt', 'lot_3_sell_ctt', 'lot_3_buy_depth', 'lot_3_sell_depth',
31 |                         'lot_4_buy_ctt', 'lot_4_sell_ctt', 'lot_4_buy_depth', 'lot_4_sell_depth',
32 |                         'lot_5_buy_ctt', 'lot_5_sell_ctt', 'lot_5_buy_depth', 'lot_5_sell_depth',
33 |                         'lot_6_buy_ctt', 'lot_6_sell_ctt', 'lot_6_buy_depth', 'lot_6_sell_depth',
34 |                         'lot_7_buy_ctt', 'lot_7_sell_ctt', 'lot_7_buy_depth', 'lot_7_sell_depth',
35 |                         'lot_8_buy_ctt', 'lot_8_sell_ctt', 'lot_8_buy_depth', 'lot_8_sell_depth',
36 |                         'lot_9_buy_ctt', 'lot_9_sell_ctt', 'lot_9_buy_depth', 'lot_9_sell_depth',
37 |                         'lot_10_buy_ctt', 'lot_10_sell_ctt', 'lot_10_buy_depth', 'lot_10_sell_depth',
38 |                         'lot_11_buy_ctt', 'lot_11_sell_ctt', 'lot_11_buy_depth', 'lot_11_sell_depth',
39 |                         'lot_12_buy_ctt', 'lot_12_sell_ctt', 'lot_12_buy_depth', 'lot_12_sell_depth',
40 |                         'lot_13_buy_ctt', 'lot_13_sell_ctt', 'lot_13_buy_depth', 'lot_13_sell_depth',
41 |                         'lot_14_buy_ctt', 'lot_14_sell_ctt', 'lot_14_buy_depth', 'lot_14_sell_depth',
42 |                         'lot_15_buy_ctt', 'lot_15_sell_ctt', 'lot_15_buy_depth', 'lot_15_sell_depth',
43 |                         'lot_16_buy_ctt', 'lot_16_sell_ctt', 'lot_16_buy_depth', 'lot_16_sell_depth',
44 |                         'lot_17_buy_ctt', 'lot_17_sell_ctt', 'lot_17_buy_depth', 'lot_17_sell_depth',
45 |                         'lot_18_buy_ctt', 'lot_18_sell_ctt', 'lot_18_buy_depth', 'lot_18_sell_depth',
46 |                         'lot_19_buy_ctt', 'lot_19_sell_ctt', 'lot_19_buy_depth', 'lot_19_sell_depth',
47 |                         'lot_20_buy_ctt', 'lot_20_sell_ctt', 'lot_20_buy_depth', 'lot_20_sell_depth',
48 |                         'lot_21_buy_ctt', 'lot_21_sell_ctt', 'lot_21_buy_depth', 'lot_21_sell_depth',
49 |                         'lot_22_buy_ctt', 'lot_22_sell_ctt', 'lot_22_buy_depth', 'lot_22_sell_depth',
50 |                         'lot_23_buy_ctt', 'lot_23_sell_ctt', 'lot_23_buy_depth', 'lot_23_sell_depth',
51 |                         'lot_24_buy_ctt', 'lot_24_sell_ctt', 'lot_24_buy_depth', 'lot_24_sell_depth',
52 |                         'lot_25_buy_ctt', 'lot_25_sell_ctt', 'lot_25_buy_depth', 'lot_25_sell_depth',),
53 |               'date': ('unixtime',),
54 |               'date:%Y%m%d': ('tradedate',)}
55 |     
56 |     def _load(self, file):
57 |         df = pd.read_csv(file, low_memory = False)
58 |         df['unixtime'] = df['unix_in_sec'].apply(lambda x: start + timedelta(seconds=x))
59 |         df = df.drop(['unix_in_sec'], axis=1)
60 |         return(df)
61 |         
62 | liqLoader = LiqLoader()
63 | 


--------------------------------------------------------------------------------
/datamine/loaders/orbitalinsight.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | import os
 5 | 
 6 | class OrbitalInsightLoader(Loader):
 7 |     dataset = 'ORBITALINSIGHT'
 8 |     fileglob = 'ORBITALINSIGHT_*.csv'
 9 |     
10 |     columns = ['storage.capacity.estimate', 'volume.estimate.stderr', 'scaled.estimate.stderr',
11 |                          'total.available.tanks', 'smoothed.estimate', 'sampled.tanks.1w',
12 |                          'sampled.tanks.1d', 'volume.estimate', 'scaled.estimate', 'truth_value_mb',
13 |                          'sampled.tanks', 'date', 'location']
14 | 
15 |     dtypes = {'category': ('location',),
16 |               'int64': ('sampled.tanks', 'sampled.tanks.1d', 'sampled.tanks.1w', 'total.available.tanks'),
17 |               'float': ('smoothed.estimate', 'storage.capacity.estimate',
18 |                         'truth_value_mb', 'volume.estimate', 'volume.estimate.stderr',
19 |                         'scaled.estimate', 'scaled.estimate.stderr'),
20 |               'date': 'date'}
21 | 
22 |     def _load(self, file):
23 |         _, location, sublocation, _ = os.path.basename(file).split('_', 3)
24 |         if sublocation != '0':
25 |             location = location + '_' + sublocation
26 |         df = pd.read_csv(file, low_memory=False)
27 |         df['location'] = location
28 |         return df
29 | 
30 | orbitalInsightLoader = OrbitalInsightLoader()
31 | 


--------------------------------------------------------------------------------
/datamine/loaders/rsmetrics.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import glob
 4 | import os
 5 | 
 6 | class RSMetricsLoader(Loader):
 7 |     dataset = 'RSMETRICS'
 8 | 
 9 |     names = ['Order', 'Ticker', 'Type', 'Full.Name', 'Name', 'Location.Type', 'Smelter.Storage',
10 |              'Metal.Shape', 'Metal.Type', 'YearMonthDayUTC', 'Address', 'City', 'State', 'Zip',
11 |              'Country', 'Employee.Cars', 'Containers', 'Trucks', 'Tippers', 'Total.Area.Metal.stocks.m2',
12 |              'Area.Piles.m2', 'Area.Concentrate.Bags.m2', 'Area.Cathodes.m2', 'Area.Anodes.m2',
13 |              'Comments', 'Notes', 'Time_Date', 'Time', 'Month', 'Day', 'Year', 'PrePost', 'DOW',
14 |              'Week.End', 'Region', 'Subregion', 'Latitude', 'Longitude', 'DIRECTORY', 'GMP',
15 |              'Location', 'Metal', 'YearMonth', 'Tot.Area', 'Drop']
16 | 
17 |     dtypes = {'category': ('Ticker', 'Type', 'Full.Name', 'Name', 'Location.Type',
18 |                            'Smelter.Storage', 'Metal.Shape', 'Metal.Type', 'Country', 'PrePost', 'PrePost',
19 |                            'Location', 'Metal'),
20 |               'int64': ('Employee.Cars', 'Containers', 'Trucks', 'Tippers', 'Total.Area.Metal.stocks.m2',
21 |                         'Area.Piles.m2', 'Area.Concentrate.Bags.m2', 'Area.Cathodes.m2',
22 |                         'Area.Anodes.m2', 'Tot.Area'),
23 |               'date:%Y-%m-%d': ('Notes', ),
24 |               'date:%H:%M %m-%d-%Y': ('Time_Date', )}
25 | 
26 |     # Return the weekly data first, then the daily
27 |     def _glob(self, path):
28 |         base = os.path.join(path, 'RSMETRICS_*')
29 |         return glob.glob(base + '_WEEKLY_*.csv') + glob.glob(base + '_DAILY_*.csv')
30 | 
31 | rsMetricsLoader = RSMetricsLoader()
32 | 


--------------------------------------------------------------------------------
/datamine/loaders/sofr.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | 
 5 | class SOFROISLoader(Loader):
 6 |     dataset = 'SOFR'
 7 |     fileglob = 'SOFR_OIS_*.csv'
 8 |     columns = ['Trade Date', 'Exchange Code', 'Currency','Commodity Code', 
 9 |                 'Short Description','Long Description', 'Curve Date', 'Offset', 
10 |                 'Discount Factor', 'Forward rate', 'Rate']
11 |     
12 |     
13 |     dtypes = {'category': ('Exchange Code', 'Currency', 'Commodity Code',
14 |                            'Short Description', 'Long Description','Curve Date','Forward rate'),
15 |               'int64': ('Offset',),
16 |               'float': ('Discount Factor','Rate'),
17 |               'date:%Y%m%d': ('Trade Date',)}
18 | 
19 |     def _load(self, file):
20 |         # Assumption: the header from the value column provides
21 |         # the name of the measure for that CSV file.
22 |         df = pd.read_csv(file, low_memory=False)
23 |         return df
24 | 
25 | sofroisLoader = SOFROISLoader()
26 | 


--------------------------------------------------------------------------------
/datamine/loaders/sofrsr.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | import gzip
 5 | import json
 6 | 
 7 | class SOFRStripRatesLoader(Loader):
 8 |     dataset = 'SOFRSR'
 9 |     fileglob = 'SOFRSR_TermRate_Fixings_*.JSON'
10 | 
11 |     columns = ['rate','transactionTime','businessDate','productCode','securityId','productDescription']
12 | 
13 |     dtypes = {
14 |         'category': ('productCode', 'productDescription', 'securityId',),
15 |         'float': ('rate',),
16 |         'date:%m-%d-%Y' : ('businessDate'),
17 |         'date:%m-%d-%Y:%H:%M:%S' : ('transactionTime')
18 |              }
19 | 
20 |     def _load(self, filename):
21 |         result = []
22 |         with open(filename, 'rt', encoding='utf-8') as f:
23 |             for line in f:
24 |                 line = json.loads(line)
25 |             result = pd.json_normalize(line['payload'])
26 |             
27 |         return result
28 | 
29 | SOFRstripratesLoader = SOFRStripRatesLoader()
30 | 


--------------------------------------------------------------------------------
/datamine/loaders/telluslabs.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | 
 5 | class TellusLabsLoader(Loader):
 6 |     dataset = 'TELLUSLABS'
 7 |     fileglob = 'TELLUSLABS_*.csv'
 8 |     index = 'metric_date'
 9 |     columns = ['crop', 'country_iso', 'geo_level', 'geo_id',
10 |                'geo_display_name', 'metric_date',
11 |                'value', 'measure']
12 |     dtypes = {'category': ('crop', 'country_iso', 'geo_level',
13 |                            'geo_display_name', 'measure'),
14 |               'int64': ('geo_id',),
15 |               'float': ('value',),
16 |               'date:%Y-%m-%d': ('metric_date',)}
17 | 
18 |     def _load(self, file):
19 |         # Assumption: the header from the value column provides
20 |         # the name of the measure for that CSV file.
21 |         df = pd.read_csv(file, low_memory=False)
22 |         df['measure'] = df.columns[-1]
23 |         return df
24 | 
25 | tellusLabsLoader = TellusLabsLoader()
26 | 


--------------------------------------------------------------------------------
/datamine/loaders/tick.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | 
 5 | class TickLoader(Loader):
 6 |     dataset = 'TICK'
 7 |     fileglob = '*.gz'
 8 | 
 9 |     columns = ['trade_date_time', 'trade_date', 'trade_time',
10 |                'trade_sequence_number', 'session_indicator',
11 |                'ticker_symbol', 'future_option_index_indicator', 'contract_delivery_date',
12 |                'trade_quantity', 'strike_price', 'trade_price', 'ask_bid_type',
13 |                'indicative_quote_type', 'market_quote', 'close_open_type',
14 |                'valid_open_exception', 'post_close', 'cancel_code_type',
15 |                'insert_code_type', 'fast_late_indicator', 'cabinet_indicator',
16 |                'book_indicator', 'entry_date', 'exchange_code']
17 | 
18 |     dtypes = {'category': ('session_indicator', 'ticker_symbol', 'future_option_index_indicator',
19 |                            'close_open_type', 'exchange_code', 'ask_bid_type', 'indicative_quote_type',
20 |                            'valid_open_exception', 'post_close', 'cancel_code_type',
21 |                            'insert_code_type', 'fast_late_indicator', 'cabinet_indicator', 'book_indicator'),
22 |               'int64': ('trade_sequence_number', 'contract_delivery_date', 'trade_quantity'),
23 |               'float': ('strike_price', 'trade_price'),
24 |               'date:%H:%M:%s': ('trade_time'),
25 |               'date:%Y%m%d': ('trade_date', 'entry_date'),
26 |               'date': ('trade_date_time')}
27 | 
28 |     def _load(self, file):
29 |         df = pd.read_csv(file, header=None, low_memory=False)
30 |         
31 |         # Make trade_date_time the first column
32 |         df.insert(0, -1, df[0].astype(str) + 'T' + df[1].astype(str))
33 |         
34 |         return(df)
35 | 
36 | tickLoader = TickLoader()
37 | 


--------------------------------------------------------------------------------
/datamine/loaders/voi.py:
--------------------------------------------------------------------------------
 1 | from . import Loader
 2 | 
 3 | import pandas as pd
 4 | 
 5 | class VOILoader(Loader):
 6 |     dataset = 'VOI'
 7 |     fileglob = '*.gz'
 8 | 
 9 |     columns = ['Trade Date','Exchange Code','Product Code','Product Description',
10 |                 'Product Type','Put/Call','Strike Price',
11 |                 'Contract Year','Contract Month','Open Interest',
12 |                 'Total Volume','Globex Volume','Floor Volume','PNT Volume',
13 |                 'Block Volume','DataType']
14 | 
15 |     dtypes = {'category': ('Exchange Code','Product Code','Product Description',
16 |                 'Product Type','Put/Call','Strike Price',
17 |                 'Contract Year','Contract Month','DataType'),
18 |               'int64': ('Open Interest',
19 |                 'Total Volume','Globex Volume','Floor Volume','PNT Volume',
20 |                 'Block Volume'),
21 |               'float': (),
22 |               'date:%Y%m%d:%s': ('Trade Date',),
23 |               }
24 | 
25 |     def _load(self, file):
26 |         df = pd.read_csv(file, skiprows=1, header=None, low_memory=False)
27 |         
28 |         #Need to extract the timing of the data from the file name.
29 |         if file[-17] == 'p':
30 |             df['DataType'] = 'Preliminary'
31 |         if file[-17] == 'f':
32 |             df['DataType'] = 'Final'
33 |         return df
34 | 
35 | voiLoader = VOILoader()
36 | 


--------------------------------------------------------------------------------
/datamine/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from tqdm import tqdm
 4 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
 5 | 
 6 | MAX_WORKERS = 4
 7 | 
 8 | logger = logging.getLogger(__name__.rsplit('.', 1)[0])
 9 | 
10 | # If we're in a Jupyter notebook, we need to play some tricks
11 | # in order to get the logger output to show up in the notebook.
12 | try:
13 |     from IPython import get_ipython
14 |     if 'IPKernelApp' in get_ipython().config:
15 |         import sys
16 |         logger.handlers = [logging.StreamHandler(sys.stderr)]
17 |         logger.setLevel(logging.INFO)
18 | except Exception:
19 |     pass
20 | 
21 | def tqdm_execute_tasks(fn, keys, desc, max_workers=MAX_WORKERS, mode='process'):
22 |     """
23 |     Equivalent to executor.map(fn, values), but uses a tqdm-based progress bar
24 |     """
25 |     if max_workers == 1:
26 |         return [fn(key) for key in tqdm(keys, desc=desc)]
27 |     # Processes are better for the dataframe loading tasks, but
28 |     # threads are significantly better for downloads
29 |     Executor = ThreadPoolExecutor if mode == 'thread' else ProcessPoolExecutor
30 |     with Executor(max_workers=max_workers) as executor:
31 |         futures = [executor.submit(fn, key) for key in keys]
32 |         for f in tqdm(as_completed(futures), total=len(keys), desc=desc):
33 |             pass
34 |         return [f.result() for f in futures]
35 | 


--------------------------------------------------------------------------------
/docs/CME Query API's - EOD_Block_Tick_BBO - Google Docs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CMEGroup/datamine_python/0454c8f04cb379de10c2949590bf6b92479af520/docs/CME Query API's - EOD_Block_Tick_BBO - Google Docs.pdf


--------------------------------------------------------------------------------
/examples/images/BitcoinEndofDayValue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CMEGroup/datamine_python/0454c8f04cb379de10c2949590bf6b92479af520/examples/images/BitcoinEndofDayValue.png


--------------------------------------------------------------------------------
/examples/images/BitcoinRTIndexValue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CMEGroup/datamine_python/0454c8f04cb379de10c2949590bf6b92479af520/examples/images/BitcoinRTIndexValue.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [flake8]
5 | ignore=W504,E501
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('README.md') as fp:
 4 |     long_description = fp.read()
 5 | 
 6 | setup(
 7 |     name="datamine",
 8 |     version="0.21.post2",
 9 |     description="CME Group DataMine Python Package",
10 |     url="https://github.com/CMEGroup/datamine_python",
11 |     author="Aaron Walters",
12 |     author_email="aaron.walters@cmegroup.com",
13 |     maintainer="Hamza Amjad",
14 |     maintainer_email="hamza.amjad@cmegroup.com",
15 |     license="BSD 3-Clause",
16 |     install_requires=['requests', 'urllib3', 'pandas', 'tqdm', 'futures'],
17 |     packages=find_packages(exclude=['tests']),
18 |     long_description=long_description,
19 |     long_description_content_type="text/markdown",
20 |     classifiers=[
21 |         "Development Status :: 3 - Alpha",
22 |         "Intended Audience :: Financial and Insurance Industry",
23 |         "License :: OSI Approved :: BSD License",
24 |         "Programming Language :: Python :: 3.5",
25 |         "Programming Language :: Python :: 3.6",
26 |         "Programming Language :: Python :: 3.7",
27 |     ])
28 | 


--------------------------------------------------------------------------------