├── .gitignore ├── LICENSE ├── README.md ├── pySecMaster ├── Dockerfile ├── __init__.py ├── build_symbology.py ├── create_tables.py ├── cross_validator.py ├── database │ └── init.sql ├── docker-compose.yml ├── download.py ├── extractor.py ├── icon_rc.py ├── icons │ ├── exit_48px.png │ ├── github_128px.png │ ├── google_128px.png │ ├── molecule_5_black.png │ ├── open_folder_240px.png │ ├── quandl.png │ └── save_240px.png ├── load_aux_tables.py ├── load_tables │ ├── data_vendor.csv │ └── exchanges.csv ├── main_gui.py ├── main_gui.ui ├── pySecMaster.py ├── query_data.py ├── query_database.py ├── requirements.txt ├── symbology_only.py ├── tests │ ├── __init__.py │ ├── test_database.py │ └── test_extractor.py └── utilities │ ├── __init__.py │ ├── database_backup.sh │ ├── database_check.py │ ├── database_queries.py │ ├── database_rebuilds │ ├── __init__.py │ ├── convert_qcode_to_tsid.py │ ├── sqlite_to_postgres.py │ └── verify_min_times.py │ ├── date_conversions.py │ ├── multithread.py │ └── user_dir.py ├── table_structure.md └── table_structure.xlsx /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Data 3 | ################# 4 | 5 | pySecMaster/load_tables/*_wo_data.csv 6 | 7 | 8 | ################# 9 | ## Eclipse 10 | ################# 11 | 12 | *.pydevproject 13 | .project 14 | .metadata 15 | bin/ 16 | tmp/ 17 | *.tmp 18 | *.bak 19 | *.swp 20 | *~.nib 21 | local.properties 22 | .classpath 23 | .settings/ 24 | .loadpath 25 | 26 | # External tool builders 27 | .externalToolBuilders/ 28 | 29 | # Locally stored "Eclipse launch configurations" 30 | *.launch 31 | 32 | # CDT-specific 33 | .cproject 34 | 35 | # PDT-specific 36 | .buildpath 37 | 38 | 39 | ################# 40 | ## Visual Studio 41 | ################# 42 | 43 | ## Ignore Visual Studio temporary files, build results, and 44 | ## files generated by popular Visual Studio add-ons. 45 | 46 | # User-specific files 47 | *.suo 48 | *.user 49 | *.sln.docstates 50 | 51 | # Build results 52 | 53 | [Dd]ebug/ 54 | [Rr]elease/ 55 | x64/ 56 | build/ 57 | [Bb]in/ 58 | [Oo]bj/ 59 | 60 | # MSTest test Results 61 | [Tt]est[Rr]esult*/ 62 | [Bb]uild[Ll]og.* 63 | 64 | *_i.c 65 | *_p.c 66 | *.ilk 67 | *.meta 68 | *.obj 69 | *.pch 70 | *.pdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.log 86 | *.scc 87 | 88 | # Visual C++ cache files 89 | ipch/ 90 | *.aps 91 | *.ncb 92 | *.opensdf 93 | *.sdf 94 | *.cachefile 95 | 96 | # Visual Studio profiler 97 | *.psess 98 | *.vsp 99 | *.vspx 100 | 101 | # Guidance Automation Toolkit 102 | *.gpState 103 | 104 | # ReSharper is a .NET coding add-in 105 | _ReSharper*/ 106 | *.[Rr]e[Ss]harper 107 | 108 | # TeamCity is a build add-in 109 | _TeamCity* 110 | 111 | # DotCover is a Code Coverage Tool 112 | *.dotCover 113 | 114 | # NCrunch 115 | *.ncrunch* 116 | .*crunch*.local.xml 117 | 118 | # Installshield output folder 119 | [Ee]xpress/ 120 | 121 | # DocProject is a documentation generator add-in 122 | DocProject/buildhelp/ 123 | DocProject/Help/*.HxT 124 | DocProject/Help/*.HxC 125 | DocProject/Help/*.hhc 126 | DocProject/Help/*.hhk 127 | DocProject/Help/*.hhp 128 | DocProject/Help/Html2 129 | DocProject/Help/html 130 | 131 | # Click-Once directory 132 | publish/ 133 | 134 | # Publish Web Output 135 | *.Publish.xml 136 | *.pubxml 137 | *.publishproj 138 | 139 | # NuGet Packages Directory 140 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 141 | #packages/ 142 | 143 | # Windows Azure Build Output 144 | csx 145 | *.build.csdef 146 | 147 | # Windows Store app package directory 148 | AppPackages/ 149 | 150 | # Others 151 | sql/ 152 | *.Cache 153 | ClientBin/ 154 | [Ss]tyle[Cc]op.* 155 | ~$* 156 | *~ 157 | *.dbmdl 158 | *.[Pp]ublish.xml 159 | *.pfx 160 | *.publishsettings 161 | 162 | # RIA/Silverlight projects 163 | Generated_Code/ 164 | 165 | # Backup & report files from converting an old project file to a newer 166 | # Visual Studio version. Backup files are not needed, because we have git ;-) 167 | _UpgradeReport_Files/ 168 | Backup*/ 169 | UpgradeLog*.XML 170 | UpgradeLog*.htm 171 | 172 | # SQL Server files 173 | App_Data/*.mdf 174 | App_Data/*.ldf 175 | 176 | ############# 177 | ## Windows detritus 178 | ############# 179 | 180 | # Windows image file caches 181 | Thumbs.db 182 | ehthumbs.db 183 | 184 | # Folder config file 185 | Desktop.ini 186 | 187 | # Recycle Bin used on file shares 188 | $RECYCLE.BIN/ 189 | 190 | # Mac crap 191 | .DS_Store 192 | 193 | 194 | ############# 195 | ## Python 196 | ############# 197 | 198 | *.py[cod] 199 | 200 | # Packages 201 | *.egg 202 | *.egg-info 203 | dist/ 204 | build/ 205 | eggs/ 206 | parts/ 207 | var/ 208 | sdist/ 209 | develop-eggs/ 210 | .installed.cfg 211 | 212 | # Installer logs 213 | pip-log.txt 214 | 215 | # Unit test / coverage reports 216 | .coverage 217 | .tox 218 | 219 | #Translations 220 | *.mo 221 | 222 | #Mr Developer 223 | .mr.developer.cfg 224 | 225 | 226 | ############# 227 | ## JetBrains 228 | ############# 229 | 230 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio 231 | 232 | *.iml 233 | 234 | ## Directory-based project format: 235 | .idea/ 236 | # if you remove the above rule, at least ignore the following: 237 | 238 | # User-specific stuff: 239 | # .idea/workspace.xml 240 | # .idea/tasks.xml 241 | # .idea/dictionaries 242 | 243 | # Sensitive or high-churn files: 244 | # .idea/dataSources.ids 245 | # .idea/dataSources.xml 246 | # .idea/sqlDataSources.xml 247 | # .idea/dynamic.xml 248 | # .idea/uiDesigner.xml 249 | 250 | # Gradle: 251 | # .idea/gradle.xml 252 | # .idea/libraries 253 | 254 | # Mongo Explorer plugin: 255 | # .idea/mongoSettings.xml 256 | 257 | ## File-based project format: 258 | *.ipr 259 | *.iws 260 | 261 | ## Plugin-specific files: 262 | 263 | # IntelliJ 264 | /out/ 265 | 266 | # mpeltonen/sbt-idea plugin 267 | .idea_modules/ 268 | 269 | # JIRA plugin 270 | atlassian-ide-plugin.xml 271 | 272 | # Crashlytics plugin (for Android Studio and IntelliJ) 273 | com_crashlytics_export_strings.xml 274 | crashlytics.properties 275 | crashlytics-build.properties 276 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pySecMaster 2 | An automated framework to store and maintain financial data. 3 | 4 | [![AGPLv3](https://img.shields.io/badge/License-AGPLv3-blue.svg)](http://opensource.org/licenses/AGPL-3.0) 5 | 6 | The goal of the system is to have a central repository of interrelated finance data that can be used for strategy backtests and live trading systems. 7 | 8 | [Data types](#data-types) that can be stored includes historical and live stock prices (daily, minute, tick), option chains, corporate actions, economic events, IPO pricings and financial statements. 9 | 10 | **Contents**: 11 | - [Quick Start Guides](#quick-start-guides) 12 | - [Database](#database) 13 | - [TSID](#tsid) 14 | - [Symbology](#symbology) 15 | - [Data Types](#data-types) 16 | - [Cross Validator](#cross-validator) 17 | - [Adjusted Prices](#adjusted-prices) 18 | - [System Requirements](#system-requirements) 19 | - [Future Goals](#future-goals) 20 | - [Notes](#notes) 21 | - [Disclaimer](#disclaimer) 22 | - [License](#license-gnu-agplv3) 23 | 24 | ## Quick Start Guides 25 | 26 | ### With Docker (Recommended) 27 | 28 | 1. Install [Docker Community Edition (CE)](https://www.docker.com/community-edition) on your computer 29 | 30 | 2. Clone (or download) [this](https://github.com/camisatx/pySecMaster) repository to your computer 31 | 32 | 3. Navigate to `pySecMaster/pySecMaster` 33 | 34 | 4. Open the `/utilities/user_dir.py` [file](../master/pySecMaster/utilities/user_dir.py): 35 | - Change the default name (josh) to your system user name (**line 30**) 36 | - Add your [Quandl API key](https://docs.quandl.com/docs#section-authentication) to both the root and your user sections (**lines 28 and 50**) 37 | - [optional] Change the `main_password` (**lines 16 and 38**) and `pysecmaster_password` (**lines 23 and 45**) in both the root and your user sections; Also change the passwords within the `pySecMaster/pySecMaster/database/init.sql` [file](../master/pySecMaster/database/init.sql) on **lines 2 and 12** to the same respective passwords you used in the `user_dir.py` file; You can also change or remove the special `remote_users` password and users created between **lines 16 and 33** within the `init.sql` file 38 | 39 | 5. Run `docker-compose up -d postgres` to start the Postgres container running as a daemon, where it will continue running in the background. 40 | 41 | 6. Run `docker-compose up app` to run the pySecMaster code. You would run this to update the prices daily. If you change a script, run `docker-compose up --build app` to have the updated script loaded into the pySecMaster container. The system will utilize the variables specified [here](../master/pySecMaster/Dockerfile#L17). To view the arguments you can provide, either run `pySecMaster/pySecMaster/pySecMaster.py -h` or view the arguments [here](../master/pySecMaster/pySecMaster.py#L388). 42 | 43 | ### Without Docker using the Script 44 | 45 | 1. Download and install both [PostgreSQL](http://www.postgresql.org/download/) and [Psycopg2](http://initd.org/psycopg/docs/install.html) to your computer. Installing psycopg2 on Windows can be challenging, but I found it easy to use the wheel provided on Christoph Gohlke's [Windows Binaries for Python](http://www.lfd.uci.edu/~gohlke/pythonlibs/#psycopg) page. 46 | 47 | 2. Create a new user in Postgres to use with pySecMaster (i.e. pysecmaster) 48 | 49 | 3. Clone or download [this](https://github.com/camisatx/pySecMaster) repository to your computer 50 | 51 | 4. Navigate to `pySecMaster/pySecMaster` 52 | 53 | 5. Open the `/utilities/user_dir.py` [file](../master/pySecMaster/utilities/user_dir.py): 54 | - Change the default name (josh) to your system user name (**line 30**) 55 | - Add your [Quandl API key](https://docs.quandl.com/docs#section-authentication) to both the root and your user sections (**lines 28 and 50**) 56 | - [optional] Change the `main_password` (**lines 16 and 38**) and `pysecmaster_password` (**lines 23 and 45**) in both the root and your user sections; Also change the passwords within the `pySecMaster/pySecMaster/database/init.sql` [file](../master/pySecMaster/database/init.sql) on **lines 2 and 12** to the same respective passwords you used in the `user_dir.py` file; You can also change or remove the special `remote_users` password and users created between **lines 16 and 33** within the `init.sql` file 57 | 58 | 6. Install the python libraries with `pip3 install -r requirements.txt` 59 | 60 | 7. Run `python pySecMaster/pySecMaster/pySecMaster.py --daily-downloads quandl.wiki -v` for the system to start building itself. It'll download Quandl daily prices and run the cross validator for all price values. To view the arguments you can provide, either run `pySecMaster/pySecMaster/pySecMaster.py -h` or view the arguments [here](../master/pySecMaster/pySecMaster.py#L388) 61 | 62 | ### Retrieve Database Values 63 | 1. To retrieve the data from the PostgreSQL database, open the `pySecMaster/pySecMaster/query_data.py` [file](../master/pySecMaster/query_data.py) in a code editor (Vim, PyCharm, Sublime, etc.) 64 | 65 | 2. Navigate to the query options (**lines 242 - 250**): change any of the options within this section to alter the query. Be aware that certain variables may be ignored depending on what type of query is run (i.e. minute data only comes from Google Finance). It is possible to retrieve very specific data by writing a custom SQL query. This file also includes a [price adjustment calculation](#adjusted-prices), which calculates the correct historical adjusted prices based on the dividend and splits. By default the data is returned as a pandas DataFrame, which can be manipulated to any format (visual, CSV, JSON, chart, etc.), or even sent to another file for further processing. 66 | 67 | 3. You can now save and run `python pySecMaster/pySecMaster/query_data.py` 68 | 69 | ## Database 70 | This system utilizes [PostgreSQL](http://www.postgresql.org/) for the database engine. [Postgres](https://en.wikipedia.org/wiki/PostgreSQL) provides an extremely flexible yet powerful database experience. Furthermore, Postgres allows the database to be stored on a remote server, accessible by multiple users. 71 | 72 | Currently, there are 24 tables that make up the pySecMaster database. You can view the database table structure [here](../master/table_structure.md). 73 | 74 | ## TSID 75 | All of the data tables utilize a custom symbol ID, called a `tsid` (trading system ID). This allows for consistent data nomenclature across the system. 76 | 77 | #### TSID Structure 78 | The tsid structure is composed of the following ([Note 1](#notes)): 79 | ``` 80 | .. 81 | ``` 82 | Since Apple (AAPL) is traded on NASDAQ (tsid exchange abbreviation is `Q`), it's tsid symbol is: 83 | ``` 84 | AAPL.Q.0 85 | ``` 86 | Walmart (WMT) is traded on NYSE (tsid exchange abbreviation is `N`), thus it's tsid symbol is: 87 | ``` 88 | WMT.N.0 89 | ``` 90 | 91 | #### TSID Creation 92 | The tsid creation process requires a unique ID as the backbone. At the moment, the CSI Data's *CSI Number* system is used as the backbone for ensuring that there are no tsid duplicates. It is possible to use another vendor's ID structure as the backbone (Bloomberg, RIC, etc.), or create a custom one (using a predefined base). 93 | 94 | The biggest hindrance to using CSI Data's CSI Number system is that it restricts tsid codes to **only the US, Toronto and London based exchanges** (as those are the only exchanges they list). I've considering using the [EODData](http://www.eoddata.com/) symbol database to enable the tsid structure to expand to all other global exchanges, but haven't implemented this yet. 95 | 96 | You can view (or download) the CSI Data stock factsheet [here](http://www.csidata.com/factsheets.php?type=stock&format=html). 97 | 98 | #### TSID Exchange Abbreviations 99 | Custom exchange abbreviations are used in the tsid structure to allow for naming flexibility and prevent duplicate abbreviations. 100 | 101 | All abbreviations can be found by looking at the **tsid_symbol** column within [exchanges.csv](../master/pySecMaster/load_tables/exchanges.csv) in `pySecMaster/load_tables` (or from the **exchange** table of the database). 102 | 103 | Some common exchange abbreviation include: 104 | 105 | | Exchange Name | TSID Exchange Abbreviation | 106 | |:------------------------------:|:--------------------------:| 107 | | American Stock Exchange | AMEX | 108 | | New York Stock Exchange (NYSE) | N | 109 | | New York Stock Exchange ARCA | NARCA | 110 | | NASDAQ | Q | 111 | | OTC Markets Pink Sheets | PINK | 112 | | London Stock Exchange | LON | 113 | | Toronto Stock Exchange | TSX | 114 | 115 | ## Symbology 116 | The symbology table is used as a translator between the tsid symbol and other symbol structures (Quandl codes, Yahoo Finance codes, etc.) ([Note 2](#notes)). This structure enables future symbol structures to be seamlessly added to the table to allow for external database communication (RIC, Bloomberg, etc.). 117 | 118 | Not only does this translation ability allows you convert one source's symbol to another, but it allows you to query any source's symbols based on characteristics stored in other tables (exchange, sector, industry, etc.). 119 | 120 | By default, the symbology table links the tsid symbol to these data sources ([Note 3](#notes)): 121 | 122 | | Source Name | Source Code | 123 | |:------------------------:|:-------------:| 124 | | CSI Data's CSI Number | csi_data | 125 | | Quandl's Google database | quandl_goog | 126 | | Quandl's WIKI database | quandl_wiki | 127 | | Seeking Alpha | seeking_alpha | 128 | | Yahoo Finance | yahoo | 129 | 130 | ## Data Types 131 | This system is built around the idea of having extractor modules 'plug-in' to the database. Therefore, it is designed for you to build your own data extractors for any type of data you want to store in line with the tsid structure. 132 | 133 | The default extractors handle daily and minute price data, along with basic exchange information. I have built extra tables that can have extractors built to fill in data. If you have ideas on additional tables to include, please create an issue with your idea. 134 | 135 | #### Default Extractors 136 | - Daily Historical Stock Prices (Quandl and Yahoo Finance (complete history); Google Finance (restricted to prior 50 days)) 137 | - Minute Historical Stock Prices (Google Finance (restricted to prior 15 days)) 138 | - Exchange Information 139 | - Symbology generator (symbol translator) 140 | 141 | #### Custom Extractors (aka, build your own) 142 | - Cash Dividends (NASDAQ) 143 | - Corporate Activities (conference calls, earnings data, splits) (Yahoo) 144 | - Economic Events Data (Yahoo) 145 | - Financial Statement Data (may require table modification as I haven't tried this yet) (SEC Edgar) 146 | - Historical Indices Membership (?) 147 | - IPO Pricings Data (Yahoo) 148 | 149 | ## Cross Validator 150 | The cross validator automatically selects the most likely prices from all available sources. 151 | 152 | The validator uses a cumulative score to select the most likely value, where the individual data source *weights* are specified in the data_vendor table of the database. The weights range from 0 to 100, with 100 holding the greatest weight. 153 | 154 | The system is setup to work with as many data sources as available, so future data sources can be easily implemented into this consensus value. It is possible for **all** prior consensus values to be replaced by new values. Alternatively, it is possible for only values over the past *n* days to be replaced. 155 | 156 | **Currently, the only way to run the cross validator is to run the system through the pySecMaster.py script. The validator has not been implemented into the GUI yet.** 157 | 158 | This can be multi-processed based on tsids. By default, 5 threads are used. This value is dependent on the disk and processor speed, so you may need to lower this value. 159 | 160 | ## Adjusted Prices 161 | The [query_data.py](../master/pySecMaster/query_data.py) file includes a function for calculating the adjusted prices based on the stock's dividends and splits. You can read more about this calculation [here](https://joshschertz.com/2016/08/27/Vectorizing-Adjusted-Close-with-Python/). 162 | 163 | ## Run without Docker using the GUI 164 | 165 | **NOTE: The Quandl data download is currently broken. Use the pysecmaster.py instead.** 166 | 167 | 1. Download and install both [PostgreSQL](http://www.postgresql.org/download/) and [Psycopg2](http://initd.org/psycopg/docs/install.html) to your computer. Installing psycopg2 on Windows can be challenging, but I found it easy to use the wheel provided on Christoph Gohlke's [Windows Binaries for Python](http://www.lfd.uci.edu/~gohlke/pythonlibs/#psycopg) page. 168 | 169 | 2. Create a new Postgres user to use with pySecMaster (i.e. pysecmaster) 170 | 171 | 3. Clone or download [this](https://github.com/camisatx/pySecMaster) repository to your computer 172 | 173 | 4. Navigate to the `pySecMaster/pySecMaster` and run `main_gui.py` 174 | 175 | 5. Within the GUI, provide the Postgres admin user and password, along with the database name you want to use for the pySecMaster (i.e. pysecmaster). Also, enter the user, password, host and port number that will be used to access this new database. 176 | 177 | #### Quandl daily data 178 | 179 | 6. Enter a Quandl API Key (free at ) 180 | 181 | 7. In the *Data* tab, change *Download Source* combo-box to **quandl** 182 | 183 | 8. In the *Data* tab, change *Selection* combo-box to: 184 | - **wiki** if you want all Quandl WIKI data ([Note 4](#notes)) (~3,000 symbols) 185 | - **goog** if you want all *US, Toronto and London* Quandl Google Finance data (~38,000 symbols) 186 | - **goog_etf** if you want all Quandl Google Finance ETF data ([Note 5](#notes))) (~3,700 symbols) 187 | - **goog_us_main_no_end_date** if you want main US exchange Quandl Google Finance data ([Note 6](#notes))) (~15,000 symbols) 188 | 189 | 9. If you have a HDD, I'd recommend changing the *Threads* count in *System Settings* tab to **2** (SSD's can handle 8 threads). If you see the database constantly being locked, lower this number. 190 | 191 | 10. Click on the *Ok* button, and the database will start building itself 192 | 193 | 11. You can save your settings either when you exit the GUI or by going to *File* -> *Save Settings* [ctrl + s] 194 | 195 | #### Yahoo Finance daily data (currently broken - 2/19/18) 196 | 197 | 6. In the *Data* tab, change *Download Source* combo-box to **yahoo** 198 | 199 | 7. In the *Data* tab, change *Selection* combo-box to: 200 | - **all** if you want all *US, Toronto and London* Yahoo Finance data (~38,000 symbols) 201 | - **us_main** if you want main US exchange Yahoo Finance data that's been active within the prior two years ([Note 6](#notes)) (~9,000 symbols) 202 | - **us_main_no_end_date** if you want main US exchange Yahoo Finance data ([Note 6](#notes)) (~15,000 symbols) 203 | - **us_canada_london** if you want all *US, Toronto and London* Yahoo Finance data that's been active within the prior two years (~25,000 symbols) 204 | 205 | 8. If you have a HDD, I'd recommend changing the *Threads* count in *System Settings* tab to **2** (SSD's can handle 8 threads). If you see the database constantly being locked, lower this number. 206 | 207 | 9. Click on the *Ok* button, and the database will start building itself with daily data from Yahoo Finance 208 | 209 | 10. You can save your settings either when you exit the GUI or by going to *File* -> *Save Settings* [ctrl + s] 210 | 211 | #### Google Finance minute data (currently broken - 2/19/18) 212 | 213 | 6. In the *Data* tab, change *Download Source* combo-box to **google** 214 | 215 | 7. In the *Data* tab, change *Selection* combo-box to: 216 | - **all** if you want all *US, Toronto and London* Google Finance data (~38,000 symbols) 217 | - **us_main** if you want main US exchange Google Finance data that's been active within the prior two years ([Note 6](#notes)) (~9,000 symbols) 218 | - **us_canada_london** if you want all *US, Toronto and London* Google Finance data that's been active within the prior two years (~25,000 symbols) 219 | 220 | 8. In the *Data* tab, change *Interval* combo-box to **minute** 221 | 222 | 9. If you have a HDD, I'd recommend changing the *Threads* count in *System Settings* tab to **2** (SSD's can handle 8 threads). If you see the database constantly being locked, lower this number. 223 | 224 | 10. Click on the *Ok* button, and the database will start building itself with minute data from Google Finance 225 | 226 | 11. You can save your settings either when you exit the GUI or by going to *File* -> *Save Settings* [ctrl + s] 227 | 228 | # System Requirements 229 | - Python 3.4+ 230 | - Numpy 1.14.0 231 | - Pandas 0.22.0 232 | - Psycopg2 2.6.2 233 | - SqlAlchemy 1.2.2 234 | - PostgreSQL 9.5+ 235 | - PyQt 4.11+ 236 | - More than 20GB of storage space (daily Quandl WIKI data is about 4 GB, while a year's worth of Google Finance minute data can become 50+ GB) 237 | 238 | # User Requirements 239 | - Quandl API Token (free at ) 240 | 241 | # Future Goals 242 | - Add Quandl_YAHOO to symbology 243 | - Add custom holiday table 244 | 245 | # Additional Info 246 | To view the PostgreSQL database, you can [pgAdmin](http://www.pgadmin.org) program that is installed when you download PostgreSQL. This allows you to view and edit all characteristics of the database. 247 | 248 | # Notes 249 | - Note 1: I have not implemented the integer of duplicates yet, so all tsid symbols have a 0 (zero) value for that. This is only relevant when you have access to delisted stock data, and for tickers that overlap active tickers (I.E. ABG on NYSE). 250 | - Note 2: All source codes created for the symbology table are built from scratch using a methodology that closely follows the true symbol structure. This means that there will be occurrences where to symbology built symbol does not match the true symbol. Create an issue if you see this happening. 251 | - Note 3: Google Finance does not have a symbol structure; they only require a ticker and unique exchange abbreviation as separate fields. Thus the Google Finance extractor uses the tsid structure as a symbol source. 252 | - Note 4: The symbology table actually includes about 9,000 symbols classified as quandl_wiki, but only about 3,000 of those actually have data from Quandl. I did this because I do not have a high quality list of all the WIKI codes (I don't trust Quandl's list), thus to ensures that a good percent of WIKI codes are downloaded. 253 | - Note 5: All ETF symbols are derived from the CSI Data Stock Factsheet, which includes listed and delisted ETFs 254 | - Note 6: US main exchanges include AMEX, NYSE, BATS, NASDAQ (CM, GM, GS) and NYSE ARCA; includes stocks and ETFs 255 | 256 | # Disclaimer 257 | Before using this software, be sure to understand and follow the terms of all data providers. I am not responsible for how you use this software, so please be responsible in your use of it! Please see the following links for some information: 258 | - [Quandl TOS](http://help.quandl.com/category/133-terms-and-conditions) 259 | - [Google Finance TOS](https://www.google.com/intl/en/googlefinance/disclaimer) 260 | - [Yahoo Finance TOS](https://policies.yahoo.com/us/en/yahoo/terms/utos/index.htm) 261 | 262 | For further information, please seek legal counsel. 263 | 264 | # License (GNU AGPLv3) 265 | pySecMaster - An automated system to store and maintain financial data. 266 | 267 | Copyright (C) 2016 Josh Schertz 268 | 269 | This program is free software: you can redistribute it and/or modify 270 | it under the terms of the GNU Affero General Public License as 271 | published by the Free Software Foundation, either version 3 of the 272 | License, or (at your option) any later version. 273 | 274 | This program is distributed in the hope that it will be useful, 275 | but WITHOUT ANY WARRANTY; without even the implied warranty of 276 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 277 | GNU Affero General Public License for more details. 278 | 279 | You should have received a copy of the GNU Affero General Public License 280 | along with this program. If not, see . 281 | -------------------------------------------------------------------------------- /pySecMaster/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | LABEL maintainer "joshschertz3@gmail.com" 3 | 4 | # Install all required python libraries 5 | COPY requirements.txt ./ 6 | RUN pip3 install -r requirements.txt 7 | 8 | # Move all pySecMaster files into the container 9 | COPY . . 10 | 11 | # Set the default endpoint as the pySecMaster.py file 12 | ENTRYPOINT ["python3", "pySecMaster.py"] 13 | # Download daily prices from quandl, yahoo and google, then run the 14 | # cross-validator for only 30 prior periods 15 | #CMD ["--daily-downloads", "quandl.wiki", "yahoo", "google", "--validator-period", "30", "--verbose"] 16 | #CMD ["--daily-downloads", "quandl.wiki", "yahoo", "google", "--verbose"] 17 | CMD ["--daily-downloads", "quandl.wiki", "--validator-period", "30", "--verbose"] 18 | #CMD ["--daily-downloads", "quandl.eod", "--validator-period", "30", "--verbose"] 19 | -------------------------------------------------------------------------------- /pySecMaster/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/__init__.py -------------------------------------------------------------------------------- /pySecMaster/cross_validator.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import operator 3 | import pandas as pd 4 | import time 5 | 6 | from utilities.database_queries import delete_sql_table_rows, df_to_sql,\ 7 | query_all_active_tsids, query_all_tsid_prices, query_source_weights,\ 8 | query_data_vendor_id 9 | from utilities.multithread import multithread 10 | 11 | __author__ = 'Josh Schertz' 12 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 13 | __description__ = 'An automated system to store and maintain financial data.' 14 | __email__ = 'josh[AT]joshschertz[DOT]com' 15 | __license__ = 'GNU AGPLv3' 16 | __maintainer__ = 'Josh Schertz' 17 | __status__ = 'Development' 18 | __url__ = 'https://joshschertz.com/' 19 | __version__ = '1.5.0' 20 | 21 | ''' 22 | This program is free software: you can redistribute it and/or modify 23 | it under the terms of the GNU Affero General Public License as 24 | published by the Free Software Foundation, either version 3 of the 25 | License, or (at your option) any later version. 26 | 27 | This program is distributed in the hope that it will be useful, 28 | but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | GNU Affero General Public License for more details. 31 | 32 | You should have received a copy of the GNU Affero General Public License 33 | along with this program. If not, see . 34 | ''' 35 | 36 | 37 | class CrossValidate: 38 | """ Compares the prices from multiple sources, storing the price with the 39 | highest consensus weight. 40 | """ 41 | 42 | def __init__(self, database, user, password, host, port, table, tsid_list, 43 | period=None, verbose=False): 44 | """ 45 | :param database: String of the database name 46 | :param user: String of the username used to login to the database 47 | :param password: String of the password used to login to the database 48 | :param host: String of the database address (localhost, url, ip, etc.) 49 | :param port: Integer of the database port number (5432) 50 | :param table: String of the database table that should be worked on 51 | :param tsid_list: List of strings, with each string being a tsid 52 | :param period: Optional integer indicating the number of days whose 53 | values should be cross validated. If None is provided, then the 54 | entire set of values will be validated. 55 | :param verbose: Boolean of whether to print debugging statements or not 56 | """ 57 | 58 | self.database = database 59 | self.user = user 60 | self.password = password 61 | self.host = host 62 | self.port = port 63 | self.table = table 64 | self.tsid_list = tsid_list 65 | self.period = period 66 | self.verbose = verbose 67 | 68 | # Build a DataFrame with the source id and weight 69 | self.source_weights_df = query_source_weights( 70 | database=self.database, user=self.user, password=self.password, 71 | host=self.host, port=self.port) 72 | 73 | # List of data vendor names to ignore when cross validating the data. 74 | # Relevant when the data source has data that would be considered. 75 | self.source_exclude_list = ['pySecMaster_Consensus'] 76 | 77 | self.source_id_exclude_list = [] 78 | for source in self.source_exclude_list: 79 | source_id = query_data_vendor_id( 80 | database=self.database, user=self.user, password=self.password, 81 | host=self.host, port=self.port, name=source) 82 | self.source_id_exclude_list.append(source_id) 83 | 84 | if self.verbose: 85 | if self.period: 86 | print('Running cross validator for %s tsids only for the prior ' 87 | '%i day\'s history.' % (len(tsid_list), self.period)) 88 | else: 89 | print('Running cross validator for %s tsids for the entire ' 90 | 'data history.' % (len(tsid_list),)) 91 | 92 | self.main() 93 | 94 | def main(self): 95 | """ Start the tsid cross validator process using either single or 96 | multiprocessing. """ 97 | 98 | validator_start = time.time() 99 | 100 | # Cycle through each tsid, running the data cross validator on all 101 | # sources and fields available. 102 | """No multiprocessing""" 103 | # [self.validator(tsid=tsid) for tsid in self.tsid_list] 104 | """Multiprocessing using 4 threads""" 105 | multithread(self.validator, self.tsid_list, threads=5) 106 | 107 | if self.verbose: 108 | print('%i tsids have had their sources cross validated taking ' 109 | '%0.2f seconds.' % 110 | (len(self.tsid_list), time.time() - validator_start)) 111 | 112 | def validator(self, tsid): 113 | 114 | tsid_start = time.time() 115 | 116 | # DataFrame of all stored prices for this ticker and interval. This is 117 | # a multi-index DataFrame, with date and data_vendor_id in the index. 118 | tsid_prices_df = query_all_tsid_prices( 119 | database=self.database, user=self.user, password=self.password, 120 | host=self.host, port=self.port, table=self.table, tsid=tsid) 121 | 122 | unique_sources = tsid_prices_df.index.\ 123 | get_level_values('data_vendor_id').unique() 124 | unique_dates = tsid_prices_df.index.get_level_values('date').unique() 125 | 126 | # If a period is provided, limit the unique_dates list to only those 127 | # within the past n period days. 128 | if self.period: 129 | beg_date = datetime.today() - timedelta(days=self.period) 130 | unique_dates = unique_dates[unique_dates > beg_date] 131 | 132 | # The consensus_price_df contains the prices from weighted consensus 133 | if self.table == 'daily_prices': 134 | consensus_price_df = pd.DataFrame( 135 | columns=['date', 'open', 'high', 'low', 'close', 'volume', 136 | 'dividend', 'split']) 137 | elif self.table == 'minute_prices': 138 | consensus_price_df = pd.DataFrame( 139 | columns=['date', 'open', 'high', 'low', 'close', 'volume']) 140 | else: 141 | raise NotImplementedError('Table %s is not implemented within ' 142 | 'CrossValidate.validator' % self.table) 143 | 144 | # Set the date as the index 145 | consensus_price_df.set_index(['date'], inplace=True) 146 | 147 | # Cycle through each period, comparing each data source's prices 148 | for date in unique_dates: 149 | 150 | # Either add each field's consensus price to a dictionary, 151 | # which is entered into the consensus_price_df upon all fields 152 | # being processed, or enter each field's consensus price directly 153 | # into the consensus_price_df. Right now, this is doing the later. 154 | # consensus_prices = {} 155 | 156 | try: 157 | # Create a DataFrame for the current period, with the source_ids 158 | # as the index and the data_columns as the column headers 159 | period_df = tsid_prices_df.xs(date, level='date') 160 | except KeyError: 161 | # Should never happen 162 | print('Unable to extract the %s period\'s prices from ' 163 | 'the tsid_prices_df for %s' % (date, tsid)) 164 | finally: 165 | # Transpose the period_df DataFrame so the source_ids are 166 | # columns and the price fields are the rows 167 | period_df = period_df.transpose() 168 | 169 | # Cycle through each price field for this period's values 170 | for field_index, field_data in period_df.iterrows(): 171 | # field_index: string of the index name 172 | # field_data: Pandas Series (always??) of the field data 173 | 174 | # Reset the field consensus for every field processed 175 | field_consensus = {} 176 | 177 | # Cycle through each source's values that are in the 178 | # field_data Series. 179 | for source_data in field_data.iteritems(): 180 | # source_data is a tuple, with the first item is being 181 | # the data_vendor_id and the second being the value. 182 | 183 | # If the source_data's id is in the exclude list, don't 184 | # use its price when calculating the field consensus. 185 | if source_data[0] not in self.source_id_exclude_list: 186 | 187 | # Only process the source value if it is not None 188 | if source_data[1] is not None: 189 | 190 | # Retrieve weighted consensus for this source 191 | source_weight = self.source_weights_df.loc[ 192 | self.source_weights_df['data_vendor_id'] == 193 | source_data[0], 'consensus_weight'] 194 | 195 | try: 196 | if field_consensus: 197 | # There's already a value for this field 198 | if source_data[1] in field_consensus: 199 | # This source's value has a match in 200 | # the current consensus. Increase 201 | # weight for this price. 202 | field_consensus[source_data[1]] += \ 203 | source_weight.iloc[0] 204 | else: 205 | # Data value from the source does 206 | # not match this field's consensus 207 | field_consensus[source_data[1]] = \ 208 | source_weight.iloc[0] 209 | 210 | else: 211 | # Add first price to the field_consensus 212 | # dictionary, using price as the key 213 | # and the source's weight as the item. 214 | field_consensus[source_data[1]] = \ 215 | source_weight.iloc[0] 216 | except IndexError: 217 | # No source_weight was found, prob because 218 | # there was no data_vendor_id for value 219 | pass 220 | 221 | # Insert the highest consensus value for this period into 222 | # the consensus_price_df (the dictionary key (price) with 223 | # the largest value (consensus sum). 224 | try: 225 | consensus_value = max(field_consensus.items(), 226 | key=operator.itemgetter(1))[0] 227 | except ValueError: 228 | # None of the sources had any values, thus use -1 229 | consensus_value = -1 230 | consensus_price_df.ix[date, field_index] = consensus_value 231 | 232 | # Make the date index into a normal column 233 | consensus_price_df.reset_index(inplace=True) 234 | # Convert the datetime object to an ISO date 235 | consensus_price_df['date'] = consensus_price_df['date'].\ 236 | apply(lambda x: x.isoformat()) 237 | 238 | # Add the vendor id of the pySecMaster_Consensus as a column 239 | validator_id = query_data_vendor_id( 240 | database=self.database, user=self.user, password=self.password, 241 | host=self.host, port=self.port, name='pySecMaster_Consensus') 242 | 243 | consensus_price_df.insert(0, 'data_vendor_id', validator_id) 244 | consensus_price_df.insert(1, 'source', 'tsid') 245 | consensus_price_df.insert(2, 'source_id', tsid) 246 | 247 | # Add the current date to the last column 248 | consensus_price_df.insert(len(consensus_price_df.columns), 249 | 'updated_date', datetime.now().isoformat()) 250 | 251 | if validator_id in unique_sources: 252 | delete_start = time.time() 253 | 254 | # Data from the cross validation process has already been saved 255 | # to the database before, thus it must be removed before adding 256 | # the new calculated values. 257 | 258 | if self.period: 259 | # Only delete prior consensus values for this tsid that are 260 | # newer than the beg_date (current date - replace period). 261 | delete_query = ("""DELETE FROM %s 262 | WHERE source_id='%s' AND source='tsid' 263 | AND data_vendor_id='%s' 264 | AND date>'%s'""" % 265 | (self.table, tsid, validator_id, 266 | beg_date.isoformat())) 267 | else: 268 | # Delete all existing consensus values for this tsid. 269 | delete_query = ("""DELETE FROM %s 270 | WHERE source_id='%s' AND source='tsid' 271 | AND data_vendor_id='%s'""" % 272 | (self.table, tsid, validator_id)) 273 | 274 | retry_count = 5 275 | while retry_count > 0: 276 | retry_count -= 1 277 | 278 | delete_status = delete_sql_table_rows( 279 | database=self.database, user=self.user, 280 | password=self.password, host=self.host, port=self.port, 281 | query=delete_query, table=self.table, item=tsid) 282 | if delete_status == 'success': 283 | # Add the validated values to the relevant price table AFTER 284 | # ensuring that the duplicates were deleted successfully 285 | df_to_sql(database=self.database, user=self.user, 286 | password=self.password, host=self.host, 287 | port=self.port, df=consensus_price_df, 288 | sql_table=self.table, exists='append', item=tsid) 289 | break 290 | 291 | # print('Data table replacement took %0.2f' % 292 | # (time.time() - delete_start)) 293 | 294 | else: 295 | # Add the validated values to the relevant price table 296 | df_to_sql(database=self.database, user=self.user, 297 | password=self.password, host=self.host, port=self.port, 298 | df=consensus_price_df, sql_table=self.table, 299 | exists='append', item=tsid) 300 | 301 | # For period updates, slow down the process to allow postgre to catch up 302 | if self.period: 303 | time.sleep(1.5) 304 | 305 | if self.verbose: 306 | print('%s data cross-validation took %0.2f seconds to complete.' % 307 | (tsid, time.time() - tsid_start)) 308 | 309 | 310 | if __name__ == '__main__': 311 | 312 | from utilities.user_dir import user_dir 313 | userdir = user_dir() 314 | 315 | test_table = 'daily_prices' 316 | 317 | test_tsids_df = query_all_active_tsids( 318 | database=userdir['postgresql']['pysecmaster_db'], 319 | user=userdir['postgresql']['pysecmaster_user'], 320 | password=userdir['postgresql']['pysecmaster_password'], 321 | host=userdir['postgresql']['pysecmaster_host'], 322 | port=userdir['postgresql']['pysecmaster_port'], 323 | table=test_table) 324 | test_tsid_list = test_tsids_df['tsid'].values 325 | 326 | CrossValidate( 327 | database=userdir['postgresql']['pysecmaster_db'], 328 | user=userdir['postgresql']['pysecmaster_user'], 329 | password=userdir['postgresql']['pysecmaster_password'], 330 | host=userdir['postgresql']['pysecmaster_host'], 331 | port=userdir['postgresql']['pysecmaster_port'], 332 | table=test_table, 333 | tsid_list=test_tsid_list, verbose=True) 334 | -------------------------------------------------------------------------------- /pySecMaster/database/init.sql: -------------------------------------------------------------------------------- 1 | -- Change default postgres password 2 | ALTER ROLE postgres WITH PASSWORD 'correct horse battery staple'; 3 | 4 | -- Create database called pysecmaster 5 | CREATE DATABASE pysecmaster; 6 | 7 | -- Block all users by default 8 | REVOKE ALL ON DATABASE pysecmaster FROM public; 9 | REVOKE ALL ON SCHEMA public FROM public; 10 | 11 | -- Create pymaster user with all privileges for pysecmaster database 12 | CREATE USER pymaster WITH PASSWORD 'correct horse battery staple'; 13 | GRANT ALL PRIVILEGES ON DATABASE pysecmaster TO pymaster; 14 | 15 | -- Add remote_users role with read only access 16 | CREATE ROLE remote_users WITH PASSWORD 'wrong horse battery staple'; 17 | -- Grant read only access to existing objects 18 | GRANT CONNECT ON DATABASE pysecmaster TO remote_users; 19 | GRANT USAGE ON SCHEMA public TO remote_users; 20 | GRANT SELECT ON ALL TABLES IN SCHEMA public TO remote_users; 21 | GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO remote_users; 22 | GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO remote_users; 23 | -- Grant read only access to new objects 24 | ALTER DEFAULT PRIVILEGES IN SCHEMA public 25 | GRANT SELECT ON TABLES TO remote_users; 26 | ALTER DEFAULT PRIVILEGES IN SCHEMA public 27 | GRANT SELECT ON SEQUENCES TO remote_users; 28 | ALTER DEFAULT PRIVILEGES IN SCHEMA public 29 | GRANT EXECUTE ON FUNCTIONS TO remote_users; 30 | 31 | -- Add remote user for josh and ricardo under the remote_users role 32 | CREATE USER remote_josh IN ROLE remote_users; 33 | CREATE USER remote_ricardo IN ROLE remote_users; 34 | 35 | -- Change to pysecmaster database using the user pymaster 36 | \c pysecmaster pymaster 37 | 38 | -- Set the database timezone to Eastern Standard Time 39 | SET TIME ZONE 'EST'; 40 | 41 | CREATE TABLE IF NOT EXISTS symbology ( 42 | symbol_id BIGINT NOT NULL, 43 | source TEXT NOT NULL, 44 | source_id TEXT NOT NULL, 45 | type TEXT, 46 | created_date TIMESTAMP WITH TIME ZONE, 47 | updated_date TIMESTAMP WITH TIME ZONE); 48 | CREATE UNIQUE INDEX IF NOT EXISTS idx_symbology_sources 49 | ON symbology(source, source_id); 50 | 51 | CREATE TABLE IF NOT EXISTS baskets ( 52 | basket_id SERIAL PRIMARY KEY, 53 | name TEXT NOT NULL, 54 | description TEXT, 55 | start_date TIMESTAMP WITH TIME ZONE, 56 | end_date TIMESTAMP WITH TIME ZONE, 57 | created_by TEXT, 58 | created_date TIMESTAMP WITH TIME ZONE, 59 | updated_date TIMESTAMP WITH TIME ZONE); 60 | 61 | CREATE TABLE IF NOT EXISTS basket_values ( 62 | basket_val_id BIGSERIAL PRIMARY KEY, 63 | basket_id INTEGER NOT NULL, 64 | source TEXT NOT NULL, 65 | source_id TEXT NOT NULL, 66 | updated_date TIMESTAMP WITH TIME ZONE, 67 | FOREIGN KEY(basket_id) REFERENCES baskets(basket_id), 68 | FOREIGN KEY(source, source_id) 69 | REFERENCES symbology(source, source_id) 70 | ON UPDATE CASCADE); 71 | 72 | CREATE TABLE IF NOT EXISTS classification ( 73 | classification_id BIGSERIAL PRIMARY KEY, 74 | source TEXT NOT NULL, 75 | source_id TEXT NOT NULL, 76 | standard TEXT, 77 | code INTEGER, 78 | level_1 TEXT, 79 | level_2 TEXT, 80 | level_3 TEXT, 81 | level_4 TEXT, 82 | created_date TIMESTAMP WITH TIME ZONE, 83 | updated_date TIMESTAMP WITH TIME ZONE, 84 | FOREIGN KEY(source, source_id) 85 | REFERENCES symbology(source, source_id) 86 | ON UPDATE CASCADE); 87 | CREATE INDEX IF NOT EXISTS idx_classification_values 88 | ON classification(source, source_id, standard, level_1, level_2, level_3, 89 | level_4); 90 | 91 | CREATE TABLE IF NOT EXISTS csidata_stock_factsheet ( 92 | csi_number TEXT PRIMARY KEY, 93 | symbol TEXT, 94 | name TEXT, 95 | exchange TEXT, 96 | sub_exchange TEXT, 97 | is_active SMALLINT, 98 | start_date DATE, 99 | end_date DATE, 100 | conversion_factor SMALLINT, 101 | switch_cf_date DATE, 102 | pre_switch_cf SMALLINT, 103 | created_date TIMESTAMP WITH TIME ZONE, 104 | updated_date TIMESTAMP WITH TIME ZONE); 105 | CREATE INDEX IF NOT EXISTS idx_csidata_symbol 106 | ON csidata_stock_factsheet(symbol); 107 | 108 | CREATE TABLE IF NOT EXISTS data_vendor ( 109 | data_vendor_id INTEGER PRIMARY KEY, 110 | name TEXT UNIQUE, 111 | url TEXT, 112 | support_email TEXT, 113 | api TEXT, 114 | consensus_weight SMALLINT, 115 | created_date TIMESTAMP WITH TIME ZONE, 116 | updated_date TIMESTAMP WITH TIME ZONE); 117 | 118 | CREATE TABLE IF NOT EXISTS exchanges ( 119 | exchange_id SMALLINT PRIMARY KEY, 120 | symbol TEXT UNIQUE NOT NULL, 121 | goog_symbol TEXT, 122 | yahoo_symbol TEXT, 123 | csi_symbol TEXT, 124 | tsid_symbol TEXT NOT NULL, 125 | name TEXT, 126 | country TEXT, 127 | city TEXT, 128 | currency TEXT, 129 | time_zone TEXT, 130 | utc_offset REAL, 131 | open TIME, 132 | close TIME, 133 | lunch TEXT, 134 | created_date TIMESTAMP WITH TIME ZONE, 135 | updated_date TIMESTAMP WITH TIME ZONE); 136 | 137 | CREATE TABLE IF NOT EXISTS indices ( 138 | index_id SERIAL PRIMARY KEY, 139 | stock_index TEXT NOT NULL, 140 | source TEXT NOT NULL, 141 | source_id TEXT NOT NULL, 142 | as_of_date TIMESTAMP WITH TIME ZONE, 143 | created_date TIMESTAMP WITH TIME ZONE, 144 | updated_date TIMESTAMP WITH TIME ZONE, 145 | FOREIGN KEY(source, source_id) 146 | REFERENCES symbology(source, source_id) 147 | ON UPDATE CASCADE); 148 | 149 | CREATE TABLE IF NOT EXISTS quandl_codes ( 150 | q_code_id BIGSERIAL PRIMARY KEY, 151 | data_vendor TEXT NOT NULL, 152 | data TEXT NOT NULL, 153 | component TEXT NOT NULL, 154 | period TEXT, 155 | symbology_source TEXT NOT NULL, 156 | q_code TEXT NOT NULL, 157 | name TEXT, 158 | start_date TIMESTAMP WITH TIME ZONE, 159 | end_date TIMESTAMP WITH TIME ZONE, 160 | frequency TEXT, 161 | last_updated TIMESTAMP WITH TIME ZONE, 162 | page_num INTEGER, 163 | created_date TIMESTAMP WITH TIME ZONE, 164 | updated_date TIMESTAMP WITH TIME ZONE, 165 | FOREIGN KEY(data_vendor) 166 | REFERENCES data_vendor(name) 167 | ON UPDATE CASCADE); 168 | CREATE INDEX IF NOT EXISTS idx_qc_data 169 | ON quandl_codes(data); 170 | 171 | CREATE TABLE IF NOT EXISTS tickers ( 172 | tsid TEXT PRIMARY KEY, 173 | ticker TEXT NOT NULL, 174 | name TEXT, 175 | exchange_id INT NOT NULL, 176 | is_active SMALLINT, 177 | start_date TIMESTAMP WITH TIME ZONE, 178 | end_date TIMESTAMP WITH TIME ZONE, 179 | type TEXT, 180 | sector TEXT, 181 | industry TEXT, 182 | sub_industry TEXT, 183 | currency TEXT, 184 | hq_country TEXT, 185 | symbology_source TEXT NOT NULL, 186 | created_date TIMESTAMP WITH TIME ZONE, 187 | updated_date TIMESTAMP WITH TIME ZONE, 188 | FOREIGN KEY(symbology_source, tsid) 189 | REFERENCES symbology(source, source_id) ON UPDATE CASCADE, 190 | FOREIGN KEY(exchange_id) 191 | REFERENCES exchanges(exchange_id) ON UPDATE CASCADE); 192 | CREATE INDEX IF NOT EXISTS idx_tickers_sector 193 | ON tickers(sector); 194 | 195 | CREATE TABLE IF NOT EXISTS daily_prices ( 196 | daily_price_id BIGSERIAL PRIMARY KEY, 197 | data_vendor_id SMALLINT, 198 | source TEXT NOT NULL, 199 | source_id TEXT NOT NULL, 200 | date TIMESTAMP WITH TIME ZONE NOT NULL, 201 | open DECIMAL(11,4), 202 | high DECIMAL(11,4), 203 | low DECIMAL(11,4), 204 | close DECIMAL(11,4), 205 | volume BIGINT, 206 | dividend DECIMAL(6,3), 207 | split DECIMAL(11,4), 208 | updated_date TIMESTAMP WITH TIME ZONE, 209 | FOREIGN KEY(data_vendor_id) 210 | REFERENCES data_vendor(data_vendor_id), 211 | FOREIGN KEY(source, source_id) 212 | REFERENCES symbology(source, source_id) 213 | ON UPDATE CASCADE); 214 | CREATE INDEX IF NOT EXISTS idx_dp_identifiers 215 | ON daily_prices(source, source_id, data_vendor_id, date DESC NULLS LAST, 216 | updated_date); 217 | 218 | CREATE TABLE IF NOT EXISTS finra_data ( 219 | finra_id SERIAL PRIMARY KEY, 220 | source TEXT NOT NULL, 221 | source_id TEXT NOT NULL, 222 | date TIMESTAMP WITH TIME ZONE NOT NULL, 223 | short_volume INTEGER, 224 | short_exempt_volume INTEGER, 225 | total_volume INTEGER, 226 | updated_date TIMESTAMP WITH TIME ZONE, 227 | FOREIGN KEY(source, source_id) 228 | REFERENCES symbology(source, source_id) 229 | ON UPDATE CASCADE); 230 | CREATE INDEX IF NOT EXISTS idx_finra_source_id 231 | ON finra_data(source, source_id); 232 | 233 | CREATE TABLE IF NOT EXISTS fundamental_data ( 234 | fundamental_id BIGSERIAL PRIMARY KEY, 235 | data_vendor_id SMALLINT, 236 | source TEXT NOT NULL, 237 | source_id TEXT NOT NULL, 238 | date TIMESTAMP WITH TIME ZONE NOT NULL, 239 | field TEXT, 240 | value DECIMAL(14,2), 241 | note TEXT, 242 | created_date TIMESTAMP WITH TIME ZONE, 243 | updated_date TIMESTAMP WITH TIME ZONE, 244 | FOREIGN KEY(data_vendor_id) 245 | REFERENCES data_vendor(data_vendor_id), 246 | FOREIGN KEY(source, source_id) 247 | REFERENCES symbology(source, source_id) 248 | ON UPDATE CASCADE); 249 | CREATE INDEX IF NOT EXISTS idx_fund_source_id 250 | ON fundamental_data(source, source_id, data_vendor_id, date DESC NULLS LAST); 251 | 252 | CREATE TABLE IF NOT EXISTS minute_prices ( 253 | minute_price_id BIGSERIAL PRIMARY KEY, 254 | data_vendor_id SMALLINT, 255 | source TEXT NOT NULL, 256 | source_id TEXT NOT NULL, 257 | date TIMESTAMP WITH TIME ZONE NOT NULL, 258 | close DECIMAL(11,4), 259 | high DECIMAL(11,4), 260 | low DECIMAL(11,4), 261 | open DECIMAL(11,4), 262 | volume BIGINT, 263 | updated_date TIMESTAMP WITH TIME ZONE, 264 | FOREIGN KEY(data_vendor_id) 265 | REFERENCES data_vendor(data_vendor_id), 266 | FOREIGN KEY(source, source_id) 267 | REFERENCES symbology(source, source_id) 268 | ON UPDATE CASCADE); 269 | CREATE INDEX IF NOT EXISTS idx_mp_identifiers 270 | ON minute_prices(source, source_id, data_vendor_id, date DESC NULLS LAST, 271 | updated_date); 272 | 273 | CREATE TABLE IF NOT EXISTS option_chains ( 274 | option_id BIGSERIAL PRIMARY KEY, 275 | data_vendor_id SMALLINT, 276 | source TEXT NOT NULL, 277 | source_id TEXT NOT NULL, 278 | symbol TEXT, 279 | exchange TEXT, 280 | currency TEXT, 281 | multiplier SMALLINT, 282 | contract_id BIGINT NOT NULL, 283 | expiry DATE, 284 | type TEXT, 285 | strike DECIMAL(8,2), 286 | pre_split BOOLEAN, 287 | created_date TIMESTAMP WITH TIME ZONE, 288 | updated_date TIMESTAMP WITH TIME ZONE, 289 | FOREIGN KEY(data_vendor_id) 290 | REFERENCES data_vendor(data_vendor_id), 291 | FOREIGN KEY(source, source_id) 292 | REFERENCES symbology(source, source_id) 293 | ON UPDATE CASCADE); 294 | CREATE INDEX IF NOT EXISTS idx_option_chains_values 295 | ON option_chains(data_vendor_id, source, source_id, contract_id, expiry, 296 | strike, pre_split); 297 | 298 | CREATE TABLE IF NOT EXISTS option_prices ( 299 | option_prices_id BIGSERIAL PRIMARY KEY, 300 | data_vendor_id SMALLINT, 301 | option_id BIGINT NOT NULL, 302 | date TIMESTAMP WITH TIME ZONE NOT NULL, 303 | bid DECIMAL(10,4), 304 | bid_size INTEGER, 305 | ask DECIMAL(10,4), 306 | ask_size INTEGER, 307 | close DECIMAL(10,4), 308 | open_interest INTEGER, 309 | volume INTEGER, 310 | imp_volatility DECIMAL(6,4), 311 | delta DECIMAL(6,5), 312 | gamma DECIMAL(6,5), 313 | rho DECIMAL(6,5), 314 | theta DECIMAL(6,5), 315 | vega DECIMAL(6,5), 316 | updated_date TIMESTAMP WITH TIME ZONE, 317 | FOREIGN KEY(data_vendor_id) 318 | REFERENCES data_vendor(data_vendor_id), 319 | FOREIGN KEY(option_id) 320 | REFERENCES option_chains(option_id) 321 | ON UPDATE CASCADE); 322 | CREATE INDEX IF NOT EXISTS idx_option_prices 323 | ON option_prices(option_id, data_vendor_id, date DESC NULLS LAST); 324 | 325 | CREATE TABLE IF NOT EXISTS tick_prices ( 326 | tick_id BIGSERIAL PRIMARY KEY, 327 | data_vendor_id SMALLINT, 328 | source TEXT NOT NULL, 329 | source_id TEXT NOT NULL, 330 | date TIMESTAMP WITH TIME ZONE, 331 | bid DECIMAL(11,4), 332 | ask DECIMAL(11,4), 333 | last DECIMAL(11,4), 334 | high DECIMAL(11,4), 335 | low DECIMAL(11,4), 336 | close DECIMAL(11,4), 337 | bid_size INTEGER, 338 | ask_size INTEGER, 339 | last_size INTEGER, 340 | volume INTEGER, 341 | FOREIGN KEY(data_vendor_id) 342 | REFERENCES data_vendor(data_vendor_id), 343 | FOREIGN KEY(source, source_id) 344 | REFERENCES symbology(source, source_id) 345 | ON UPDATE CASCADE); 346 | CREATE INDEX IF NOT EXISTS idx_tick_values 347 | ON tick_prices(source, source_id, date DESC NULLS LAST); 348 | 349 | CREATE TABLE IF NOT EXISTS tick_prices_stream ( 350 | tick_id BIGSERIAL PRIMARY KEY, 351 | data_vendor_id SMALLINT, 352 | source TEXT NOT NULL, 353 | source_id TEXT NOT NULL, 354 | date TIMESTAMP WITH TIME ZONE, 355 | field TEXT, 356 | value DECIMAL(11,4), 357 | FOREIGN KEY(data_vendor_id) 358 | REFERENCES data_vendor(data_vendor_id), 359 | FOREIGN KEY(source, source_id) 360 | REFERENCES symbology(source, source_id) 361 | ON UPDATE CASCADE); 362 | CREATE INDEX IF NOT EXISTS idx_tick_stream_values 363 | ON tick_prices_stream(source, source_id, date DESC NULLS LAST, field); 364 | 365 | CREATE TABLE IF NOT EXISTS conference_calls ( 366 | conf_call_id SERIAL PRIMARY KEY, 367 | source TEXT NOT NULL, 368 | source_id TEXT NOT NULL, 369 | symbol TEXT, 370 | date TIMESTAMP WITH TIME ZONE NOT NULL, 371 | event_title TEXT, 372 | created_date TIMESTAMP WITH TIME ZONE, 373 | updated_date TIMESTAMP WITH TIME ZONE, 374 | FOREIGN KEY(source, source_id) 375 | REFERENCES symbology(source, source_id) 376 | ON UPDATE CASCADE); 377 | CREATE INDEX IF NOT EXISTS idx_conf_source_id 378 | ON conference_calls(source, source_id, date); 379 | 380 | CREATE TABLE IF NOT EXISTS dividends ( 381 | dividend_id SERIAL PRIMARY KEY, 382 | source TEXT NOT NULL, 383 | source_id TEXT NOT NULL, 384 | symbol TEXT, 385 | company TEXT, 386 | dividend DECIMAL(6,3), 387 | ex_dividend_date TIMESTAMP WITH TIME ZONE NOT NULL, 388 | record_date TIMESTAMP WITH TIME ZONE, 389 | announcement_date TIMESTAMP WITH TIME ZONE, 390 | payment_date TIMESTAMP WITH TIME ZONE, 391 | created_date TIMESTAMP WITH TIME ZONE, 392 | updated_date TIMESTAMP WITH TIME ZONE, 393 | FOREIGN KEY(source, source_id) 394 | REFERENCES symbology(source, source_id) 395 | ON UPDATE CASCADE); 396 | CREATE INDEX IF NOT EXISTS idx_div_source_id 397 | ON dividends(source, source_id, ex_dividend_date); 398 | 399 | CREATE TABLE IF NOT EXISTS earnings ( 400 | earnings_id SERIAL PRIMARY KEY, 401 | source TEXT NOT NULL, 402 | source_id TEXT NOT NULL, 403 | symbol TEXT, 404 | company_name TEXT, 405 | date TIMESTAMP WITH TIME ZONE NOT NULL, 406 | reported_eps DECIMAL(6,3), 407 | consensus_eps DECIMAL(6,3), 408 | created_date TIMESTAMP WITH TIME ZONE, 409 | updated_date TIMESTAMP WITH TIME ZONE, 410 | FOREIGN KEY(source, source_id) 411 | REFERENCES symbology(source, source_id) 412 | ON UPDATE CASCADE); 413 | CREATE INDEX IF NOT EXISTS idx_earn_source_id 414 | ON earnings(source, source_id, date); 415 | 416 | CREATE TABLE IF NOT EXISTS economic_events ( 417 | event_id SERIAL PRIMARY KEY, 418 | source TEXT NOT NULL, 419 | source_id TEXT NOT NULL, 420 | event_name TEXT, 421 | date TIMESTAMP WITH TIME ZONE, 422 | date_for TIMESTAMP WITH TIME ZONE, 423 | actual TEXT, 424 | briefing_forecast TEXT, 425 | market_expects TEXT, 426 | prior TEXT, 427 | revised_from TEXT, 428 | created_date TIMESTAMP WITH TIME ZONE, 429 | updated_date TIMESTAMP WITH TIME ZONE, 430 | FOREIGN KEY(source, source_id) 431 | REFERENCES symbology(source, source_id) 432 | ON UPDATE CASCADE); 433 | CREATE INDEX IF NOT EXISTS idx_econ_event_source_id 434 | ON economic_events(source, source_id, date, event_name); 435 | 436 | CREATE TABLE IF NOT EXISTS ipo_pricings ( 437 | ipo_id SERIAL PRIMARY KEY, 438 | source TEXT NOT NULL, 439 | source_id TEXT NOT NULL, 440 | symbol TEXT, 441 | company_name TEXT, 442 | offer_date TIMESTAMP WITH TIME ZONE, 443 | shares_offered TEXT, 444 | proposed_price TEXT, 445 | initial_price TEXT, 446 | created_date TIMESTAMP WITH TIME ZONE, 447 | updated_date TIMESTAMP WITH TIME ZONE, 448 | FOREIGN KEY(source, source_id) 449 | REFERENCES symbology(source, source_id) 450 | ON UPDATE CASCADE); 451 | CREATE INDEX IF NOT EXISTS idx_ipop_source_id 452 | ON ipo_pricings(source, source_id, offer_date); 453 | 454 | CREATE TABLE IF NOT EXISTS splits ( 455 | split_id SERIAL PRIMARY KEY, 456 | source TEXT NOT NULL, 457 | source_id TEXT NOT NULL, 458 | symbol TEXT, 459 | company_name TEXT, 460 | payable_date TIMESTAMP WITH TIME ZONE, 461 | ex_date TIMESTAMP WITH TIME ZONE, 462 | announced_date TIMESTAMP WITH TIME ZONE, 463 | optionable BOOLEAN, 464 | ratio DECIMAL(11,4), 465 | created_date TIMESTAMP WITH TIME ZONE, 466 | updated_date TIMESTAMP WITH TIME ZONE, 467 | FOREIGN KEY(source, source_id) 468 | REFERENCES symbology(source, source_id) 469 | ON UPDATE CASCADE); 470 | CREATE INDEX IF NOT EXISTS idx_splits_source_id 471 | ON splits(source, source_id, ex_date, ratio); 472 | -------------------------------------------------------------------------------- /pySecMaster/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | postgres: 5 | container_name: postgres_pysecmaster 6 | image: postgres:10-alpine 7 | volumes: 8 | - ./database:/docker-entrypoint-initdb.d/ 9 | - postgres_pysecmaster_data:/var/lib/postgresql 10 | restart: always 11 | ports: 12 | - "127.0.0.1:5432:5432" 13 | 14 | app: 15 | container_name: pysecmaster 16 | build: . 17 | environment: 18 | - "PYTHONUNBUFFERED=1" # Prevent python prints from using buffer 19 | volumes: 20 | - ./load_tables:/load_tables/ 21 | depends_on: 22 | - postgres 23 | 24 | volumes: 25 | postgres_pysecmaster_data: 26 | -------------------------------------------------------------------------------- /pySecMaster/icons/exit_48px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/exit_48px.png -------------------------------------------------------------------------------- /pySecMaster/icons/github_128px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/github_128px.png -------------------------------------------------------------------------------- /pySecMaster/icons/google_128px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/google_128px.png -------------------------------------------------------------------------------- /pySecMaster/icons/molecule_5_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/molecule_5_black.png -------------------------------------------------------------------------------- /pySecMaster/icons/open_folder_240px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/open_folder_240px.png -------------------------------------------------------------------------------- /pySecMaster/icons/quandl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/quandl.png -------------------------------------------------------------------------------- /pySecMaster/icons/save_240px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/icons/save_240px.png -------------------------------------------------------------------------------- /pySecMaster/load_aux_tables.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | import os 4 | import pandas as pd 5 | import psycopg2 6 | 7 | from utilities.database_queries import df_to_sql, query_load_table,\ 8 | update_load_table 9 | 10 | __author__ = 'Josh Schertz' 11 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 12 | __description__ = 'An automated system to store and maintain financial data.' 13 | __email__ = 'josh[AT]joshschertz[DOT]com' 14 | __license__ = 'GNU AGPLv3' 15 | __maintainer__ = 'Josh Schertz' 16 | __status__ = 'Development' 17 | __url__ = 'https://joshschertz.com/' 18 | __version__ = '1.5.0' 19 | 20 | ''' 21 | This program is free software: you can redistribute it and/or modify 22 | it under the terms of the GNU Affero General Public License as 23 | published by the Free Software Foundation, either version 3 of the 24 | License, or (at your option) any later version. 25 | 26 | This program is distributed in the hope that it will be useful, 27 | but WITHOUT ANY WARRANTY; without even the implied warranty of 28 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29 | GNU Affero General Public License for more details. 30 | 31 | You should have received a copy of the GNU Affero General Public License 32 | along with this program. If not, see . 33 | ''' 34 | 35 | 36 | class LoadTables(object): 37 | 38 | def __init__(self, database, user, password, host, port, tables_to_load, 39 | load_tables='load_tables'): 40 | self.database = database 41 | self.user = user 42 | self.password = password 43 | self.host = host 44 | self.port = port 45 | self.load_to_sql(tables_to_load, load_tables) 46 | 47 | @staticmethod 48 | def altered_values(existing_df, new_df): 49 | """ Compare the two provided DataFrames, returning a new DataFrame that 50 | only includes rows from the new_df that are different from the 51 | existing_df. 52 | 53 | :param existing_df: DataFrame of the existing values 54 | :param new_df: DataFrame of the next values 55 | :return: DataFrame with the altered/new values 56 | """ 57 | 58 | # DataFrame with the similar values from both the existing_df and the 59 | # new_df. 60 | combined_df = pd.merge(left=existing_df, right=new_df, how='inner', 61 | on=list(new_df.columns.values)) 62 | 63 | # In a new DataFrame, only keep the new_df rows that did NOT have a 64 | # match to the existing_df 65 | id_col_name = list(new_df.columns.values)[0] 66 | altered_df = new_df[~new_df[id_col_name].isin(combined_df[id_col_name])] 67 | 68 | return altered_df 69 | 70 | def find_tsid(self, table_df): 71 | """ This only converts the stock's ticker to it's respective symbol_id. 72 | This requires knowing the ticker, the exchange and data vendor. 73 | 74 | :param table_df: DataFrame with the ticker and index 75 | :return: DataFrame with symbol_id's instead of tickers 76 | """ 77 | 78 | try: 79 | conn = psycopg2.connect(database=self.database, user=self.user, 80 | password=self.password, host=self.host, 81 | port=self.port) 82 | with conn: 83 | cur = conn.cursor() 84 | # Determines if the quandl_codes table is empty? Stop if it is. 85 | cur.execute('SELECT q_code FROM quandl_codes LIMIT 1') 86 | if not cur.fetchall(): 87 | print('The quandl_codes table is empty. Run the code to ' 88 | 'download the Quandl Codes and then run this again.') 89 | else: 90 | table_df = self.find_symbol_id_process(table_df, cur) 91 | return table_df 92 | except psycopg2.Error as e: 93 | print('Error when trying to retrieve data from the %s database ' 94 | 'in LoadTables.find_q_code') 95 | print(e) 96 | 97 | @staticmethod 98 | def find_symbol_id_process(table_df, cur): 99 | """ Finds the ticker's symbol_id. If the table provided has an exchange 100 | column, then the ticker and exchange will be used to find the 101 | symbol_id. The result should be a perfect match to the quandl_codes 102 | table. If an exchange column doesn't exist, then only the ticker will 103 | be used, along with an implied US exchange. Thus, only tickers traded 104 | on US exchanges will have their symbol_id's found. A way around this is 105 | to provide the exchange in the load file. 106 | 107 | :param table_df: A DataFrame with each row a ticker plus extra items 108 | :param cur: A cursor for navigating the SQL database. 109 | :return: A DataFrame with the original ticker replaced with a symbol_id 110 | """ 111 | 112 | if 'exchange' in table_df.columns: 113 | # ToDo: Find a new source for the tickers table 114 | 115 | cur.execute("""SELECT symbol_id, component, data 116 | FROM quandl_codes""") 117 | data = cur.fetchall() 118 | q_codes_df = pd.DataFrame(data, columns=['symbol_id', 'ticker', 119 | 'exchange']) 120 | q_codes_df.drop_duplicates('symbol_id', inplace=True) 121 | 122 | # Match the rows that have the same ticker and exchange 123 | df = pd.merge(table_df, q_codes_df, how='inner', 124 | on=['ticker', 'exchange']) 125 | 126 | df = df[['symbol_id', 'ticker', 'exchange', 'sector', 'industry', 127 | 'sub_industry', 'currency', 'hq_country', 'created_date', 128 | 'updated_date']] 129 | 130 | else: 131 | exchanges = ['NYSE', 'NYSEMKT', 'NYSEARCA', 'NASDAQ'] 132 | 133 | cur.execute("""SELECT symbol_id, component, data 134 | FROM quandl_codes""") 135 | data = cur.fetchall() 136 | q_codes_df = pd.DataFrame(data, columns=['symbol_id', 'ticker', 137 | 'exchange']) 138 | q_codes_df.drop_duplicates('symbol_id', inplace=True) 139 | 140 | # Match the rows that have the same ticker and exchange 141 | # Broke the merge into two steps, involving an intermediary table 142 | df = pd.merge(table_df, q_codes_df, how='left', on='ticker') 143 | df = df[df['exchange'].isin(exchanges)] 144 | 145 | df = df.drop(['ticker', 'exchange'], axis=1) 146 | df.rename(columns={'index': 'stock_index'}, inplace=True) 147 | df = df[['stock_index', 'symbol_id', 'as_of', 'created_date', 148 | 'updated_date']] 149 | 150 | # ToDo: Implement a way to show the tickers that are not included 151 | return df 152 | 153 | def load_to_sql(self, tables_to_load, table_location): 154 | """ The main function that processes and loads the auxiliary data into 155 | the database. For each table listed in the tables_to_load list, their 156 | CSV file is loaded and the data moved into the SQL database. If the 157 | table is for indices, the CSV data is passed to the find_symbol_id 158 | function, where the ticker is replaced with it's respective symbol_id. 159 | 160 | :param tables_to_load: List of strings 161 | :param table_location: String of the directory for the load tables 162 | :return: Nothing. Data is just loaded into the SQL database. 163 | """ 164 | 165 | start_time = time.time() 166 | for table, query in tables.items(): 167 | if table in tables_to_load: 168 | try: 169 | file = os.path.abspath(os.path.join(table_location, 170 | table + '.csv')) 171 | table_df = pd.read_csv(file, encoding='ISO-8859-1') 172 | except Exception as e: 173 | print('Unable to load the %s csv load file. Skipping it' % 174 | table) 175 | print(e) 176 | continue 177 | 178 | if table == 'indices' or table == 'tickers': 179 | # ToDo: Re-implement these tables; need symbol_id 180 | print('Unable to process indices and tickers table ' 181 | 'since there is no system to create a unique ' 182 | 'symbol_id for each item.') 183 | continue 184 | # # Removes the column that has the company's name 185 | # table_df.drop('ticker_name', 1, inplace=True) 186 | # # Finds the tsid for each ticker 187 | # table_df = self.find_tsid(table_df) 188 | 189 | # if table == 'tickers': 190 | # table_df.to_csv('load_tables/tickers_df.csv', 191 | # index=False) 192 | 193 | # Retrieve any existing values for this table 194 | existing_df = query_load_table( 195 | database=self.database, user=self.user, 196 | password=self.password, host=self.host, port=self.port, 197 | table=table) 198 | 199 | # Find the values that are different between the two DataFrames 200 | altered_df = self.altered_values( 201 | existing_df=existing_df, new_df=table_df) 202 | 203 | altered_df.insert(len(altered_df.columns), 'created_date', 204 | datetime.now().isoformat()) 205 | altered_df.insert(len(altered_df.columns), 'updated_date', 206 | datetime.now().isoformat()) 207 | 208 | # Get the id column for the current table (first column) 209 | id_col_name = list(altered_df.columns.values)[0] 210 | 211 | # Separate out the new and updated values from the altered_df 212 | new_df = (altered_df[~altered_df[id_col_name]. 213 | isin(existing_df[id_col_name])]) 214 | updated_df = (altered_df[altered_df[id_col_name]. 215 | isin(existing_df[id_col_name])]) 216 | 217 | # Update all modified values within the database 218 | update_load_table(database=self.database, user=self.user, 219 | password=self.password, host=self.host, 220 | port=self.port, values_df=updated_df, 221 | table=table) 222 | 223 | # Append all new values to the database 224 | df_to_sql(database=self.database, user=self.user, 225 | password=self.password, host=self.host, 226 | port=self.port, df=new_df, sql_table=table, 227 | exists='append', item=table) 228 | 229 | print('Loaded %s into the %s database' % 230 | (table, self.database)) 231 | 232 | load_tables_excluded = [table for table in tables_to_load 233 | if table not in tables.keys()] 234 | if load_tables_excluded: 235 | print('Unable to load the following tables: %s' % 236 | (", ".join(load_tables_excluded))) 237 | print("If the CSV file exists, make sure it's name matches the " 238 | "name in the tables dictionary.") 239 | 240 | print('Finished loading all selected tables taking %0.1f seconds' 241 | % (time.time() - start_time)) 242 | 243 | 244 | # NOTE: make sure the table name (dict key) matches the csv load file name 245 | tables = { 246 | 'data_vendor': '(%s,%s,%s,%s,%s,%s,%s,%s)', 247 | 'exchanges': '(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', 248 | 'tickers': '(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', 249 | 'indices': '(NULL,%s,%s,%s,%s,%s,%s)', 250 | } 251 | -------------------------------------------------------------------------------- /pySecMaster/load_tables/data_vendor.csv: -------------------------------------------------------------------------------- 1 | data_vendor_id,name,url,support_email,api,consensus_weight 2 | 1,Quandl_WIKI,https://www.quandl.com/data/WIKI,connect@quandl.com,WIKI,25 3 | 2,Quandl_GOOG,https://www.quandl.com/data/GOOG,connect@quandl.com,GOOG,15 4 | 3,Quandl_YAHOO,https://www.quandl.com/data/YAHOO,connect@quandl.com,YAHOO,15 5 | 4,Quandl_FINRA,https://www.quandl.com/data/FINRA,connect@quandl.com,FINRA, 6 | 5,Quandl_EIA,https://www.quandl.com/data/EIA,connect@quandl.com,EIA, 7 | 6,Quandl_JODI,https://www.quandl.com/data/JODI,connect@quandl.com,JODI, 8 | 7,Quandl_ZFA,https://www.quandl.com/data/ZFA,connect@quandl.com,ZFA, 9 | 8,Quandl_ZFB,https://www.quandl.com/data/ZFB,connect@quandl.com,ZFB, 10 | 9,Quandl_RAYMOND,https://www.quandl.com/data/RAYMOND,connect@quandl.com,RAYMOND, 11 | 10,Quandl_ZEP,https://www.quandl.com/data/ZEP,connect@quandl.com,ZEP, 12 | 11,Quandl_EOD,https://www.quandl.com/data/EOD,connect@quandl.com,EOD,50 13 | 12,Google_Finance,https://www.google.com/finance,,,20 14 | 13,Yahoo_Finance,https://finance.yahoo.com,,,20 15 | 14,CSI_Data,http://www.csidata.com,support@csidata.com,,50 16 | 15,pySecMaster_Consensus,https://github.com/camisatx/pySecMaster,,, 17 | 16,IB_API,https://www.interactivebrokers.com,,, 18 | 17,Seeking_Alpha,http://www.seekingalpha.com,,, 19 | 18,Bloomberg,http://www.bloomberg.com,,, 20 | 19,Reuters,http://www.reuters.com,,, 21 | -------------------------------------------------------------------------------- /pySecMaster/load_tables/exchanges.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/load_tables/exchanges.csv -------------------------------------------------------------------------------- /pySecMaster/main_gui.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from PyQt4 import QtGui, QtCore, uic 3 | import os.path 4 | from queue import Queue 5 | import sys 6 | 7 | # Required to use resource file icons 8 | # Compile the qrc file in terminal "pyrcc4.exe -py3 'icons.qrc' -o 'icon_rc.py'" 9 | from icon_rc import * 10 | 11 | from pySecMaster import maintenance, data_download 12 | 13 | __author__ = 'Josh Schertz' 14 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 15 | __description__ = 'An automated system to store and maintain financial data.' 16 | __email__ = 'josh[AT]joshschertz[DOT]com' 17 | __license__ = 'GNU AGPLv3' 18 | __maintainer__ = 'Josh Schertz' 19 | __status__ = 'Development' 20 | __url__ = 'https://joshschertz.com/' 21 | __version__ = '1.5.0' 22 | 23 | ''' 24 | This program is free software: you can redistribute it and/or modify 25 | it under the terms of the GNU Affero General Public License as 26 | published by the Free Software Foundation, either version 3 of the 27 | License, or (at your option) any later version. 28 | 29 | This program is distributed in the hope that it will be useful, 30 | but WITHOUT ANY WARRANTY; without even the implied warranty of 31 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 32 | GNU Affero General Public License for more details. 33 | 34 | You should have received a copy of the GNU Affero General Public License 35 | along with this program. If not, see . 36 | ''' 37 | 38 | 39 | class MainWindow(QtGui.QMainWindow): 40 | 41 | def __init__(self, parent=None): 42 | super(MainWindow, self).__init__(parent) 43 | 44 | # Set the default name of the ini file; used to load/save GUI settings 45 | self.ini_name = 'pySecMaster_gui.ini' 46 | 47 | # Load the GUI structure from the ui file 48 | uic.loadUi('main_gui.ui', self) 49 | 50 | # Establish all menu bar connections 51 | self.actionLoad_Settings.triggered.connect(lambda: self.select_restore()) 52 | self.actionSave_Settings.triggered.connect(lambda: self.save_settings(self.ini_name)) 53 | self.actionStart.triggered.connect(self.process) 54 | self.actionExit.triggered.connect(lambda: self.confirm_close(self.ini_name)) 55 | self.actionPySecMaster.triggered.connect(lambda: self.open_url('https://github.com/camisatx/pySecMaster')) 56 | self.actionCSI_Data.triggered.connect(lambda: self.open_url('http://www.csidata.com/')) 57 | self.actionGoogle_Finance.triggered.connect(lambda: self.open_url('https://www.google.com/finance')) 58 | self.actionQuandl.triggered.connect(lambda: self.open_url('https://www.quandl.com/')) 59 | self.actionInstall_PostgreSQL.triggered.connect(lambda: self.open_url('http://www.postgresql.org/download/')) 60 | self.actionInstall_Psycopg.triggered.connect(lambda: self.open_url('http://initd.org/psycopg/docs/install.html')) 61 | self.actionJosh_Schertz.triggered.connect(lambda: self.open_url('https://joshschertz.com/')) 62 | 63 | # Establish all form button connections 64 | self.toolbtn_details.clicked.connect(self.txtbrwsr_details_toggle) 65 | self.btnbox_action.button(self.btnbox_action.Ok).\ 66 | clicked.connect(self.process) 67 | self.btnbox_action.button(self.btnbox_action.Abort).\ 68 | clicked.connect(self.worker_finished) 69 | self.btnbox_action.button(self.btnbox_action.Cancel).\ 70 | clicked.connect(lambda: self.confirm_close(self.ini_name)) 71 | 72 | # Set the default items for 'Quandl Databases' 73 | quandl_databases_index = self.cmb_tickers_quandl_db.findText('WIKI') 74 | self.cmb_tickers_quandl_db.setCurrentIndex(quandl_databases_index) 75 | 76 | # Hide the data fields if data won't be downloaded for them 77 | self.data_provider_toggle() 78 | # If 'Download Source' (Data tab) is changed, re-run the 79 | # data_provider_toggle method to re-process items 80 | self.cmb_data_source.currentIndexChanged.\ 81 | connect(self.data_provider_toggle) 82 | self.cmb_data_source.currentIndexChanged.\ 83 | connect(self.data_selection_toggle) 84 | 85 | # Modify the combobox items of 'Selection' (Data tab) to make sure it 86 | # only shows valid options. 87 | self.data_selection_toggle() 88 | 89 | # Hide the details text browser by default 90 | # ToDo: Doesn't hide at startup; .isVisible() always returned 'False' 91 | self.txtbrwsr_details_toggle() 92 | 93 | # Hide the Abort button; only show when pySecMaster function is running 94 | self.btnbox_action.button(self.btnbox_action.Abort).hide() 95 | # Change the default name from 'Abort' to 'Stop' 96 | self.btnbox_action.button(self.btnbox_action.Abort).setText('Stop') 97 | 98 | # ToDo: Integrate the progress bar 99 | self.progressBar.hide() 100 | 101 | # Load the prior settings if a ini files exists 102 | if os.path.isfile(self.ini_name): 103 | self.restore_settings(self.ini_name) 104 | 105 | def closeEvent(self, event): 106 | """ 107 | closeEvent method is called when the user clicks window close button 108 | 109 | :param event: A default system variable specifying a user action (exit) 110 | """ 111 | 112 | self.confirm_close(self.ini_name, event) 113 | 114 | def confirm_close(self, ini_name, event=None): 115 | """ 116 | Popup message box requiring user consent to close program 117 | 118 | :param ini_name: String of the name of the ini file to save the 119 | settings to 120 | :param event: A Qt object that is only used via the closeEvent method 121 | """ 122 | 123 | reply = QtGui.QMessageBox.question(self, 'Confirm Exit', 124 | 'Do you want to save the current ' 125 | 'settings?', 126 | QtGui.QMessageBox.Yes | 127 | QtGui.QMessageBox.No | 128 | QtGui.QMessageBox.Cancel, 129 | QtGui.QMessageBox.Yes) 130 | 131 | if event: 132 | # Request originated from the closeEvent method 133 | if reply == QtGui.QMessageBox.Yes: 134 | self.save_settings(ini_name) 135 | event.accept() 136 | elif reply == QtGui.QMessageBox.No: 137 | event.accept() 138 | else: 139 | event.ignore() 140 | else: 141 | # Request originated from a specific exit feature 142 | if reply == QtGui.QMessageBox.Yes: 143 | self.save_settings(ini_name) 144 | sys.exit() 145 | elif reply == QtGui.QMessageBox.No: 146 | sys.exit() 147 | else: 148 | pass 149 | 150 | def data_provider_toggle(self): 151 | """ 152 | Hides the data fields if the data won't be downloaded for them. 153 | """ 154 | 155 | provider_selected = self.cmb_data_source.currentText() 156 | 157 | # The default interval is daily; all sources have daily data. 158 | intervals = ['daily'] 159 | 160 | if provider_selected in ['google', 'yahoo']: 161 | # Downloading Google or Yahoo Finance data; hide Quandl options 162 | self.lbl_quandlkey.hide() 163 | self.lineedit_quandlkey.hide() 164 | self.lbl_tickers_quandl.hide() 165 | self.cmb_tickers_quandl.hide() 166 | self.lbl_tickers_quandl_db.hide() 167 | self.cmb_tickers_quandl_db.hide() 168 | 169 | # Set the data interval 170 | self.cmb_data_interval.clear() 171 | if provider_selected == 'google': 172 | google_intervals = ['daily', 'minute'] 173 | self.cmb_data_interval.addItems(google_intervals) 174 | self.cmb_data_interval.setCurrentIndex(0) 175 | else: 176 | self.cmb_data_interval.addItems(intervals) 177 | self.cmb_data_interval.setCurrentIndex(0) 178 | 179 | elif provider_selected == 'quandl': 180 | # Downloading quandl data; hide all Google Fin options 181 | self.lbl_quandlkey.show() 182 | self.lineedit_quandlkey.show() 183 | self.lbl_tickers_quandl.show() 184 | self.cmb_tickers_quandl.show() 185 | self.lbl_tickers_quandl_db.show() 186 | self.cmb_tickers_quandl_db.show() 187 | 188 | # Set the data interval 189 | self.cmb_data_interval.clear() 190 | self.cmb_data_interval.addItems(intervals) 191 | self.cmb_data_interval.setCurrentIndex(0) 192 | 193 | else: 194 | raise NotImplementedError('%s is not implemented in the ' 195 | 'data_provider_toggle function within ' 196 | 'main_gui.py' % provider_selected) 197 | 198 | def data_selection_toggle(self): 199 | """ 200 | Modify the combobox items of 'Selection' (Data tab) to make sure it 201 | only shows valid options. Each one of these options has explicit SQL 202 | queries established in the database_queries.query_codes function. 203 | """ 204 | 205 | # The selected provider 206 | provider_selected = self.cmb_data_source.currentText() 207 | 208 | # The data selections for the currently selected data provider. 209 | google_fin_possible_selections = ['all', 'us_main', 210 | 'us_main_no_end_date', 211 | 'us_canada_london'] 212 | google_default_selection = 1 213 | 214 | yahoo_fin_possible_selections = ['all', 'us_main', 215 | 'us_main_no_end_date', 216 | 'us_canada_london'] 217 | yahoo_default_selection = 2 218 | 219 | quandl_possible_selections = ['wiki', 'goog', 'goog_us_main', 220 | 'goog_us_main_no_end_date', 221 | 'goog_us_canada_london'] 222 | quandl_default_selection = 0 223 | 224 | self.cmb_data_selection.clear() 225 | if provider_selected == 'google': 226 | self.cmb_data_selection.addItems(google_fin_possible_selections) 227 | self.cmb_data_selection.setCurrentIndex(google_default_selection) 228 | elif provider_selected == 'yahoo': 229 | self.cmb_data_selection.addItems(yahoo_fin_possible_selections) 230 | self.cmb_data_selection.setCurrentIndex(yahoo_default_selection) 231 | elif provider_selected == 'quandl': 232 | self.cmb_data_selection.addItems(quandl_possible_selections) 233 | self.cmb_data_selection.setCurrentIndex(quandl_default_selection) 234 | else: 235 | raise NotImplementedError('%s is not implemented in the ' 236 | 'data_selection_toggle function within ' 237 | 'main_gui.py' % provider_selected) 238 | 239 | def onDataReady(self, string): 240 | """ 241 | Special PyQt name; Write code output to txtbrwsr_details 242 | """ 243 | # ToDo: Build functionality to handle stderr, using red font in GUI 244 | 245 | cursor = self.txtbrwsr_details.textCursor() 246 | cursor.movePosition(cursor.End) 247 | cursor.insertText(str(string)) 248 | self.txtbrwsr_details.ensureCursorVisible() 249 | 250 | def open_url(self, url): 251 | """ 252 | Open the provided url in the system default browser 253 | 254 | :param url: String of the url 255 | """ 256 | 257 | print('Opening %s in the default browser' % (url,)) 258 | q_url = QtCore.QUrl(url) 259 | if not QtGui.QDesktopServices.openUrl(q_url): 260 | QtGui.QMessageBox.warning(self, 'Open Url', 261 | 'Could not open %s' % url) 262 | 263 | def process(self): 264 | """ 265 | Invoke the thread worker, prepare the worker by providing it with the 266 | variables the function it's to run needs, and then pass the thread 267 | to the Worker class where it'll be executed. 268 | """ 269 | 270 | # Determine if any of the postgres database options were not provided 271 | if (self.lineedit_admin_user.text() or 272 | self.lineedit_admin_password.text() or 273 | self.lineedit_name.text() or self.lineedit_user.text() or 274 | self.lineedit_password.text() or self.lineedit_host.text() or 275 | self.lineedit_port.text()) == '': 276 | raise ValueError('One or multiple database options were not ' 277 | 'provided. Ensure there is a value in each field ' 278 | 'within the PostgreSQL Database Options section.') 279 | 280 | # Determine if the Quandl API Key is required; if so, was it provided? 281 | if (self.cmb_data_source.currentText() in ['quandl'] and 282 | self.lineedit_quandlkey.text() == ''): 283 | raise ValueError('No Quandl API key provided') 284 | 285 | # # Depreciated when DB switched to PostgreSQL; kept for posterity 286 | # # Combine the directory path with the database name 287 | # db_link = os.path.abspath(os.path.join(self.lineedit_dbdir.text(), 288 | # self.lineedit_dbname.text())) 289 | 290 | # PostgreSQL database options 291 | database_options = {'admin_user': self.lineedit_admin_user.text(), 292 | 'admin_password': self.lineedit_admin_password.text(), 293 | 'database': self.lineedit_name.text(), 294 | 'user': self.lineedit_user.text(), 295 | 'password': self.lineedit_password.text(), 296 | 'host': self.lineedit_host.text(), 297 | 'port': self.lineedit_port.text()} 298 | 299 | # Change the quandl database string to a list 300 | quandl_db_list = [self.cmb_tickers_quandl_db.currentText()] 301 | 302 | # ToDo: Add these source options as an interactive setup 303 | symbology_sources = ['csi_data', 'tsid', 'quandl_wiki', 'quandl_goog', 304 | 'seeking_alpha', 'yahoo'] 305 | 306 | download_list = [{'source': self.cmb_data_source.currentText(), 307 | 'selection': self.cmb_data_selection.currentText(), 308 | 'interval': self.cmb_data_interval.currentText(), 309 | 'redownload_time': 60 * 60 * 12, 310 | 'data_process': 'replace', 311 | 'replace_days_back': 60, 312 | 'period': 60}] 313 | 314 | # Build the dictionary with all the pySecMaster settings 315 | settings_dict = { 316 | 'database_options': database_options, 317 | 'quandl_ticker_source': self.cmb_tickers_quandl.currentText(), 318 | 'quandl_db_list': quandl_db_list, 319 | 'download_list': download_list, 320 | 'quandl_update_range': self.spinbx_settings_quandl_update.value(), 321 | 'google_fin_update_range': self.spinbx_settings_csi_update.value(), 322 | 'threads': self.spinbx_settings_threads.value(), 323 | 'quandl_key': self.lineedit_quandlkey.text(), 324 | 'symbology_sources': symbology_sources 325 | } 326 | 327 | self.thread_worker = QtCore.QThread() 328 | self.worker = Worker() 329 | self.worker.dataReady.connect(self.onDataReady) 330 | 331 | self.worker.moveToThread(self.thread_worker) 332 | 333 | # Stops the thread after the worker is done. To start it again, call 334 | # thread.start() 335 | self.worker.finished.connect(self.thread_worker.quit) 336 | # ToDo: Figure out why worker_finished is unable to kill the thread 337 | # self.worker.finished.connect(self.worker_finished) 338 | self.worker.finished.connect(self.worker.deleteLater) 339 | 340 | # # Calls the Worker process directly, but it's difficult to send data 341 | # # to the worker object from the main gui thread. 342 | # self.thread_worker.started.connect(self.worker.processA) 343 | # self.thread_worker.finished.connect(main().app.exit) 344 | 345 | # Tell the thread to start working 346 | self.thread_worker.start() 347 | 348 | # Invoke the Worker process with the ability of safely communicating 349 | # with the worker through signals and slots. Worker must already be 350 | # running in order for the process to be invoked. If you need to pass 351 | # arguments to the worker process, add a "QtCore.Q_ARG(str, 'arg')" 352 | # variable for each argument in the invokeMethod statement after 353 | # the QueuedConnection variable. Only able to handle 10 arguments. 354 | # QtCore.Q_ARG(str, 'Hello'), 355 | # QtCore.Q_ARG(list, ['Hello', 0, 1])) 356 | QtCore.QMetaObject.invokeMethod(self.worker, 'pysecmaster', 357 | QtCore.Qt.QueuedConnection, 358 | QtCore.Q_ARG(dict, settings_dict)) 359 | 360 | # Disable the 'Ok' button while the worker thread is running 361 | self.btnbox_action.button(self.btnbox_action.Ok).setEnabled(False) 362 | 363 | # ToDo: Figure out why worker_finished is unable to kill the thread 364 | # # Show the 'Stop' button and hide the 'Cancel' button 365 | # self.btnbox_action.button(self.btnbox_action.Abort).show() 366 | # self.btnbox_action.button(self.btnbox_action.Cancel).hide() 367 | 368 | def restore_settings(self, ini_name): 369 | """ 370 | Technique structured from the code from: "https://stackoverflow.com 371 | /questions/23279125/python-pyqt4-functions-to-save-and-restore-ui- 372 | widget-values" 373 | 374 | :param ini_name: Name/path of the .ini file (Ex. pySecMaster_gui.ini) 375 | """ 376 | 377 | settings = QtCore.QSettings(ini_name, QtCore.QSettings.IniFormat) 378 | 379 | for name, obj in inspect.getmembers(self): 380 | if isinstance(obj, QtGui.QComboBox): 381 | name = obj.objectName() 382 | value = str(settings.value(name)) # .toString()) 383 | 384 | if value == "": 385 | continue 386 | 387 | # Get the corresponding index for specified string in combobox 388 | index = obj.findText(value) 389 | # Check if the value exists, otherwise add it to the combobox 390 | if index == -1: 391 | obj.insertItems(0, [value]) 392 | index = obj.findText(value) 393 | obj.setCurrentIndex(index) 394 | else: 395 | obj.setCurrentIndex(index) 396 | 397 | elif isinstance(obj, QtGui.QLineEdit): 398 | name = obj.objectName() 399 | value = str(settings.value(name)) 400 | obj.setText(value) 401 | 402 | elif isinstance(obj, QtGui.QSpinBox): 403 | name = obj.objectName() 404 | value = int(settings.value(name)) 405 | obj.setValue(value) 406 | 407 | elif isinstance(obj, QtGui.QCheckBox): 408 | name = obj.objectName() 409 | value = settings.value(name) 410 | if value: 411 | obj.setChecked(value) # setCheckState enables tristate 412 | 413 | def save_settings(self, ini_name): 414 | """ 415 | Technique structured from the code from: "https://stackoverflow.com 416 | /questions/23279125/python-pyqt4-functions-to-save-and-restore-ui- 417 | widget-values" 418 | 419 | :param ini_name: Name of the .ini file (Ex. pysecmaster.ini) 420 | :return: 421 | """ 422 | 423 | settings = QtCore.QSettings(ini_name, QtCore.QSettings.IniFormat) 424 | 425 | # For child in ui.children(): # works like getmembers, but because it 426 | # traverses the hierarchy, you would have to call the method recursively 427 | # to traverse down the tree. 428 | 429 | for name, obj in inspect.getmembers(self): 430 | if isinstance(obj, QtGui.QComboBox): 431 | name = obj.objectName() 432 | text = obj.currentText() 433 | settings.setValue(name, text) 434 | 435 | elif isinstance(obj, QtGui.QLineEdit): 436 | name = obj.objectName() 437 | value = obj.text() 438 | settings.setValue(name, value) 439 | 440 | elif isinstance(obj, QtGui.QSpinBox): 441 | name = obj.objectName() 442 | value = obj.value() 443 | settings.setValue(name, value) 444 | 445 | elif isinstance(obj, QtGui.QCheckBox): 446 | name = obj.objectName() 447 | state = obj.checkState() 448 | settings.setValue(name, state) 449 | 450 | def select_dir(self): 451 | """ 452 | Opens a PyQt folder search. If a folder is selected, it will 453 | populate the db_dir text editor box. 454 | 455 | DEPRECIATED 456 | """ 457 | 458 | db_dir = QtGui.QFileDialog.getExistingDirectory(self, 459 | 'Select Directory') 460 | if db_dir: 461 | self.lineedit_dbdir.setText(db_dir) 462 | 463 | def select_restore(self): 464 | """ 465 | Opens a PyQt file search. If a file is selected, it will populate 466 | the gui settings with the values from the selected ini file. 467 | """ 468 | 469 | file = QtGui.QFileDialog.getOpenFileName(self, 'Select Saved Settings', 470 | '', 'INI (*.ini)') 471 | if file: 472 | self.restore_settings(file) 473 | 474 | def txtbrwsr_details_toggle(self): 475 | 476 | mw_size = [self.size().width(), self.size().height()] 477 | 478 | if self.txtbrwsr_details.isVisible(): 479 | 480 | mw_size[1] -= self.txtbrwsr_details.size().height() 481 | self.txtbrwsr_details.hide() 482 | 483 | # Resize the main window 484 | while self.size().height() > mw_size[1]: 485 | QtGui.QApplication.sendPostedEvents() 486 | self.resize(mw_size[0], mw_size[1]) 487 | 488 | else: 489 | self.txtbrwsr_details.show() 490 | 491 | def worker_finished(self): 492 | 493 | # Enable the 'Ok' button and change the Stop button back to Cancel 494 | self.btnbox_action.button(self.btnbox_action.Ok).setEnabled(True) 495 | # Hide the 'Stop' button and show the 'Cancel' button 496 | self.btnbox_action.button(self.btnbox_action.Abort).hide() 497 | self.btnbox_action.button(self.btnbox_action.Cancel).show() 498 | 499 | # ToDo: Figure out why none of these kill the thread... 500 | # Safely shut down the thread 501 | # self.thread_worker.quit() 502 | self.thread_worker.terminate() 503 | # self.thread_worker.wait() 504 | 505 | print('Current process has been halted.') 506 | 507 | 508 | class Worker(QtCore.QObject): 509 | finished = QtCore.pyqtSignal() 510 | dataReady = QtCore.pyqtSignal(str) 511 | 512 | @QtCore.pyqtSlot(dict) 513 | def pysecmaster(self, settings_dict): 514 | """ 515 | Calls the functions that operate the pySecMaster. Emits signals back to 516 | the main gui for further processing, using the dataReady process. 517 | 518 | :param settings_dict: Dictionary of all parameters to be passed back 519 | to the pySecMaster.py functions. 520 | """ 521 | 522 | self.dataReady.emit('Building the pySecMaster in the %s database ' 523 | 'located at host %s\n' % 524 | (settings_dict['database_options']['database'], 525 | settings_dict['database_options']['host'])) 526 | 527 | maintenance(database_options=settings_dict['database_options'], 528 | quandl_key=settings_dict['quandl_key'], 529 | quandl_ticker_source=settings_dict['quandl_ticker_source'], 530 | database_list=settings_dict['quandl_db_list'], 531 | threads=settings_dict['threads'], 532 | quandl_update_range=settings_dict['quandl_update_range'], 533 | csidata_update_range=settings_dict['google_fin_update_range'], 534 | symbology_sources=settings_dict['symbology_sources']) 535 | data_download(database_options=settings_dict['database_options'], 536 | quandl_key=settings_dict['quandl_key'], 537 | download_list=settings_dict['download_list'], 538 | threads=settings_dict['threads'], 539 | verbose=True) 540 | 541 | self.dataReady.emit('Finished running the pySecMaster process\n') 542 | self.finished.emit() 543 | 544 | 545 | class StdoutQueue(object): 546 | """ 547 | This is a queue that acts like the default system standard output (stdout) 548 | """ 549 | 550 | def __init__(self, queue): 551 | self.queue = queue 552 | 553 | def write(self, string): 554 | self.queue.put(string) 555 | 556 | def flush(self): 557 | sys.__stdout__.flush() 558 | 559 | 560 | class Receiver(QtCore.QObject): 561 | """ 562 | A QObject (to be run in a QThread) that sits waiting for data to come 563 | through a Queue.Queue(). It blocks until data is available, and once it's 564 | received something from the queue, it sends it to the "MainThread" by 565 | emitting a Qt Signal. 566 | """ 567 | 568 | signal = QtCore.pyqtSignal(str) 569 | 570 | def __init__(self, queue, *args, **kwargs): 571 | QtCore.QObject.__init__(self, *args, **kwargs) 572 | self.queue = queue 573 | 574 | @QtCore.pyqtSlot() 575 | def run(self): 576 | while True: 577 | text = self.queue.get() 578 | self.signal.emit(text) 579 | 580 | 581 | def main(): 582 | 583 | # Create Queue and redirect sys.stdout to this queue 584 | queue = Queue() 585 | sys.stdout = StdoutQueue(queue) 586 | 587 | # Start the main GUI class 588 | app = QtGui.QApplication(sys.argv) 589 | form = MainWindow() 590 | form.show() 591 | 592 | # Create thread that will listen for new strings in the queue. Upon new 593 | # items, Receiver will emit a signal, which will be sent to the 594 | # onDataReady method in the MainWindow class. The onDataReady method 595 | # will add the string to the text editor in the GUI. 596 | thread = QtCore.QThread() 597 | receiver = Receiver(queue) 598 | receiver.signal.connect(form.onDataReady) 599 | receiver.moveToThread(thread) 600 | thread.started.connect(receiver.run) 601 | thread.start() 602 | 603 | sys.exit(app.exec_()) 604 | 605 | if __name__ == '__main__': 606 | 607 | main() 608 | -------------------------------------------------------------------------------- /pySecMaster/query_data.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import numpy as np 3 | import pandas as pd 4 | import psycopg2 5 | import re 6 | import time 7 | 8 | __author__ = 'Josh Schertz' 9 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 10 | __description__ = 'An automated system to store and maintain financial data.' 11 | __email__ = 'josh[AT]joshschertz[DOT]com' 12 | __license__ = 'GNU AGPLv3' 13 | __maintainer__ = 'Josh Schertz' 14 | __status__ = 'Development' 15 | __url__ = 'https://joshschertz.com/' 16 | __version__ = '1.5.0' 17 | 18 | ''' 19 | This program is free software: you can redistribute it and/or modify 20 | it under the terms of the GNU Affero General Public License as 21 | published by the Free Software Foundation, either version 3 of the 22 | License, or (at your option) any later version. 23 | 24 | This program is distributed in the hope that it will be useful, 25 | but WITHOUT ANY WARRANTY; without even the implied warranty of 26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 | GNU Affero General Public License for more details. 28 | 29 | You should have received a copy of the GNU Affero General Public License 30 | along with this program. If not, see . 31 | ''' 32 | 33 | 34 | def calculate_adjusted_prices(df, column): 35 | """ Vectorized approach for calculating the adjusted prices for the 36 | specified column in the provided DataFrame. This creates a new column 37 | called 'adj_' with the adjusted prices. This function requires 38 | that the DataFrame have columns with dividend and split values. 39 | 40 | NOTE: This assumes the input split values direct. E.g. 7-for-1 split = 7 41 | 42 | :param df: DataFrame with raw prices along with dividend and split_ratio 43 | values 44 | :param column: String of which price column should have adjusted prices 45 | created for it 46 | :return: DataFrame with the addition of the adjusted price column 47 | """ 48 | 49 | adj_column = 'adj_' + column 50 | 51 | # Reverse the DataFrame order, sorting by date in descending order 52 | df.sort_index(ascending=False, inplace=True) 53 | 54 | price_col = df[column].values 55 | split_col = df['split'].values 56 | dividend_col = df['dividend'].values 57 | adj_price_col = np.zeros(len(df.index)) 58 | adj_price_col[0] = price_col[0] 59 | 60 | for i in range(1, len(price_col)): 61 | adj_price_col[i] = \ 62 | round((adj_price_col[i - 1] + adj_price_col[i - 1] * 63 | (((price_col[i] * (1/split_col[i - 1])) - 64 | price_col[i - 1] - 65 | dividend_col[i - 1]) / price_col[i - 1])), 4) 66 | 67 | df[adj_column] = adj_price_col 68 | 69 | # Change the DataFrame order back to dates ascending 70 | df.sort_index(ascending=True, inplace=True) 71 | 72 | return df 73 | 74 | 75 | def pull_daily_prices(database, user, password, host, port, query_type, 76 | data_vendor_id, beg_date, end_date, adjust=True, 77 | source='tsid', *args): 78 | """ Query the daily prices from the database for the tsid provided between 79 | the start and end dates. Return a DataFrame with the prices. 80 | 81 | :param database: String of the database name 82 | :param user: String of the username used to login to the database 83 | :param password: String of the password used to login to the database 84 | :param host: String of the database address (localhost, url, ip, etc.) 85 | :param port: Integer of the database port number (5432) 86 | :param query_type: String of which query to run 87 | :param data_vendor_id: Integer of which data vendor id to return prices for 88 | :param beg_date: String of the ISO date to start with 89 | :param end_date: String of the ISO date to end with 90 | :param adjust: Boolean of whether to adjust the values or not; default True 91 | :param source: String of the ticker's source 92 | :return: DataFrame of the returned prices 93 | """ 94 | 95 | conn = psycopg2.connect(database=database, user=user, password=password, 96 | host=host, port=port) 97 | 98 | try: 99 | with conn: 100 | cur = conn.cursor() 101 | if query_type == 'ticker': 102 | tsid, = args 103 | print('Extracting the daily prices for %s' % (tsid,)) 104 | 105 | cur.execute("""SELECT date, source_id AS tsid, open, high, low, 106 | close, volume, dividend, split 107 | FROM daily_prices 108 | WHERE source_id=%s AND source=%s 109 | AND data_vendor_id=%s 110 | AND date>=%s AND date<=%s""", 111 | (tsid, source, data_vendor_id, beg_date, end_date)) 112 | 113 | else: 114 | raise NotImplementedError('Query type %s is not implemented ' 115 | 'within pull_daily_prices' % 116 | query_type) 117 | 118 | rows = cur.fetchall() 119 | if rows: 120 | df = pd.DataFrame(rows, 121 | columns=['date', 'tsid', 'open', 'high', 122 | 'low', 'close', 'volume', 123 | 'dividend', 'split']) 124 | else: 125 | raise SystemExit('No data returned from table query. Try ' 126 | 'adjusting the criteria for the query.') 127 | 128 | # The next two lines change the index of the df to be the date. 129 | df.set_index(['date'], inplace=True) 130 | df.index.name = 'date' 131 | 132 | df.sort_index(inplace=True) 133 | 134 | if adjust: 135 | # Change the columns from decimal to float 136 | df['dividend'] = df['dividend'].apply(lambda x: float(x)) 137 | df['split'] = df['split'].apply(lambda x: float(x)) 138 | df['close'] = df['close'].apply(lambda x: float(x)) 139 | 140 | # Calculate the adjusted prices for the close column 141 | df = calculate_adjusted_prices(df=df, column='close') 142 | 143 | return df 144 | 145 | except psycopg2.Error as e: 146 | print('Error when trying to retrieve price data from the %s database ' 147 | 'in pull_daily_prices' % database) 148 | print(e) 149 | except conn.OperationalError: 150 | raise SystemError('Unable to connect to the %s database in ' 151 | 'pull_daily_prices. Make sure the database ' 152 | 'address/name are correct.' % database) 153 | except Exception as e: 154 | print(e) 155 | raise SystemError('Error: Unknown issue occurred in pull_daily_prices') 156 | 157 | 158 | def pull_minute_prices(database, user, password, host, port, query_type, 159 | data_vendor_id, beg_date, end_date, source='tsid', 160 | *args): 161 | """ Query the minute prices from the database for the tsid provided between 162 | the start and end dates. Return a DataFrame with the prices. 163 | 164 | :param database: String of the database name 165 | :param user: String of the username used to login to the database 166 | :param password: String of the password used to login to the database 167 | :param host: String of the database address (localhost, url, ip, etc.) 168 | :param port: Integer of the database port number (5432) 169 | :param query_type: String of which query to run 170 | :param data_vendor_id: Integer of which data vendor id to return prices for 171 | :param beg_date: String of the ISO date to start with 172 | :param end_date: String of the ISO date to end with 173 | :param source: String of the source 174 | :param args: 175 | :return: DataFrame of the returned prices 176 | """ 177 | 178 | conn = psycopg2.connect(database=database, user=user, password=password, 179 | host=host, port=port) 180 | 181 | try: 182 | with conn: 183 | cur = conn.cursor() 184 | if query_type == 'ticker': 185 | tsid, = args 186 | print('Extracting the minute prices for %s' % (tsid,)) 187 | 188 | cur.execute("""SELECT date, source_id AS tsid, open, high, low, 189 | close, volume 190 | FROM minute_prices 191 | WHERE source_id=%s AND source=%s 192 | AND data_vendor_id=%s 193 | AND date>=%s AND date<=%s""", 194 | (tsid, source, data_vendor_id, beg_date, end_date)) 195 | else: 196 | raise NotImplementedError('Query type %s is not implemented ' 197 | 'within pull_minute_prices' % 198 | query_type) 199 | 200 | rows = cur.fetchall() 201 | if rows: 202 | df = pd.DataFrame(rows, 203 | columns=['date', 'tsid', 'open', 'high', 204 | 'low', 'close', 'volume']) 205 | else: 206 | raise SystemExit('No data returned from table query. Try ' 207 | 'adjusting the criteria for the query.') 208 | 209 | # The next two lines change the index of the df to be the date. 210 | df.set_index(['date'], inplace=True) 211 | df.index.name = ['date'] 212 | 213 | df.sort_index(inplace=True) 214 | 215 | return df 216 | 217 | except psycopg2.Error as e: 218 | print('Error when trying to retrieve price data from the %s database ' 219 | 'in pull_minute_prices' % database) 220 | print(e) 221 | except conn.OperationalError: 222 | raise SystemError('Unable to connect to the %s database in ' 223 | 'pull_minute_prices. Make sure the database ' 224 | 'address/name are correct.' % database) 225 | except Exception as e: 226 | print(e) 227 | raise SystemError('Error: Unknown issue occurred in pull_minute_prices') 228 | 229 | 230 | if __name__ == '__main__': 231 | 232 | from utilities.user_dir import user_dir 233 | 234 | userdir = user_dir() 235 | 236 | test_database = userdir['postgresql']['pysecmaster_db'] 237 | test_user = userdir['postgresql']['pysecmaster_user'] 238 | test_password = userdir['postgresql']['pysecmaster_password'] 239 | test_host = userdir['postgresql']['pysecmaster_host'] 240 | test_port = userdir['postgresql']['pysecmaster_port'] 241 | 242 | test_query_type = 'ticker' # index, ticker 243 | test_tsid = 'AAPL.Q.0' 244 | test_data_vendor_id = 1 # Quandl WIKi 245 | # test_data_vendor_id = 11 # Quandl EOD 246 | # test_data_vendor_id = 15 # pySecMaster_Consensus 247 | # test_data_vendor_id = 12 # Google_Finance 248 | test_beg_date = '1950-01-01 00:00:00' 249 | test_end_date = '2018-12-30 00:00:00' 250 | frequency = 'daily' # daily, minute 251 | 252 | start_time = time.time() 253 | if test_query_type == 'ticker': 254 | if frequency == 'daily': 255 | prices_df = pull_daily_prices(test_database, test_user, 256 | test_password, test_host, test_port, 257 | test_query_type, test_data_vendor_id, 258 | test_beg_date, test_end_date, 259 | True, 'tsid', test_tsid) 260 | 261 | elif frequency == 'minute': 262 | prices_df = pull_minute_prices(test_database, test_user, 263 | test_password, test_host, test_port, 264 | test_query_type, test_data_vendor_id, 265 | test_beg_date, test_end_date, 266 | 'tsid', test_tsid) 267 | 268 | else: 269 | raise NotImplementedError('Frequency %s is not implemented within ' 270 | 'query_data.py' % frequency) 271 | else: 272 | raise NotImplementedError('Query type %s is not implemented within ' 273 | 'query_data.py' % test_query_type) 274 | 275 | csv_friendly_tsid = re.sub('[.]', '_', test_tsid) 276 | print('Query took %0.2f seconds' % (time.time() - start_time)) 277 | print(prices_df) 278 | #prices_df.to_csv('output/%s_%s_%s.csv' % 279 | # (csv_friendly_tsid, frequency, 280 | # datetime.today().strftime('%Y%m%d'))) 281 | 282 | unique_codes = pd.unique((prices_df['tsid']).values) 283 | print('There are %i unique tsid codes' % (len(unique_codes))) 284 | print('There are %s rows' % ('{:,}'.format(len(prices_df.index)))) 285 | print(datetime.today().strftime('%Y%m%d')) 286 | -------------------------------------------------------------------------------- /pySecMaster/query_database.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psycopg2 3 | import time 4 | 5 | __author__ = 'Josh Schertz' 6 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 7 | __description__ = 'An automated system to store and maintain financial data.' 8 | __email__ = 'josh[AT]joshschertz[DOT]com' 9 | __license__ = 'GNU AGPLv3' 10 | __maintainer__ = 'Josh Schertz' 11 | __status__ = 'Development' 12 | __url__ = 'https://joshschertz.com/' 13 | __version__ = '1.5.0' 14 | 15 | ''' 16 | This program is free software: you can redistribute it and/or modify 17 | it under the terms of the GNU Affero General Public License as 18 | published by the Free Software Foundation, either version 3 of the 19 | License, or (at your option) any later version. 20 | 21 | This program is distributed in the hope that it will be useful, 22 | but WITHOUT ANY WARRANTY; without even the implied warranty of 23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 | GNU Affero General Public License for more details. 25 | 26 | You should have received a copy of the GNU Affero General Public License 27 | along with this program. If not, see . 28 | ''' 29 | 30 | 31 | def query_entire_table(database, user, password, host, port, table): 32 | """ Query all of the active tsid values from the specified database. 33 | 34 | :param database: String of the database name 35 | :param user: String of the username used to login to the database 36 | :param password: String of the password used to login to the database 37 | :param host: String of the database address (localhost, url, ip, etc.) 38 | :param port: Integer of the database port number (5432) 39 | :param table: String of the table whose values should be returned 40 | :return: DataFrame of the returned values 41 | """ 42 | 43 | conn = psycopg2.connect(database=database, user=user, password=password, 44 | host=host, port=port) 45 | 46 | try: 47 | with conn: 48 | cur = conn.cursor() 49 | query = ("""SELECT sym.source_id AS tsid 50 | FROM symbology AS sym, 51 | LATERAL ( 52 | SELECT source_id 53 | FROM %s 54 | WHERE source_id = sym.source_id 55 | ORDER BY source_id ASC NULLS LAST 56 | LIMIT 1) AS prices""" % 57 | (table,)) 58 | cur.execute(query) 59 | rows = cur.fetchall() 60 | if rows: 61 | df = pd.DataFrame(rows) 62 | else: 63 | raise SystemExit('No data returned from query_entire_table') 64 | 65 | return df 66 | except psycopg2.Error as e: 67 | print( 68 | 'Error when trying to retrieve data from the %s database in ' 69 | 'query_entire_table' % database) 70 | print(e) 71 | except conn.OperationalError: 72 | raise SystemError('Unable to connect to the %s database in ' 73 | 'query_entire_table. Make sure the database ' 74 | 'address/name are correct.' % database) 75 | except Exception as e: 76 | print(e) 77 | raise SystemError('Error: Unknown issue occurred in query_entire_table') 78 | 79 | if __name__ == '__main__': 80 | 81 | from utilities.user_dir import user_dir 82 | 83 | userdir = user_dir() 84 | 85 | test_database = userdir['postgresql']['pysecmaster_db'] 86 | test_user = userdir['postgresql']['pysecmaster_user'] 87 | test_password = userdir['postgresql']['pysecmaster_password'] 88 | test_host = userdir['postgresql']['pysecmaster_host'] 89 | test_port = userdir['postgresql']['pysecmaster_port'] 90 | 91 | test_table = 'daily_prices' # daily_prices, minute_prices, quandl_codes 92 | 93 | start_time = time.time() 94 | 95 | table_df = query_entire_table(test_database, test_user, test_password, 96 | test_host, test_port, test_table) 97 | 98 | print('Query took %0.2f seconds' % (time.time() - start_time)) 99 | 100 | # table_df.to_csv('%s.csv' % test_table) 101 | print(table_df) 102 | -------------------------------------------------------------------------------- /pySecMaster/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.0 2 | pandas==0.22.0 3 | psycopg2==2.8.2 4 | sqlalchemy==1.3.3 5 | -------------------------------------------------------------------------------- /pySecMaster/symbology_only.py: -------------------------------------------------------------------------------- 1 | from build_symbology import create_symbology 2 | from extractor import CSIDataExtractor 3 | from load_aux_tables import LoadTables 4 | from utilities.user_dir import user_dir 5 | 6 | 7 | __author__ = 'Josh Schertz' 8 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 9 | __description__ = 'An automated system to store and maintain financial data.' 10 | __email__ = 'josh[AT]joshschertz[DOT]com' 11 | __license__ = 'GNU AGPLv3' 12 | __maintainer__ = 'Josh Schertz' 13 | __status__ = 'Development' 14 | __url__ = 'https://joshschertz.com/' 15 | __version__ = '1.5.0' 16 | 17 | ''' 18 | This program is free software: you can redistribute it and/or modify 19 | it under the terms of the GNU Affero General Public License as 20 | published by the Free Software Foundation, either version 3 of the 21 | License, or (at your option) any later version. 22 | 23 | This program is distributed in the hope that it will be useful, 24 | but WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 26 | GNU Affero General Public License for more details. 27 | 28 | You should have received a copy of the GNU Affero General Public License 29 | along with this program. If not, see . 30 | ''' 31 | 32 | 33 | def build_symbology(database_options): 34 | 35 | tables_to_load = ['data_vendor', 'exchanges'] 36 | 37 | csidata_type = 'stock' # stock, commodity 38 | csidata_update_range = 7 39 | 40 | # Don't change these unless you know what you are doing 41 | # http://www.csidata.com/factsheets.php?type=stock&format=html 42 | csidata_url = 'http://www.csidata.com/factsheets.php?' 43 | symbology_sources = ['csi_data', 'tsid', 'quandl_wiki', 'quandl_eod', 44 | 'quandl_goog', 'seeking_alpha', 'yahoo'] 45 | 46 | LoadTables(database=database_options['database'], 47 | user=database_options['user'], 48 | password=database_options['password'], 49 | host=database_options['host'], 50 | port=database_options['port'], 51 | tables_to_load=tables_to_load) 52 | 53 | CSIDataExtractor(database=database_options['database'], 54 | user=database_options['user'], 55 | password=database_options['password'], 56 | host=database_options['host'], 57 | port=database_options['port'], 58 | db_url=csidata_url, 59 | data_type=csidata_type, 60 | redownload_time=csidata_update_range) 61 | 62 | create_symbology(database=database_options['database'], 63 | user=database_options['user'], 64 | password=database_options['password'], 65 | host=database_options['host'], 66 | port=database_options['port'], 67 | source_list=symbology_sources) 68 | 69 | if __name__ == '__main__': 70 | 71 | userdir = user_dir() 72 | 73 | sayvmaster_database_options = { 74 | 'admin_user': userdir['postgresql']['main_user'], 75 | 'admin_password': userdir['postgresql']['main_password'], 76 | 'database': userdir['postgresql']['sayvmaster_db'], 77 | 'user': userdir['postgresql']['sayvmaster_user'], 78 | 'password': userdir['postgresql']['sayvmaster_password'], 79 | 'host': userdir['postgresql']['sayvmaster_host'], 80 | 'port': userdir['postgresql']['sayvmaster_port'], 81 | } 82 | 83 | newsmaster_database_options = { 84 | 'admin_user': userdir['postgresql']['main_user'], 85 | 'admin_password': userdir['postgresql']['main_password'], 86 | 'database': userdir['postgresql']['newsmaster_db'], 87 | 'user': userdir['postgresql']['newsmaster_user'], 88 | 'password': userdir['postgresql']['newsmaster_password'], 89 | 'host': userdir['postgresql']['newsmaster_host'], 90 | 'port': userdir['postgresql']['newsmaster_port'], 91 | } 92 | 93 | # build_symbology(database_options=sayvmaster_database_options) 94 | build_symbology(database_options=newsmaster_database_options) 95 | -------------------------------------------------------------------------------- /pySecMaster/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/tests/__init__.py -------------------------------------------------------------------------------- /pySecMaster/tests/test_database.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT 3 | import unittest 4 | 5 | from create_tables import create_database, main_tables, data_tables,\ 6 | events_tables 7 | from utilities.user_dir import user_dir 8 | 9 | 10 | class DatabaseCreationTests(unittest.TestCase): 11 | 12 | def setUp(self): 13 | 14 | self.userdir = user_dir()['postgresql'] 15 | 16 | self.db_name = self.userdir['pysecmaster_test_db'] 17 | self.user = self.userdir['pysecmaster_test_user'] 18 | self.password = self.userdir['pysecmaster_test_password'] 19 | self.host = self.userdir['pysecmaster_test_host'] 20 | self.port = self.userdir['pysecmaster_test_port'] 21 | 22 | def test_database_creation(self): 23 | 24 | create_database(database=self.db_name) 25 | 26 | conn = psycopg2.connect(database=self.userdir['main_db'], 27 | user=self.userdir['main_user'], 28 | password=self.userdir['main_password'], 29 | host=self.userdir['main_host'], 30 | port=self.userdir['main_port']) 31 | conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) 32 | 33 | with conn: 34 | cur = conn.cursor() 35 | cur.execute("""SELECT datname FROM pg_catalog.pg_database 36 | WHERE lower(datname)=lower('%s')""" % self.db_name) 37 | database_exist = cur.fetchone() 38 | 39 | self.assertEqual(len(database_exist), 1) 40 | 41 | # cur.execute("""SELECT pg_terminate_backend(pg_stat_activity.pid) 42 | # FROM pg_stat_activity 43 | # WHERE datname = current_database() 44 | # AND pid <> pg_backend_pid()""") 45 | cur.execute("""DROP DATABASE IF EXISTS %s""" % self.db_name) 46 | cur.close() 47 | conn.close() 48 | 49 | def test_table_creation(self): 50 | 51 | create_database(database=self.db_name, user=self.user) 52 | 53 | main_tables(database=self.db_name, user=self.user, 54 | password=self.password, host=self.host, port=self.port) 55 | data_tables(database=self.db_name, user=self.user, 56 | password=self.password, host=self.host, port=self.port) 57 | events_tables(database=self.db_name, user=self.user, 58 | password=self.password, host=self.host, port=self.port) 59 | 60 | tables_to_create = ['fundamental_data', 'daily_prices', 'finra_data', 61 | 'minute_prices', 'conference_calls', 'dividends', 62 | 'earnings', 'exchange', 'economic_events', 63 | 'ipo_pricings', 'symbology', 'splits', 64 | 'csidata_stock_factsheet', 'baskets', 65 | 'basket_values', 'indices', 'quandl_codes', 66 | 'data_vendor', 'option_chains', 'tick_prices', 67 | 'tick_prices_stream'] 68 | tables_created = [] 69 | extra_table = [] 70 | missing_table = [] 71 | 72 | conn = psycopg2.connect(database=self.db_name, user=self.user, 73 | password=self.password, host=self.host, 74 | port=self.port) 75 | 76 | with conn: 77 | cur = conn.cursor() 78 | cur.execute("""SELECT table_name 79 | FROM information_schema.tables 80 | WHERE table_schema='public' 81 | AND table_type='BASE TABLE'""") 82 | tables_exists = cur.fetchall() 83 | 84 | if tables_exists: 85 | 86 | for table in tables_exists: 87 | tables_created.append(table[0]) 88 | if table[0] not in tables_to_create: 89 | extra_table.append(table[0]) 90 | 91 | for table in tables_to_create: 92 | if table not in tables_created: 93 | missing_table.append(table) 94 | 95 | cur.close() 96 | conn.close() 97 | 98 | if missing_table: 99 | print('Missing tables: %s' % missing_table) 100 | if extra_table: 101 | print('Extra tables: %s' % extra_table) 102 | 103 | self.assertEqual(len(tables_to_create), len(tables_created)) 104 | self.assertEqual(len(missing_table), 0) 105 | self.assertEqual(len(extra_table), 0) 106 | 107 | # Connect as the server super user to drop the test database 108 | conn = psycopg2.connect(database=self.userdir['main_db'], 109 | user=self.userdir['main_user'], 110 | password=self.userdir['main_password'], 111 | host=self.userdir['main_host'], 112 | port=self.userdir['main_port']) 113 | conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) 114 | 115 | with conn: 116 | cur = conn.cursor() 117 | cur.execute("""DROP DATABASE IF EXISTS %s""" % self.db_name) 118 | cur.close() 119 | conn.close() 120 | -------------------------------------------------------------------------------- /pySecMaster/tests/test_extractor.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from datetime import datetime 3 | import os 4 | import pandas as pd 5 | import psycopg2 6 | import sys 7 | import unittest 8 | 9 | sys.path.append('..') 10 | 11 | from utilities.user_dir import user_dir 12 | from extractor import NASDAQSectorIndustryExtractor 13 | from download import QuandlDownload, download_google_data, download_yahoo_data 14 | 15 | 16 | class GoogleFinanceDownloadTests(unittest.TestCase): 17 | 18 | def setUp(self): 19 | userdir = user_dir() 20 | self.database = userdir['postgresql']['pysecmaster_db'] 21 | self.user = userdir['postgresql']['pysecmaster_user'] 22 | self.password = userdir['postgresql']['pysecmaster_password'] 23 | self.host = userdir['postgresql']['pysecmaster_host'] 24 | self.port = userdir['postgresql']['pysecmaster_port'] 25 | 26 | self.google_fin_url = { 27 | 'root': 'http://www.google.com/finance/getprices?', 28 | 'ticker': 'q=', 29 | 'exchange': 'x=', 30 | 'interval': 'i=', # 60; 60 seconds is the shortest interval 31 | # 'sessions': 'sessions=ext_hours', 32 | 'period': 'p=', # 20d; 15d is the longest period for min 33 | 'fields': 'f=d,c,v,o,h,l', 34 | } # order doesn't change anything 35 | 36 | self.exchanges_df = self.query_exchanges() 37 | 38 | def test_download_google_daily_price_data(self): 39 | self.google_fin_url['interval'] += str(60*60*24) 40 | self.google_fin_url['period'] += str(60) + 'd' 41 | tsid = 'AAPL.Q.0' 42 | 43 | csv_wo_data = 'test_goog_daily_wo_data.csv' 44 | with open(csv_wo_data, 'w') as csv_file: 45 | writer = csv.writer(csv_file) 46 | writer.writerow(['tsid', 'date_tried']) 47 | 48 | test_df = download_google_data(db_url=self.google_fin_url, 49 | tsid=tsid, exchanges_df=self.exchanges_df, csv_out=csv_wo_data) 50 | print(test_df) 51 | self.assertGreater(len(test_df.index), 1) 52 | os.remove(csv_wo_data) 53 | 54 | def test_download_google_minute_price_data(self): 55 | self.google_fin_url['interval'] += str(60) 56 | self.google_fin_url['period'] += str(20) + 'd' 57 | tsid = 'AAPL.Q.0' 58 | 59 | csv_wo_data = 'test_goog_minute_wo_data.csv' 60 | with open(csv_wo_data, 'w') as csv_file: 61 | writer = csv.writer(csv_file) 62 | writer.writerow(['tsid', 'date_tried']) 63 | 64 | test_df = download_google_data(db_url=self.google_fin_url, 65 | tsid=tsid, exchanges_df=self.exchanges_df, csv_out=csv_wo_data) 66 | print(test_df) 67 | self.assertGreater(len(test_df.index), 1) 68 | os.remove(csv_wo_data) 69 | 70 | def query_exchanges(self): 71 | """ Retrieve the exchange symbols for goog and tsid, which will be used 72 | to translate the tsid symbols to goog symbols. Remove the symbols for 73 | which there are no goog symbols. 74 | 75 | :return: DataFrame with exchange symbols 76 | """ 77 | 78 | conn = psycopg2.connect(database=self.database, user=self.user, 79 | password=self.password, host=self.host, 80 | port=self.port) 81 | try: 82 | with conn: 83 | cur = conn.cursor() 84 | cur.execute("""SELECT DISTINCT ON (tsid_symbol) 85 | symbol, goog_symbol, tsid_symbol 86 | FROM exchanges 87 | WHERE goog_symbol IS NOT NULL 88 | AND goog_symbol != 'NaN' 89 | ORDER BY tsid_symbol ASC NULLS LAST""") 90 | rows = cur.fetchall() 91 | df = pd.DataFrame(rows, columns=['symbol', 'goog_symbol', 92 | 'tsid_symbol']) 93 | except psycopg2.Error as e: 94 | print(e) 95 | raise SystemError('Failed to query the data from the exchange ' 96 | 'table within query_exchanges in ' 97 | 'GoogleFinanceDownloadTests') 98 | except conn.OperationalError: 99 | raise SystemError('Unable to connect to the SQL Database in ' 100 | 'query_exchanges in GoogleFinanceDownloadTests. ' 101 | 'Make sure the database address is correct.') 102 | except Exception as e: 103 | print(e) 104 | raise SystemError('Error: Unknown issue occurred in ' 105 | 'query_exchanges in GoogleFinanceDownloadTests.') 106 | 107 | conn.close() 108 | return df 109 | 110 | 111 | class YahooFinanceDownloadTests(unittest.TestCase): 112 | 113 | def setUp(self): 114 | userdir = user_dir() 115 | self.database = userdir['postgresql']['pysecmaster_db'] 116 | self.user = userdir['postgresql']['pysecmaster_user'] 117 | self.password = userdir['postgresql']['pysecmaster_password'] 118 | self.host = userdir['postgresql']['pysecmaster_host'] 119 | self.port = userdir['postgresql']['pysecmaster_port'] 120 | 121 | cur_posix_time = str(datetime.now().timestamp()) 122 | cur_posix_time = cur_posix_time[:cur_posix_time.find('.')] 123 | self.yahoo_fin_url = { 124 | 'root': 'https://query1.finance.yahoo.com/v7/finance/download/', 125 | 'start_date': 'period1=0', # First POSIX time (whole hist) 126 | 'end_date': 'period2=' + cur_posix_time, # Cur POSIX time 127 | 'interval': 'interval=', # 1d, 1w, 1mo: (daily, wkly, mthly) 128 | 'events': 'events=', # history, div, split 129 | 'cookie': 'crumb=', # Cookie value 130 | } 131 | 132 | self.csv_wo_data = 'test_yahoo_daily_wo_data.csv' 133 | with open(self.csv_wo_data, 'w') as csv_file: 134 | writer = csv.writer(csv_file) 135 | writer.writerow(['tsid', 'date_tried']) 136 | 137 | self.exchanges_df = self.query_exchanges() 138 | 139 | def tearDown(self): 140 | os.remove(self.csv_wo_data) 141 | 142 | def test_download_yahoo_daily_price_data(self): 143 | self.yahoo_fin_url['interval'] += '1d' 144 | self.yahoo_fin_url['events'] += 'history' 145 | tsid = 'AAPL.Q.0' 146 | 147 | test_df = download_yahoo_data(db_url=self.yahoo_fin_url, tsid=tsid, 148 | exchanges_df=self.exchanges_df, csv_out=self.csv_wo_data) 149 | print(test_df) 150 | self.assertGreater(len(test_df.index), 1) 151 | 152 | def query_exchanges(self): 153 | """ Retrieve the exchange symbols for yahoo and tsid, which will be used 154 | to translate the tsid symbols to yahoo symbols. Remove the symbols for 155 | which there are no yahoo symbols. 156 | 157 | :return: DataFrame with exchange symbols 158 | """ 159 | 160 | conn = psycopg2.connect(database=self.database, user=self.user, 161 | password=self.password, host=self.host, 162 | port=self.port) 163 | try: 164 | with conn: 165 | cur = conn.cursor() 166 | cur.execute("""SELECT DISTINCT ON (tsid_symbol) 167 | symbol, yahoo_symbol, tsid_symbol 168 | FROM exchanges 169 | WHERE yahoo_symbol IS NOT NULL 170 | AND yahoo_symbol != 'NaN' 171 | ORDER BY tsid_symbol ASC NULLS LAST""") 172 | rows = cur.fetchall() 173 | df = pd.DataFrame(rows, columns=['symbol', 'yahoo_symbol', 174 | 'tsid_symbol']) 175 | except psycopg2.Error as e: 176 | print(e) 177 | raise SystemError('Failed to query the data from the exchange ' 178 | 'table within query_exchanges in ' 179 | 'YahooFinanceDownloadTests') 180 | except conn.OperationalError: 181 | raise SystemError('Unable to connect to the SQL Database in ' 182 | 'query_exchanges in YahooFinanceDownloadTests. ' 183 | 'Make sure the database address is correct.') 184 | except Exception as e: 185 | print(e) 186 | raise SystemError('Error: Unknown issue occurred in ' 187 | 'query_exchanges in YahooFinanceDownloadTests.') 188 | 189 | conn.close() 190 | return df 191 | 192 | 193 | class NASDAQSectorIndustryExtractorTests(unittest.TestCase): 194 | 195 | def setUp(self): 196 | userdir = user_dir() 197 | self.database = userdir['postgresql']['pysecmaster_db'] 198 | self.user = userdir['postgresql']['pysecmaster_user'] 199 | self.password = userdir['postgresql']['pysecmaster_password'] 200 | self.host = userdir['postgresql']['pysecmaster_host'] 201 | self.port = userdir['postgresql']['pysecmaster_port'] 202 | 203 | self.nasdaq_sector_industry_url = 'http://www.nasdaq.com/screening/' \ 204 | 'companies-by-industry.aspx?' 205 | self.nasdaq_sector_industry_extractor_exchanges = ['NASDAQ', 'NYSE', 206 | 'AMEX'] 207 | self.nasdaq_sector_industry_redownload_time = 0 208 | 209 | def test_nasdaq_extractor(self): 210 | NASDAQSectorIndustryExtractor( 211 | database=self.database, 212 | user=self.user, 213 | password=self.password, 214 | host=self.host, 215 | port=self.port, 216 | db_url=self.nasdaq_sector_industry_url, 217 | exchange_list=self.nasdaq_sector_industry_extractor_exchanges, 218 | redownload_time=self.nasdaq_sector_industry_redownload_time) 219 | 220 | 221 | class QuandlDownloadTests(unittest.TestCase): 222 | 223 | def setUp(self): 224 | userdir = user_dir() 225 | quandl_token = userdir['quandl']['quandl_token'] 226 | db_url = ['https://www.quandl.com/api/v1/datasets/', '.csv'] 227 | self.qd = QuandlDownload(quandl_token=quandl_token, db_url=db_url) 228 | 229 | self.csv_wo_data = 'test_quandl_codes_wo_data.csv' 230 | with open(self.csv_wo_data, 'w') as csv_file: 231 | writer = csv.writer(csv_file) 232 | writer.writerow(['q_code', 'date_tried']) 233 | 234 | def tearDown(self): 235 | os.remove(self.csv_wo_data) 236 | 237 | def test_download_quandl_data(self): 238 | test_df = self.qd.download_quandl_data('WIKI/AAPL', self.csv_wo_data) 239 | print(test_df.head(5)) 240 | self.assertGreater(len(test_df.index), 1) 241 | 242 | 243 | if __name__ == '__main__': 244 | unittest.main() 245 | -------------------------------------------------------------------------------- /pySecMaster/utilities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/utilities/__init__.py -------------------------------------------------------------------------------- /pySecMaster/utilities/database_backup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # NOTE: To automate this with cron, you must set database passwords in .pgpass 4 | # sudo nano ~/.pgpass 5 | # localhost:5432:postgres:postgres:mypass 6 | # localhost:5432:mydbname:postgres:mypass 7 | # chmod 600 ~/.pgpass 8 | 9 | # On linux, you must enable this script to be executable 10 | # chmod u+x ~/Programming/Code/database_backup.sh 11 | 12 | cur_time=$(date "+%Y%m%dT%H%M%S") 13 | cur_database="pysecmaster" 14 | 15 | #database_dump_location="O:/Programming/Databases/pySecMaster/postgres_dumps" 16 | database_dump_location="/mnt/backups/Programming/Databases/pySecMaster/postgres_dumps" 17 | 18 | echo "Starting the Postgres database dump for ${cur_database}" 19 | 20 | 21 | # Local database dumps: 22 | # Dump the database that is on the local machine, saving it to a local folder 23 | 24 | # Windows Backup 25 | #"C:\Program Files\PostgreSQL\9.5\bin\pg_dump" -Fc -U postgres -h localhost $cur_database > ${database_dump_location}/${cur_database}_${cur_time}.dump 26 | #"C:\Program Files\PostgreSQL\9.5\bin\pg_dumpall" -g -U postgres -h localhost > ${database_dump_location}/postgres_misc_${cur_time}.dump 27 | 28 | # Linux Backup 29 | pg_dump -Fc -U postgres -h localhost $cur_database > ${database_dump_location}/${cur_database}_${cur_time}.dump 30 | pg_dumpall -g -U postgres -h localhost > ${database_dump_location}/postgres_misc_${cur_time}.dump 31 | 32 | 33 | # Remote database dumps: 34 | # SSH into the database server to run pg_dump, saving it to the local machine 35 | 36 | #remote_user="xxxxx" 37 | #remote_ip="xxx.xxx.xxx.xxx" 38 | 39 | #ssh -o "Compression=no" ${remote_user}@${remote_ip} "pg_dump -Fc -U postgres -h localhost ${cur_database}" > ${database_dump_location}/${cur_database}_${cur_time}.dump 40 | #ssh -o "Compression=no" ${remote_user}@${remote_ip} "pg_dumpall -g -U postgres -h localhost" > ${database_dump_location}/postgres_misc_${cur_time}.dump 41 | 42 | 43 | # Find and delete ALL files that are older than days_to_retain period 44 | 45 | days_to_retain=60 46 | 47 | find ${database_dump_location} -type f -mtime +${days_to_retain} -delete 48 | -------------------------------------------------------------------------------- /pySecMaster/utilities/database_check.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | 3 | 4 | def postgres_test(database_options): 5 | """Test the connection to the postgres database. 6 | 7 | :param database_options: Dictionary with database parameters 8 | :return: Boolean indicating whether the database connection was successful 9 | """ 10 | 11 | host = database_options['host'] 12 | port = database_options['port'] 13 | database = database_options['database'] 14 | user = database_options['user'] 15 | password = database_options['password'] 16 | try: 17 | conn = psycopg2.connect(host=host, port=port, database=database, 18 | user=user, password=password) 19 | conn.close() 20 | return True 21 | except psycopg2.Error as e: 22 | print(e) 23 | return False 24 | -------------------------------------------------------------------------------- /pySecMaster/utilities/database_rebuilds/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/pySecMaster/utilities/database_rebuilds/__init__.py -------------------------------------------------------------------------------- /pySecMaster/utilities/database_rebuilds/convert_qcode_to_tsid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import sqlite3 4 | import time 5 | 6 | from pySecMaster import maintenance 7 | 8 | __author__ = 'Josh Schertz' 9 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 10 | __description__ = 'An automated system to store and maintain financial data.' 11 | __email__ = 'josh[AT]joshschertz[DOT]com' 12 | __license__ = 'GNU AGPLv3' 13 | __maintainer__ = 'Josh Schertz' 14 | __status__ = 'Development' 15 | __url__ = 'https://joshschertz.com/' 16 | __version__ = '1.3.2' 17 | 18 | ''' 19 | This program is free software: you can redistribute it and/or modify 20 | it under the terms of the GNU Affero General Public License as 21 | published by the Free Software Foundation, either version 3 of the 22 | License, or (at your option) any later version. 23 | 24 | This program is distributed in the hope that it will be useful, 25 | but WITHOUT ANY WARRANTY; without even the implied warranty of 26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 | GNU Affero General Public License for more details. 28 | 29 | You should have received a copy of the GNU Affero General Public License 30 | along with this program. If not, see . 31 | ''' 32 | 33 | 34 | def query_existing_qcodes(db_location, table, verbose=False): 35 | 36 | start_time = time.time() 37 | if verbose: 38 | print('Retrieving the q_codes from %s...' % db_location) 39 | 40 | conn = sqlite3.connect(db_location) 41 | try: 42 | with conn: 43 | cur = conn.cursor() 44 | query = """SELECT q_code 45 | FROM %s 46 | GROUP BY q_code""" % table 47 | cur.execute(query) 48 | data = cur.fetchall() 49 | if data: 50 | df = pd.DataFrame(data, columns=['q_code']) 51 | if verbose: 52 | print('The query of the existing q_codes for %s took %0.1f ' 53 | 'seconds.' % (table, time.time() - start_time)) 54 | return df 55 | else: 56 | raise SystemError('Not able to determine the q_codes from the ' 57 | 'SQL query in query_existing_qcodes') 58 | except sqlite3.Error as e: 59 | conn.rollback() 60 | print('Failed to query the q_codes from %s within ' 61 | 'query_existing_qcodes' % table) 62 | print(e) 63 | except conn.OperationalError: 64 | print('Unable to connect to the SQL Database in ' 65 | 'query_existing_qcodes. Make sure the database ' 66 | 'address/name are correct.') 67 | except Exception as e: 68 | print(e) 69 | print('Error: Unknown issue occurred in query_existing_qcodes') 70 | 71 | 72 | def query_qcode_data(db_location, table, qcode, verbose=False): 73 | 74 | start_time = time.time() 75 | if verbose: 76 | print('Retrieving all the %s data for %s...' % (table, qcode)) 77 | 78 | conn = sqlite3.connect(db_location) 79 | try: 80 | with conn: 81 | cur = conn.cursor() 82 | # Group by date to remove any duplicate values 83 | query = """SELECT * 84 | FROM %s 85 | WHERE q_code='%s' 86 | GROUP BY date""" % (table, qcode) 87 | cur.execute(query) 88 | data = cur.fetchall() 89 | if data: 90 | daily_prices_col = ['daily_price_id', 'data_vendor_id', 91 | 'q_code', 'date', 'open', 'high', 'low', 92 | 'close', 'volume', 'ex_dividend', 93 | 'split_ratio', 'adj_open', 'adj_high', 94 | 'adj_low', 'adj_close', 'adj_volume', 95 | 'updated_date'] 96 | minute_prices_col = ['minute_price_id', 'data_vendor_id', 97 | 'q_code', 'date', 'close', 'high', 'low', 98 | 'open', 'volume', 'updated_date'] 99 | if table == 'daily_prices': 100 | df = pd.DataFrame(data, columns=daily_prices_col) 101 | elif table == 'minute_prices': 102 | df = pd.DataFrame(data, columns=minute_prices_col) 103 | else: 104 | raise SystemError('Incorrect table type provided to ' 105 | 'query_qcode_data. Valid table types ' 106 | 'include daily_prices and minute_prices') 107 | if verbose: 108 | print('The query of the %s q_code data for %s took %0.2f ' 109 | 'seconds.' % (table, qcode, time.time() - start_time)) 110 | return df 111 | else: 112 | raise SystemError('Not able to determine the q_codes from the ' 113 | 'SQL query in query_qcode_data') 114 | except sqlite3.Error as e: 115 | conn.rollback() 116 | print('Failed to query the price data from %s within query_qcode_data' % 117 | table) 118 | print(e) 119 | except conn.OperationalError: 120 | print('Unable to connect to the SQL Database in query_qcode_data. Make ' 121 | 'sure the database address/name are correct.') 122 | except Exception as e: 123 | print(e) 124 | print('Error: Unknown issue occurred in query_qcode_data') 125 | 126 | 127 | def query_symbology(db_location): 128 | 129 | conn = sqlite3.connect(db_location) 130 | try: 131 | with conn: 132 | cur = conn.cursor() 133 | cur.execute("""SELECT tsid.source_id, goog.source_id 134 | FROM symbology tsid 135 | INNER JOIN symbology goog 136 | ON tsid.symbol_id = goog.symbol_id 137 | WHERE tsid.source='tsid' 138 | AND goog.source='quandl_goog' 139 | GROUP BY goog.source_id""") 140 | data = cur.fetchall() 141 | if data: 142 | df = pd.DataFrame(data, columns=['tsid', 'goog']) 143 | return df 144 | else: 145 | raise SystemError('Not able to determine the quandl_goog codes ' 146 | 'from the SQL query in query_symbology') 147 | except sqlite3.Error as e: 148 | conn.rollback() 149 | print('Failed to insert the data into the earnings table within ' 150 | 'query_symbology') 151 | print(e) 152 | except conn.OperationalError: 153 | print('Unable to connect to the SQL Database in query_symbology. Make ' 154 | 'sure the database address/name are correct.') 155 | except Exception as e: 156 | print(e) 157 | print('Error: Unknown issue occurred in query_symbology') 158 | 159 | 160 | def convert_qcode_to_tsid(db_location, price_df, table, qcode): 161 | 162 | # Remove the price_id and the q_code columns 163 | if table == 'daily_prices': 164 | price_df.drop('daily_price_id', axis=1, inplace=True) 165 | elif table == 'minute_prices': 166 | price_df.drop('minute_price_id', axis=1, inplace=True) 167 | price_df.drop('q_code', axis=1, inplace=True) 168 | 169 | # Translate the q_code to a tsid 170 | sym_codes = query_symbology(db_location=db_location) 171 | tsid = sym_codes.loc[sym_codes['goog'] == qcode, 'tsid'].values 172 | if tsid: 173 | tsid = tsid[0] 174 | else: 175 | tsid = None 176 | print('Unable to find a tsid for %s' % qcode) 177 | 178 | # Add a tsid column with the appropriate tsid value 179 | price_df.insert(0, 'tsid', tsid) 180 | 181 | return price_df 182 | 183 | 184 | def df_to_sql(db_location, df, sql_table, exists, item, verbose=False): 185 | 186 | if verbose: 187 | print('Entering the data for %s into %s.' % (item, sql_table)) 188 | 189 | conn = sqlite3.connect(db_location) 190 | # Try and except block writes the new data to the SQL Database. 191 | try: 192 | # if_exists options: append new df rows, replace all table values 193 | df.to_sql(sql_table, conn, if_exists=exists, index=False) 194 | conn.execute("PRAGMA journal_mode = MEMORY") 195 | conn.execute("PRAGMA busy_timeout = 60000") 196 | if verbose: 197 | print('Successfully entered %s into %s' % (item, sql_table)) 198 | except conn.Error: 199 | conn.rollback() 200 | print("Failed to insert the DataFrame into %s for %s" % 201 | (sql_table, item)) 202 | except conn.OperationalError: 203 | raise ValueError('Unable to connect to the SQL Database in df_to_sql. ' 204 | 'Make sure the database address/name are correct.') 205 | except Exception as e: 206 | print('Error: Unknown issue when adding the DataFrame for %s to %s' % 207 | (item, sql_table)) 208 | print(e) 209 | 210 | 211 | def delete_sql_table_rows(db_location, query, table, tsid): 212 | 213 | # print('Deleting all rows in %s that fit the provided criteria' % (table,)) 214 | conn = sqlite3.connect(db_location) 215 | try: 216 | with conn: 217 | cur = conn.cursor() 218 | cur.execute(query) 219 | return 'success' 220 | except sqlite3.Error as e: 221 | conn.rollback() 222 | print(e) 223 | print('Error: Not able to delete the overlapping rows for %s in ' 224 | 'the %s table.' % (tsid, table)) 225 | return 'failure' 226 | except conn.OperationalError: 227 | print('Unable to connect to the SQL Database in delete_sql_table_rows. ' 228 | 'Make sure the database address/name are correct.') 229 | return 'failure' 230 | except Exception as e: 231 | print('Error: Unknown issue when trying to delete overlapping rows for' 232 | '%s in the %s table.' % (tsid, table)) 233 | print(e) 234 | return 'failure' 235 | 236 | 237 | def insert_df_to_db(db_location, price_df, table, verbose=False): 238 | 239 | # Information about the new data 240 | tsid = price_df.loc[0, 'tsid'] 241 | max_date = price_df['date'].max() 242 | min_date = price_df['date'].min() 243 | 244 | # Check if the database table already has data for this ticker 245 | conn = sqlite3.connect(db_location) 246 | try: 247 | with conn: 248 | cur = conn.cursor() 249 | query = ("""SELECT tsid, MAX(date), MIN(date) 250 | FROM %s 251 | WHERE tsid='%s'""" % (table, tsid)) 252 | cur.execute(query) 253 | data = cur.fetchall() 254 | existing_df = pd.DataFrame(data, columns=['tsid', 'max', 'min']) 255 | except sqlite3.Error as e: 256 | conn.rollback() 257 | raise SystemError('Failed to query the existing data from %s within ' 258 | 'insert_df_to_db because of %s' % (table, e)) 259 | except conn.OperationalError: 260 | raise SystemError('Unable to connect to the SQL Database in ' 261 | 'insert_df_to_db. Make sure the database ' 262 | 'address/name are correct.') 263 | except Exception as e: 264 | raise SystemError('Error occurred in insert_df_to_db: %s' % e) 265 | 266 | # If there is existing data and the new data's date range is more extensive 267 | # than the stored data, delete the old data and add the new data 268 | if existing_df.loc[0, 'tsid']: 269 | if (max_date > existing_df.loc[0, 'max'] and 270 | min_date <= existing_df.loc[0, 'min']): 271 | if verbose: 272 | print('Replacing the %s values because it had more data than ' 273 | 'the currently stored data.' % tsid) 274 | 275 | # Delete the existing data for this tsid 276 | query = ("""DELETE FROM %s 277 | WHERE tsid='%s'""" % (table, tsid)) 278 | del_success = delete_sql_table_rows(db_location=db_location, 279 | query=query, table=table, 280 | tsid=tsid) 281 | if del_success == 'success': 282 | # Delete was successful, so insert the new data into the table 283 | df_to_sql(df=price_df, db_location=db_location, sql_table=table, 284 | exists='append', item=tsid, verbose=False) 285 | elif del_success == 'failure': 286 | # delete_sql_table_rows will issue a failure notice 287 | pass 288 | else: 289 | if verbose: 290 | print('Not inserting data for %s because duplicate data was ' 291 | 'found in the database' % tsid) 292 | else: 293 | # There is not existing data for this ticker, so insert the data 294 | df_to_sql(df=price_df, db_location=db_location, sql_table=table, 295 | exists='append', item=tsid, verbose=False) 296 | 297 | 298 | def main(verbose=False): 299 | 300 | old_db_location = 'C:/Users/Josh/Desktop/pySecMaster_m old.db' 301 | new_db_location = 'C:/Users/Josh/Desktop/pySecMaster_m.db' 302 | table = 'minute_prices' # daily_prices, minute_prices 303 | 304 | # Create a new database where the old prices will be copied to 305 | symbology_sources = ['csi_data', 'tsid', 'quandl_wiki', 'quandl_goog', 306 | 'seeking_alpha', 'yahoo'] 307 | 308 | os.chdir('..') # Need to move up a folder in order to access load_tables 309 | maintenance(database_link=new_db_location, quandl_ticker_source='csidata', 310 | database_list=['WIKI'], threads=8, quandl_key='', 311 | quandl_update_range=30, csidata_update_range=5, 312 | symbology_sources=symbology_sources) 313 | 314 | # Retrieve a list of all the tickers from the existing database table 315 | qcodes_df = query_existing_qcodes(db_location=old_db_location, 316 | table=table, verbose=True) 317 | 318 | for index, row in qcodes_df.iterrows(): 319 | ticker = row['q_code'] 320 | copy_start = time.time() 321 | 322 | # Retrieve all price data for this ticker 323 | raw_price_df = query_qcode_data(db_location=old_db_location, 324 | table=table, qcode=ticker, 325 | verbose=False) 326 | 327 | # Change the q_code column to a tsid column 328 | clean_price_df = convert_qcode_to_tsid(db_location=new_db_location, 329 | price_df=raw_price_df, 330 | table=table, qcode=ticker) 331 | 332 | tsid = clean_price_df.loc[0, 'tsid'] 333 | # If there is no tsid, don't attempt to insert the data to the database 334 | if tsid: 335 | # Add the data to the database if there is not existing data 336 | insert_df_to_db(db_location=new_db_location, 337 | price_df=clean_price_df, table=table, verbose=True) 338 | if verbose: 339 | print('Moving the %s from %s to %s took %0.2f seconds' % 340 | (table, ticker, tsid, time.time() - copy_start)) 341 | 342 | 343 | if __name__ == '__main__': 344 | 345 | main(verbose=True) 346 | -------------------------------------------------------------------------------- /pySecMaster/utilities/database_rebuilds/sqlite_to_postgres.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import psycopg2 3 | from sqlalchemy import create_engine 4 | import sqlite3 5 | import time 6 | 7 | __author__ = 'Josh Schertz' 8 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 9 | __description__ = 'An automated system to store and maintain financial data.' 10 | __email__ = 'josh[AT]joshschertz[DOT]com' 11 | __license__ = 'GNU AGPLv3' 12 | __maintainer__ = 'Josh Schertz' 13 | __status__ = 'Development' 14 | __url__ = 'https://joshschertz.com/' 15 | __version__ = '1.4.0' 16 | 17 | ''' 18 | This program is free software: you can redistribute it and/or modify 19 | it under the terms of the GNU Affero General Public License as 20 | published by the Free Software Foundation, either version 3 of the 21 | License, or (at your option) any later version. 22 | 23 | This program is distributed in the hope that it will be useful, 24 | but WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 26 | GNU Affero General Public License for more details. 27 | 28 | You should have received a copy of the GNU Affero General Public License 29 | along with this program. If not, see . 30 | ''' 31 | 32 | 33 | def query_all_tsids_from_table(database, table): 34 | """ Get a list of all unique tsids from the specified table. 35 | 36 | :param database: String of the database name and directory 37 | :param table: String of the table that should be queried from 38 | :return: List of all of the unique tsids for the specified table 39 | """ 40 | 41 | conn = sqlite3.connect(database) 42 | 43 | try: 44 | with conn: 45 | cur = conn.cursor() 46 | query = ("""SELECT tsid 47 | FROM %s 48 | GROUP BY tsid""" % (table,)) 49 | 50 | cur.execute(query) 51 | data = cur.fetchall() 52 | if data: 53 | df = pd.DataFrame(data, columns=['tsid']) 54 | df.drop_duplicates(inplace=True) 55 | 56 | tsid_unique_list = pd.unique((df['tsid']).values) 57 | return tsid_unique_list 58 | else: 59 | raise TypeError('Not able to query any tsid codes in ' 60 | 'query_all_tsids_from_table') 61 | except sqlite3.Error as e: 62 | print(e) 63 | raise TypeError('Error when trying to connect to the %s database ' 64 | 'in query_all_tsids_from_table' % database) 65 | except conn.OperationalError: 66 | raise SystemError('Unable to connect to the %s database in ' 67 | 'query_all_tsids_from_table. Make sure the database ' 68 | 'address/name are correct.' % database) 69 | except Exception as e: 70 | print(e) 71 | raise SystemError('Error: Unknown issue occurred in ' 72 | 'query_all_tsids_from_table') 73 | 74 | 75 | def query_all_tsid_prices(database, table, tsid): 76 | """ Query all of the data for this ticker from the sqlite database. 77 | 78 | :param database: String of the database name 79 | :param table: String of the table that should be queried from 80 | :param tsid: String of which tsid to check 81 | :return: Datetime object representing the start date 82 | """ 83 | 84 | conn = sqlite3.connect(database) 85 | try: 86 | with conn: 87 | cur = conn.cursor() 88 | query = """SELECT data_vendor_id, tsid, date, close, high, low, 89 | open, volume, updated_date 90 | FROM %s 91 | WHERE tsid='%s'""" % (table, tsid) 92 | cur.execute(query) 93 | data = cur.fetchall() 94 | if data: 95 | columns = ['data_vendor_id', 'tsid', 'date', 'close', 'high', 96 | 'low', 'open', 'volume', 'updated_date'] 97 | df = pd.DataFrame(data, columns=columns) 98 | return df 99 | else: 100 | return pd.DataFrame() 101 | except sqlite3.Error as e: 102 | print(e) 103 | raise SystemError('Failed to query the %s data within ' 104 | 'query_all_tsid_prices' % tsid) 105 | except conn.OperationalError: 106 | raise SystemError('Unable to connect to the %s database in ' 107 | 'query_all_tsid_prices. Make sure the database ' 108 | 'address/name are correct.' % database) 109 | except Exception as e: 110 | print(e) 111 | raise SystemError('Error: Unknown issue occurred in ' 112 | 'query_all_tsid_prices') 113 | 114 | 115 | def delete_sql_table_rows(database, user, password, host, port, query, table, 116 | item, verbose=False): 117 | """ Execute the provided query in the specified table in the database. 118 | Normally, this will delete the existing prices over dates where the new 119 | prices would overlap. Returns a string value indicating whether the query 120 | was successfully executed. 121 | 122 | :param database: String of the database name 123 | :param user: String of the username used to login to the database 124 | :param password: String of the password used to login to the database 125 | :param host: String of the database address (localhost, url, ip, etc.) 126 | :param port: Integer of the database port number (5432) 127 | :param query: String representing the SQL query to perform on the database. 128 | :param table: String indicating which table should be worked on. 129 | :param item: String of the tsid being worked on. 130 | :param verbose: Boolean indicating whether debugging prints should occur. 131 | :return: String of either 'success' or 'failure', which the function that 132 | called this function uses determine whether it should add new values. 133 | """ 134 | 135 | if verbose: 136 | print('Deleting all rows in %s that fit the provided criteria' % table) 137 | 138 | conn = psycopg2.connect(database=database, user=user, password=password, 139 | host=host, port=port) 140 | 141 | try: 142 | with conn: 143 | cur = conn.cursor() 144 | cur.execute(query) 145 | return 'success' 146 | except psycopg2.Error as e: 147 | conn.rollback() 148 | print(e) 149 | print('Error: Not able to delete the overlapping rows for %s in ' 150 | 'the %s table.' % (item, table)) 151 | return 'failure' 152 | except conn.OperationalError: 153 | print('Unable to connect to the %s database in delete_sql_table_rows. ' 154 | 'Make sure the database address/name are correct.' % database) 155 | return 'failure' 156 | except Exception as e: 157 | print('Error: Unknown issue when trying to delete overlapping rows for' 158 | '%s in the %s table.' % (item, table)) 159 | print(e) 160 | return 'failure' 161 | 162 | 163 | def df_to_sql(database, user, password, host, port, df, sql_table, exists, 164 | item, verbose=False): 165 | """ Save a DataFrame to a specified PostgreSQL database table. 166 | 167 | :param database: String of the database name 168 | :param user: String of the username used to login to the database 169 | :param password: String of the password used to login to the database 170 | :param host: String of the database address (localhost, url, ip, etc.) 171 | :param port: Integer of the database port number (5432) 172 | :param df: Pandas DataFrame with values to insert into the SQL database. 173 | :param sql_table: String indicating which table the DataFrame should be 174 | put into. 175 | :param exists: String indicating how the DataFrame values should interact 176 | with the existing values in the table. Valid parameters include 177 | 'append' [new rows] and 'replace' [all existing table rows]. 178 | :param item: String representing the item being inserted (i.e. the tsid) 179 | :param verbose: Boolean indicating whether debugging statements should print 180 | """ 181 | 182 | if verbose: 183 | print('Entering the data for %s into the %s database.' % 184 | (item, database)) 185 | 186 | engine = create_engine('postgresql://%s:%s@%s:%s/%s' % 187 | (user, password, host, port, database)) 188 | conn = engine.connect() 189 | 190 | # Try and except block writes the new data to the SQL Database. 191 | try: 192 | # if_exists options: append new df rows, replace all table values 193 | df.to_sql(sql_table, conn, if_exists=exists, index=False) 194 | if verbose: 195 | print('Successfully entered the values into the %s database' % 196 | database) 197 | except Exception as e: 198 | print('Error: Unknown issue when adding the DataFrame to the %s ' 199 | 'database for %s' % (database, item)) 200 | print(e) 201 | 202 | 203 | def insert_df_to_db(database, user, password, host, port, price_df, table, 204 | verbose=False): 205 | """ 206 | 207 | :param database: String of the database name 208 | :param user: String of the username used to login to the database 209 | :param password: String of the password used to login to the database 210 | :param host: String of the database address (localhost, url, ip, etc.) 211 | :param port: Integer of the database port number (5432) 212 | :param price_df: DataFrame of the tsid values 213 | :param table: String of the table to insert the DataFrame into 214 | :param verbose: Boolean of whether to print debugging statements 215 | """ 216 | 217 | # Information about the new data 218 | tsid = price_df.loc[0, 'source_id'] 219 | max_date = price_df['date'].max() 220 | min_date = price_df['date'].min() 221 | 222 | # Check if the postgre database table already has data for this ticker 223 | conn = psycopg2.connect(database=database, user=user, password=password, 224 | host=host, port=port) 225 | try: 226 | with conn: 227 | cur = conn.cursor() 228 | query = ("""SELECT source_id AS tsid, MAX(date), MIN(date) 229 | FROM %s 230 | WHERE source_id='%s' 231 | GROUP BY source_id""" % (table, tsid)) 232 | cur.execute(query) 233 | data = cur.fetchall() 234 | existing_df = pd.DataFrame(data, columns=['tsid', 'max', 'min']) 235 | existing_df['max'] = pd.to_datetime(existing_df['max'], utc=True) 236 | existing_df['min'] = pd.to_datetime(existing_df['min'], utc=True) 237 | except psycopg2.Error as e: 238 | conn.rollback() 239 | raise SystemError('Failed to query the existing data from %s within ' 240 | 'insert_df_to_db because of %s' % (table, e)) 241 | except conn.OperationalError: 242 | raise SystemError('Unable to connect to the %s database in ' 243 | 'insert_df_to_db. Make sure the database ' 244 | 'address/name are correct.' % database) 245 | except Exception as e: 246 | raise SystemError('Error occurred in insert_df_to_db: %s' % e) 247 | 248 | # If there is existing data and the new data's date range is more extensive 249 | # than the stored data, delete the old data and add the new data 250 | # if existing_df.loc[0, 'tsid']: 251 | if len(existing_df) > 0: 252 | if (max_date > existing_df.loc[0, 'max'] and 253 | min_date <= existing_df.loc[0, 'min']): 254 | if verbose: 255 | print('Replacing the %s values because the new data had more ' 256 | 'values than the currently stored data.' % tsid) 257 | 258 | # Delete the existing data for this tsid 259 | query = ("""DELETE FROM %s 260 | WHERE source_id='%s'""" % (table, tsid)) 261 | del_success = delete_sql_table_rows(database=database, user=user, 262 | password=password, host=host, 263 | port=port, query=query, 264 | table=table, item=tsid) 265 | if del_success == 'success': 266 | # Delete was successful, so insert the new data into the table 267 | df_to_sql(database=database, user=user, password=password, 268 | host=host, port=port, df=price_df, sql_table=table, 269 | exists='append', item=tsid) 270 | elif del_success == 'failure': 271 | # delete_sql_table_rows will issue a failure notice 272 | pass 273 | else: 274 | if verbose: 275 | print('Not inserting data for %s because duplicate data was ' 276 | 'found in the %s database' % (tsid, database)) 277 | else: 278 | # There is no existing data for this ticker, so insert the data 279 | df_to_sql(database=database, user=user, password=password, host=host, 280 | port=port, df=price_df, sql_table=table, exists='append', 281 | item=tsid) 282 | 283 | 284 | def main(verbose=False): 285 | """ Move all values from the minute_prices table of the sqlite_database 286 | to the postgre database. 287 | 288 | :param verbose: Boolean of whether to print debugging statements 289 | """ 290 | 291 | sqlite_database = 'C:/Users/joshs/Programming/Databases/pySecMaster/' \ 292 | 'pySecMaster_m.db' 293 | 294 | from utilities.user_dir import user_dir 295 | userdir = user_dir() 296 | postgre_database = userdir['postgresql']['pysecmaster_db'] 297 | postgre_user = userdir['postgresql']['pysecmaster_user'] 298 | postgre_password = userdir['postgresql']['pysecmaster_password'] 299 | postgre_host = userdir['postgresql']['pysecmaster_host'] 300 | postgre_port = userdir['postgresql']['pysecmaster_port'] 301 | 302 | table = 'minute_prices' 303 | 304 | # Get a list of unique tsids from the sqlite database's table 305 | tsid_list = query_all_tsids_from_table(database=sqlite_database, 306 | table=table) 307 | 308 | # Iterate through each tsid from the unique list 309 | for tsid in tsid_list: 310 | 311 | tsid_start = time.time() 312 | 313 | # Query all of the tsid's table prices from the sqlite database 314 | existing_price_df = query_all_tsid_prices(database=sqlite_database, 315 | table=table, tsid=tsid) 316 | 317 | if len(existing_price_df) > 0: 318 | 319 | # Add the source column using 'tsid' between data_vendor_id and tsid 320 | existing_price_df.insert(1, 'source', 'tsid') 321 | 322 | # Change the tsid column name to 'source_id' 323 | existing_price_df.rename(columns={'tsid': 'source_id'}, 324 | inplace=True) 325 | 326 | # existing_price_df['date'].tz_localize('UTC') 327 | existing_price_df['date'] = \ 328 | pd.to_datetime(existing_price_df['date'], utc=True) 329 | 330 | # print(existing_price_df.head()) 331 | # existing_price_df.to_csv('%s_min_prices.csv' % tsid) 332 | 333 | # Insert all the prices into the postgre database 334 | insert_df_to_db(database=postgre_database, user=postgre_user, 335 | password=postgre_password, host=postgre_host, 336 | port=postgre_port, price_df=existing_price_df, 337 | table=table, verbose=verbose) 338 | else: 339 | # Should never happen, since the tsid wouldn't have been used anyway 340 | if verbose: 341 | print('No existing data found in the Sqlite3 database for %s' % 342 | tsid) 343 | 344 | if verbose: 345 | print('Verifying the %s times for %s took %0.2f seconds' % 346 | (table, tsid, time.time() - tsid_start)) 347 | 348 | 349 | if __name__ == '__main__': 350 | 351 | main(verbose=True) 352 | -------------------------------------------------------------------------------- /pySecMaster/utilities/database_rebuilds/verify_min_times.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | from dateutil import tz 3 | import time 4 | 5 | import os 6 | import pandas as pd 7 | import sqlite3 8 | 9 | from pySecMaster import maintenance 10 | 11 | 12 | __author__ = 'Josh Schertz' 13 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 14 | __description__ = 'An automated system to store and maintain financial data.' 15 | __email__ = 'josh[AT]joshschertz[DOT]com' 16 | __license__ = 'GNU AGPLv3' 17 | __maintainer__ = 'Josh Schertz' 18 | __status__ = 'Development' 19 | __url__ = 'https://joshschertz.com/' 20 | __version__ = '1.3.2' 21 | 22 | ''' 23 | This program is free software: you can redistribute it and/or modify 24 | it under the terms of the GNU Affero General Public License as 25 | published by the Free Software Foundation, either version 3 of the 26 | License, or (at your option) any later version. 27 | 28 | This program is distributed in the hope that it will be useful, 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 31 | GNU Affero General Public License for more details. 32 | 33 | You should have received a copy of the GNU Affero General Public License 34 | along with this program. If not, see . 35 | ''' 36 | 37 | 38 | def query_existing_tsids(db_location, table, verbose=False): 39 | 40 | start_time = time.time() 41 | if verbose: 42 | print('Retrieving the tsids from %s...' % db_location) 43 | 44 | conn = sqlite3.connect(db_location) 45 | try: 46 | with conn: 47 | cur = conn.cursor() 48 | query = """SELECT tsid 49 | FROM %s 50 | GROUP BY tsid""" % table 51 | cur.execute(query) 52 | data = cur.fetchall() 53 | if data: 54 | df = pd.DataFrame(data, columns=['tsid']) 55 | if verbose: 56 | print('The query of the existing tsids for %s took %0.1f ' 57 | 'seconds.' % (table, time.time() - start_time)) 58 | return df 59 | else: 60 | raise SystemError('Not able to determine the tsids from the ' 61 | 'SQL query in query_existing_tsids') 62 | except sqlite3.Error as e: 63 | conn.rollback() 64 | print('Failed to query the tsids from %s within ' 65 | 'query_existing_tsids' % table) 66 | print(e) 67 | except conn.OperationalError: 68 | print('Unable to connect to the SQL Database in ' 69 | 'query_existing_tsids. Make sure the database ' 70 | 'address/name are correct.') 71 | except Exception as e: 72 | print(e) 73 | print('Error: Unknown issue occurred in query_existing_tsids') 74 | 75 | 76 | def query_tsid_data(db_location, table, tsid, verbose=False): 77 | 78 | start_time = time.time() 79 | if verbose: 80 | print('Retrieving all the %s data for %s...' % (table, tsid)) 81 | 82 | conn = sqlite3.connect(db_location) 83 | try: 84 | with conn: 85 | cur = conn.cursor() 86 | query = """SELECT * 87 | FROM %s 88 | WHERE tsid='%s'""" % (table, tsid) 89 | cur.execute(query) 90 | data = cur.fetchall() 91 | if data: 92 | minute_prices_col = ['minute_price_id', 'data_vendor_id', 93 | 'tsid', 'date', 'close', 'high', 'low', 94 | 'open', 'volume', 'updated_date'] 95 | if table == 'minute_prices': 96 | df = pd.DataFrame(data, columns=minute_prices_col) 97 | else: 98 | raise SystemError('Incorrect table type provided to ' 99 | 'query_tsid_data. Valid table type ' 100 | 'include minute_prices.') 101 | if verbose: 102 | print('The query of the %s tsid data for %s took %0.2f ' 103 | 'seconds.' % (table, tsid, time.time() - start_time)) 104 | return df 105 | else: 106 | raise SystemError('Not able to determine the tsid from the ' 107 | 'SQL query in query_tsid_data') 108 | except sqlite3.Error as e: 109 | conn.rollback() 110 | print('Failed to query the price data from %s within query_tsid_data' % 111 | table) 112 | print(e) 113 | except conn.OperationalError: 114 | print('Unable to connect to the SQL Database in query_tsid_data. Make ' 115 | 'sure the database address/name are correct.') 116 | except Exception as e: 117 | print(e) 118 | print('Error: Unknown issue occurred in query_tsid_data') 119 | 120 | 121 | def verify_minute_time(price_df, tsid): 122 | """ 123 | Determine if each period's time is correct. First this calculates the 124 | correct start and end time for each day. These times are compared to the 125 | existing times, and if there is any discrepancy, the difference is made 126 | to the time. 127 | 128 | :param price_df: DataFrame of the original times 129 | :param tsid: String of the current tsid 130 | :return: DataFrame with adjusted times 131 | """ 132 | 133 | def split_day(row, column): 134 | date = row[column] 135 | return date[:10] 136 | 137 | nyc_tz = tz.gettz('America/New_York') 138 | utc_tz = tz.gettz('UTC') 139 | 140 | minute_prices_col = ['data_vendor_id', 'tsid', 'date', 'close', 'high', 141 | 'low', 'open', 'volume', 'updated_date'] 142 | 143 | price_df['day'] = price_df.apply(split_day, axis=1, args=('date',)) 144 | price_df['date_obj'] = pd.to_datetime(price_df['date']) 145 | 146 | # Build a unique list of all the days (YYYY-MM-DD) 147 | unique_days = pd.unique(price_df['day']) 148 | 149 | # Calculate each day's time adjustments to use (in seconds) 150 | day_adjustments = {} 151 | for day in unique_days: 152 | # Retrieve the existing start and end times for this day 153 | day_price_df = price_df.loc[price_df['day'] == day] 154 | start_time = day_price_df.loc[day_price_df['date_obj'].idxmin(), 155 | 'date_obj'] 156 | start_time_utc = start_time.replace(tzinfo=utc_tz) 157 | end_time = day_price_df.loc[day_price_df['date_obj'].idxmax(), 158 | 'date_obj'] 159 | end_time_utc = end_time.replace(tzinfo=utc_tz) 160 | 161 | # Calculate the actual start and end times for this day 162 | working_day = datetime.strptime(day, '%Y-%m-%d') 163 | 164 | # Market opens at 9:30AM EST 165 | actual_start_time = working_day.replace(hour=9, minute=30, tzinfo=nyc_tz) 166 | actual_start_time_utc = actual_start_time.astimezone(tz=utc_tz) 167 | 168 | if day in ['2015-11-27', '2015-12-24']: 169 | # Market closes at 1PM EST 170 | actual_end_time = working_day.replace(hour=13, tzinfo=nyc_tz) 171 | else: 172 | # Market closes at 4PM EST 173 | actual_end_time = working_day.replace(hour=16, tzinfo=nyc_tz) 174 | actual_end_time_utc = actual_end_time.astimezone(tz=utc_tz) 175 | 176 | start_time_delta = actual_start_time_utc - start_time_utc 177 | end_time_delta = actual_end_time_utc - end_time_utc 178 | 179 | start_end_delta = abs(end_time_delta) - abs(start_time_delta) 180 | 181 | # The end time delta is viewed as higher quality, as the first time 182 | # period could be missing (which would mess up the time adjustment). 183 | if abs(start_end_delta.total_seconds()) < (15*60): 184 | # Normal: start and end time delta are within 15 min of each other. 185 | # Adjust all times by the end time delta. 186 | time_delta = end_time_delta.total_seconds() 187 | elif abs(end_time_delta.total_seconds()) < (15*60): 188 | # If the end time delta is within 15 min of the actual, us it's 189 | # delta to adjust all the times. Indicates the start time is 190 | # missing 191 | time_delta = end_time_delta.total_seconds() 192 | elif abs(start_time_delta.total_seconds()) < (15*60): 193 | # Shouldn't ever happen. The end time delta is greater than 15 min 194 | # from the actual (missing), so if the start time is within 15 195 | # min from the actual start, use it's delta to adjust all times. 196 | time_delta = start_time_delta.total_seconds() 197 | else: 198 | # Shouldn't ever happen. Both the start and end time delta are 199 | # greater than 15 min from the actual times. No way to determine 200 | # what to adjust by, so just adjust to using the end time delta. 201 | time_delta = end_time_delta.total_seconds() 202 | print("Don't know what time delta to use for %s on %s. Using the " 203 | "end delta." % (tsid, day)) 204 | 205 | day_adjustments[day] = int(time_delta) 206 | 207 | updated_times = [] 208 | # Add the calculated time adjustments to the existing time 209 | for index, row in price_df.iterrows(): 210 | 211 | # Check if the this time is the first of the day 212 | prior_date = price_df.iloc[(index - 1), price_df.columns.get_loc('day')] 213 | if prior_date != row['day']: 214 | # This is the first time for this day 215 | try: 216 | next_time = price_df.iloc[(index + 1), 217 | price_df.columns.get_loc('date_obj')] 218 | except IndexError: 219 | pass 220 | finally: 221 | # -60 indicates the next period is 1 min away (normal). 222 | # +3480, +3300 indicates the the next period is behind this time 223 | # by roughly an hour. 224 | next_time_delta = row['date_obj'] - next_time 225 | 226 | if int(next_time_delta.total_seconds()) >= (10*60): 227 | # This time period was not effected by the unix bug, but 228 | # needs to be moved back by the next time period delta 229 | # (plus 1 min) so it will be aligned with the other times 230 | # so that when the adjustment occurs below, all times 231 | # will be aligned. 232 | price_df.loc[index, 'date_obj'] -= \ 233 | timedelta(seconds=next_time_delta.total_seconds() + 60) 234 | 235 | day_time_delta = day_adjustments[row['day']] 236 | cur_time = {} 237 | new_date = (price_df.loc[index, 'date_obj'] + 238 | timedelta(seconds=day_time_delta)) 239 | cur_time['minute_price_id'] = row['minute_price_id'] 240 | cur_time['date'] = new_date.isoformat() 241 | updated_times.append(cur_time) 242 | 243 | updated_time_df = pd.DataFrame(updated_times, 244 | columns=['minute_price_id', 'date']) 245 | 246 | price_df.drop('date', axis=1, inplace=True) 247 | price_df.drop('day', axis=1, inplace=True) 248 | price_df.drop('date_obj', axis=1, inplace=True) 249 | price_df.drop('updated_date', axis=1, inplace=True) 250 | 251 | new_price_df = pd.merge(price_df, updated_time_df, on=['minute_price_id']) 252 | 253 | new_price_df.drop('minute_price_id', axis=1, inplace=True) 254 | new_price_df.insert(len(updated_time_df.columns), 'updated_date', 255 | datetime.utcnow().isoformat()) 256 | 257 | # Rearrange the DataFrame columns based on the minute_prices_col list 258 | new_price_df = new_price_df.ix[:, minute_prices_col] 259 | 260 | return new_price_df 261 | 262 | 263 | def update_db_times(db_location, table, price_df): 264 | """ 265 | For any periods that were changed, update the database values based on the 266 | minute_price_id variable. 267 | 268 | :param db_location: String of the database location 269 | :param table: String of the database table to be updating 270 | :param price_df: DataFrame of one symbol's times that are corrected 271 | """ 272 | 273 | for index, row in price_df.iterrows(): 274 | 275 | min_price_id = row['minute_price_id'] 276 | date = row['date'].isoformat() 277 | updated_date = row['updated_date'] 278 | 279 | if updated_date: 280 | # Only update the database time if the existing time was updated, 281 | # which is indicated by the updated_date variable not being None 282 | 283 | conn = sqlite3.connect(db_location) 284 | try: 285 | with conn: 286 | cur = conn.cursor() 287 | cur.execute("""UPDATE %s 288 | SET date='%s', updated_date='%s' 289 | WHERE minute_price_id='%s'""" % 290 | (table, date, updated_date, min_price_id)) 291 | conn.commit() 292 | except sqlite3.Error as e: 293 | conn.rollback() 294 | raise SystemError('Failed to update the times in %s within ' 295 | 'update_db_times because of %s' % (table, e)) 296 | except conn.OperationalError: 297 | raise SystemError('Unable to connect to the SQL Database in ' 298 | 'update_db_times. Make sure the database ' 299 | 'address/name are correct.') 300 | except Exception as e: 301 | raise SystemError('Error occurred in update_db_times: %s' % e) 302 | 303 | 304 | def df_to_sql(db_location, df, sql_table, exists, item, verbose=False): 305 | 306 | if verbose: 307 | print('Entering the data for %s into %s.' % (item, sql_table)) 308 | 309 | conn = sqlite3.connect(db_location) 310 | # Try and except block writes the new data to the SQL Database. 311 | try: 312 | # if_exists options: append new df rows, replace all table values 313 | df.to_sql(sql_table, conn, if_exists=exists, index=False) 314 | conn.execute("PRAGMA journal_mode = MEMORY") 315 | conn.execute("PRAGMA busy_timeout = 60000") 316 | if verbose: 317 | print('Successfully entered %s into %s' % (item, sql_table)) 318 | except conn.Error: 319 | conn.rollback() 320 | print("Failed to insert the DataFrame into %s for %s" % 321 | (sql_table, item)) 322 | except conn.OperationalError: 323 | raise ValueError('Unable to connect to the SQL Database in df_to_sql. ' 324 | 'Make sure the database address/name are correct.') 325 | except Exception as e: 326 | print('Error: Unknown issue when adding the DataFrame for %s to %s' % 327 | (item, sql_table)) 328 | print(e) 329 | 330 | 331 | def delete_sql_table_rows(db_location, query, table, tsid): 332 | 333 | # print('Deleting all rows in %s that fit the provided criteria' % (table,)) 334 | conn = sqlite3.connect(db_location) 335 | try: 336 | with conn: 337 | cur = conn.cursor() 338 | cur.execute(query) 339 | return 'success' 340 | except sqlite3.Error as e: 341 | conn.rollback() 342 | print(e) 343 | print('Error: Not able to delete the overlapping rows for %s in ' 344 | 'the %s table.' % (tsid, table)) 345 | return 'failure' 346 | except conn.OperationalError: 347 | print('Unable to connect to the SQL Database in delete_sql_table_rows. ' 348 | 'Make sure the database address/name are correct.') 349 | return 'failure' 350 | except Exception as e: 351 | print('Error: Unknown issue when trying to delete overlapping rows for' 352 | '%s in the %s table.' % (tsid, table)) 353 | print(e) 354 | return 'failure' 355 | 356 | 357 | def insert_df_to_db(db_location, price_df, table, verbose=False): 358 | 359 | # Information about the new data 360 | tsid = price_df.loc[0, 'tsid'] 361 | max_date = price_df['date'].max() 362 | min_date = price_df['date'].min() 363 | 364 | # Check if the database table already has data for this ticker 365 | conn = sqlite3.connect(db_location) 366 | try: 367 | with conn: 368 | cur = conn.cursor() 369 | query = ("""SELECT tsid, MAX(date), MIN(date) 370 | FROM %s 371 | WHERE tsid='%s'""" % (table, tsid)) 372 | cur.execute(query) 373 | data = cur.fetchall() 374 | existing_df = pd.DataFrame(data, columns=['tsid', 'max', 'min']) 375 | except sqlite3.Error as e: 376 | conn.rollback() 377 | raise SystemError('Failed to query the existing data from %s within ' 378 | 'insert_df_to_db because of %s' % (table, e)) 379 | except conn.OperationalError: 380 | raise SystemError('Unable to connect to the SQL Database in ' 381 | 'insert_df_to_db. Make sure the database ' 382 | 'address/name are correct.') 383 | except Exception as e: 384 | raise SystemError('Error occurred in insert_df_to_db: %s' % e) 385 | 386 | # If there is existing data and the new data's date range is more extensive 387 | # than the stored data, delete the old data and add the new data 388 | if existing_df.loc[0, 'tsid']: 389 | if (max_date > existing_df.loc[0, 'max'] and 390 | min_date <= existing_df.loc[0, 'min']): 391 | if verbose: 392 | print('Replacing the %s values because it had more data than ' 393 | 'the currently stored data.' % tsid) 394 | 395 | # Delete the existing data for this tsid 396 | query = ("""DELETE FROM %s 397 | WHERE tsid='%s'""" % (table, tsid)) 398 | del_success = delete_sql_table_rows(db_location=db_location, 399 | query=query, table=table, 400 | tsid=tsid) 401 | if del_success == 'success': 402 | # Delete was successful, so insert the new data into the table 403 | df_to_sql(df=price_df, db_location=db_location, sql_table=table, 404 | exists='append', item=tsid, verbose=False) 405 | elif del_success == 'failure': 406 | # delete_sql_table_rows will issue a failure notice 407 | pass 408 | else: 409 | if verbose: 410 | print('Not inserting data for %s because duplicate data was ' 411 | 'found in the database' % tsid) 412 | else: 413 | # There is no existing data for this ticker, so insert the data 414 | df_to_sql(df=price_df, db_location=db_location, sql_table=table, 415 | exists='append', item=tsid, verbose=False) 416 | 417 | 418 | def main(verbose=False): 419 | 420 | old_db_location = 'C:/Users/joshs/Programming/Databases/pySecMaster/' \ 421 | 'pySecMaster_m old.db' 422 | new_db_location = 'C:/Users/joshs/Programming/Databases/pySecMaster/' \ 423 | 'pySecMaster_m new.db' 424 | table = 'minute_prices' 425 | 426 | # Create a new pySecMaster minute database 427 | symbology_sources = ['csi_data', 'tsid', 'quandl_wiki', 'quandl_goog', 428 | 'seeking_alpha', 'yahoo'] 429 | os.chdir('..') # Need to move up a folder in order to access load_tables 430 | maintenance(database_link=new_db_location, quandl_ticker_source='csidata', 431 | database_list=['WIKI'], threads=8, quandl_key='', 432 | quandl_update_range=30, csidata_update_range=5, 433 | symbology_sources=symbology_sources) 434 | 435 | # Retrieve a list of all existing tsid's 436 | current_tsid_df = query_existing_tsids(db_location=old_db_location, 437 | table=table, verbose=verbose) 438 | 439 | # Cycle through each tsid 440 | for index, row in current_tsid_df.iterrows(): 441 | tsid = row['tsid'] 442 | tsid_start = time.time() 443 | 444 | # Query the existing tsid's price times 445 | existing_price_df = query_tsid_data(db_location=old_db_location, 446 | table=table, tsid=tsid, 447 | verbose=verbose) 448 | 449 | # Change any incorrect times to best guess times (98% confident) 450 | updated_price_df = verify_minute_time(price_df=existing_price_df, 451 | tsid=tsid) 452 | 453 | # Update the database times with the corrected times 454 | insert_df_to_db(db_location=new_db_location, table=table, 455 | price_df=updated_price_df, verbose=verbose) 456 | 457 | if verbose: 458 | print('Verifying the %s times for %s took %0.2f seconds' % 459 | (table, tsid, time.time() - tsid_start)) 460 | 461 | if __name__ == '__main__': 462 | 463 | main(verbose=True) 464 | -------------------------------------------------------------------------------- /pySecMaster/utilities/date_conversions.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | __author__ = 'Josh Schertz' 4 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 5 | __description__ = 'An automated system to store and maintain financial data.' 6 | __email__ = 'josh[AT]joshschertz[DOT]com' 7 | __license__ = 'GNU AGPLv3' 8 | __maintainer__ = 'Josh Schertz' 9 | __status__ = 'Development' 10 | __url__ = 'https://joshschertz.com/' 11 | __version__ = '1.5.0' 12 | 13 | ''' 14 | This program is free software: you can redistribute it and/or modify 15 | it under the terms of the GNU Affero General Public License as 16 | published by the Free Software Foundation, either version 3 of the 17 | License, or (at your option) any later version. 18 | 19 | This program is distributed in the hope that it will be useful, 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | GNU Affero General Public License for more details. 23 | 24 | You should have received a copy of the GNU Affero General Public License 25 | along with this program. If not, see . 26 | ''' 27 | 28 | 29 | def dt_from_iso(row, column): 30 | """ 31 | Changes the ISO 8601 date string to a datetime object. 32 | """ 33 | 34 | iso = row[column] 35 | try: 36 | return datetime.strptime(iso, '%Y-%m-%dT%H:%M:%S.%f') 37 | except ValueError: 38 | return datetime.strptime(iso, '%Y-%m-%dT%H:%M:%S') 39 | except TypeError: 40 | return 'NaN' 41 | 42 | 43 | def date_to_iso(row, column): 44 | """ 45 | Change the default date format of "YYYY-MM-DD" to an ISO 8601 format. 46 | """ 47 | 48 | raw_date = row[column] 49 | try: 50 | raw_date_obj = datetime.strptime(raw_date, '%Y-%m-%d') 51 | except TypeError: # Occurs if there is no date provided ("nan") 52 | raw_date_obj = datetime.today() 53 | return raw_date_obj.isoformat() 54 | -------------------------------------------------------------------------------- /pySecMaster/utilities/multithread.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | 3 | __author__ = 'Josh Schertz' 4 | __copyright__ = 'Copyright (C) 2018 Josh Schertz' 5 | __description__ = 'An automated system to store and maintain financial data.' 6 | __email__ = 'josh[AT]joshschertz[DOT]com' 7 | __license__ = 'GNU AGPLv3' 8 | __maintainer__ = 'Josh Schertz' 9 | __status__ = 'Development' 10 | __url__ = 'https://joshschertz.com/' 11 | __version__ = '1.5.0' 12 | 13 | ''' 14 | This program is free software: you can redistribute it and/or modify 15 | it under the terms of the GNU Affero General Public License as 16 | published by the Free Software Foundation, either version 3 of the 17 | License, or (at your option) any later version. 18 | 19 | This program is distributed in the hope that it will be useful, 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | GNU Affero General Public License for more details. 23 | 24 | You should have received a copy of the GNU Affero General Public License 25 | along with this program. If not, see . 26 | ''' 27 | 28 | 29 | def multithread(function, items, threads=4): 30 | """ Takes the main function to run in parallel, inputs the variable(s) 31 | and returns the results. 32 | 33 | :param function: The main function to process in parallel. 34 | :param items: A list of strings that are passed into the function for 35 | each thread. 36 | :param threads: The number of threads to use. The default is 4, but 37 | the threads are not CPU core bound. 38 | :return: The results of the function passed into this function. 39 | """ 40 | 41 | """The async variant, which submits all processes at once and 42 | retrieve the results as soon as they are done.""" 43 | pool = Pool(threads) 44 | output = [pool.apply_async(function, args=(item,)) for item in items] 45 | results = [p.get() for p in output] 46 | pool.close() 47 | pool.join() 48 | 49 | return results 50 | -------------------------------------------------------------------------------- /pySecMaster/utilities/user_dir.py: -------------------------------------------------------------------------------- 1 | import getpass 2 | 3 | 4 | def user_dir(): 5 | """ This function returns the relavant file directories and passwords for 6 | the current system user in a dictionary. """ 7 | 8 | if getpass.getuser() == 'root': 9 | # Docker container will use these variables 10 | 11 | load_tables = '/load_tables' 12 | 13 | # PostgreSQL default database information 14 | main_db = 'postgres' 15 | main_user = 'postgres' 16 | main_password = 'correct horse battery staple' # Change this!! 17 | main_host = 'postgres_pysecmaster' # the docker container name 18 | main_port = '5432' 19 | 20 | # PostgreSQL pysecmaster database information 21 | pysecmaster_db = 'pysecmaster' 22 | pysecmaster_user = 'pymaster' 23 | pysecmaster_password = 'correct horse battery staple' # Change this!! 24 | pysecmaster_host = 'postgres_pysecmaster' # the docker container name 25 | pysecmaster_port = '5432' 26 | 27 | # Quandl information 28 | quandl_token = 'XXXXXXXXX' # Keep this secret!! 29 | 30 | elif getpass.getuser() == 'josh': 31 | # Local user will use thee variables 32 | 33 | load_tables = '/load_tables' 34 | 35 | # PostgreSQL default database information 36 | main_db = 'postgres' 37 | main_user = 'postgres' 38 | main_password = 'correct horse battery staple' # Change this!! 39 | main_host = '127.0.0.1' 40 | main_port = '5432' 41 | 42 | # PostgreSQL pysecmaster database information 43 | pysecmaster_db = 'pysecmaster' 44 | pysecmaster_user = 'pymaster' 45 | pysecmaster_password = 'correct horse battery staple' # Change this!! 46 | pysecmaster_host = '127.0.0.1' 47 | pysecmaster_port = '5432' 48 | 49 | # Quandl information 50 | quandl_token = 'XXXXXXXXX' # Keep this secret!! 51 | 52 | else: 53 | raise NotImplementedError('Need to set data variables for user %s in ' 54 | 'pySecMaster/utilities/user_dir.py' % 55 | getpass.getuser()) 56 | 57 | return {'load_tables': load_tables, 58 | 'postgresql': 59 | {'main_db': main_db, 60 | 'main_user': main_user, 61 | 'main_password': main_password, 62 | 'main_host': main_host, 63 | 'main_port': main_port, 64 | 'pysecmaster_db': pysecmaster_db, 65 | 'pysecmaster_user': pysecmaster_user, 66 | 'pysecmaster_password': pysecmaster_password, 67 | 'pysecmaster_host': pysecmaster_host, 68 | 'pysecmaster_port': pysecmaster_port, 69 | }, 70 | 'quandl': 71 | {'quandl_token': quandl_token}, 72 | } 73 | -------------------------------------------------------------------------------- /table_structure.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/camisatx/pySecMaster/3b4704e5e678d232cff7898f8152720e3a30913f/table_structure.xlsx --------------------------------------------------------------------------------