├── results └── .gitkeep ├── tests ├── __init__.py ├── data │ ├── __init__.py │ └── test_data.py ├── experiments │ ├── __init__.py │ └── test_simple_experiment.py ├── test_indicators.py └── test_filters.py ├── ta_scanner ├── __init__.py ├── data │ ├── __init__.py │ ├── base_connector.py │ ├── csv_file_fetcher.py │ ├── constants.py │ ├── ib.py │ └── data.py ├── experiements │ ├── __init__.py │ └── simple.py ├── experiments │ ├── __init__.py │ └── simple_experiment.py ├── version.py ├── signals.py ├── reports.py ├── models.py ├── filters.py └── indicators.py ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── ISSUE_TEMPLATE.md └── workflows │ └── conda-run-tests.yml ├── lab ├── csv_file_load.py ├── query_futures_data.py ├── fetch_futures_data.py ├── experiment_futures.py └── TradingDay.ipynb ├── .vscode └── settings.json ├── RELEASE_NOTES.md ├── docker-compose.yml ├── Makefile ├── requirements.dev ├── environment.yml ├── examples ├── db_stocks.py ├── db_futures.py ├── moving_average_crossover_stocks.py ├── moving_average_crossover_futures.py ├── moving_average_crossover_range_stocks.py ├── moving_average_crossover_range_futures.py └── combine_indcators.py ├── .gitignore ├── LICENSE ├── setup.py └── README.md /results/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ta_scanner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ta_scanner/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ta_scanner/experiements/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ta_scanner/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ta_scanner/version.py: -------------------------------------------------------------------------------- 1 | VERSION = "0.0.1" 2 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | changes include: 2 | - 3 | -------------------------------------------------------------------------------- /ta_scanner/signals.py: -------------------------------------------------------------------------------- 1 | class Signal: 2 | def __init__(self): 3 | pass 4 | -------------------------------------------------------------------------------- /ta_scanner/data/base_connector.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | # python3 4 | class DataFetcherBase(object, metaclass=ABCMeta): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/experiments/test_simple_experiment.py: -------------------------------------------------------------------------------- 1 | from ta_scanner.experiments.simple_experiment import SimpleExperiment 2 | 3 | 4 | def test_x(): 5 | assert SimpleExperiment.x() == "x" 6 | -------------------------------------------------------------------------------- /lab/csv_file_load.py: -------------------------------------------------------------------------------- 1 | from ta_scanner.data.data import aggregate_bars 2 | from ta_scanner.data.csv_file_fetcher import CsvFileFetcher 3 | 4 | data = CsvFileFetcher("example.csv") 5 | df = data.request_instrument() 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "ECBOT", 4 | "GLOBEX", 5 | "NYMEX", 6 | "groupby", 7 | "insync", 8 | "iterrows", 9 | "sfilter", 10 | "strftime" 11 | ] 12 | } -------------------------------------------------------------------------------- /RELEASE_NOTES.md: -------------------------------------------------------------------------------- 1 | ## (in progress) Version 0.1 2 | - Create Indicator objects 3 | - Create Filter objects 4 | - Pull live data from IB (and optionally cache) 5 | - Create ranges of param permutations and range of PnL 6 | 7 | ## Version 0.0.1 8 | - started 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Expected Behavior 2 | 3 | 4 | ## Actual Behavior 5 | 6 | 7 | ## Steps to Reproduce the Problem 8 | 9 | 1. 10 | 1. 11 | 1. 12 | 13 | ## Specifications 14 | 15 | - Version: 16 | - Platform: 17 | - Subsystem: 18 | 19 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.0' 2 | 3 | services: 4 | pg: 5 | image: postgres:12.2 6 | restart: always 7 | ports: 8 | - 65432:5432 9 | volumes: 10 | - ..:/db_data 11 | environment: 12 | - POSTGRES_USER=ta_scanner 13 | - POSTGRES_PASSWORD=ta_scanner 14 | 15 | volumes: 16 | db_data: -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CONDA_ENV ?= ta_scanner 2 | 3 | test: 4 | @pytest -s . 5 | 6 | release: 7 | @python setup.py sdist 8 | @twine upload dist/* 9 | 10 | example: 11 | @python examples/moving_average_crossover.py 12 | 13 | 14 | env.create: 15 | @conda create -y -n ${CONDA_ENV} python=3.7 16 | 17 | env.update: 18 | @conda env update -n ${CONDA_ENV} -f environment.yml 19 | 20 | -------------------------------------------------------------------------------- /requirements.dev: -------------------------------------------------------------------------------- 1 | # Keep these development dependencies out of the requirements.txt & environment.yml files; 2 | # conda cannot install some of these development libs so use pip to install 3 | # them into a conda env after conda installs everything else. 4 | 5 | # dev utils 6 | ipdb 7 | ipython 8 | 9 | # docs 10 | sphinx 11 | 12 | # tests 13 | codecov 14 | pytest 15 | pytest-benchmark 16 | pytest-cov 17 | pytest-datadir 18 | pytest-mock 19 | pytest-voluptuous 20 | 21 | # static analysis and style 22 | black 23 | pylint 24 | 25 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: ta_scanner 2 | 3 | channels: 4 | - default 5 | 6 | dependencies: 7 | - python=3.7 8 | - click 9 | - sqlalchemy 10 | - pandas >= 1.0.0 11 | - psycopg2 12 | - numpy 13 | - requests 14 | - pytz 15 | - matplotlib 16 | # test resources 17 | - pytest 18 | - pip 19 | - pip: 20 | - twine 21 | - ipdb 22 | - TA-Lib 23 | - loguru 24 | - ib_insync 25 | - trading_calendars 26 | - "--editable ." 27 | # testing resources -------------------------------------------------------------------------------- /examples/db_stocks.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from loguru import logger 3 | from ta_scanner.data.data import load_and_cache 4 | from ta_scanner.data.ib import IbDataFetcher 5 | 6 | ib_data_fetcher = IbDataFetcher() 7 | 8 | symbols = ["SPY", "QQQ", "AAPL"] 9 | 10 | for symbol in symbols: 11 | df = load_and_cache( 12 | symbol, 13 | ib_data_fetcher, 14 | start_date=date(2020, 6, 1), 15 | end_date=date(2020, 6, 4), 16 | use_rth=False, 17 | groupby_minutes=15, 18 | ) 19 | logger.info(f"{symbol} - {len(df.index)}") 20 | -------------------------------------------------------------------------------- /ta_scanner/reports.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Tuple 3 | 4 | 5 | class BasicReport: 6 | def __init__(self): 7 | pass 8 | 9 | def analyze(self, df, field_name) -> Tuple[np.float64, int, np.float64, np.float64]: 10 | trades = df.query(f"0 < {field_name} or {field_name} < 0") 11 | 12 | trades.to_csv("trades.csv") 13 | 14 | pnl = trades[field_name].sum() 15 | count = trades[field_name].count() 16 | average = np.average(trades[field_name]) 17 | median = np.median(trades[field_name]) 18 | 19 | return pnl, count, average, median 20 | -------------------------------------------------------------------------------- /tests/data/test_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from ta_scanner.data.data import ( 3 | __gen_values, 4 | __gen_cols, 5 | db_insert_df_conflict_on_do_nothing, 6 | ) 7 | 8 | 9 | def fake_df_ab(): 10 | data = {"a": [1, 2, 3], "b": [11, 22, 33]} 11 | df = pd.DataFrame(data, columns=["a", "b"]) 12 | return df 13 | 14 | 15 | def test_gen_values(): 16 | df = fake_df_ab() 17 | expected_values = [("1", "11"), ("2", "22"), ("3", "33")] 18 | assert __gen_values(df) == expected_values 19 | 20 | 21 | def test_gen_cols(): 22 | df = fake_df_ab() 23 | expected_values = ["a", "b"] 24 | assert __gen_cols(df) == expected_values 25 | -------------------------------------------------------------------------------- /lab/query_futures_data.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from loguru import logger 3 | import sys 4 | 5 | from ta_scanner.data.data import db_data_fetch_between, aggregate_bars 6 | from ta_scanner.models import gen_engine 7 | 8 | 9 | symbol = "/MES" 10 | sd = datetime.date(2020, 9, 18) 11 | ed = sd 12 | 13 | engine = gen_engine() 14 | groupby_minutes = 1 15 | 16 | 17 | def query_data(engine, symbol, sd, ed, groupby_minutes): 18 | df = db_data_fetch_between(engine, symbol, sd, ed) 19 | df.set_index("ts", inplace=True) 20 | df = aggregate_bars(df, groupby_minutes=groupby_minutes) 21 | df["ts"] = df.index 22 | return df 23 | 24 | 25 | df = query_data(engine, symbol, sd, ed, groupby_minutes) -------------------------------------------------------------------------------- /lab/fetch_futures_data.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | from ta_scanner.data.data import load_and_cache 3 | from ta_scanner.data.ib import IbDataFetcher 4 | import datetime 5 | 6 | ib_data_fetcher = IbDataFetcher() 7 | 8 | # symbols = ["/MES", "/MNQ", "/MGC"] 9 | symbols = ["/MES"] 10 | 11 | get_last_n_days = 1 12 | get_last_n_days = 5 13 | 14 | sd = datetime.date.today() - datetime.timedelta(days=get_last_n_days) 15 | ed = datetime.date.today() - datetime.timedelta(days=1) 16 | 17 | for symbol in symbols: 18 | params = dict( 19 | start_date=sd, 20 | end_date=ed, 21 | use_rth=False, 22 | groupby_minutes=1, 23 | ) 24 | df = load_and_cache(symbol, ib_data_fetcher, **params) 25 | 26 | logger.info("Done") 27 | -------------------------------------------------------------------------------- /examples/db_futures.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | from ta_scanner.data.data import load_and_cache 3 | from ta_scanner.data.ib import IbDataFetcher 4 | import datetime 5 | 6 | ib_data_fetcher = IbDataFetcher() 7 | 8 | symbol = "/MES" 9 | sd = datetime.date(2020, 9, 2) 10 | ed = datetime.date(2020, 9, 10) 11 | params = dict( 12 | start_date=sd, 13 | end_date=ed, 14 | use_rth=False, 15 | groupby_minutes=1, 16 | ) 17 | 18 | df = load_and_cache(symbol, ib_data_fetcher, **params) 19 | logger.info(f"{symbol} - All hours / 1min bars - {len(df.index)}") 20 | 21 | params["use_rth"] = True 22 | df = load_and_cache(symbol, ib_data_fetcher, **params) 23 | logger.info(f"{symbol} - Only RTH / 1min bars - {len(df.index)}") 24 | 25 | params["use_rth"] = False 26 | params["groupby_minutes"] = 12 27 | df = load_and_cache(symbol, ib_data_fetcher, **params) 28 | logger.info(f"{symbol} - All hours / 12min bars - {len(df.index)}") 29 | logger.info(f"\n{df.head(10)}") 30 | -------------------------------------------------------------------------------- /tests/test_indicators.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from typing import Any, Dict 4 | 5 | from ta_scanner.indicators import ( 6 | IndicatorSmaCrossover, 7 | IndicatorParams, 8 | IndicatorException, 9 | ) 10 | 11 | 12 | def gen_df_zeros(field_name="some_field_name"): 13 | return pd.DataFrame(0, index=[1, 2, 3], columns=[field_name]) 14 | 15 | 16 | def test_abstract_methods_present(): 17 | field_name, params = "field_name", [] 18 | IndicatorSmaCrossover(field_name=field_name, params=params) 19 | 20 | 21 | def test_ensure_required_filter_options(): 22 | field_name = "fake_some_name" 23 | fake_df = gen_df_zeros(field_name) 24 | 25 | params = { 26 | IndicatorParams.fast_sma: 20, 27 | # IndicatorParams.slow_sma: 50, # intentionally missing param 28 | } 29 | 30 | sma_crossover = IndicatorSmaCrossover(field_name=field_name, params=params) 31 | 32 | with pytest.raises(IndicatorException) as e: 33 | sma_crossover.apply(fake_df) 34 | 35 | expected_message = "IndicatorSmaCrossover requires key = IndicatorParams.slow_sma" 36 | assert expected_message == str(e.value) 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # Installer logs 29 | pip-log.txt 30 | pip-delete-this-directory.txt 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | .hypothesis/ 42 | .pytest_cache/ 43 | 44 | # Translations 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | local_settings.py 51 | db.sqlite3 52 | 53 | # Flask stuff: 54 | instance/ 55 | .webassets-cache 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | # Jupyter Notebook 64 | .ipynb_checkpoints 65 | 66 | # pyenv 67 | .python-version 68 | 69 | # celery beat schedule file 70 | celerybeat-schedule 71 | 72 | # SageMath parsed files 73 | *.sage.py 74 | 75 | # Environments 76 | .env 77 | .venv 78 | env/ 79 | venv/ 80 | ENV/ 81 | env.bak/ 82 | venv.bak/ 83 | 84 | # mkdocs documentation 85 | /site 86 | 87 | # mypy 88 | .mypy_cache/ 89 | 90 | *.egg-info 91 | *.csv 92 | *.txt 93 | 94 | results/ 95 | -------------------------------------------------------------------------------- /ta_scanner/models.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.ext.declarative import declarative_base 2 | from sqlalchemy import ( 3 | create_engine, 4 | Column, 5 | Integer, 6 | Numeric, 7 | String, 8 | DateTime, 9 | Index, 10 | Boolean, 11 | ) 12 | 13 | 14 | Base = declarative_base() 15 | 16 | 17 | NUMERIC_OPTIONS = dict(precision=8, scale=2, decimal_return_scale=None, asdecimal=True) 18 | 19 | 20 | def gen_engine(): 21 | connection_string: str = ( 22 | "postgresql://ta_scanner:ta_scanner@localhost:65432/ta_scanner" 23 | ) 24 | engine = create_engine(connection_string, convert_unicode=True) 25 | return engine 26 | 27 | 28 | def init_db(): 29 | engine = gen_engine() 30 | Base.metadata.create_all(bind=engine) 31 | 32 | 33 | class Quote(Base): 34 | __tablename__ = "quote" 35 | 36 | id = Column(Integer, primary_key=True) 37 | ts = Column(DateTime(timezone=True), index=True) 38 | symbol = Column(String(10)) 39 | open = Column(Numeric(**NUMERIC_OPTIONS)) 40 | close = Column(Numeric(**NUMERIC_OPTIONS)) 41 | high = Column(Numeric(**NUMERIC_OPTIONS)) 42 | low = Column(Numeric(**NUMERIC_OPTIONS)) 43 | average = Column(Numeric(**NUMERIC_OPTIONS)) 44 | volume = Column(Integer) 45 | bar_count = Column(Integer) 46 | rth = Column(Boolean) 47 | 48 | __table_args__ = (Index("ix_quote_symbol_ts", symbol, ts, unique=True),) 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020, Weston Platter 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /ta_scanner/data/csv_file_fetcher.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from loguru import logger 3 | 4 | import datetime 5 | from trading_calendars import get_calendar, TradingCalendar 6 | from typing import Optional, Dict, Any, List, Tuple, Optional 7 | 8 | from ta_scanner.data.base_connector import DataFetcherBase 9 | from ta_scanner.data.constants import ( 10 | TimezoneNames, 11 | WhatToShow, 12 | Exchange, 13 | Calendar, 14 | Currency, 15 | ) 16 | 17 | 18 | class CsvFileFetcher(DataFetcherBase): 19 | def __init__(self, file_path: str): 20 | self.file_path = file_path 21 | self.df = None 22 | self._load_data_from_file() 23 | 24 | def _load_data_from_file(self): 25 | df = pd.read_csv(self.file_path) 26 | self.df = self._prepare_columns(df) 27 | 28 | def _prepare_columns(self, ddf): 29 | rename_columns = { 30 | "Date": "date", 31 | " Time": "time", 32 | " Open": "open", 33 | " High": "high", 34 | " Low": "low", 35 | " Last": "close", 36 | " Volume": "volume", 37 | " Bid Volume": "bid_volume", 38 | " Ask Volume": "ask_volume", 39 | " Close": "cumulative_delta_bars", 40 | } 41 | ddf = ddf.rename(columns=rename_columns) 42 | ddf["ts"] = pd.to_datetime(ddf["date"].map(str) + ddf["time"].map(str)) 43 | ddf.set_index("ts", drop=False, inplace=True) 44 | ddf = ddf.tz_localize("US/Mountain") 45 | return ddf 46 | 47 | def request_instrument(self): 48 | return self.df 49 | -------------------------------------------------------------------------------- /tests/test_filters.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from typing import Any, Dict 4 | 5 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterException 6 | 7 | 8 | def gen_df_zeros(field_name="some_field_name"): 9 | return pd.DataFrame(0, index=[1, 2, 3], columns=[field_name]) 10 | 11 | 12 | def test_abstract_methods_present(): 13 | field_name, result_field_name, params = "some_field_name", "result", [] 14 | FilterCumsum( 15 | field_name=field_name, result_field_name=result_field_name, params=params 16 | ) 17 | 18 | 19 | def test_required_filter_options(): 20 | field_name = "indicator_name" 21 | result_field_name = f"{field_name}_png" 22 | df = gen_df_zeros(field_name) 23 | 24 | params: Dict[FilterOptions, Any] = { 25 | FilterOptions.win_points: 20.0, 26 | FilterOptions.loss_points: 10.0, 27 | # FilterOptions.threshold_intervals: 50, 28 | } 29 | 30 | filter_cumsum = FilterCumsum( 31 | field_name=field_name, result_field_name=result_field_name, params=params 32 | ) 33 | 34 | with pytest.raises(FilterException) as execinfo: 35 | filter_cumsum.apply(df) 36 | assert "FilterOptions.loss_points" in str(excinfo.value) 37 | 38 | params: Dict[FilterOptions, Any] = { 39 | FilterOptions.win_points: 20.0, 40 | FilterOptions.loss_points: 10.0, 41 | FilterOptions.threshold_intervals: 50, 42 | } 43 | 44 | filter_cumsum = FilterCumsum( 45 | field_name=field_name, result_field_name=result_field_name, params=params 46 | ) 47 | 48 | filter_cumsum.apply(df) 49 | -------------------------------------------------------------------------------- /examples/moving_average_crossover_stocks.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | from ta_scanner.data.data import load_and_cache 4 | from ta_scanner.data.ib import IbDataFetcher 5 | from ta_scanner.indicators import IndicatorSmaCrossover, IndicatorParams 6 | from ta_scanner.signals import Signal 7 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterNames 8 | from ta_scanner.reports import BasicReport 9 | 10 | 11 | # get SPY data 12 | ib_data_fetcher = IbDataFetcher() 13 | df = load_and_cache( 14 | "SPY", 15 | ib_data_fetcher, 16 | start_date=date(2020, 7, 1), 17 | end_date=date(2020, 7, 20), 18 | use_rth=True, 19 | ) 20 | 21 | # store signals in this field 22 | field_name = "moving_avg_cross" 23 | result_field_name = f"{field_name}_pnl" 24 | 25 | indicator_sma_cross = IndicatorSmaCrossover( 26 | field_name=field_name, 27 | params={ 28 | IndicatorParams.fast_sma: 20, 29 | IndicatorParams.slow_sma: 50, 30 | }, 31 | ) 32 | 33 | # apply indicator to generate signals 34 | indicator_sma_cross.apply(df) 35 | 36 | # initialize filter 37 | sfilter = FilterCumsum( 38 | field_name=field_name, 39 | result_field_name=result_field_name, 40 | params={ 41 | FilterOptions.win_points: 10, 42 | FilterOptions.loss_points: 5, 43 | FilterOptions.threshold_intervals: 30, 44 | }, 45 | ) 46 | 47 | # generate results 48 | results = sfilter.apply(df) 49 | 50 | # analyze results 51 | basic_report = BasicReport() 52 | pnl = basic_report.analyze(df, FilterNames.filter_cumsum.value) 53 | 54 | logger.info("------------------------") 55 | 56 | logger.info(f"Final PnL = {pnl}") 57 | -------------------------------------------------------------------------------- /ta_scanner/experiements/simple.py: -------------------------------------------------------------------------------- 1 | class Simple: 2 | def __init__( 3 | self, df_train, df_test, indicator, indicator_params, sfilter, sfilter_params 4 | ): 5 | self.df_train = df_train 6 | self.df_test = df_test 7 | self.indicator = indicator 8 | self.indicator_params = indicator_params 9 | self.sfilter = sfilter 10 | self.sfilter_params = sfilter_params 11 | 12 | # the goal here is to 13 | # - apply range of indicators configs to the train data 14 | # - pick a couple of the bottom, middle, and top results 15 | # - apply those to the test data 16 | # - analyze how well they translate 17 | 18 | 19 | # indicator_sma_cross = IndicatorSmaCrossover() 20 | 21 | # # store signals in this field 22 | # field_name = "moving_avg_cross" 23 | 24 | # # Moving Average Crossover, 20 vs 50 25 | # indicator_params = { 26 | # IndicatorParams.fast_sma: 30, 27 | # IndicatorParams.slow_sma: 60, 28 | # } 29 | # # apply indicator to generate signals 30 | # indicator_sma_cross.apply(df, field_name, indicator_params) 31 | 32 | # # initialize filter 33 | # sfilter = FilterCumsum() 34 | 35 | # filter_options = { 36 | # FilterOptions.win_points: 10, 37 | # FilterOptions.loss_points: 3, 38 | # FilterOptions.threshold_intervals: 20, 39 | # } 40 | 41 | # # generate results 42 | # results = sfilter.apply(df, field_name, filter_options) 43 | 44 | # # analyze results 45 | # basic_report = BasicReport() 46 | # pnl, count, average, median = basic_report.analyze(df, FilterNames.filter_cumsum.value) 47 | 48 | # logger.info("------------------------") 49 | 50 | # logger.info(f"Final PnL = {pnl}") 51 | -------------------------------------------------------------------------------- /.github/workflows/conda-run-tests.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - "issue-**" 7 | pull_request: 8 | branches: 9 | - "dev" 10 | 11 | jobs: 12 | example-1: 13 | name: Ex1 (${{ matrix.python-version }}, ${{ matrix.os }}) 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: ["ubuntu-latest"] #, "macos-latest", "windows-latest"] 19 | python-version: ["3.7"] #, "2.7"] 20 | 21 | steps: 22 | - name: Install xmllint 23 | run: | 24 | sudo apt install build-essential wget -y 25 | wget https://artiya4u.keybase.pub/TA-lib/ta-lib-0.4.0-src.tar.gz 26 | tar -xvf ta-lib-0.4.0-src.tar.gz 27 | cd ta-lib/ 28 | ./configure --prefix=/usr 29 | make 30 | sudo make install 31 | - uses: actions/checkout@v2 32 | - name: Cache Conda 33 | uses: actions/cache@v1 34 | with: 35 | path: /usr/share/miniconda/envs/ta_scanner 36 | key: ${{ runner.os }}-conda-${{ hashFiles('environment.yml') }} 37 | restore-keys: | 38 | ${{ runner.os }}-conda- 39 | - name: Setup Conda dependencies 40 | uses: goanpeca/setup-miniconda@v1 41 | with: 42 | activate-environment: ta_scanner 43 | environment-file: environment.yml 44 | python-version: "${{ matrix.python-version }}" 45 | auto-activate-base: false 46 | - name: Conda info 47 | shell: bash -l {0} 48 | run: | 49 | conda info 50 | conda list 51 | - name: Run tests 52 | shell: bash -l {0} 53 | run: | 54 | pytest 55 | -------------------------------------------------------------------------------- /examples/moving_average_crossover_futures.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date 2 | from loguru import logger 3 | 4 | from ta_scanner.data.data import load_and_cache 5 | from ta_scanner.data.ib import IbDataFetcher 6 | from ta_scanner.indicators import IndicatorSmaCrossover, IndicatorParams 7 | from ta_scanner.signals import Signal 8 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterNames 9 | from ta_scanner.reports import BasicReport 10 | 11 | 12 | ib_data_fetcher = IbDataFetcher() 13 | df = load_and_cache( 14 | "/MES", 15 | ib_data_fetcher, 16 | start_date=date(2020, 7, 10), 17 | end_date=date(2020, 7, 20), 18 | use_rth=True, 19 | ) 20 | 21 | # store signals in this field 22 | field_name = "moving_avg_cross" 23 | 24 | # Moving Average Crossover, 20 vs 50 25 | indicator_params = { 26 | IndicatorParams.fast_sma: 30, 27 | IndicatorParams.slow_sma: 60, 28 | } 29 | 30 | # init 31 | indicator_sma_cross = IndicatorSmaCrossover( 32 | field_name=field_name, params=indicator_params 33 | ) 34 | 35 | # apply indicator to generate signals 36 | indicator_sma_cross.apply(df) 37 | 38 | 39 | filter_options = { 40 | FilterOptions.win_points: 10, 41 | FilterOptions.loss_points: 3, 42 | FilterOptions.threshold_intervals: 20, 43 | } 44 | # initialize filter 45 | result_field_name = f"{field_name}_pnl" 46 | sfilter = FilterCumsum( 47 | field_name=field_name, result_field_name=result_field_name, params=filter_options 48 | ) 49 | 50 | # generate results 51 | results = sfilter.apply(df) 52 | 53 | # analyze results 54 | basic_report = BasicReport() 55 | pnl, count, average, median = basic_report.analyze(df, FilterNames.filter_cumsum.value) 56 | 57 | logger.info("------------------------") 58 | 59 | logger.info(f"Final PnL = {pnl}") 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup 3 | from setuptools.command.test import test as TestCommand 4 | 5 | 6 | class PyTest(TestCommand): 7 | user_options = [("pytest-args=", "a", "Arguments to pass into pytest")] 8 | 9 | def initialize_options(self): 10 | TestCommand.initialize_options(self) 11 | self.pytest_args = "-n auto" 12 | 13 | def run_tests(self): 14 | import shlex 15 | import pytest 16 | 17 | errno = pytest.main(shlex.split(self.pytest_args)) 18 | sys.exit(errno) 19 | 20 | 21 | # version_contents = {} 22 | # with open("fast_arrow/version.py", "r", encoding="utf-8") as f: 23 | # exec(f.read(), version_contents) 24 | 25 | 26 | with open("README.md", "r") as f: 27 | long_description = f.read() 28 | 29 | 30 | # deps = [ 31 | # "datetime", 32 | # "deprecation", 33 | # "pathlib2", 34 | # "requests>=2.20.0", 35 | # "pandas>=0.23.2", 36 | # "numpy", 37 | # "yarl", 38 | # "urllib3>=1.24.2", 39 | # ] 40 | 41 | 42 | # test_deps = ["pipenv", "pytest", "pytest-cov", "detox", "flake8", "vcrpy"] 43 | 44 | 45 | setup( 46 | name="ta_scanner", 47 | version="0.0.1", 48 | description="Technical analysis scanner and bavhior matcher", 49 | long_description=long_description, 50 | long_description_content_type="text/markdown", 51 | author="Weston Platter", 52 | author_email="westonplatter+github@gmail.com", 53 | url="https://github.com/westonplatter/ta_scanner/", 54 | license="BSD-3-Clause", 55 | python_requires=">=3.5", 56 | packages=["ta_scanner"], 57 | cmdclass={"test": PyTest}, 58 | project_urls={ 59 | "Issue Tracker": "https://github.com/westonplatter/ta_scanner/issues", 60 | "Source Code": "https://github.com/westonplatter/ta_scanner", 61 | }, 62 | ) 63 | -------------------------------------------------------------------------------- /ta_scanner/experiments/simple_experiment.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | 4 | class BaseExperiment(ABC): 5 | @staticmethod 6 | def x() -> str: 7 | return "x" 8 | 9 | 10 | class SimpleExperiment(BaseExperiment): 11 | def __init__( 12 | self, df_train, df_test, indicator, indicator_params, sfilter, sfilter_params 13 | ): 14 | self.df_train = df_train 15 | self.df_test = df_test 16 | self.indicator = indicator 17 | self.indicator_params = indicator_params 18 | self.sfilter = sfilter 19 | self.sfilter_params = sfilter_params 20 | 21 | # the goal here is to 22 | # - apply range of indicators configs to the train data 23 | # - pick a couple of the bottom, middle, and top results 24 | # - apply those to the test data 25 | # - analyze how well they translate 26 | 27 | 28 | # indicator_sma_cross = IndicatorSmaCrossover() 29 | 30 | # # store signals in this field 31 | # field_name = "moving_avg_cross" 32 | 33 | # # Moving Average Crossover, 20 vs 50 34 | # indicator_params = { 35 | # IndicatorParams.fast_sma: 30, 36 | # IndicatorParams.slow_sma: 60, 37 | # } 38 | # # apply indicator to generate signals 39 | # indicator_sma_cross.apply(df, field_name, indicator_params) 40 | 41 | # # initialize filter 42 | # sfilter = FilterCumsum() 43 | 44 | # filter_options = { 45 | # FilterOptions.win_points: 10, 46 | # FilterOptions.loss_points: 3, 47 | # FilterOptions.threshold_intervals: 20, 48 | # } 49 | 50 | # # generate results 51 | # results = sfilter.apply(df, field_name, filter_options) 52 | 53 | # # analyze results 54 | # basic_report = BasicReport() 55 | # pnl, count, average, median = basic_report.analyze(df, FilterNames.filter_cumsum.value) 56 | 57 | # logger.info("------------------------") 58 | 59 | # logger.info(f"Final PnL = {pnl}") 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Technical Analaysis Scanner 2 | 3 | ## Goals 4 | 5 | This software attempts to provide a framework that does a combination of 6 | (1) scanning and, (2) backtesting to ask and answer such questions as, 7 | 8 | - which instruments have responded well to the 4 hr MACD(26, 9, 12) in the 9 | last quarter? 10 | 11 | - which stocks are getting close to their 20/50 Moving Average Crossover, and 12 | of those stocks, which have responded more than +/- 2.5% in the past? 13 | 14 | - which instruments in the last 2 weeks after trended out of their opening 15 | ranges? What is the 1 and 2 std dev band for each isntrument at 30 minute 16 | intervals? 17 | 18 | ## Features 19 | 20 | - [x] Pull data from IB 21 | - [ ] Apply various indicators against single or multiple instruments 22 | - [ ] Simple Moving Average Crossover 23 | - [ ] MACD Crossover 24 | 25 | ### Digging a little deeper 26 | 27 | Technical analysis sometimes works, and sometimes doesn't. The goal of this 28 | codebase is to provide means/methods for measuring a universe of instruments 29 | and determine which ones are behaving in line with various TA patterns 30 | 31 | It's intended to work differently than a traditional backtester (eg, a 32 | Quantopian and QuantConnect). From what I undestand about backtesting, the 33 | goal is to provide predetermined entry and exit rules, and measure the 34 | results for a single or multiple instruments. This software is didferent 35 | it that it intends to experiment with the entry and exit rules and see how 36 | those adjustments impact results. 37 | 38 | ## Structure 39 | 40 | Core Framework lives in this repo, and your secret sauce parameter/configs, 41 | research findings live in another one. 42 | 43 | This repo will feel like a mono repo to many, and focuses on 44 | 45 | - downloading market data (currently using IB API Gateway) 46 | - exposing market data 47 | - running multi-variate simulations 48 | - reporting results 49 | 50 | 51 | ## Releasing 52 | 53 | Pypi release commands, 54 | 55 | ``` 56 | python setup.py sdist bdist_wheel 57 | twine upload dist/* 58 | ``` 59 | -------------------------------------------------------------------------------- /examples/moving_average_crossover_range_stocks.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from loguru import logger 3 | import sys 4 | 5 | from ta_scanner.data.data import load_and_cache 6 | from ta_scanner.data.ib import IbDataFetcher 7 | from ta_scanner.indicators import IndicatorSmaCrossover, IndicatorParams 8 | from ta_scanner.signals import Signal 9 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterNames 10 | from ta_scanner.reports import BasicReport 11 | 12 | 13 | # mute the noisy data debug statements 14 | logger.remove() 15 | logger.add(sys.stderr, level="INFO") 16 | 17 | # get SPY data 18 | ib_data_fetcher = IbDataFetcher() 19 | df_original = load_and_cache( 20 | "SPY", 21 | ib_data_fetcher, 22 | start_date=date(2020, 7, 1), 23 | end_date=date(2020, 7, 20), 24 | use_rth=True, 25 | ) 26 | 27 | # store signals in this field 28 | field_name = "moving_avg_cross" 29 | result_field_name = f"{field_name}_pnl" 30 | 31 | 32 | def run_cross(fast_sma: int, slow_sma: int): 33 | df = df_original.copy() 34 | 35 | indicator_sma_cross = IndicatorSmaCrossover( 36 | field_name=field_name, 37 | params={ 38 | IndicatorParams.fast_sma: fast_sma, 39 | IndicatorParams.slow_sma: slow_sma, 40 | }, 41 | ) 42 | 43 | # apply indicator to generate signals 44 | indicator_sma_cross.apply(df) 45 | 46 | # initialize filter 47 | sfilter = FilterCumsum( 48 | field_name=field_name, 49 | result_field_name=result_field_name, 50 | params={ 51 | FilterOptions.win_points: 10, 52 | FilterOptions.loss_points: 5, 53 | FilterOptions.threshold_intervals: 30, 54 | }, 55 | ) 56 | 57 | # generate results 58 | results = sfilter.apply(df) 59 | 60 | # get aggregate pnl 61 | basic_report = BasicReport() 62 | pnl = basic_report.analyze(df, FilterNames.filter_cumsum.value) 63 | return pnl 64 | 65 | 66 | slow_sma = 50 67 | 68 | for fast_sma in range(2, slow_sma): 69 | final_pnl = run_cross(fast_sma, slow_sma) 70 | print(f"MA Crx {fast_sma}/{slow_sma}. Final PnL = {final_pnl}") 71 | -------------------------------------------------------------------------------- /examples/moving_average_crossover_range_futures.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from loguru import logger 3 | import sys 4 | 5 | from ta_scanner.data.data import load_and_cache, db_data_fetch_between, aggregate_bars 6 | from ta_scanner.data.ib import IbDataFetcher 7 | from ta_scanner.indicators import IndicatorSmaCrossover, IndicatorParams 8 | from ta_scanner.signals import Signal 9 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterNames 10 | from ta_scanner.reports import BasicReport 11 | from ta_scanner.models import gen_engine 12 | 13 | 14 | # mute the noisy data debug statements 15 | logger.remove() 16 | logger.add(sys.stderr, level="INFO") 17 | 18 | ib_data_fetcher = IbDataFetcher() 19 | 20 | symbol = "/MGC" 21 | 22 | df_original = load_and_cache( 23 | symbol, 24 | ib_data_fetcher, 25 | start_date=datetime.date(2020, 8, 1), 26 | end_date=datetime.date(2020, 8, 23), 27 | ) 28 | 29 | 30 | def query_data(engine, symbol, sd, ed, groupby_minutes): 31 | df = db_data_fetch_between(engine, symbol, sd, ed) 32 | df.set_index("ts", inplace=True) 33 | df = aggregate_bars(df, groupby_minutes=groupby_minutes) 34 | df["ts"] = df.index 35 | return df 36 | 37 | 38 | engine = gen_engine() 39 | sd, ed = datetime.date(2020, 8, 1), datetime.date(2020, 8, 23) 40 | interval = 1 41 | df_original = query_data(engine, symbol, sd, ed, interval) 42 | 43 | 44 | # store signals in this field 45 | field_name = "moving_avg_cross" 46 | result_field_name = f"{field_name}_pnl" 47 | 48 | 49 | def run_cross(fast_sma: int, slow_sma: int): 50 | df = df_original.copy() 51 | 52 | indicator_params = { 53 | IndicatorParams.fast_sma: fast_sma, 54 | IndicatorParams.slow_sma: slow_sma, 55 | } 56 | indicator = IndicatorSmaCrossover(field_name, indicator_params) 57 | indicator.apply(df) 58 | 59 | filter_options = { 60 | FilterOptions.win_points: 6, 61 | FilterOptions.loss_points: 4, 62 | FilterOptions.threshold_intervals: 30, 63 | } 64 | sfilter = FilterCumsum(field_name, result_field_name, filter_options) 65 | results = sfilter.apply(df) 66 | 67 | # get aggregate pnl 68 | basic_report = BasicReport() 69 | pnl, count, avg, median = basic_report.analyze(df, field_name) 70 | return pnl, count, avg, median 71 | 72 | 73 | slow_sma = 60 74 | 75 | results = [] 76 | 77 | for fast_sma in range(2, slow_sma): 78 | pnl, count, avg, median = run_cross(fast_sma, slow_sma) 79 | results.append([slow_sma, fast_sma, pnl, count, avg, median]) 80 | print(f"MA_Crx_{fast_sma}/{slow_sma}, {pnl}, {count}, {avg}, {median}") 81 | 82 | 83 | # write results to csv 84 | 85 | headers = ["slow_sma", "fast_sma", "pnl", "count", "avg", "median"] 86 | 87 | filename = f"results/MA_Crx_{symbol.replace('/', '')}.csv" 88 | 89 | with open(filename, "w") as f: 90 | import csv 91 | 92 | writer = csv.writer(f) 93 | writer.writerow(headers) 94 | for row in results: 95 | writer.writerow(row) 96 | -------------------------------------------------------------------------------- /ta_scanner/data/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from trading_calendars import get_calendar, TradingCalendar 3 | from typing import Dict 4 | 5 | 6 | class TimezoneNames(Enum): 7 | US_EASTERN = "US/Eastern" 8 | US_CENTRAL = "US/Central" 9 | US_MOUNTAIN = "US/Mountain" 10 | US_PACIFIC = "US/Pacific" 11 | UTC = "UTC" 12 | 13 | 14 | class WhatToShow(Enum): 15 | TRADES = "TRADES" 16 | MIDPOINT = "MIDPOINT" 17 | BID = "BID" 18 | ASK = "ASK" 19 | BID_ASK = "BID_ASK" 20 | ADJUSTED_LAST = "ADJUSTED_LAST" 21 | HISTORICAL_VOLATILITY = "HISTORICAL_VOLATILITY" 22 | OPTION_IMPLIED_VOLATILITY = "OPTION_IMPLIED_VOLATILITY" 23 | REBATE_RATE = "REBATE_RATE" 24 | FEE_RATE = "FEE_RATE" 25 | YIELD_BID = "YIELD_BID" 26 | YIELD_ASK = "YIELD_ASK" 27 | YIELD_BID_ASK = "YIELD_BID_ASK" 28 | YIELD_LAST = "YIELD_LAST" 29 | 30 | 31 | class Exchange(Enum): 32 | SMART = "SMART" 33 | NYSE = "NYSE" 34 | GLOBEX = "GLOBEX" 35 | NYMEX = "NYMEX" 36 | ECBOT = "ECBOT" 37 | CBOE = "CBOE" 38 | ICE = "ICE" 39 | 40 | 41 | class Calendar(Enum): 42 | # https://github.com/quantopian/trading_calendars 43 | DEFAULT = "XNYS" # default to NYSE 44 | NYSE = "XNYS" 45 | CME = "CMES" 46 | CBOE = "XCBF" 47 | ICE = "IEPA" 48 | 49 | @staticmethod 50 | def futures_lookup_hash() -> Dict: 51 | return { 52 | Calendar.CME: [ 53 | # equities 54 | "/ES", 55 | "/MES", 56 | "/MNQ", 57 | "/NQ", 58 | "/MNQ", 59 | # metals 60 | "/GC", 61 | "/MGC", 62 | # energy 63 | "/CL", 64 | "/QM", 65 | # currencies 66 | "/M6A", 67 | "/M6B", 68 | "/M6E", 69 | # interest rates 70 | "/GE", 71 | "/ZN", 72 | "/ZN", 73 | "/ZT", 74 | # grains 75 | "/ZC", 76 | "/YC", 77 | "/ZS", 78 | "/YK", 79 | "/ZW", 80 | "/YW", 81 | ], 82 | Calendar.CBOE: [], 83 | Calendar.ICE: [], 84 | } 85 | 86 | @staticmethod 87 | def select_exchange_by_symbol(symbol: str): 88 | for k, v in Calendar.futures_lookup_hash().items(): 89 | if symbol in v: 90 | return k 91 | logger.warning(f"Did not find a calendar entry for symbol={symbol}") 92 | return Calendar.DEFAULT 93 | 94 | @staticmethod 95 | def init_by_symbol(symbol: str) -> TradingCalendar: 96 | if "/" in symbol: 97 | key = Calendar.select_exchange_by_symbol(symbol) 98 | name = key.value 99 | else: 100 | name = Calendar.NYSE.value 101 | return get_calendar(name) 102 | 103 | 104 | class Currency(Enum): 105 | USD = "USD" 106 | -------------------------------------------------------------------------------- /examples/combine_indcators.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from loguru import logger 3 | import sys 4 | 5 | from ta_scanner.data.data import load_and_cache, db_data_fetch_between, aggregate_bars 6 | from ta_scanner.data.ib import IbDataFetcher 7 | from ta_scanner.indicators import ( 8 | IndicatorSmaCrossover, 9 | IndicatorParams, 10 | CombinedBindary, 11 | ) 12 | from ta_scanner.signals import Signal 13 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterNames 14 | from ta_scanner.reports import BasicReport 15 | from ta_scanner.models import gen_engine 16 | 17 | # mute the noisy data debug statements 18 | # logger.remove() 19 | # logger.add(sys.stderr, level="INFO") 20 | 21 | symbol = "/MES" 22 | 23 | 24 | def query_data(engine, symbol, sd, ed, groupby_minutes): 25 | df = db_data_fetch_between(engine, symbol, sd, ed) 26 | df.set_index("ts", inplace=True) 27 | df = aggregate_bars(df, groupby_minutes=groupby_minutes) 28 | df["ts"] = df.index 29 | return df 30 | 31 | 32 | engine = gen_engine() 33 | sd, ed = datetime.date(2020, 8, 1), datetime.date(2020, 8, 23) 34 | interval = 1 35 | df = query_data(engine, symbol, sd, ed, interval) 36 | 37 | 38 | # short duration 39 | short_duration_ma_cross = "short_duration_ma_cross" 40 | short_duration_fast_sma = 30 41 | short_duration_slow_sma = 60 42 | 43 | # long duration 44 | multiplier = 3 45 | long_duration_ma_cross = "long_duration_ma_cross" 46 | long_duration_fast_sma = short_duration_fast_sma * multiplier 47 | long_duration_slow_sma = short_duration_slow_sma * multiplier 48 | 49 | # init and apply short duration crosses 50 | short_duration_indicator = IndicatorSmaCrossover( 51 | field_name=short_duration_ma_cross, 52 | params={ 53 | IndicatorParams.fast_sma: short_duration_fast_sma, 54 | IndicatorParams.slow_sma: short_duration_slow_sma, 55 | }, 56 | ) 57 | short_duration_indicator.apply(df) 58 | 59 | # init and apply long duration crosses 60 | long_duration_indicator = IndicatorSmaCrossover( 61 | field_name=long_duration_ma_cross, 62 | params={ 63 | IndicatorParams.fast_sma: long_duration_fast_sma, 64 | IndicatorParams.slow_sma: long_duration_slow_sma, 65 | }, 66 | ) 67 | long_duration_indicator.apply(df) 68 | 69 | # combine indicators 70 | composite_field_name = "composite" 71 | composite_indicator = CombinedBindary( 72 | field_name=composite_field_name, 73 | params={ 74 | IndicatorParams.field_names: [short_duration_ma_cross, long_duration_ma_cross] 75 | }, 76 | ) 77 | composite_indicator.apply(df) 78 | 79 | filter_options = { 80 | FilterOptions.win_points: 20, 81 | FilterOptions.loss_points: 6, 82 | FilterOptions.threshold_intervals: 20, 83 | } 84 | # initialize filter 85 | 86 | result_field_name = f"{composite_field_name}_pnl" 87 | sfilter = FilterCumsum( 88 | field_name=composite_field_name, 89 | result_field_name=result_field_name, 90 | params=filter_options, 91 | ) 92 | 93 | # generate results 94 | results = sfilter.apply(df, -1) 95 | 96 | # analyze results 97 | basic_report = BasicReport() 98 | pnl, count, average, median = basic_report.analyze(df, result_field_name) 99 | 100 | logger.info(f"Final PnL = {pnl}") 101 | -------------------------------------------------------------------------------- /ta_scanner/filters.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from enum import Enum 3 | import pandas as pd 4 | from loguru import logger 5 | from typing import Any, Dict, List, Optional, List 6 | 7 | 8 | class FilterOptions(Enum): 9 | win_points = "win_points" 10 | loss_points = "loss_points" 11 | threshold_intervals = "threshold_intervals" 12 | 13 | 14 | class FilterNames(Enum): 15 | filter_cumsum = "filter_cumsum" 16 | 17 | 18 | class FilterException(Exception): 19 | pass 20 | 21 | 22 | class BaseFitler(metaclass=abc.ABCMeta): 23 | def __init__( 24 | self, field_name: str, result_field_name: str, params: Dict[FilterOptions, Any] 25 | ): 26 | self.field_name = field_name 27 | self.result_field_name = result_field_name 28 | self.params = params 29 | 30 | def ensure_required_filter_options( 31 | self, expected: List[FilterOptions], actual: Dict[FilterOptions, Any] 32 | ): 33 | for fo_key in expected: 34 | if fo_key not in actual: 35 | raise FilterException(f"expected key = {fo_key}") 36 | 37 | @abc.abstractmethod 38 | def apply(self, df, field_name, filter_options): 39 | pass 40 | 41 | 42 | class FilterCumsum(BaseFitler): 43 | required_filter_options = [ 44 | FilterOptions.win_points, 45 | FilterOptions.loss_points, 46 | FilterOptions.threshold_intervals, 47 | ] 48 | 49 | def log_exit(self, action: str, diff, row): 50 | logger.debug(f"Action={action}. Ts={row.ts}. Diff={diff}. Close={row.close}") 51 | 52 | def log_entry(self, action, row): 53 | logger.debug(f"Action={action}. Ts={row.ts}. Close={row.close}") 54 | 55 | def apply(self, df: pd.DataFrame, inverse: int = 1): 56 | self.ensure_required_filter_options(self.required_filter_options, self.params) 57 | 58 | query_signals = f"{self.field_name} != 0" 59 | query_results = df.query(query_signals) 60 | 61 | threshold = self.params[FilterOptions.threshold_intervals] 62 | 63 | for index, rs in query_results.iterrows(): 64 | signal_direction = rs[self.field_name] * inverse 65 | self.log_entry(signal_direction, rs) 66 | 67 | for index_after in range(0, threshold): 68 | # df_index = index + index_after 69 | df_index = df.index.get_loc(index) + index_after 70 | 71 | if df_index >= len(df.index): 72 | rx = df.iloc[df_index - 1] 73 | diff = (rx.close - rs.close) * signal_direction 74 | self.log_exit("MaxTime", diff, rx) 75 | rxi = rx.name 76 | df.loc[rxi, self.result_field_name] = diff 77 | break 78 | 79 | rx = df.iloc[df_index] 80 | rxi = rx.name 81 | diff = (rx.close - rs.close) * signal_direction 82 | 83 | if diff >= self.params[FilterOptions.win_points]: 84 | self.log_exit("Won", diff, df.iloc[df_index]) 85 | df.loc[rxi, self.result_field_name] = diff 86 | break 87 | 88 | if diff <= (self.params[FilterOptions.loss_points] * -1.0): 89 | self.log_exit("Lost", diff, df.iloc[df_index]) 90 | df.loc[rxi, self.result_field_name] = diff 91 | break 92 | 93 | if index_after == threshold - 1: 94 | self.log_exit("MaxTime", diff, df.iloc[df_index]) 95 | df.loc[rxi, self.result_field_name] = diff 96 | -------------------------------------------------------------------------------- /ta_scanner/indicators.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from enum import Enum 3 | import numpy as np 4 | import pandas as pd 5 | from talib import abstract 6 | from typing import Any, Dict, List, Optional 7 | 8 | 9 | class IndicatorParams(Enum): 10 | slow_sma = "slow_sma" 11 | fast_sma = "fast_sma" 12 | slow_ema = "slow_ema" 13 | fast_ema = "fast_ema" 14 | field_names = "field_names" 15 | 16 | 17 | def crossover(series, value=0): 18 | shift = +1 19 | series_shifted = series.shift(shift) 20 | conditions = [ 21 | (series <= value) & (series_shifted >= value), 22 | (series >= value) & (series_shifted <= value), 23 | ] 24 | choices = [-1, +1] 25 | crossover = np.select(conditions, choices, default=0) 26 | return crossover 27 | 28 | 29 | class IndicatorException(Exception): 30 | pass 31 | 32 | 33 | class BaseIndicator(metaclass=abc.ABCMeta): 34 | def __init__(self, field_name: str, params: Dict[IndicatorParams, Any]): 35 | self.field_name = field_name 36 | self.params = params 37 | 38 | def ensure_required_filter_options( 39 | self, expected: List[IndicatorParams], actual: Dict[IndicatorParams, Any] 40 | ): 41 | for expected_key in expected: 42 | if expected_key not in actual: 43 | indicator_name = self.__class__.__name__ 44 | raise IndicatorException( 45 | f"{indicator_name} requires key = {expected_key}" 46 | ) 47 | 48 | @abc.abstractmethod 49 | def apply(self, df, field_name: str) -> None: 50 | pass 51 | 52 | 53 | class IndicatorSmaCrossover(BaseIndicator): 54 | def apply(self, df: pd.DataFrame) -> None: 55 | self.ensure_required_filter_options( 56 | [IndicatorParams.fast_sma, IndicatorParams.slow_sma], self.params 57 | ) 58 | slow_sma = self.params[IndicatorParams.slow_sma] 59 | fast_sma = self.params[IndicatorParams.fast_sma] 60 | 61 | sma = abstract.Function("sma") 62 | df["slow_sma"] = sma(df.close, timeperiod=slow_sma) 63 | df["fast_sma"] = sma(df.close, timeperiod=fast_sma) 64 | df[self.field_name] = crossover(df.fast_sma - df.slow_sma) 65 | return df 66 | 67 | 68 | class IndicatorEmaCrossover(BaseIndicator): 69 | def apply(self, df: pd.DataFrame) -> None: 70 | self.ensure_required_filter_options( 71 | [IndicatorParams.fast_ema, IndicatorParams.slow_ema], self.params 72 | ) 73 | slow_ema = self.params[IndicatorParams.slow_ema] 74 | fast_ema = self.params[IndicatorParams.fast_ema] 75 | 76 | ema = abstract.Function("ema") 77 | df["slow_ema"] = ema(df.close, timeperiod=slow_ema) 78 | df["fast_ema"] = ema(df.close, timeperiod=fast_ema) 79 | df[self.field_name] = crossover(df.fast_ema - df.slow_ema) 80 | return df 81 | 82 | 83 | class CombinedBindary(BaseIndicator): 84 | def apply(self, df: pd.DataFrame) -> None: 85 | self.ensure_required_filter_options([IndicatorParams.field_names], self.params) 86 | field_names = self.params[IndicatorParams.field_names] 87 | 88 | df[self.field_name] = 0 89 | length = len(field_names) 90 | field_name_values = [None for _ in range(length)] 91 | 92 | signals = df.loc[df[field_names].isin([1, -1]).any(1)][field_names] 93 | 94 | for i, row in signals.iterrows(): 95 | for ii, fn in enumerate(field_names): 96 | if row[fn] != 0: 97 | field_name_values[ii] = row[fn] 98 | if abs(sum(filter(None, field_name_values))) == length: 99 | df.loc[i, self.field_name] = field_name_values[0] 100 | -------------------------------------------------------------------------------- /ta_scanner/data/ib.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from loguru import logger 3 | 4 | import datetime 5 | from trading_calendars import get_calendar, TradingCalendar 6 | from typing import Optional, Dict, Any, List, Tuple, Optional 7 | 8 | from ib_insync import IB, Future, ContFuture, Stock, Contract 9 | from ib_insync import util as ib_insync_util 10 | 11 | from ta_scanner.data.base_connector import DataFetcherBase 12 | from ta_scanner.data.constants import ( 13 | TimezoneNames, 14 | WhatToShow, 15 | Exchange, 16 | Calendar, 17 | Currency, 18 | ) 19 | 20 | 21 | class IbDataFetcher(DataFetcherBase): 22 | def __init__(self, client_id: int = 0): 23 | self.ib = None 24 | self.client_id = client_id 25 | 26 | def _init_client(self, host: str = "127.0.0.1", port: int = 4001) -> None: 27 | ib = IB() 28 | ib.connect(host, port, clientId=self.client_id) 29 | self.ib = ib 30 | 31 | def _execute_req_historical( 32 | self, contract, dt, duration, bar_size_setting, what_to_show, use_rth 33 | ) -> pd.DataFrame: 34 | if self.ib is None or not self.ib.isConnected(): 35 | self._init_client() 36 | 37 | dfs = [] 38 | for rth in [True, False]: 39 | bars = self.ib.reqHistoricalData( 40 | contract, 41 | endDateTime=dt, 42 | durationStr=duration, 43 | barSizeSetting=bar_size_setting, 44 | whatToShow=what_to_show, 45 | useRTH=rth, # use_rth, 46 | formatDate=2, # return as UTC time 47 | ) 48 | x = ib_insync_util.df(bars) 49 | if x is None: 50 | continue 51 | x["rth"] = rth 52 | dfs.append(x) 53 | 54 | if dfs == []: 55 | return None 56 | df = pd.concat(dfs).drop_duplicates().reset_index(drop=True) 57 | return df 58 | 59 | def request_stock_instrument( 60 | self, instrument_symbol: str, dt: datetime.datetime, what_to_show: str 61 | ) -> pd.DataFrame: 62 | exchange = Exchange.SMART.value 63 | contract = Stock(instrument_symbol, exchange, Currency.USD.value) 64 | duration = "2 D" 65 | bar_size_setting = "1 min" 66 | use_rth = False 67 | return self._execute_req_historical( 68 | contract, dt, duration, bar_size_setting, what_to_show, use_rth 69 | ) 70 | 71 | def select_exchange_by_symbol(self, symbol): 72 | kvs = { 73 | Exchange.GLOBEX: [ 74 | # fmt: off 75 | # equities 76 | "/ES", "/MES", 77 | "/NQ", "/MNQ", 78 | # currencies 79 | "/M6A", "/M6B", "/M6E", 80 | # interest rates 81 | # '/GE', '/ZN', '/ZN', '/ZT', 82 | # fmt: on 83 | ], 84 | Exchange.ECBOT: ["/ZC", "/YC", "/ZS", "/YK", "/ZW", "/YW"], 85 | Exchange.NYMEX: [ 86 | "/GC", 87 | "/MGC", 88 | "/CL", 89 | "/QM", 90 | ], 91 | } 92 | 93 | for k, v in kvs.items(): 94 | if symbol in v: 95 | return k 96 | raise NotImplementedError 97 | 98 | def request_future_instrument( 99 | self, 100 | symbol: str, 101 | dt: datetime.datetime, 102 | what_to_show: str, 103 | contract_date: Optional[str] = None, 104 | ) -> pd.DataFrame: 105 | exchange_name = self.select_exchange_by_symbol(symbol).value 106 | 107 | if contract_date: 108 | raise NotImplementedError 109 | else: 110 | contract = ContFuture(symbol, exchange_name, currency=Currency.USD.value) 111 | 112 | duration = "1 D" 113 | bar_size_setting = "1 min" 114 | use_rth = False 115 | return self._execute_req_historical( 116 | contract, dt, duration, bar_size_setting, what_to_show, use_rth 117 | ) 118 | 119 | def request_instrument( 120 | self, 121 | symbol: str, 122 | dt, 123 | what_to_show, 124 | contract_date: Optional[str] = None, 125 | ): 126 | if "/" in symbol: 127 | return self.request_future_instrument( 128 | symbol, dt, what_to_show, contract_date 129 | ) 130 | else: 131 | return self.request_stock_instrument(symbol, dt, what_to_show) 132 | -------------------------------------------------------------------------------- /lab/experiment_futures.py: -------------------------------------------------------------------------------- 1 | # todos 2 | # - [ ] all dates and date deltas are in time, not integers 3 | 4 | from loguru import logger 5 | from typing import Dict 6 | import sys 7 | import datetime 8 | from datetime import timedelta 9 | import numpy as np 10 | 11 | from ta_scanner.data.data import load_and_cache, db_data_fetch_between, aggregate_bars 12 | from ta_scanner.data.ib import IbDataFetcher 13 | from ta_scanner.experiments.simple_experiment import SimpleExperiment 14 | 15 | from ta_scanner.indicators import ( 16 | IndicatorSmaCrossover, 17 | IndicatorEmaCrossover, 18 | IndicatorParams, 19 | ) 20 | from ta_scanner.signals import Signal 21 | from ta_scanner.filters import FilterCumsum, FilterOptions, FilterNames 22 | from ta_scanner.reports import BasicReport 23 | from ta_scanner.models import gen_engine 24 | 25 | 26 | ib_data_fetcher = IbDataFetcher() 27 | instrument_symbol = "/NQ" 28 | rth = False 29 | interval = 1 30 | 31 | field_name = "ema_cross" 32 | slow_sma = 25 33 | fast_sma_min = 5 34 | fast_sma_max = 20 35 | 36 | filter_inverse = True 37 | win_pts = 75 38 | loss_pts = 30 39 | trade_interval = 12 40 | 41 | test_total_pnl = 0.0 42 | test_total_count = 0 43 | all_test_results = [] 44 | 45 | engine = gen_engine() 46 | 47 | logger.remove() 48 | logger.add(sys.stderr, level="INFO") 49 | 50 | 51 | def gen_params(sd, ed) -> Dict: 52 | return dict(start_date=sd, end_date=ed, use_rth=rth, groupby_minutes=interval) 53 | 54 | 55 | def run_cross(original_df, fast_sma: int, slow_sma: int): 56 | df = original_df.copy() 57 | 58 | # indicator setup 59 | indicator_params = { 60 | IndicatorParams.fast_ema: fast_sma, 61 | IndicatorParams.slow_ema: slow_sma, 62 | } 63 | indicator = IndicatorEmaCrossover(field_name, indicator_params) 64 | indicator.apply(df) 65 | 66 | # filter setup 67 | filter_params = { 68 | FilterOptions.win_points: win_pts, 69 | FilterOptions.loss_points: loss_pts, 70 | FilterOptions.threshold_intervals: trade_interval, 71 | } 72 | sfilter = FilterCumsum(field_name, filter_params) 73 | 74 | # generate results 75 | if filter_inverse: 76 | results = sfilter.apply(df, inverse=1) 77 | else: 78 | results = sfilter.apply(df) 79 | 80 | # get aggregate pnl 81 | basic_report = BasicReport() 82 | pnl, count, avg, median = basic_report.analyze(df, field_name) 83 | 84 | return pnl, count, avg, median 85 | 86 | 87 | def run_cross_range(df, slow_sma: int, fast_sma_min, fast_sma_max): 88 | results = [] 89 | for fast_sma in range(fast_sma_min, fast_sma_max): 90 | pnl, count, avg, median = run_cross(df, fast_sma, slow_sma) 91 | results.append([fast_sma, pnl, count, avg, median]) 92 | return results 93 | 94 | 95 | def fetch_data(): 96 | sd = datetime.date(2020, 7, 1) 97 | ed = datetime.date(2020, 8, 15) 98 | load_and_cache(instrument_symbol, ib_data_fetcher, **gen_params(sd, ed)) 99 | 100 | 101 | def query_data(engine, symbol, sd, ed, groupby_minutes): 102 | df = db_data_fetch_between(engine, symbol, sd, ed) 103 | df.set_index("ts", inplace=True) 104 | df = aggregate_bars(df, groupby_minutes=groupby_minutes) 105 | df["ts"] = df.index 106 | return df 107 | 108 | 109 | # fetch_data() 110 | 111 | for i in range(0, 33): 112 | initial = datetime.date(2020, 7, 10) + timedelta(days=i) 113 | test_start, test_end = initial, initial 114 | 115 | if initial.weekday() in [5, 6]: 116 | continue 117 | 118 | # fetch training data 119 | train_sd = initial - timedelta(days=5) 120 | train_ed = initial - timedelta(days=1) 121 | df_train = query_data(engine, instrument_symbol, train_sd, train_ed, interval) 122 | 123 | # for training data, let's find results for a range of SMA 124 | results = run_cross_range( 125 | df_train, 126 | slow_sma=slow_sma, 127 | fast_sma_min=fast_sma_min, 128 | fast_sma_max=fast_sma_max, 129 | ) 130 | 131 | fast_sma_pnl = [] 132 | 133 | for resultindex in range(2, len(results) - 3): 134 | fast_sma = results[resultindex][0] 135 | pnl = results[resultindex][1] 136 | result_set = results[resultindex - 2 : resultindex + 3] 137 | total_pnl = sum([x[1] for x in result_set]) 138 | fast_sma_pnl.append([fast_sma, total_pnl, pnl]) 139 | 140 | arr = np.array(fast_sma_pnl, dtype=float) 141 | max_tuple = np.unravel_index(np.argmax(arr, axis=None), arr.shape) 142 | optimal_fast_sma = int(arr[(max_tuple[0], 0)]) 143 | 144 | optimal_fast_sma_pnl = [x[2] for x in fast_sma_pnl if x[0] == optimal_fast_sma][0] 145 | 146 | # logger.info(f"Selected fast_sma={optimal_fast_sma}. PnL={optimal_fast_sma_pnl}") 147 | 148 | test_sd = initial 149 | test_ed = initial + timedelta(days=1) 150 | 151 | df_test = query_data(engine, instrument_symbol, test_sd, test_ed, interval) 152 | test_results = run_cross(df_test, optimal_fast_sma, slow_sma) 153 | 154 | all_test_results.append([initial] + list(test_results)) 155 | 156 | logger.info( 157 | f"Test Results. pnl={test_results[0]}, count={test_results[1]}, avg={test_results[2]}, median={test_results[3]}" 158 | ) 159 | test_total_pnl += test_results[0] 160 | test_total_count += test_results[1] 161 | 162 | logger.info( 163 | f"--- CumulativePnL={test_total_pnl}. Trades Count={test_total_count}. After={initial}" 164 | ) 165 | 166 | 167 | import csv 168 | 169 | with open("simple_results.csv", "w") as csvfile: 170 | spamwriter = csv.writer(csvfile) 171 | for row in all_test_results: 172 | spamwriter.writerow(row) 173 | -------------------------------------------------------------------------------- /ta_scanner/data/data.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import pandas as pd 3 | import numpy as np 4 | import os 5 | from loguru import logger 6 | from psycopg2 import sql 7 | 8 | import datetime 9 | import pytz 10 | from psycopg2.errors import UniqueViolation 11 | from sqlalchemy.exc import IntegrityError 12 | from trading_calendars import get_calendar, TradingCalendar 13 | from typing import Optional, Dict, Any, List, Tuple 14 | 15 | from ta_scanner.models import gen_engine, init_db, Quote 16 | from ta_scanner.data.base_connector import DataFetcherBase 17 | from ta_scanner.data.constants import ( 18 | TimezoneNames, 19 | WhatToShow, 20 | Exchange, 21 | Calendar, 22 | Currency, 23 | ) 24 | 25 | 26 | def extract_kwarg(kwargs: Dict, key: str, default_value: Any = None) -> Optional[Any]: 27 | if key in kwargs: 28 | return kwargs[key] 29 | else: 30 | return default_value 31 | 32 | 33 | def load_and_cache( 34 | instrument_symbol: str, data_fetcher: DataFetcherBase, **kwargs 35 | ) -> pd.DataFrame: 36 | """Fetch data from IB or postgres 37 | 38 | Args: 39 | instrument_symbol (str): [description] 40 | data_fetcher (DataFetcherBase): [description] 41 | kwargs (Dict): [description] 42 | 43 | Returns: 44 | pd.DataFrame: [description] 45 | """ 46 | engine = gen_engine() 47 | init_db() 48 | 49 | # turn kwargs into variables 50 | start_date = extract_kwarg(kwargs, "start_date", None) 51 | end_date = extract_kwarg(kwargs, "end_date", None) 52 | 53 | use_rth = extract_kwarg(kwargs, "use_rth", False) 54 | contract_date = extract_kwarg(kwargs, "contract_date") 55 | groupby_minutes = extract_kwarg(kwargs, "groupby_minutes", 1) 56 | return_tz = extract_kwarg(kwargs, "return_tz", TimezoneNames.US_EASTERN.value) 57 | 58 | what_to_show = WhatToShow.TRADES.value 59 | 60 | # this is temp - start 61 | dfs = [] 62 | # temp - stop 63 | 64 | calendar = Calendar.init_by_symbol(instrument_symbol) 65 | 66 | for dt in gen_datetime_range(start_date, end_date): 67 | # if market was closed - skip 68 | # if calendar.is_session(dt.date()) == False: 69 | # logger.debug(f"Market closed on {dt.date()} for {instrument_symbol}") 70 | 71 | # if db already has values - skip 72 | # if db_data_exists(engine, instrument_symbol, dt): 73 | # df = db_data_fetch(engine, instrument_symbol, dt) 74 | # else: 75 | if True: 76 | df = data_fetcher.request_instrument(instrument_symbol, dt, what_to_show) 77 | 78 | if df is None: 79 | continue 80 | 81 | df["symbol"] = instrument_symbol 82 | transform_rename_df_columns(df) 83 | # convert time from UTC to US/Eastern 84 | # df["ts"] = df["ts"].dt.tz_convert(TimezoneNames.UTC.value) 85 | # df["ts"] = df["ts"].dt.tz_localize(TimezoneNames.US_EASTERN.value) 86 | # apply_rth(df, calendar) 87 | db_insert_df_conflict_on_do_nothing(engine, df, "quote") 88 | 89 | if use_rth: 90 | df = reduce_to_only_rth(df) 91 | 92 | transform_set_index_ts(df) 93 | 94 | df = aggregate_bars(df, groupby_minutes) 95 | transform_ts_result_tz(df, return_tz) 96 | 97 | logger.debug(f"--- fetched {instrument_symbol} - {dt}") 98 | 99 | # temp - start 100 | dfs.append(df) 101 | # temp - stop 102 | 103 | logger.debug(f"finished {instrument_symbol}") 104 | 105 | df = pd.concat(dfs) 106 | df.sort_values(by=["ts"], inplace=True, ascending=True) 107 | df.reset_index(inplace=True) 108 | return df 109 | 110 | 111 | def gen_datetime_range(start, end) -> List[datetime.datetime]: 112 | result = [] 113 | span = end - start 114 | for i in range(span.days + 1): 115 | d = start + datetime.timedelta(days=i) 116 | result.append(datetime.date(d.year, d.month, d.day)) 117 | return result 118 | 119 | 120 | def reduce_to_only_rth(df) -> pd.DataFrame: 121 | return df[df["rth"] == True] 122 | 123 | 124 | def apply_rth(df: pd.DataFrame, calendar: TradingCalendar) -> None: 125 | calendar_name: str = "XNYS" 126 | calendar = get_calendar(calendar_name) 127 | 128 | def is_open(ts: pd.Timestamp): 129 | return calendar.is_open_on_minute(ts) 130 | 131 | df["rth"] = df.ts.apply(is_open) 132 | 133 | 134 | def aggregate_bars(df: pd.DataFrame, groupby_minutes: int) -> pd.DataFrame: 135 | if groupby_minutes == 1: 136 | return df 137 | 138 | # this method only intended to handle data that's 139 | # aggredating data at intervals less than 1 day 140 | assert groupby_minutes < 1440 141 | 142 | groupby = f"{groupby_minutes}min" 143 | 144 | agg_expression = { 145 | "open": "first", 146 | "high": "max", 147 | "low": "min", 148 | "close": "last", 149 | "volume": "sum", 150 | } 151 | df = df.resample(groupby).agg(agg_expression) 152 | df.dropna(subset=["open", "close", "high", "low"], inplace=True) 153 | return df 154 | 155 | 156 | def transform_set_index_ts(df: pd.DataFrame) -> None: 157 | df.set_index("ts", inplace=True) 158 | 159 | 160 | def transform_rename_df_columns(df) -> None: 161 | df.rename(columns={"date": "ts", "barCount": "bar_count"}, inplace=True) 162 | 163 | 164 | def transform_ts_result_tz(df: pd.DataFrame, return_tz: str) -> None: 165 | return_tz_value = pytz.timezone(return_tz) 166 | df.index = df.index.tz_convert(return_tz_value) 167 | 168 | 169 | def clean_query(query: str) -> str: 170 | return query.replace("\n", "").replace("\t", "") 171 | 172 | 173 | def db_data_exists(engine, instrument_symbol: str, date: datetime.datetime) -> bool: 174 | date_str: str = date.strftime("%Y-%m-%d") 175 | 176 | query = f""" 177 | select count(*) 178 | from {Quote.__tablename__} 179 | where 180 | symbol = '{instrument_symbol}' 181 | and date(ts AT TIME ZONE '{TimezoneNames.US_EASTERN.value}') = date('{date_str}') 182 | """ 183 | with engine.connect() as con: 184 | result = con.execute(clean_query(query)) 185 | counts = [x for x in result] 186 | 187 | return counts != [(0,)] 188 | 189 | 190 | def db_data_fetch( 191 | engine, instrument_symbol: str, date: datetime.datetime 192 | ) -> pd.DataFrame: 193 | date_str: str = date.strftime("%Y-%m-%d") 194 | 195 | query = f""" 196 | select * 197 | from {Quote.__tablename__} 198 | where 199 | symbol = '{instrument_symbol}' 200 | and date(ts AT TIME ZONE '{TimezoneNames.US_EASTERN.value}') = date('{date_str}') 201 | """ 202 | return pd.read_sql(clean_query(query), con=engine) 203 | 204 | 205 | def db_data_fetch_between( 206 | engine, instrument_symbol: str, sd: datetime.datetime, ed: datetime.datetime 207 | ) -> pd.DataFrame: 208 | sd_str: str = sd.strftime("%Y-%m-%d") 209 | ed_str: str = ed.strftime("%Y-%m-%d") 210 | 211 | query = f""" 212 | select * 213 | from {Quote.__tablename__} 214 | where 215 | symbol = '{instrument_symbol}' 216 | and date(ts AT TIME ZONE '{TimezoneNames.US_EASTERN.value}') BETWEEN date('{sd}') AND date('{ed}') 217 | """ 218 | return pd.read_sql(clean_query(query), con=engine) 219 | 220 | 221 | def db_insert_df_conflict_on_do_nothing( 222 | engine, df: pd.DataFrame, table_name: str 223 | ) -> None: 224 | cols = __gen_cols(df) 225 | values = __gen_values(df) 226 | 227 | query_template = "INSERT INTO {table_name} ({cols}) VALUES ({values});" 228 | 229 | query = sql.SQL(query_template).format( 230 | table_name=sql.Identifier(table_name), 231 | cols=sql.SQL(', ').join(map(sql.Identifier, cols)), 232 | values=sql.SQL(', ').join(sql.Placeholder() * len(cols)), 233 | ) 234 | 235 | with engine.connect() as con: 236 | with con.connection.cursor() as cur: 237 | for v in values: 238 | try: 239 | cur.execute(query, v) 240 | con.connection.commit() 241 | except UniqueViolation as e: 242 | cur.execute("rollback") 243 | con.connection.commit() 244 | except Exception as e: 245 | cur.execute("rollback") 246 | con.connection.commit() 247 | 248 | 249 | def __gen_values(df: pd.DataFrame) -> List[Tuple[str]]: 250 | """ 251 | return array of tuples for the df values 252 | """ 253 | return [tuple([str(xx) for xx in x]) for x in df.to_records(index=False)] 254 | 255 | 256 | def __gen_cols(df) -> List[str]: 257 | """ 258 | return column names 259 | """ 260 | return list(df.columns) 261 | -------------------------------------------------------------------------------- /lab/TradingDay.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import datetime\n", 12 | "\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import matplotlib.dates as mdates\n", 15 | "import matplotlib.gridspec as gridspec\n", 16 | "%matplotlib inline\n", 17 | "\n", 18 | "from ta_scanner.data import load_and_cache, IbDataFetcher, db_data_fetch_between\n", 19 | "from ta_scanner.models import gen_engine" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import nest_asyncio\n", 29 | "nest_asyncio.apply()\n", 30 | "\n", 31 | "colors = {\n", 32 | " \"pink\": \"#ff1493\"\n", 33 | "}" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "ib_data_fetcher = IbDataFetcher()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "engine = gen_engine()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 141, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "symbol = \"/M6E\"\n", 61 | "\n", 62 | "sd = datetime.date(2020, 7, 30)\n", 63 | "ed = datetime.date(2020, 8, 13)\n", 64 | "\n", 65 | "df = db_data_fetch_between(engine, symbol, sd, ed)\n", 66 | "\n", 67 | "\n", 68 | "# df = load_and_cache(\n", 69 | "# symbol,\n", 70 | "# ib_data_fetcher,\n", 71 | "# start_date=datetime.date(2020, 7, 30),\n", 72 | "# end_date=datetime.date(2020, 8, 8),\n", 73 | "# # use_rth=False,\n", 74 | "# # groupby_minutes=1,\n", 75 | "# )\n", 76 | "# df.drop(['id'], axis=1, inplace=True)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 142, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/html": [ 87 | "
\n", 88 | "\n", 101 | "\n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | "
idtssymbolopenclosehighlowaveragevolumebar_countrth
04812020-07-30 04:00:00+00:00/M6E1.181.181.181.181.182112False
14822020-07-30 04:01:00+00:00/M6E1.181.181.181.181.1864False
24832020-07-30 04:02:00+00:00/M6E1.181.181.181.181.1833False
34842020-07-30 04:03:00+00:00/M6E1.181.181.181.181.1887False
44852020-07-30 04:04:00+00:00/M6E1.181.181.181.181.1844False
....................................
8275120562020-08-07 03:55:00+00:00/M6E1.191.191.191.191.1976False
8276120572020-08-07 03:56:00+00:00/M6E1.191.191.191.191.1962False
8277120582020-08-07 03:57:00+00:00/M6E1.191.191.191.191.1986False
8278120592020-08-07 03:58:00+00:00/M6E1.191.191.191.191.19178False
8279120602020-08-07 03:59:00+00:00/M6E1.191.191.191.191.1952False
\n", 275 | "

8280 rows × 11 columns

\n", 276 | "
" 277 | ], 278 | "text/plain": [ 279 | " id ts symbol open close high low \\\n", 280 | "0 481 2020-07-30 04:00:00+00:00 /M6E 1.18 1.18 1.18 1.18 \n", 281 | "1 482 2020-07-30 04:01:00+00:00 /M6E 1.18 1.18 1.18 1.18 \n", 282 | "2 483 2020-07-30 04:02:00+00:00 /M6E 1.18 1.18 1.18 1.18 \n", 283 | "3 484 2020-07-30 04:03:00+00:00 /M6E 1.18 1.18 1.18 1.18 \n", 284 | "4 485 2020-07-30 04:04:00+00:00 /M6E 1.18 1.18 1.18 1.18 \n", 285 | "... ... ... ... ... ... ... ... \n", 286 | "8275 12056 2020-08-07 03:55:00+00:00 /M6E 1.19 1.19 1.19 1.19 \n", 287 | "8276 12057 2020-08-07 03:56:00+00:00 /M6E 1.19 1.19 1.19 1.19 \n", 288 | "8277 12058 2020-08-07 03:57:00+00:00 /M6E 1.19 1.19 1.19 1.19 \n", 289 | "8278 12059 2020-08-07 03:58:00+00:00 /M6E 1.19 1.19 1.19 1.19 \n", 290 | "8279 12060 2020-08-07 03:59:00+00:00 /M6E 1.19 1.19 1.19 1.19 \n", 291 | "\n", 292 | " average volume bar_count rth \n", 293 | "0 1.18 21 12 False \n", 294 | "1 1.18 6 4 False \n", 295 | "2 1.18 3 3 False \n", 296 | "3 1.18 8 7 False \n", 297 | "4 1.18 4 4 False \n", 298 | "... ... ... ... ... \n", 299 | "8275 1.19 7 6 False \n", 300 | "8276 1.19 6 2 False \n", 301 | "8277 1.19 8 6 False \n", 302 | "8278 1.19 17 8 False \n", 303 | "8279 1.19 5 2 False \n", 304 | "\n", 305 | "[8280 rows x 11 columns]" 306 | ] 307 | }, 308 | "execution_count": 142, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "df" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 143, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "df['date'] = df['ts'].dt.date" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 144, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "image/png": "\n", 334 | "text/plain": [ 335 | "
" 336 | ] 337 | }, 338 | "metadata": { 339 | "needs_background": "light" 340 | }, 341 | "output_type": "display_data" 342 | } 343 | ], 344 | "source": [ 345 | "fig = plt.figure(figsize=(25, 15))\n", 346 | "\n", 347 | "num_days = 4\n", 348 | "\n", 349 | "dates = []\n", 350 | "for k, v in df.groupby('date'):\n", 351 | " v.set_index('ts', inplace=True)\n", 352 | "\n", 353 | " if len(v.between_time('6:30', '9:00')) > 10:\n", 354 | " dates.append(k)\n", 355 | "\n", 356 | "gs = gridspec.GridSpec(num_days, 1, height_ratios=([1] * num_days))\n", 357 | "\n", 358 | "for i, date in enumerate(reversed(dates[-num_days:])):\n", 359 | " ax = plt.subplot(gs[i])\n", 360 | " idf = df.query(\"date == @date\").copy()\n", 361 | " idf.set_index('ts', inplace=True)\n", 362 | " idf = idf.between_time('4:30', '12:00').copy()\n", 363 | " \n", 364 | " idf.sort_index(inplace=True)\n", 365 | "\n", 366 | " ax.plot(idf.close, color='black')\n", 367 | " ax.grid(True)\n", 368 | " ax.set_ylabel(f\"{date}, {symbol}\")\n", 369 | " \n", 370 | " for index, row in idf[(idf.index.hour == 8) & (idf.index.minute == 30)].iterrows():\n", 371 | " ax.axvline(index, color=colors[\"pink\"], linestyle='solid')\n", 372 | " ax.axhline(row.close, color=colors[\"pink\"], linestyle='solid', lw=0.7)\n", 373 | " \n", 374 | "fig.tight_layout()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 145, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "data": { 384 | "text/plain": [ 385 | "" 386 | ] 387 | }, 388 | "execution_count": 145, 389 | "metadata": {}, 390 | "output_type": "execute_result" 391 | }, 392 | { 393 | "data": { 394 | "image/png": "\n", 395 | "text/plain": [ 396 | "
" 397 | ] 398 | }, 399 | "metadata": { 400 | "needs_background": "light" 401 | }, 402 | "output_type": "display_data" 403 | } 404 | ], 405 | "source": [ 406 | "# idf.query(\"6 < index.hour\").query(\"index.hour < 10\")\n", 407 | "idf.between_time('7:15', '10:45').close.plot()" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 146, 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "name": "stdout", 417 | "output_type": "stream", 418 | "text": [ 419 | "2020-07-30. 2020-07-30T04:00:00.000000000 - 2020-07-30T23:59:00.000000000. 1140\n", 420 | "2020-07-31. 2020-07-31T00:00:00.000000000 - 2020-07-31T18:59:00.000000000. 1140\n", 421 | "2020-08-03. 2020-08-03T00:00:00.000000000 - 2020-08-03T23:59:00.000000000. 1380\n", 422 | "2020-08-04. 2020-08-04T11:50:00.000000000 - 2020-08-04T11:49:00.000000000. 1380\n", 423 | "2020-08-05. 2020-08-05T04:08:00.000000000 - 2020-08-05T23:59:00.000000000. 1380\n", 424 | "2020-08-06. 2020-08-06T00:00:00.000000000 - 2020-08-06T23:59:00.000000000. 1380\n", 425 | "2020-08-07. 2020-08-07T00:00:00.000000000 - 2020-08-07T03:59:00.000000000. 240\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "grouped = df.groupby('date').sum().reset_index()\n", 431 | "grouped.sort_values('date', ascending=False)\n", 432 | "\n", 433 | "for k, v in df.groupby('date'):\n", 434 | " if k.weekday() in [5, 6]:\n", 435 | " continue\n", 436 | " \n", 437 | " msg = f\"{k}. {v.ts.values[0]} - {v.ts.values[-1]}. {len(v.index)}\"\n", 438 | " print(msg)" 439 | ] 440 | } 441 | ], 442 | "metadata": { 443 | "kernelspec": { 444 | "display_name": "Python 3", 445 | "language": "python", 446 | "name": "python3" 447 | }, 448 | "language_info": { 449 | "codemirror_mode": { 450 | "name": "ipython", 451 | "version": 3 452 | }, 453 | "file_extension": ".py", 454 | "mimetype": "text/x-python", 455 | "name": "python", 456 | "nbconvert_exporter": "python", 457 | "pygments_lexer": "ipython3", 458 | "version": "3.7.7" 459 | } 460 | }, 461 | "nbformat": 4, 462 | "nbformat_minor": 4 463 | } 464 | --------------------------------------------------------------------------------