├── assets ├── arch.png └── chart.png ├── 1_Data ├── stocks_history_target.h5 ├── data_prep.py ├── Load_Hist_Data_Daily.ipynb └── Load_Hist_Data_Daily_Public.ipynb ├── 5_SageMakerStudio ├── init_schema.sh ├── stocks_history_target.h5 ├── init_s3_bucket.sh ├── docker │ ├── model │ │ ├── wsgi.py │ │ ├── train │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── algo_live_feed.py │ │ ├── serve │ │ ├── algo_sim_feed.py │ │ └── algo_base.py │ ├── default-user-settings.json │ ├── app-image-config-input.json │ └── Dockerfile ├── s3_bucket.yaml ├── data_prep.py ├── 2_Load_Data.ipynb ├── schema.yaml ├── 1_Setup.ipynb └── 3_Backtest_Strategy.ipynb ├── 2_Strategies ├── model │ ├── model_long_short_predict.h5 │ ├── wsgi.py │ ├── train │ ├── nginx.conf │ ├── predictor.py │ ├── algo_live_feed.py │ ├── serve │ ├── algo_sim_feed.py │ └── algo_base.py ├── leaderboard.py ├── docker-compose.yml.template ├── init_model.py ├── ecs-params.yml.template ├── update_config.py ├── Dockerfile ├── build_and_push.sh ├── Run_Strategy_ECS.ipynb ├── deploy.sh ├── Strategy_SMA.ipynb ├── Strategy_Breakout.ipynb └── Strategy_ML_Forecast.ipynb ├── 3_Models ├── model │ ├── wsgi.py │ ├── nginx.conf │ ├── serve │ └── predictor.py ├── Dockerfile └── build_and_push.sh ├── 4_Kinesis ├── model │ ├── wsgi.py │ ├── train │ ├── nginx.conf │ ├── predictor.py │ ├── algo_live_feed.py │ ├── serve │ ├── algo_sim_feed.py │ └── algo_base.py ├── init_model.py ├── update_config_kinesis.py ├── Dockerfile ├── build_and_push.sh └── algo-kinesis.yaml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── CONTRIBUTING.md ├── README.md └── 0_Setup └── algo-reference.yaml /assets/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/assets/arch.png -------------------------------------------------------------------------------- /assets/chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/assets/chart.png -------------------------------------------------------------------------------- /1_Data/stocks_history_target.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/1_Data/stocks_history_target.h5 -------------------------------------------------------------------------------- /5_SageMakerStudio/init_schema.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | aws cloudformation deploy --template-file=schema.yaml --stack-name=algotrading-schema -------------------------------------------------------------------------------- /5_SageMakerStudio/stocks_history_target.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/5_SageMakerStudio/stocks_history_target.h5 -------------------------------------------------------------------------------- /2_Strategies/model/model_long_short_predict.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/2_Strategies/model/model_long_short_predict.h5 -------------------------------------------------------------------------------- /5_SageMakerStudio/init_s3_bucket.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | aws cloudformation deploy --template-file=schema.yaml --stack-name=algotrading-data-schema --capabilities=CAPABILITY_IAM -------------------------------------------------------------------------------- /3_Models/model/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /4_Kinesis/model/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /2_Strategies/model/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /2_Strategies/leaderboard.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | 4 | try: 5 | with open('algo_event.config', 'r') as f: 6 | event_config = json.load(f) 7 | print(event_config['leaderboard']) 8 | except: 9 | print("No leaderboard for this event") 10 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /2_Strategies/docker-compose.yml.template: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | $SERVICE: 4 | image: $IMAGE 5 | environment: 6 | - AWS_DEFAULT_REGION=$REGION 7 | - ALGO_CMD=train 8 | logging: 9 | driver: awslogs 10 | options: 11 | awslogs-group: algo 12 | awslogs-region: $REGION 13 | awslogs-stream-prefix: $SERVICE -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/default-user-settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "DefaultUserSettings": { 3 | "KernelGatewayAppSettings": { 4 | "CustomImages": [ 5 | { 6 | "ImageName": "algotrading", 7 | "AppImageConfigName": "algotrading-config" 8 | } 9 | ] 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /2_Strategies/init_model.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | algo_name=sys.argv[1] 5 | 6 | Path('local/'+algo_name+'/input/data/training').mkdir(parents=True, exist_ok=True) 7 | Path('local/'+algo_name+'/input/config').mkdir(parents=True, exist_ok=True) 8 | Path('local/'+algo_name+'/model').mkdir(parents=True, exist_ok=True) 9 | 10 | model_name=algo_name 11 | print(algo_name) -------------------------------------------------------------------------------- /4_Kinesis/init_model.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | algo_name=sys.argv[1] 5 | 6 | Path('local/'+algo_name+'/input/data/training').mkdir(parents=True, exist_ok=True) 7 | Path('local/'+algo_name+'/input/config').mkdir(parents=True, exist_ok=True) 8 | Path('local/'+algo_name+'/model').mkdir(parents=True, exist_ok=True) 9 | 10 | model_name=algo_name 11 | print(algo_name) -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/app-image-config-input.json: -------------------------------------------------------------------------------- 1 | { 2 | "AppImageConfigName": "algotrading-config", 3 | "KernelGatewayImageConfig": { 4 | "KernelSpecs": [ 5 | { 6 | "Name": "python3", 7 | "DisplayName": "tensorflow 2.9.1-py3.9-cpu" 8 | } 9 | ], 10 | "FileSystemConfig": { 11 | "MountPath": "/root", 12 | "DefaultUid": 0, 13 | "DefaultGid": 0 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /4_Kinesis/model/train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import backtrader as bt 4 | from algo_base import * 5 | import importlib 6 | import json 7 | 8 | prefix = '/opt/ml/' 9 | hyper_params_path = os.path.join(prefix, 'input/config/hyperparameters.json') 10 | with open(hyper_params_path, 'r') as f: 11 | config = json.load(f) 12 | 13 | algo_name=config['algo_name'] 14 | print("import:%s" % algo_name) 15 | 16 | cls = getattr(importlib.import_module(algo_name), 'MyStrategy') 17 | print(cls) 18 | 19 | algo=AlgoStrategy(config,cls) 20 | algo.run() -------------------------------------------------------------------------------- /2_Strategies/model/train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import backtrader as bt 4 | from algo_base import * 5 | import importlib 6 | import json 7 | 8 | prefix = '/opt/ml/' 9 | hyper_params_path = os.path.join(prefix, 'input/config/hyperparameters.json') 10 | with open(hyper_params_path, 'r') as f: 11 | config = json.load(f) 12 | 13 | algo_name=config['algo_name'] 14 | print("import:%s" % algo_name) 15 | 16 | cls = getattr(importlib.import_module(algo_name), 'MyStrategy') 17 | print(cls) 18 | 19 | algo=AlgoStrategy(config,cls) 20 | algo.run() -------------------------------------------------------------------------------- /2_Strategies/ecs-params.yml.template: -------------------------------------------------------------------------------- 1 | version: 1 2 | task_definition: 3 | task_execution_role: $TASK_ROLE 4 | task_role_arn: $ROLE_ARN 5 | ecs_network_mode: awsvpc 6 | task_size: 7 | mem_limit: 512 8 | cpu_limit: 256 9 | run_params: 10 | network_configuration: 11 | awsvpc_configuration: 12 | subnets: 13 | - $SUBNET1 14 | - $SUBNET2 15 | security_groups: 16 | - $SG 17 | assign_public_ip: DISABLED 18 | service_discovery: 19 | private_dns_namespace: 20 | vpc: $VPC 21 | name: algo 22 | -------------------------------------------------------------------------------- /3_Models/model/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | 27 | location ~ ^/(ping|invocations) { 28 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 29 | proxy_set_header Host $http_host; 30 | proxy_redirect off; 31 | proxy_pass http://gunicorn; 32 | } 33 | 34 | location / { 35 | return 404 "{}"; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /2_Strategies/model/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | 27 | location ~ ^/(ping|invocations) { 28 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 29 | proxy_set_header Host $http_host; 30 | proxy_redirect off; 31 | proxy_pass http://gunicorn; 32 | } 33 | 34 | location / { 35 | return 404 "{}"; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /4_Kinesis/model/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | 27 | location ~ ^/(ping|invocations) { 28 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 29 | proxy_set_header Host $http_host; 30 | proxy_redirect off; 31 | proxy_pass http://gunicorn; 32 | } 33 | 34 | location / { 35 | return 404 "{}"; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import backtrader as bt 4 | from algo_base import * 5 | import importlib 6 | import json 7 | import boto3 8 | 9 | prefix = '/opt/ml/' 10 | hyper_params_path = os.path.join(prefix, 'input/config/hyperparameters.json') 11 | with open(hyper_params_path, 'r') as f: 12 | config = json.load(f) 13 | 14 | algo_name=config['algo_name'] 15 | print("import:%s" % algo_name) 16 | 17 | if 's3' in config: 18 | try: 19 | s3_bucket=config['s3'] 20 | print("s3_bucket=%s" % s3_bucket) 21 | s3 = boto3.client('s3') 22 | s3.download_file(s3_bucket, algo_name+'/'+algo_name+'.py', '/opt/program/'+algo_name+'.py') 23 | except Exception as e: 24 | print(e) 25 | 26 | cls = getattr(importlib.import_module(algo_name), 'MyStrategy') 27 | print(cls) 28 | 29 | algo=AlgoStrategy(config,cls) 30 | algo.run() -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | 27 | location ~ ^/(ping|invocations) { 28 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 29 | proxy_set_header Host $http_host; 30 | proxy_redirect off; 31 | proxy_pass http://gunicorn; 32 | } 33 | 34 | location / { 35 | return 404 "{}"; 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /2_Strategies/update_config.py: -------------------------------------------------------------------------------- 1 | # Update configurations 2 | import json 3 | import sys 4 | import boto3 5 | import sagemaker as sage 6 | 7 | sess = sage.Session() 8 | 9 | algo_name=sys.argv[1] 10 | conf_file='local/'+algo_name+'/input/config/hyperparameters.json' 11 | with open(conf_file, 'r') as f: 12 | config = json.load(f) 13 | 14 | config["algo_name"]=algo_name 15 | 16 | account=boto3.client('sts').get_caller_identity().get('Account') 17 | 18 | if 'user' not in config: 19 | config['user']='user' 20 | config["account"] = account 21 | config["region"]=sess.boto_session.region_name 22 | 23 | #try: 24 | # s3 = boto3.client('s3') 25 | # s3.download_file('', 'algo_event.config', 'algo_event.config') 26 | # with open('algo_event.config', 'r') as f: 27 | # event_config = json.load(f) 28 | # config['submitUrl']=event_config['submitUrl'] 29 | #except: 30 | # print("Skipped event config") 31 | 32 | with open(conf_file, "w") as text_file: 33 | text_file.write(json.dumps(config)) 34 | 35 | print("config=%s" % json.dumps(config)) -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM 763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.9.1-cpu-py39-ubuntu20.04-sagemaker 2 | 3 | RUN pip install ipykernel && \ 4 | python -m ipykernel install --sys-prefix 5 | 6 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz 7 | 8 | # Install all of the packages 9 | RUN pip install pandas 10 | RUN pip install flask 11 | RUN pip install gevent 12 | RUN pip install backtrader 13 | RUN pip install matplotlib==3.2.2 14 | RUN pip install ta-lib 15 | RUN pip install pyathena 16 | 17 | RUN mkdir -p /opt/ml/input/data/training 18 | RUN mkdir -p /opt/ml/input/config 19 | RUN mkdir -p /opt/ml/model 20 | 21 | # Env Variables 22 | ENV PYTHONUNBUFFERED=TRUE 23 | ENV PYTHONDONTWRITEBYTECODE=TRUE 24 | ENV PATH="/opt/program:${PATH}" 25 | 26 | # Set up the program in the image 27 | COPY model /opt/program 28 | WORKDIR /opt/program -------------------------------------------------------------------------------- /4_Kinesis/update_config_kinesis.py: -------------------------------------------------------------------------------- 1 | # Update configurations 2 | import json 3 | import sys 4 | import boto3 5 | import sagemaker as sage 6 | 7 | sess = sage.Session() 8 | 9 | algo_name=sys.argv[1] 10 | kinesis_stream=sys.argv[2] 11 | sym=sys.argv[3] 12 | 13 | conf_file='local/'+algo_name+'/input/config/hyperparameters.json' 14 | with open(conf_file, 'r') as f: 15 | config = json.load(f) 16 | 17 | config["algo_name"]=algo_name 18 | config["kinesis_stream"]=kinesis_stream 19 | config["sym"]=sym 20 | 21 | account=boto3.client('sts').get_caller_identity().get('Account') 22 | 23 | if 'user' not in config: 24 | config['user']='user' 25 | config["account"] = account 26 | config["region"]=sess.boto_session.region_name 27 | 28 | #try: 29 | # s3 = boto3.client('s3') 30 | # s3.download_file('', 'algo_event.config', 'algo_event.config') 31 | # with open('algo_event.config', 'r') as f: 32 | # event_config = json.load(f) 33 | # config['submitUrl']=event_config['submitUrl'] 34 | #except: 35 | # print("Skipped event config") 36 | 37 | with open(conf_file, "w") as text_file: 38 | text_file.write(json.dumps(config)) 39 | 40 | print("config=%s" % json.dumps(config)) -------------------------------------------------------------------------------- /4_Kinesis/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.1.0rc2-py3 2 | 3 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 4 | wget \ 5 | python3 \ 6 | nginx \ 7 | ca-certificates \ 8 | && rm -rf /var/lib/apt/lists/* 9 | 10 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz 11 | 12 | # Install all of the packages 13 | RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py 14 | RUN pip install numpy 15 | RUN pip install scipy 16 | RUN pip install scikit-learn 17 | RUN pip install pandas 18 | RUN pip install flask 19 | RUN pip install gevent 20 | RUN pip install gunicorn 21 | RUN pip install tensorflow==2.2.0 22 | RUN pip install keras 23 | RUN pip install backtrader 24 | RUN pip install matplotlib==3.2.2 25 | RUN pip install ta-lib 26 | RUN pip install boto3 27 | 28 | # Env Variables 29 | ENV PYTHONUNBUFFERED=TRUE 30 | ENV PYTHONDONTWRITEBYTECODE=TRUE 31 | ENV PATH="/opt/program:${PATH}" 32 | 33 | # Set up the program in the image 34 | COPY model /opt/program 35 | WORKDIR /opt/program -------------------------------------------------------------------------------- /2_Strategies/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.1.0rc2-py3 2 | 3 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 4 | wget \ 5 | python3 \ 6 | nginx \ 7 | ca-certificates \ 8 | && rm -rf /var/lib/apt/lists/* 9 | 10 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz 11 | 12 | # Install all of the packages 13 | # RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py 14 | RUN pip install numpy 15 | RUN pip install scipy 16 | RUN pip install scikit-learn 17 | RUN pip install pandas 18 | RUN pip install flask 19 | RUN pip install gevent 20 | RUN pip install gunicorn 21 | RUN pip install tensorflow==2.2.0 22 | RUN pip install keras 23 | RUN pip install backtrader 24 | RUN pip install matplotlib==3.2.2 25 | RUN pip install ta-lib 26 | RUN pip install boto3 27 | 28 | # Env Variables 29 | ENV PYTHONUNBUFFERED=TRUE 30 | ENV PYTHONDONTWRITEBYTECODE=TRUE 31 | ENV PATH="/opt/program:${PATH}" 32 | 33 | # Set up the program in the image 34 | COPY model /opt/program 35 | WORKDIR /opt/program 36 | -------------------------------------------------------------------------------- /3_Models/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.1.0rc2-py3 2 | 3 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 4 | wget \ 5 | python3 \ 6 | nginx \ 7 | ca-certificates \ 8 | && rm -rf /var/lib/apt/lists/* 9 | 10 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz 11 | 12 | # Install all of the packages 13 | # RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py 14 | RUN pip install numpy 15 | RUN pip install scipy 16 | RUN pip install scikit-learn 17 | RUN pip install pandas 18 | RUN pip install flask 19 | RUN pip install gevent 20 | RUN pip install gunicorn 21 | RUN pip install tensorflow==2.2.0 22 | RUN pip install keras 23 | RUN pip install backtrader 24 | RUN pip install matplotlib==3.2.2 25 | RUN pip install ta-lib 26 | RUN pip install boto3 27 | 28 | # Env Variables 29 | ENV PYTHONUNBUFFERED=TRUE 30 | ENV PYTHONDONTWRITEBYTECODE=TRUE 31 | ENV PATH="/opt/program:${PATH}" 32 | 33 | # Set up the program in the image 34 | COPY model /opt/program 35 | WORKDIR /opt/program 36 | -------------------------------------------------------------------------------- /3_Models/build_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use 4 | # by SageMaker. 5 | 6 | image=$1 7 | 8 | chmod +x model/train 9 | chmod +x model/serve 10 | 11 | # Get the account number associated with the current IAM credentials 12 | account=$(aws sts get-caller-identity --query Account --output text) 13 | 14 | if [ $? -ne 0 ] 15 | then 16 | exit 255 17 | fi 18 | 19 | 20 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 21 | region=$(aws configure get region) 22 | region=${region:-us-east-1} 23 | 24 | 25 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest" 26 | 27 | # If the repository doesn't exist in ECR, create it. 28 | 29 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1 30 | 31 | if [ $? -ne 0 ] 32 | then 33 | aws ecr create-repository --repository-name "${image}" > /dev/null 34 | fi 35 | 36 | # Get the login command from ECR and execute it directly 37 | $(aws ecr get-login --region ${region} --no-include-email) 38 | 39 | # Build the docker image locally with the image name and then push it to ECR 40 | # with the full name. 41 | 42 | docker build -t ${image} . 43 | docker tag ${image} ${fullname} 44 | 45 | docker push ${fullname} -------------------------------------------------------------------------------- /2_Strategies/build_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use 4 | # by SageMaker. 5 | 6 | image=$1 7 | 8 | chmod +x model/train 9 | chmod +x model/serve 10 | 11 | # Get the account number associated with the current IAM credentials 12 | account=$(aws sts get-caller-identity --query Account --output text) 13 | 14 | if [ $? -ne 0 ] 15 | then 16 | exit 255 17 | fi 18 | 19 | 20 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 21 | region=$(aws configure get region) 22 | region=${region:-us-east-1} 23 | 24 | 25 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest" 26 | 27 | # If the repository doesn't exist in ECR, create it. 28 | 29 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1 30 | 31 | if [ $? -ne 0 ] 32 | then 33 | aws ecr create-repository --repository-name "${image}" > /dev/null 34 | fi 35 | 36 | # Get the login command from ECR and execute it directly 37 | $(aws ecr get-login --region ${region} --no-include-email) 38 | 39 | # Build the docker image locally with the image name and then push it to ECR 40 | # with the full name. 41 | 42 | docker build -t ${image} . 43 | docker tag ${image} ${fullname} 44 | 45 | docker push ${fullname} -------------------------------------------------------------------------------- /4_Kinesis/build_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use 4 | # by SageMaker. 5 | 6 | image=$1 7 | 8 | chmod +x model/train 9 | chmod +x model/serve 10 | 11 | # Get the account number associated with the current IAM credentials 12 | account=$(aws sts get-caller-identity --query Account --output text) 13 | 14 | if [ $? -ne 0 ] 15 | then 16 | exit 255 17 | fi 18 | 19 | 20 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 21 | region=$(aws configure get region) 22 | region=${region:-us-east-1} 23 | 24 | 25 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest" 26 | 27 | # If the repository doesn't exist in ECR, create it. 28 | 29 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1 30 | 31 | if [ $? -ne 0 ] 32 | then 33 | aws ecr create-repository --repository-name "${image}" > /dev/null 34 | fi 35 | 36 | # Get the login command from ECR and execute it directly 37 | $(aws ecr get-login --region ${region} --no-include-email) 38 | 39 | # Build the docker image locally with the image name and then push it to ECR 40 | # with the full name. 41 | 42 | docker build -t ${image} . 43 | docker tag ${image} ${fullname} 44 | 45 | docker push ${fullname} -------------------------------------------------------------------------------- /2_Strategies/Run_Strategy_ECS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Please run through the strategy notebook first. This will create the container image." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "strategy_image=\"algo_daily_breakout\"" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "!./deploy.sh $strategy_image" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "!./ecs-cli compose up" 35 | ] 36 | } 37 | ], 38 | "metadata": { 39 | "kernelspec": { 40 | "display_name": "conda_python3", 41 | "language": "python", 42 | "name": "conda_python3" 43 | }, 44 | "language_info": { 45 | "codemirror_mode": { 46 | "name": "ipython", 47 | "version": 3 48 | }, 49 | "file_extension": ".py", 50 | "mimetype": "text/x-python", 51 | "name": "python", 52 | "nbconvert_exporter": "python", 53 | "pygments_lexer": "ipython3", 54 | "version": "3.6.10" 55 | } 56 | }, 57 | "nbformat": 4, 58 | "nbformat_minor": 4 59 | } 60 | -------------------------------------------------------------------------------- /5_SageMakerStudio/s3_bucket.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | dataBucket: 3 | Metadata: 4 | 'aws:copilot:description': 'An Amazon S3 bucket to store and retrieve objects' 5 | Type: AWS::S3::Bucket 6 | DeletionPolicy: Retain 7 | Properties: 8 | VersioningConfiguration: 9 | Status: Enabled 10 | AccessControl: Private 11 | BucketEncryption: 12 | ServerSideEncryptionConfiguration: 13 | - ServerSideEncryptionByDefault: 14 | SSEAlgorithm: AES256 15 | PublicAccessBlockConfiguration: 16 | BlockPublicAcls: true 17 | BlockPublicPolicy: true 18 | 19 | dataBucketPolicy: 20 | Metadata: 21 | 'aws:copilot:description': 'A bucket policy to deny unencrypted access to the bucket and its contents' 22 | Type: AWS::S3::BucketPolicy 23 | DeletionPolicy: Retain 24 | Properties: 25 | PolicyDocument: 26 | Version: 2012-10-17 27 | Statement: 28 | - Sid: ForceHTTPS 29 | Effect: Deny 30 | Principal: '*' 31 | Action: 's3:*' 32 | Resource: 33 | - !Sub ${ dataBucket.Arn}/* 34 | - !Sub ${ dataBucket.Arn} 35 | Condition: 36 | Bool: 37 | "aws:SecureTransport": false 38 | Bucket: !Ref dataBucket 39 | 40 | Outputs: 41 | databucket: 42 | Description: "The name of a user-defined bucket." 43 | Value: !Ref dataBucket 44 | Export: 45 | Name: "algotrading-s3bucket" -------------------------------------------------------------------------------- /5_SageMakerStudio/data_prep.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import datetime 3 | import pandas as pd 4 | import sys 5 | 6 | START_DATE = '2012-08-13' 7 | END_DATE = '2017-08-11' 8 | DATE_FORMAT = '%Y-%m-%d' 9 | START_DATETIME = datetime.datetime.strptime(START_DATE, DATE_FORMAT) 10 | 11 | def read_stock_history(filepath): 12 | """ Read data from extracted h5 13 | Args: 14 | filepath: path of file 15 | Returns: 16 | history: 17 | abbreviation: 18 | """ 19 | with h5py.File(filepath, 'r') as f: 20 | history = f['history'][:] 21 | abbreviation = f['abbreviation'][:].tolist() 22 | abbreviation = [abbr.decode('utf-8') for abbr in abbreviation] 23 | return history, abbreviation 24 | 25 | def index_to_date(index): 26 | return (START_DATETIME + datetime.timedelta(index)).strftime(DATE_FORMAT) 27 | 28 | def save_stock_data(stk,history,abbreviation): 29 | p=abbreviation.index(stk) 30 | h=history[p] 31 | tData=[] 32 | hData=['dt','sym','open','high','low','close','vol'] 33 | for x in range(0,h.shape[0]): 34 | row=[] 35 | row.append(index_to_date(x)) 36 | row.append(stk) 37 | v=h[x] 38 | for y in range(0,len(v)): 39 | row.append(v[y]) 40 | tData.append(row) 41 | df=pd.DataFrame(tData,columns=hData) 42 | df.set_index(pd.DatetimeIndex(df['dt']), inplace=True) 43 | del df['dt'] 44 | df.to_csv(stk+".csv") 45 | print("store:"+stk) 46 | return df 47 | 48 | stk=sys.argv[1] 49 | history,abbreviation=read_stock_history('stocks_history_target.h5') 50 | save_stock_data(stk,history,abbreviation) -------------------------------------------------------------------------------- /1_Data/data_prep.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import datetime 3 | import pandas as pd 4 | import sys 5 | 6 | START_DATE = '2012-08-13' 7 | END_DATE = '2017-08-11' 8 | DATE_FORMAT = '%Y-%m-%d' 9 | START_DATETIME = datetime.datetime.strptime(START_DATE, DATE_FORMAT) 10 | 11 | def read_stock_history(filepath): 12 | """ Read data from extracted h5 13 | Args: 14 | filepath: path of file 15 | Returns: 16 | history: 17 | abbreviation: 18 | """ 19 | with h5py.File(filepath, 'r') as f: 20 | history = f['history'][:] 21 | abbreviation = f['abbreviation'][:].tolist() 22 | abbreviation = [abbr.decode('utf-8') for abbr in abbreviation] 23 | return history, abbreviation 24 | 25 | def index_to_date(index): 26 | return (START_DATETIME + datetime.timedelta(index)).strftime(DATE_FORMAT) 27 | 28 | def save_stock_data(stk,history,abbreviation): 29 | p=abbreviation.index(stk) 30 | h=history[p] 31 | tData=[] 32 | hData=['dt','sym','open','high','low','close','vol'] 33 | for x in range(0,h.shape[0]): 34 | row=[] 35 | row.append(index_to_date(x)) 36 | row.append(stk) 37 | v=h[x] 38 | for y in range(0,len(v)): 39 | row.append(v[y]) 40 | tData.append(row) 41 | df=pd.DataFrame(tData,columns=hData) 42 | df.set_index(pd.DatetimeIndex(df['dt']), inplace=True) 43 | del df['dt'] 44 | df.to_csv("../1_Data/"+stk+".csv") 45 | print("store:"+stk) 46 | return df 47 | 48 | stk=sys.argv[1] 49 | history,abbreviation=read_stock_history('../1_Data/stocks_history_target.h5') 50 | save_stock_data(stk,history,abbreviation) -------------------------------------------------------------------------------- /2_Strategies/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | image=$1 3 | service=$1 4 | 5 | # Get the account number associated with the current IAM credentials 6 | account=$(aws sts get-caller-identity --query Account --output text) 7 | 8 | if [ $? -ne 0 ] 9 | then 10 | exit 255 11 | fi 12 | 13 | # Get the region defined in the current configuration 14 | region=$(aws configure get region) 15 | region=${region:-us-east-1} 16 | 17 | echo "create docker-compose.yml" 18 | 19 | cp docker-compose.yml.template docker-compose.yml 20 | sed -i "s/\$ENV/${env}/g" docker-compose.yml 21 | sed -i "s/\$REGION/${region}/g" docker-compose.yml 22 | sed -i "s/\$IMAGE/${account}.dkr.ecr.${region}.amazonaws.com\/${image}/g" docker-compose.yml 23 | sed -i "s/\$SERVICE/${service}/g" docker-compose.yml 24 | 25 | AWS_EXPORTS=`aws cloudformation list-exports` 26 | 27 | VPC=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-VPC") | .Value'` 28 | SUBNET1=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-PrivateSubnet1") | .Value'` 29 | SUBNET2=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-PrivateSubnet2") | .Value'` 30 | SG=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-ECSHostSecurityGroup") | .Value'` 31 | TASK_ROLE=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-ECSTaskExecutionRole") | .Value'` 32 | ROLE_ARN=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-AlgoExecutionRole-ARN") | .Value'` 33 | CLUSTER=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-ECSCluster") | .Value'` 34 | 35 | echo "create ecs-params.yml" 36 | 37 | cp ecs-params.yml.template ecs-params.yml 38 | sed -i "s/\$VPC/${VPC}/g" ecs-params.yml 39 | sed -i "s/\$SUBNET1/${SUBNET1}/g" ecs-params.yml 40 | sed -i "s/\$SUBNET2/${SUBNET2}/g" ecs-params.yml 41 | sed -i "s/\$SG/${SG}/g" ecs-params.yml 42 | sed -i "s/\$TASK_ROLE/${TASK_ROLE}/g" ecs-params.yml 43 | sed -i "s@\$ROLE_ARN@${ROLE_ARN}@g" ecs-params.yml 44 | 45 | if [ ! -f "ecs-cli" ] ; then 46 | curl -Lo ecs-cli https://amazon-ecs-cli.s3.amazonaws.com/ecs-cli-linux-amd64-latest 47 | chmod 777 ecs-cli 48 | echo "ecs-cli installed" 49 | fi 50 | 51 | ./ecs-cli configure --region ${region} --cluster ${CLUSTER} --default-launch-type FARGATE -------------------------------------------------------------------------------- /2_Strategies/model/predictor.py: -------------------------------------------------------------------------------- 1 | # This is the file that implements a flask server to do inferences. It's the 2 | # file that you will modify to implement the scoring for your own algorithm. 3 | from __future__ import print_function 4 | 5 | import os 6 | from io import StringIO 7 | #import StringIO 8 | import flask 9 | 10 | import tensorflow as tf 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from keras import backend as K 15 | from keras.models import load_model 16 | from sklearn.preprocessing import StandardScaler 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 18 | 19 | prefix = '/opt/ml/' 20 | model_path = os.path.join(prefix, 'model') 21 | 22 | # A singleton for holding the model. This simply loads the model and holds it. 23 | # It has a predict function that does a prediction based on the model and the 24 | # input data. 25 | class ScoringService(object): 26 | model = None # Where we keep the model when it's loaded 27 | 28 | @classmethod 29 | def get_model(cls): 30 | """ 31 | Get the model object for this instance, 32 | loading it if it's not already loaded. 33 | """ 34 | if cls.model is None: 35 | cls.model = load_model( 36 | os.path.join(model_path, 'model.h5')) 37 | return cls.model 38 | 39 | @classmethod 40 | def predict(cls, input): 41 | """For the input, do the predictions and return them. 42 | 43 | Args: 44 | input (a pandas dataframe): The data on which to do the 45 | predictions. 46 | 47 | There will be one prediction per row in the dataframe 48 | """ 49 | sess = K.get_session() 50 | with sess.graph.as_default(): 51 | clf = cls.get_model() 52 | return clf.predict(input) 53 | 54 | # The flask app for serving predictions 55 | app = flask.Flask(__name__) 56 | 57 | 58 | @app.route('/ping', methods=['GET']) 59 | def ping(): 60 | """ 61 | Determine if the container is working and healthy. 62 | In this sample container, we declare it healthy if we can load the model 63 | successfully. 64 | """ 65 | # Health check -- You can insert a health check here 66 | health = True 67 | status = 200 if health else 404 68 | return flask.Response( 69 | response='{"status":"ok"}', 70 | status=status, 71 | mimetype='application/json') 72 | 73 | 74 | @app.route('/invocations', methods=['POST']) 75 | def transformation(): 76 | return flask.Response( 77 | response='{"status":"ok"}', 78 | status=status, 79 | mimetype='application/json') 80 | -------------------------------------------------------------------------------- /4_Kinesis/model/predictor.py: -------------------------------------------------------------------------------- 1 | # This is the file that implements a flask server to do inferences. It's the 2 | # file that you will modify to implement the scoring for your own algorithm. 3 | from __future__ import print_function 4 | 5 | import os 6 | from io import StringIO 7 | #import StringIO 8 | import flask 9 | 10 | import tensorflow as tf 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from keras import backend as K 15 | from keras.models import load_model 16 | from sklearn.preprocessing import StandardScaler 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 18 | 19 | prefix = '/opt/ml/' 20 | model_path = os.path.join(prefix, 'model') 21 | 22 | # A singleton for holding the model. This simply loads the model and holds it. 23 | # It has a predict function that does a prediction based on the model and the 24 | # input data. 25 | class ScoringService(object): 26 | model = None # Where we keep the model when it's loaded 27 | 28 | @classmethod 29 | def get_model(cls): 30 | """ 31 | Get the model object for this instance, 32 | loading it if it's not already loaded. 33 | """ 34 | if cls.model is None: 35 | cls.model = load_model( 36 | os.path.join(model_path, 'model.h5')) 37 | return cls.model 38 | 39 | @classmethod 40 | def predict(cls, input): 41 | """For the input, do the predictions and return them. 42 | 43 | Args: 44 | input (a pandas dataframe): The data on which to do the 45 | predictions. 46 | 47 | There will be one prediction per row in the dataframe 48 | """ 49 | sess = K.get_session() 50 | with sess.graph.as_default(): 51 | clf = cls.get_model() 52 | return clf.predict(input) 53 | 54 | # The flask app for serving predictions 55 | app = flask.Flask(__name__) 56 | 57 | 58 | @app.route('/ping', methods=['GET']) 59 | def ping(): 60 | """ 61 | Determine if the container is working and healthy. 62 | In this sample container, we declare it healthy if we can load the model 63 | successfully. 64 | """ 65 | # Health check -- You can insert a health check here 66 | health = True 67 | status = 200 if health else 404 68 | return flask.Response( 69 | response='{"status":"ok"}', 70 | status=status, 71 | mimetype='application/json') 72 | 73 | 74 | @app.route('/invocations', methods=['POST']) 75 | def transformation(): 76 | return flask.Response( 77 | response='{"status":"ok"}', 78 | status=status, 79 | mimetype='application/json') 80 | -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/predictor.py: -------------------------------------------------------------------------------- 1 | # This is the file that implements a flask server to do inferences. It's the 2 | # file that you will modify to implement the scoring for your own algorithm. 3 | from __future__ import print_function 4 | 5 | import os 6 | from io import StringIO 7 | #import StringIO 8 | import flask 9 | 10 | import tensorflow as tf 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from keras import backend as K 15 | from keras.models import load_model 16 | from sklearn.preprocessing import StandardScaler 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 18 | 19 | prefix = '/opt/ml/' 20 | model_path = os.path.join(prefix, 'model') 21 | 22 | # A singleton for holding the model. This simply loads the model and holds it. 23 | # It has a predict function that does a prediction based on the model and the 24 | # input data. 25 | class ScoringService(object): 26 | model = None # Where we keep the model when it's loaded 27 | 28 | @classmethod 29 | def get_model(cls): 30 | """ 31 | Get the model object for this instance, 32 | loading it if it's not already loaded. 33 | """ 34 | if cls.model is None: 35 | cls.model = load_model( 36 | os.path.join(model_path, 'model.h5')) 37 | return cls.model 38 | 39 | @classmethod 40 | def predict(cls, input): 41 | """For the input, do the predictions and return them. 42 | 43 | Args: 44 | input (a pandas dataframe): The data on which to do the 45 | predictions. 46 | 47 | There will be one prediction per row in the dataframe 48 | """ 49 | sess = K.get_session() 50 | with sess.graph.as_default(): 51 | clf = cls.get_model() 52 | return clf.predict(input) 53 | 54 | # The flask app for serving predictions 55 | app = flask.Flask(__name__) 56 | 57 | 58 | @app.route('/ping', methods=['GET']) 59 | def ping(): 60 | """ 61 | Determine if the container is working and healthy. 62 | In this sample container, we declare it healthy if we can load the model 63 | successfully. 64 | """ 65 | # Health check -- You can insert a health check here 66 | health = True 67 | status = 200 if health else 404 68 | return flask.Response( 69 | response='{"status":"ok"}', 70 | status=status, 71 | mimetype='application/json') 72 | 73 | 74 | @app.route('/invocations', methods=['POST']) 75 | def transformation(): 76 | return flask.Response( 77 | response='{"status":"ok"}', 78 | status=status, 79 | mimetype='application/json') 80 | -------------------------------------------------------------------------------- /4_Kinesis/model/algo_live_feed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | import time 4 | 5 | from backtrader.feed import DataBase 6 | from backtrader import date2num 7 | from backtrader import TimeFrame 8 | import backtrader as bt 9 | import math 10 | import numpy as np 11 | import pandas as pd 12 | import json 13 | import boto3 14 | 15 | class AlgoLiveData(DataBase): 16 | def __init__(self,region): 17 | super(AlgoLiveData, self).__init__() 18 | self.region=region 19 | self.lambda_client = boto3.client('lambda',region_name=self.region) 20 | self.connected=False 21 | 22 | #start_date = '2017-08-11' 23 | #now = datetime.datetime.now() # current date and time 24 | #end_date = now.strftime("%Y-%m-%d") 25 | 26 | #self.fromdate=pd.to_datetime(start_date, format = "%Y-%m-%d") 27 | #self.todate=pd.to_datetime(end_date, format = "%Y-%m-%d") 28 | self.timeframe=bt.TimeFrame.Ticks 29 | print(self.lines.datetime.array) 30 | 31 | def start(self): 32 | print("start feed") 33 | print(self.lines.datetime.array) 34 | 35 | def stop(self): 36 | print("stop feed") 37 | 38 | def islive(self): 39 | '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce 40 | should be deactivated''' 41 | return True 42 | 43 | def haslivedata(self): 44 | '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce 45 | should be deactivated''' 46 | return self.connected 47 | 48 | def _load(self): 49 | #print("A:%s" % self.lines.datetime.array) 50 | if not self.connected: 51 | while not self.connected: 52 | self.pull() 53 | else: 54 | self.pull() 55 | return True 56 | 57 | def pull(self): 58 | #print("B:%s" % self.lines.datetime.array) 59 | if math.isnan(self.lines.datetime[0]): 60 | now = datetime.datetime.now() 61 | self.lines.datetime[0]=date2num(now) 62 | now=datetime.datetime.now() 63 | try: 64 | item={} 65 | res=self.lambda_client.invoke( 66 | FunctionName='algo_market_data', 67 | InvocationType='RequestResponse', 68 | Payload=json.dumps(item) 69 | ) 70 | t=res['Payload'] 71 | l=json.loads(t.read().decode('utf-8')) 72 | print("load:%s" % l) 73 | 74 | #print(self.lines.datetime.array) 75 | #print(self.lines.close.array) 76 | 77 | for x in l: 78 | dt=pd.to_datetime(x['date'], format = "%Y-%m-%d") 79 | #print(dt) 80 | close=x['close'] 81 | 82 | self.lines.datetime[0] = date2num(datetime.datetime.now()) 83 | self.lines.open[0] = close 84 | self.lines.high[0] = close 85 | self.lines.low[0] = close 86 | self.lines.close[0] = close 87 | self.lines.volume[0] = 0 88 | 89 | self.connected=True 90 | self._laststatus=self.LIVE 91 | #print("connected") 92 | except Exception as e: 93 | print("err:%s" % e) 94 | time.sleep(5) -------------------------------------------------------------------------------- /2_Strategies/model/algo_live_feed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | import time 4 | 5 | from backtrader.feed import DataBase 6 | from backtrader import date2num 7 | from backtrader import TimeFrame 8 | import backtrader as bt 9 | import math 10 | import numpy as np 11 | import pandas as pd 12 | import json 13 | import boto3 14 | 15 | class AlgoLiveData(DataBase): 16 | def __init__(self,region): 17 | super(AlgoLiveData, self).__init__() 18 | self.region=region 19 | self.lambda_client = boto3.client('lambda',region_name=self.region) 20 | self.connected=False 21 | 22 | #start_date = '2017-08-11' 23 | #now = datetime.datetime.now() # current date and time 24 | #end_date = now.strftime("%Y-%m-%d") 25 | 26 | #self.fromdate=pd.to_datetime(start_date, format = "%Y-%m-%d") 27 | #self.todate=pd.to_datetime(end_date, format = "%Y-%m-%d") 28 | self.timeframe=bt.TimeFrame.Ticks 29 | print(self.lines.datetime.array) 30 | 31 | def start(self): 32 | print("start feed") 33 | print(self.lines.datetime.array) 34 | 35 | def stop(self): 36 | print("stop feed") 37 | 38 | def islive(self): 39 | '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce 40 | should be deactivated''' 41 | return True 42 | 43 | def haslivedata(self): 44 | '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce 45 | should be deactivated''' 46 | return self.connected 47 | 48 | def _load(self): 49 | #print("A:%s" % self.lines.datetime.array) 50 | if not self.connected: 51 | while not self.connected: 52 | self.pull() 53 | else: 54 | self.pull() 55 | return True 56 | 57 | def pull(self): 58 | #print("B:%s" % self.lines.datetime.array) 59 | if math.isnan(self.lines.datetime[0]): 60 | now = datetime.datetime.now() 61 | self.lines.datetime[0]=date2num(now) 62 | now=datetime.datetime.now() 63 | try: 64 | item={} 65 | res=self.lambda_client.invoke( 66 | FunctionName='algo_market_data', 67 | InvocationType='RequestResponse', 68 | Payload=json.dumps(item) 69 | ) 70 | t=res['Payload'] 71 | l=json.loads(t.read().decode('utf-8')) 72 | print("load:%s" % l) 73 | 74 | #print(self.lines.datetime.array) 75 | #print(self.lines.close.array) 76 | 77 | for x in l: 78 | dt=pd.to_datetime(x['date'], format = "%Y-%m-%d") 79 | #print(dt) 80 | close=x['close'] 81 | 82 | self.lines.datetime[0] = date2num(datetime.datetime.now()) 83 | self.lines.open[0] = close 84 | self.lines.high[0] = close 85 | self.lines.low[0] = close 86 | self.lines.close[0] = close 87 | self.lines.volume[0] = 0 88 | 89 | self.connected=True 90 | self._laststatus=self.LIVE 91 | #print("connected") 92 | except Exception as e: 93 | print("err:%s" % e) 94 | time.sleep(5) -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/algo_live_feed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | import time 4 | 5 | from backtrader.feed import DataBase 6 | from backtrader import date2num 7 | from backtrader import TimeFrame 8 | import backtrader as bt 9 | import math 10 | import numpy as np 11 | import pandas as pd 12 | import json 13 | import boto3 14 | 15 | class AlgoLiveData(DataBase): 16 | def __init__(self,region): 17 | super(AlgoLiveData, self).__init__() 18 | self.region=region 19 | self.lambda_client = boto3.client('lambda',region_name=self.region) 20 | self.connected=False 21 | 22 | #start_date = '2017-08-11' 23 | #now = datetime.datetime.now() # current date and time 24 | #end_date = now.strftime("%Y-%m-%d") 25 | 26 | #self.fromdate=pd.to_datetime(start_date, format = "%Y-%m-%d") 27 | #self.todate=pd.to_datetime(end_date, format = "%Y-%m-%d") 28 | self.timeframe=bt.TimeFrame.Ticks 29 | print(self.lines.datetime.array) 30 | 31 | def start(self): 32 | print("start feed") 33 | print(self.lines.datetime.array) 34 | 35 | def stop(self): 36 | print("stop feed") 37 | 38 | def islive(self): 39 | '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce 40 | should be deactivated''' 41 | return True 42 | 43 | def haslivedata(self): 44 | '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce 45 | should be deactivated''' 46 | return self.connected 47 | 48 | def _load(self): 49 | #print("A:%s" % self.lines.datetime.array) 50 | if not self.connected: 51 | while not self.connected: 52 | self.pull() 53 | else: 54 | self.pull() 55 | return True 56 | 57 | def pull(self): 58 | #print("B:%s" % self.lines.datetime.array) 59 | if math.isnan(self.lines.datetime[0]): 60 | now = datetime.datetime.now() 61 | self.lines.datetime[0]=date2num(now) 62 | now=datetime.datetime.now() 63 | try: 64 | item={} 65 | res=self.lambda_client.invoke( 66 | FunctionName='algo_market_data', 67 | InvocationType='RequestResponse', 68 | Payload=json.dumps(item) 69 | ) 70 | t=res['Payload'] 71 | l=json.loads(t.read().decode('utf-8')) 72 | print("load:%s" % l) 73 | 74 | #print(self.lines.datetime.array) 75 | #print(self.lines.close.array) 76 | 77 | for x in l: 78 | dt=pd.to_datetime(x['date'], format = "%Y-%m-%d") 79 | #print(dt) 80 | close=x['close'] 81 | 82 | self.lines.datetime[0] = date2num(datetime.datetime.now()) 83 | self.lines.open[0] = close 84 | self.lines.high[0] = close 85 | self.lines.low[0] = close 86 | self.lines.close[0] = close 87 | self.lines.volume[0] = 0 88 | 89 | self.connected=True 90 | self._laststatus=self.LIVE 91 | #print("connected") 92 | except Exception as e: 93 | print("err:%s" % e) 94 | time.sleep(5) -------------------------------------------------------------------------------- /2_Strategies/model/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | print("python:%s" % sys.version) 24 | 25 | import backtrader as bt 26 | from algo_base import * 27 | import importlib 28 | from algo_live_feed import AlgoLiveData 29 | 30 | algo_name='' 31 | with open('algo_name', 'r') as file: 32 | algo_name = file.read().replace('\n', '') 33 | algo_package='algo_'+algo_name 34 | print("import:%s" % algo_package) 35 | 36 | cls = getattr(importlib.import_module(algo_package), 'MyStrategy') 37 | print(cls) 38 | 39 | cpu_count = multiprocessing.cpu_count() 40 | 41 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 42 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 43 | 44 | 45 | def sigterm_handler(nginx_pid, gunicorn_pid): 46 | try: 47 | os.kill(nginx_pid, signal.SIGQUIT) 48 | except OSError: 49 | pass 50 | try: 51 | os.kill(gunicorn_pid, signal.SIGTERM) 52 | except OSError: 53 | pass 54 | 55 | sys.exit(0) 56 | 57 | 58 | def start_server(): 59 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 60 | 61 | # link the log streams to stdout/err so they will be logged to the container logs 62 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 63 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 64 | 65 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 66 | gunicorn = subprocess.Popen(['gunicorn', 67 | '--timeout', str(model_server_timeout), 68 | '-k', 'gevent', 69 | '-b', 'unix:/tmp/gunicorn.sock', 70 | '-w', str(model_server_workers), 71 | 'wsgi:app']) 72 | 73 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 74 | 75 | with open('algo_config', 'r') as f: 76 | config = json.load(f) 77 | print("config=%s" % (config)) 78 | 79 | data=AlgoLiveData(config["region"]) 80 | algo=AlgoStrategy(cls,data) 81 | algo.run() 82 | 83 | # If either subprocess exits, so do we. 84 | pids = set([nginx.pid, gunicorn.pid]) 85 | while True: 86 | pid, _ = os.wait() 87 | if pid in pids: 88 | break 89 | 90 | sigterm_handler(nginx.pid, gunicorn.pid) 91 | print('Inference server exiting') 92 | 93 | 94 | # The main routine just invokes the start function. 95 | if __name__ == '__main__': 96 | start_server() 97 | -------------------------------------------------------------------------------- /4_Kinesis/model/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | print("python:%s" % sys.version) 24 | 25 | import backtrader as bt 26 | from algo_base import * 27 | import importlib 28 | from algo_live_feed import AlgoLiveData 29 | 30 | algo_name='' 31 | with open('algo_name', 'r') as file: 32 | algo_name = file.read().replace('\n', '') 33 | algo_package='algo_'+algo_name 34 | print("import:%s" % algo_package) 35 | 36 | cls = getattr(importlib.import_module(algo_package), 'MyStrategy') 37 | print(cls) 38 | 39 | cpu_count = multiprocessing.cpu_count() 40 | 41 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 42 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 43 | 44 | 45 | def sigterm_handler(nginx_pid, gunicorn_pid): 46 | try: 47 | os.kill(nginx_pid, signal.SIGQUIT) 48 | except OSError: 49 | pass 50 | try: 51 | os.kill(gunicorn_pid, signal.SIGTERM) 52 | except OSError: 53 | pass 54 | 55 | sys.exit(0) 56 | 57 | 58 | def start_server(): 59 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 60 | 61 | # link the log streams to stdout/err so they will be logged to the container logs 62 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 63 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 64 | 65 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 66 | gunicorn = subprocess.Popen(['gunicorn', 67 | '--timeout', str(model_server_timeout), 68 | '-k', 'gevent', 69 | '-b', 'unix:/tmp/gunicorn.sock', 70 | '-w', str(model_server_workers), 71 | 'wsgi:app']) 72 | 73 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 74 | 75 | with open('algo_config', 'r') as f: 76 | config = json.load(f) 77 | print("config=%s" % (config)) 78 | 79 | data=AlgoLiveData(config["region"]) 80 | algo=AlgoStrategy(cls,data) 81 | algo.run() 82 | 83 | # If either subprocess exits, so do we. 84 | pids = set([nginx.pid, gunicorn.pid]) 85 | while True: 86 | pid, _ = os.wait() 87 | if pid in pids: 88 | break 89 | 90 | sigterm_handler(nginx.pid, gunicorn.pid) 91 | print('Inference server exiting') 92 | 93 | 94 | # The main routine just invokes the start function. 95 | if __name__ == '__main__': 96 | start_server() 97 | -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | print("python:%s" % sys.version) 24 | 25 | import backtrader as bt 26 | from algo_base import * 27 | import importlib 28 | from algo_live_feed import AlgoLiveData 29 | 30 | algo_name='' 31 | with open('algo_name', 'r') as file: 32 | algo_name = file.read().replace('\n', '') 33 | algo_package='algo_'+algo_name 34 | print("import:%s" % algo_package) 35 | 36 | cls = getattr(importlib.import_module(algo_package), 'MyStrategy') 37 | print(cls) 38 | 39 | cpu_count = multiprocessing.cpu_count() 40 | 41 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 42 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 43 | 44 | 45 | def sigterm_handler(nginx_pid, gunicorn_pid): 46 | try: 47 | os.kill(nginx_pid, signal.SIGQUIT) 48 | except OSError: 49 | pass 50 | try: 51 | os.kill(gunicorn_pid, signal.SIGTERM) 52 | except OSError: 53 | pass 54 | 55 | sys.exit(0) 56 | 57 | 58 | def start_server(): 59 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 60 | 61 | # link the log streams to stdout/err so they will be logged to the container logs 62 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 63 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 64 | 65 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 66 | gunicorn = subprocess.Popen(['gunicorn', 67 | '--timeout', str(model_server_timeout), 68 | '-k', 'gevent', 69 | '-b', 'unix:/tmp/gunicorn.sock', 70 | '-w', str(model_server_workers), 71 | 'wsgi:app']) 72 | 73 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 74 | 75 | with open('algo_config', 'r') as f: 76 | config = json.load(f) 77 | print("config=%s" % (config)) 78 | 79 | data=AlgoLiveData(config["region"]) 80 | algo=AlgoStrategy(cls,data) 81 | algo.run() 82 | 83 | # If either subprocess exits, so do we. 84 | pids = set([nginx.pid, gunicorn.pid]) 85 | while True: 86 | pid, _ = os.wait() 87 | if pid in pids: 88 | break 89 | 90 | sigterm_handler(nginx.pid, gunicorn.pid) 91 | print('Inference server exiting') 92 | 93 | 94 | # The main routine just invokes the start function. 95 | if __name__ == '__main__': 96 | start_server() 97 | -------------------------------------------------------------------------------- /1_Data/Load_Hist_Data_Daily.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Load Daily Data from AWS Data Exchange into S3 Bucket\n", 8 | "\n", 9 | "### Obtaining Data\n", 10 | "\n", 11 | "We obtain EOD stock data from AWS Data Exchange and export it to a S3 bucket. Then we format the data for our daily dataset. In this example we use the following data: https://aws.amazon.com/marketplace/pp/prodview-e2aizdzkos266\n", 12 | "\n", 13 | "### Output dataset \n", 14 | "\n", 15 | "- Contains 20 years of EOD data for one of the top 10 US companies\n", 16 | "- The data is saved into the specified S3 bucket as CSV.\n", 17 | "\n", 18 | "```\n", 19 | "hist_data_daily/{sym}.csv (columns: dt, sym,open,high,low,close,vol)\n", 20 | "```" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# get S3 bucket\n", 30 | "s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')\n", 31 | "s3bucket=s3bucket[0]\n", 32 | "s3bucket" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# symbol\n", 42 | "sym='JNJ'" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# copy daily eod data to local\n", 52 | "! aws s3 cp s3://{s3bucket}/daily_adjusted_{sym}.csv ./" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "import pandas as pd\n", 62 | "\n", 63 | "df = pd.read_csv(\"daily_adjusted_\"+sym+\".csv\",infer_datetime_format=True, parse_dates=['timestamp'], index_col=['timestamp'])\n", 64 | "del df[\"split_coefficient\"]\n", 65 | "del df[\"dividend_amount\"]\n", 66 | "del df[\"adjusted_close\"]\n", 67 | "df.rename(inplace=True,columns={'volume':'vol'})\n", 68 | "df.index=df.index.rename('dt')\n", 69 | "df['sym']=sym\n", 70 | "df = df[['sym', 'open', 'high', 'low', 'close','vol']]\n", 71 | "df.sort_index(inplace=True)\n", 72 | "df.head()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "df.to_csv(sym+'.csv')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "!aws s3 cp {sym}.csv s3://{s3bucket}/hist_data_daily/\n", 91 | "!rm daily_adjusted_{sym}.csv\n", 92 | "!rm {sym}.csv" 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "conda_python3", 99 | "language": "python", 100 | "name": "conda_python3" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.6.10" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 2 117 | } 118 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /3_Models/model/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | print("python:%s" % sys.version) 24 | 25 | cpu_count = multiprocessing.cpu_count() 26 | 27 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 28 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 29 | 30 | 31 | def sigterm_handler(nginx_pid, gunicorn_pid): 32 | try: 33 | os.kill(nginx_pid, signal.SIGQUIT) 34 | except OSError: 35 | pass 36 | try: 37 | os.kill(gunicorn_pid, signal.SIGTERM) 38 | except OSError: 39 | pass 40 | 41 | sys.exit(0) 42 | 43 | 44 | def start_server(): 45 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 46 | 47 | # link the log streams to stdout/err so they will be logged to the container logs 48 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 49 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 50 | 51 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 52 | gunicorn = subprocess.Popen(['gunicorn', 53 | '--timeout', str(model_server_timeout), 54 | '-k', 'gevent', 55 | '-b', 'unix:/tmp/gunicorn.sock', 56 | '-w', str(model_server_workers), 57 | 'wsgi:app']) 58 | 59 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 60 | 61 | 62 | from threading import Timer, Thread, Event 63 | from datetime import datetime 64 | 65 | class PT(): 66 | def __init__(self, t, hFunction): 67 | self.t = t 68 | self.hFunction = hFunction 69 | self.thread = Timer(self.t, self.handle_function) 70 | 71 | def handle_function(self): 72 | self.hFunction() 73 | self.thread = Timer(self.t, self.handle_function) 74 | self.thread.start() 75 | 76 | def start(self): 77 | self.thread.start() 78 | 79 | def printer(): 80 | tempo = datetime.today() 81 | h,m,s = tempo.hour, tempo.minute, tempo.second 82 | print(f"{h}:{m}:{s}") 83 | 84 | t = PT(5, printer) 85 | t.start() 86 | 87 | # If either subprocess exits, so do we. 88 | pids = set([nginx.pid, gunicorn.pid]) 89 | while True: 90 | pid, _ = os.wait() 91 | if pid in pids: 92 | break 93 | 94 | sigterm_handler(nginx.pid, gunicorn.pid) 95 | print('Inference server exiting') 96 | 97 | 98 | # The main routine just invokes the start function. 99 | if __name__ == '__main__': 100 | start_server() 101 | -------------------------------------------------------------------------------- /3_Models/model/predictor.py: -------------------------------------------------------------------------------- 1 | # This is the file that implements a flask server to do inferences. It's the 2 | # file that you will modify to implement the scoring for your own algorithm. 3 | from __future__ import print_function 4 | 5 | import os 6 | from io import StringIO 7 | #import StringIO 8 | import flask 9 | 10 | import tensorflow as tf 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from keras import backend as K 15 | from keras.models import load_model 16 | from sklearn.preprocessing import StandardScaler 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 18 | 19 | prefix = '/opt/ml/' 20 | model_path = os.path.join(prefix, 'model') 21 | 22 | # A singleton for holding the model. This simply loads the model and holds it. 23 | # It has a predict function that does a prediction based on the model and the 24 | # input data. 25 | class ScoringService(object): 26 | model = None # Where we keep the model when it's loaded 27 | 28 | @classmethod 29 | def get_model(cls): 30 | """ 31 | Get the model object for this instance, 32 | loading it if it's not already loaded. 33 | """ 34 | if cls.model is None: 35 | cls.model = load_model( 36 | os.path.join(model_path, 'model.h5')) 37 | return cls.model 38 | 39 | @classmethod 40 | def predict(cls, input): 41 | """For the input, do the predictions and return them. 42 | 43 | Args: 44 | input (a pandas dataframe): The data on which to do the 45 | predictions. 46 | 47 | There will be one prediction per row in the dataframe 48 | """ 49 | sess = K.get_session() 50 | with sess.graph.as_default(): 51 | clf = cls.get_model() 52 | return clf.predict(input) 53 | 54 | # The flask app for serving predictions 55 | app = flask.Flask(__name__) 56 | 57 | 58 | @app.route('/ping', methods=['GET']) 59 | def ping(): 60 | """ 61 | Determine if the container is working and healthy. 62 | In this sample container, we declare it healthy if we can load the model 63 | successfully. 64 | """ 65 | 66 | # Health check -- You can insert a health check here 67 | health = ScoringService.get_model() is not None 68 | status = 200 if health else 404 69 | return flask.Response( 70 | response='\n', 71 | status=status, 72 | mimetype='application/json') 73 | 74 | 75 | @app.route('/invocations', methods=['POST']) 76 | def transformation(): 77 | """ 78 | Do an inference on a single batch of data. In this sample server, we take 79 | data as CSV, convert it to a pandas data frame for internal use and then 80 | convert the predictions back to CSV (which really just means one prediction 81 | per line, since there's a single column. 82 | """ 83 | data = None 84 | 85 | # Convert from CSV to pandas 86 | if flask.request.content_type == 'text/csv': 87 | data = flask.request.data.decode('utf-8') 88 | f = StringIO(data) 89 | data = pd.read_csv(f) 90 | else: 91 | return flask.Response( 92 | response='This predictor only supports CSV data', 93 | status=415, 94 | mimetype='text/plain') 95 | 96 | print('Invoked with {} records'.format(data.shape[0])) 97 | 98 | # Do the prediction 99 | print("data=%s" % data) 100 | predictions = ScoringService.predict(data) 101 | print("predictions=%s" % predictions) 102 | 103 | # Convert from numpy back to CSV 104 | 105 | result = pd.DataFrame(predictions).to_csv(header=False, index=False) 106 | print("result=%s" % result) 107 | #result = out.getvalue() 108 | 109 | return flask.Response(response=result, status=200, mimetype='text/csv') 110 | -------------------------------------------------------------------------------- /4_Kinesis/model/algo_sim_feed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | import time 4 | 5 | from backtrader.feed import DataBase 6 | from backtrader import date2num 7 | from backtrader import TimeFrame 8 | import backtrader as bt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | # Based on this: https://towardsdatascience.com/simulating-stock-prices-in-python-using-geometric-brownian-motion-8dfd6e8c6b18 14 | 15 | class AlgoSimData(DataBase): 16 | def __init__(self,datafile): 17 | super(AlgoSimData, self).__init__() 18 | 19 | df = pd.read_csv(datafile,infer_datetime_format=True, parse_dates=['dt']) 20 | 21 | start_date = '2012-08-13' 22 | end_date = '2017-08-11' 23 | 24 | now = datetime.datetime.now() # current date and time 25 | pred_end_date = now.strftime("%Y-%m-%d") 26 | 27 | self.fromdate=pd.to_datetime(end_date, format = "%Y-%m-%d") 28 | self.todate=pd.to_datetime(pred_end_date, format = "%Y-%m-%d") 29 | self.timeframe=bt.TimeFrame.Days 30 | 31 | S_eon = df[["dt","close"]] 32 | 33 | returns = (S_eon.loc[1:, 'close'] - \ 34 | S_eon.shift(1).loc[1:, 'close']) / \ 35 | S_eon.shift(1).loc[1:, 'close'] 36 | 37 | # Parameter Assignments 38 | So = S_eon.loc[S_eon.shape[0] - 1, "close"] 39 | dt = 1 # day # User input 40 | n_of_wkdays = pd.date_range(start = pd.to_datetime(end_date, 41 | format = "%Y-%m-%d") + pd.Timedelta('1 days'), 42 | end = pd.to_datetime(pred_end_date, 43 | format = "%Y-%m-%d")).to_series().map(lambda x: 44 | 1 if x.isoweekday() in range(1,6) else 0).sum() 45 | T = n_of_wkdays # days # User input -> follows from pred_end_date 46 | N = T / dt 47 | t = np.arange(1, int(N) + 1) 48 | mu = np.mean(returns) 49 | sigma = np.std(returns) 50 | scen_size = 1 # User input 51 | b = {str(scen): np.random.normal(0, 1, int(N)) for scen in range(1, scen_size + 1)} 52 | W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)} 53 | 54 | # Calculating drift and diffusion components 55 | drift = (mu - 0.5 * sigma**2) * t 56 | diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)} 57 | 58 | # Making the predictions 59 | S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)]) 60 | S = np.hstack((np.array([[So] for scen in range(scen_size)]), S)) # add So to the beginning series 61 | 62 | # Dataframe format for predictions - first 10 scenarios only 63 | self.df = pd.DataFrame(S.swapaxes(0, 1)[:, :10]).set_index( 64 | pd.date_range(start = S_eon["dt"].max(), 65 | end = pred_end_date, freq = 'D').map(lambda x: 66 | x if x.isoweekday() in range(1, 6) else np.nan).dropna() 67 | ).reset_index(drop = False) 68 | print("SimData generated:from=%s,to=%s,count=%s" % (self.fromdate,self.todate,len(self.df))) 69 | self.n=0 70 | 71 | def start(self): 72 | print("start feed") 73 | 74 | def stop(self): 75 | print("stop feed") 76 | 77 | def _load(self): 78 | #print("load feed") 79 | if self.n>=len(self.df): 80 | return False 81 | 82 | v=self.df.values 83 | #print(v) 84 | #print(self.n) 85 | dt=v[self.n][0] 86 | close=v[self.n][1] 87 | #print("%s:%s:%s" % (self.n,dt,close)) 88 | 89 | self.lines.datetime[0] = date2num(dt) 90 | 91 | print(self.num2date(self.lines.datetime[0])) 92 | 93 | self.lines.open[0] = close 94 | self.lines.high[0] = close 95 | self.lines.low[0] = close 96 | self.lines.close[0] = close 97 | self.lines.volume[0] = 0 98 | 99 | self.n=self.n+1 100 | return True -------------------------------------------------------------------------------- /2_Strategies/model/algo_sim_feed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | import time 4 | 5 | from backtrader.feed import DataBase 6 | from backtrader import date2num 7 | from backtrader import TimeFrame 8 | import backtrader as bt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | # Based on this: https://towardsdatascience.com/simulating-stock-prices-in-python-using-geometric-brownian-motion-8dfd6e8c6b18 14 | 15 | class AlgoSimData(DataBase): 16 | def __init__(self,datafile): 17 | super(AlgoSimData, self).__init__() 18 | 19 | df = pd.read_csv(datafile,infer_datetime_format=True, parse_dates=['dt']) 20 | 21 | start_date = '2012-08-13' 22 | end_date = '2017-08-11' 23 | 24 | now = datetime.datetime.now() # current date and time 25 | pred_end_date = now.strftime("%Y-%m-%d") 26 | 27 | self.fromdate=pd.to_datetime(end_date, format = "%Y-%m-%d") 28 | self.todate=pd.to_datetime(pred_end_date, format = "%Y-%m-%d") 29 | self.timeframe=bt.TimeFrame.Days 30 | 31 | S_eon = df[["dt","close"]] 32 | 33 | returns = (S_eon.loc[1:, 'close'] - \ 34 | S_eon.shift(1).loc[1:, 'close']) / \ 35 | S_eon.shift(1).loc[1:, 'close'] 36 | 37 | # Parameter Assignments 38 | So = S_eon.loc[S_eon.shape[0] - 1, "close"] 39 | dt = 1 # day # User input 40 | n_of_wkdays = pd.date_range(start = pd.to_datetime(end_date, 41 | format = "%Y-%m-%d") + pd.Timedelta('1 days'), 42 | end = pd.to_datetime(pred_end_date, 43 | format = "%Y-%m-%d")).to_series().map(lambda x: 44 | 1 if x.isoweekday() in range(1,6) else 0).sum() 45 | T = n_of_wkdays # days # User input -> follows from pred_end_date 46 | N = T / dt 47 | t = np.arange(1, int(N) + 1) 48 | mu = np.mean(returns) 49 | sigma = np.std(returns) 50 | scen_size = 1 # User input 51 | b = {str(scen): np.random.normal(0, 1, int(N)) for scen in range(1, scen_size + 1)} 52 | W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)} 53 | 54 | # Calculating drift and diffusion components 55 | drift = (mu - 0.5 * sigma**2) * t 56 | diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)} 57 | 58 | # Making the predictions 59 | S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)]) 60 | S = np.hstack((np.array([[So] for scen in range(scen_size)]), S)) # add So to the beginning series 61 | 62 | # Dataframe format for predictions - first 10 scenarios only 63 | self.df = pd.DataFrame(S.swapaxes(0, 1)[:, :10]).set_index( 64 | pd.date_range(start = S_eon["dt"].max(), 65 | end = pred_end_date, freq = 'D').map(lambda x: 66 | x if x.isoweekday() in range(1, 6) else np.nan).dropna() 67 | ).reset_index(drop = False) 68 | print("SimData generated:from=%s,to=%s,count=%s" % (self.fromdate,self.todate,len(self.df))) 69 | self.n=0 70 | 71 | def start(self): 72 | print("start feed") 73 | 74 | def stop(self): 75 | print("stop feed") 76 | 77 | def _load(self): 78 | #print("load feed") 79 | if self.n>=len(self.df): 80 | return False 81 | 82 | v=self.df.values 83 | #print(v) 84 | #print(self.n) 85 | dt=v[self.n][0] 86 | close=v[self.n][1] 87 | #print("%s:%s:%s" % (self.n,dt,close)) 88 | 89 | self.lines.datetime[0] = date2num(dt) 90 | 91 | print(self.num2date(self.lines.datetime[0])) 92 | 93 | self.lines.open[0] = close 94 | self.lines.high[0] = close 95 | self.lines.low[0] = close 96 | self.lines.close[0] = close 97 | self.lines.volume[0] = 0 98 | 99 | self.n=self.n+1 100 | return True -------------------------------------------------------------------------------- /1_Data/Load_Hist_Data_Daily_Public.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Load Daily Data into S3 Bucket\n", 8 | "\n", 9 | "### Obtaining Data\n", 10 | "\n", 11 | "\n", 12 | "We use the dataset generated by [Chi Zhang](https://github.com/vermouth1992/drl-portfolio-management/tree/master/src/utils/datasets). It contains the historic price of 16 target stocks from NASDAQ100, including open, close, high and low prices from 2012-08-13 to 2017-08-11. Specifically, those stocks are: “AAPL”, “ATVI”, “CMCSA”, “COST”, “CSX”, “DISH”, “EA”, “EBAY”, “FB”, “GOOGL”, “HAS”, “ILMN”, “INTC”, “MAR”, “REGN” and “SBUX”.\n", 13 | "\n", 14 | "**This dataset is licensed under a MIT License**\n", 15 | "\n", 16 | "Copyright (c) 2017 Chi Zhang\n", 17 | "\n", 18 | "Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n", 19 | "\n", 20 | "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n", 21 | "\n", 22 | "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n", 23 | "\n", 24 | "### Output dataset \n", 25 | "\n", 26 | "- Contains 5 years of EOD data for one of the stocks\n", 27 | "- The data is saved into the specified S3 bucket as CSV.\n", 28 | "\n", 29 | "```\n", 30 | "hist_data_daily/{sym}.csv (columns: dt,sym,open,high,low,close,vol)\n", 31 | "```" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# get S3 bucket\n", 41 | "s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')\n", 42 | "s3bucket=s3bucket[0]\n", 43 | "s3bucket" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# symbol\n", 53 | "sym='INTC'" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "%run data_prep.py {sym}" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "import pandas as pd\n", 72 | "\n", 73 | "df = pd.read_csv(\"INTC.csv\",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])\n", 74 | "df.head()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "!aws s3 cp {sym}.csv s3://{s3bucket}/hist_data_daily/\n", 84 | "!rm {sym}.csv" 85 | ] 86 | } 87 | ], 88 | "metadata": { 89 | "kernelspec": { 90 | "display_name": "conda_python3", 91 | "language": "python", 92 | "name": "conda_python3" 93 | }, 94 | "language_info": { 95 | "codemirror_mode": { 96 | "name": "ipython", 97 | "version": 3 98 | }, 99 | "file_extension": ".py", 100 | "mimetype": "text/x-python", 101 | "name": "python", 102 | "nbconvert_exporter": "python", 103 | "pygments_lexer": "ipython3", 104 | "version": "3.6.10" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 2 109 | } -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/algo_sim_feed.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | import time 4 | 5 | from backtrader.feed import DataBase 6 | from backtrader import date2num 7 | from backtrader import TimeFrame 8 | import backtrader as bt 9 | 10 | import numpy as np 11 | import pandas as pd 12 | 13 | # Based on this: https://towardsdatascience.com/simulating-stock-prices-in-python-using-geometric-brownian-motion-8dfd6e8c6b18 14 | 15 | class AlgoSimData(DataBase): 16 | def __init__(self,datafile): 17 | super(AlgoSimData, self).__init__() 18 | 19 | df = pd.read_csv(datafile,infer_datetime_format=True, parse_dates=['dt']) 20 | 21 | start_date = '2012-08-13' 22 | end_date = '2017-08-11' 23 | 24 | now = datetime.datetime.now() # current date and time 25 | pred_end_date = now.strftime("%Y-%m-%d") 26 | 27 | self.fromdate=pd.to_datetime(end_date, format = "%Y-%m-%d") 28 | self.todate=pd.to_datetime(pred_end_date, format = "%Y-%m-%d") 29 | self.timeframe=bt.TimeFrame.Days 30 | 31 | S_eon = df[["dt","close"]] 32 | 33 | returns = (S_eon.loc[1:, 'close'] - \ 34 | S_eon.shift(1).loc[1:, 'close']) / \ 35 | S_eon.shift(1).loc[1:, 'close'] 36 | 37 | # Parameter Assignments 38 | So = S_eon.loc[S_eon.shape[0] - 1, "close"] 39 | dt = 1 # day # User input 40 | n_of_wkdays = pd.date_range(start = pd.to_datetime(end_date, 41 | format = "%Y-%m-%d") + pd.Timedelta('1 days'), 42 | end = pd.to_datetime(pred_end_date, 43 | format = "%Y-%m-%d")).to_series().map(lambda x: 44 | 1 if x.isoweekday() in range(1,6) else 0).sum() 45 | T = n_of_wkdays # days # User input -> follows from pred_end_date 46 | N = T / dt 47 | t = np.arange(1, int(N) + 1) 48 | mu = np.mean(returns) 49 | sigma = np.std(returns) 50 | scen_size = 1 # User input 51 | b = {str(scen): np.random.normal(0, 1, int(N)) for scen in range(1, scen_size + 1)} 52 | W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)} 53 | 54 | # Calculating drift and diffusion components 55 | drift = (mu - 0.5 * sigma**2) * t 56 | diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)} 57 | 58 | # Making the predictions 59 | S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)]) 60 | S = np.hstack((np.array([[So] for scen in range(scen_size)]), S)) # add So to the beginning series 61 | 62 | # Dataframe format for predictions - first 10 scenarios only 63 | self.df = pd.DataFrame(S.swapaxes(0, 1)[:, :10]).set_index( 64 | pd.date_range(start = S_eon["dt"].max(), 65 | end = pred_end_date, freq = 'D').map(lambda x: 66 | x if x.isoweekday() in range(1, 6) else np.nan).dropna() 67 | ).reset_index(drop = False) 68 | print("SimData generated:from=%s,to=%s,count=%s" % (self.fromdate,self.todate,len(self.df))) 69 | self.n=0 70 | 71 | def start(self): 72 | print("start feed") 73 | 74 | def stop(self): 75 | print("stop feed") 76 | 77 | def _load(self): 78 | #print("load feed") 79 | if self.n>=len(self.df): 80 | return False 81 | 82 | v=self.df.values 83 | #print(v) 84 | #print(self.n) 85 | dt=v[self.n][0] 86 | close=v[self.n][1] 87 | #print("%s:%s:%s" % (self.n,dt,close)) 88 | 89 | self.lines.datetime[0] = date2num(dt) 90 | 91 | print(self.num2date(self.lines.datetime[0])) 92 | 93 | self.lines.open[0] = close 94 | self.lines.high[0] = close 95 | self.lines.low[0] = close 96 | self.lines.close[0] = close 97 | self.lines.volume[0] = 0 98 | 99 | self.n=self.n+1 100 | return True -------------------------------------------------------------------------------- /5_SageMakerStudio/2_Load_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Load Daily Data into S3 Bucket\n", 8 | "\n", 9 | "### Obtaining Data\n", 10 | "\n", 11 | "\n", 12 | "We use the dataset generated by [Chi Zhang](https://github.com/vermouth1992/drl-portfolio-management/tree/master/src/utils/datasets). It contains the historic price of 16 target stocks from NASDAQ100, including open, close, high and low prices from 2012-08-13 to 2017-08-11. Specifically, those stocks are: “AAPL”, “ATVI”, “CMCSA”, “COST”, “CSX”, “DISH”, “EA”, “EBAY”, “FB”, “GOOGL”, “HAS”, “ILMN”, “INTC”, “MAR”, “REGN” and “SBUX”.\n", 13 | "\n", 14 | "**This dataset is licensed under a MIT License**\n", 15 | "\n", 16 | "Copyright (c) 2017 Chi Zhang\n", 17 | "\n", 18 | "Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n", 19 | "\n", 20 | "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n", 21 | "\n", 22 | "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n", 23 | "\n", 24 | "### Output dataset \n", 25 | "\n", 26 | "- Contains 5 years of EOD data for one of the stocks\n", 27 | "- The data is saved into the specified S3 bucket as CSV.\n", 28 | "\n", 29 | "```\n", 30 | "hist_data_daily/{sym}.csv (columns: dt,sym,open,high,low,close,vol)\n", 31 | "```" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "S3_BUCKET=!(aws cloudformation list-exports --query \"Exports[?Name=='algotrading-s3bucket'].Value\" --output text)\n", 41 | "S3_BUCKET=S3_BUCKET[0]\n", 42 | "S3_BUCKET" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# symbol\n", 52 | "sym='INTC'" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "%run data_prep.py {sym}" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "import pandas as pd\n", 71 | "\n", 72 | "df = pd.read_csv(\"INTC.csv\",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])\n", 73 | "df.head()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "!aws s3 cp {sym}.csv s3://{S3_BUCKET}/hist_data_daily/" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "instance_type": "ml.t3.medium", 88 | "kernelspec": { 89 | "display_name": "Python 3 (Data Science)", 90 | "language": "python", 91 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 92 | }, 93 | "language_info": { 94 | "codemirror_mode": { 95 | "name": "ipython", 96 | "version": 3 97 | }, 98 | "file_extension": ".py", 99 | "mimetype": "text/x-python", 100 | "name": "python", 101 | "nbconvert_exporter": "python", 102 | "pygments_lexer": "ipython3", 103 | "version": "3.7.10" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 4 108 | } 109 | -------------------------------------------------------------------------------- /5_SageMakerStudio/schema.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | GlueDatabase: 3 | Type: AWS::Glue::Database 4 | Properties: 5 | CatalogId: !Ref 'AWS::AccountId' 6 | DatabaseInput: 7 | Name: algo_data 8 | 9 | GlueHistDataDaily: 10 | Type: AWS::Glue::Table 11 | Properties: 12 | CatalogId: !Ref 'AWS::AccountId' 13 | DatabaseName: !Ref 'GlueDatabase' 14 | TableInput: 15 | Description: Daily Price Data 16 | Name: hist_data_daily 17 | Parameters: 18 | classification: csv 19 | has_encrypted_data: false 20 | StorageDescriptor: 21 | Columns: 22 | - Name: dt 23 | Type: string 24 | - Name: sym 25 | Type: string 26 | - Name: open 27 | Type: double 28 | - Name: high 29 | Type: double 30 | - Name: low 31 | Type: double 32 | - Name: close 33 | Type: double 34 | - Name: vol 35 | Type: double 36 | Compressed: false 37 | InputFormat: org.apache.hadoop.mapred.TextInputFormat 38 | Location: !Join 39 | - '' 40 | - - s3:// 41 | - !ImportValue algotrading-s3bucket 42 | - /hist_data_daily 43 | OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat 44 | SerdeInfo: 45 | SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe 46 | Parameters: 47 | field.delim: ',' 48 | skip.header.line.count: '1' 49 | StoredAsSubDirectories: false 50 | TableType: EXTERNAL_TABLE 51 | 52 | GlueHistDataIntraday: 53 | Type: AWS::Glue::Table 54 | Properties: 55 | CatalogId: !Ref 'AWS::AccountId' 56 | DatabaseName: !Ref 'GlueDatabase' 57 | TableInput: 58 | Description: Intraday Price Data 59 | Name: hist_data_intraday 60 | Parameters: 61 | classification: csv 62 | has_encrypted_data: false 63 | StorageDescriptor: 64 | Columns: 65 | - Name: dt 66 | Type: string 67 | - Name: sym 68 | Type: string 69 | - Name: open 70 | Type: double 71 | - Name: high 72 | Type: double 73 | - Name: low 74 | Type: double 75 | - Name: close 76 | Type: double 77 | - Name: vol 78 | Type: double 79 | Compressed: false 80 | InputFormat: org.apache.hadoop.mapred.TextInputFormat 81 | Location: !Join 82 | - '' 83 | - - s3:// 84 | - !ImportValue algotrading-s3bucket 85 | - /hist_data_intraday 86 | OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat 87 | SerdeInfo: 88 | SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe 89 | Parameters: 90 | field.delim: ',' 91 | skip.header.line.count: '1' 92 | StoredAsSubDirectories: false 93 | TableType: EXTERNAL_TABLE 94 | 95 | AthenaWorkgroup: 96 | Type: AWS::Athena::WorkGroup 97 | Properties: 98 | Name: MyWorkGroup 99 | Description: AlgoWorkgroup 100 | RecursiveDeleteOption: true 101 | State: ENABLED 102 | WorkGroupConfiguration: 103 | RequesterPaysEnabled: true 104 | ResultConfiguration: 105 | OutputLocation: !Join 106 | - '' 107 | - - s3:// 108 | - !ImportValue algotrading-s3bucket 109 | - /results/ 110 | 111 | AlgoHistDataDaily: 112 | Type: AWS::Athena::NamedQuery 113 | Properties: 114 | Database: !Ref 'GlueDatabase' 115 | QueryString: !Join 116 | - '' 117 | - - select * from algo_data. 118 | - !Ref 'GlueHistDataDaily' 119 | - ' limit 10;' 120 | Name: HistDataDaily 121 | 122 | AlgoHistDataIntraday: 123 | Type: AWS::Athena::NamedQuery 124 | Properties: 125 | Database: !Ref 'GlueDatabase' 126 | QueryString: !Join 127 | - '' 128 | - - select * from algo_data. 129 | - !Ref 'GlueHistDataIntraday' 130 | - ' limit 10;' 131 | Name: HistDataIntraday -------------------------------------------------------------------------------- /5_SageMakerStudio/1_Setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Setup" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "scrolled": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "!./init_s3_bucket.sh" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "!./init_schema.sh" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "!pip install sagemaker-studio-image-build" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "scrolled": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "!sm-docker build docker/. --repository algotrading:1.0" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "# Attach Container as SageMaker Kernel" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "import sagemaker as sage\n", 64 | "from sagemaker import get_execution_role\n", 65 | "\n", 66 | "role = get_execution_role()\n", 67 | "sess = sage.Session()\n", 68 | "\n", 69 | "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", 70 | "region = sess.boto_session.region_name" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "!aws --region {region} sagemaker delete-image-version --image-name algotrading --version-number=1\n", 80 | "!aws --region {region} sagemaker list-image-versions --image-name algotrading\n", 81 | "!aws --region {region} sagemaker delete-image --image-name algotrading\n", 82 | "!aws --region {region} sagemaker list-images\n", 83 | "!aws --region {region} sagemaker delete-app-image-config --app-image-config-name=algotrading-config\n", 84 | "!aws --region {region} sagemaker list-app-image-configs" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "!aws --region {region} sagemaker create-image --image-name algotrading --role-arn {role}\n", 94 | "!aws --region {region} sagemaker list-images\n", 95 | "!aws --region {region} sagemaker create-image-version --image-name algotrading --base-image \"{account}.dkr.ecr.{region}.amazonaws.com/algotrading:1.0\"\n", 96 | "!aws --region {region} sagemaker list-image-versions --image-name algotrading\n", 97 | "!aws --region {region} sagemaker create-app-image-config --cli-input-json file://app-image-config-input.json\n", 98 | "!aws --region {region} sagemaker list-app-image-configs" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "domainid=!(aws --region {region} sagemaker list-domains --query 'Domains[0].DomainId' --output text)\n", 108 | "domainid=domainid[0]\n", 109 | "domainid" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "!aws --region {region} sagemaker update-domain --domain-id {domainid} --cli-input-json file://default-user-settings.json" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "instance_type": "ml.t3.medium", 124 | "kernelspec": { 125 | "display_name": "Python 3 (Data Science)", 126 | "language": "python", 127 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.7.10" 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 4 144 | } 145 | -------------------------------------------------------------------------------- /2_Strategies/model/algo_base.py: -------------------------------------------------------------------------------- 1 | import backtrader as bt 2 | import backtrader.feeds as btfeeds 3 | import backtrader.analyzers as btanalyzers 4 | import backtrader.plot 5 | import os 6 | import pytz 7 | from pytz import timezone 8 | import requests 9 | import json 10 | import time 11 | from algo_sim_feed import AlgoSimData 12 | #from abc import classmethod 13 | 14 | import matplotlib 15 | matplotlib.use('Agg') 16 | import matplotlib.pyplot as plt 17 | plt.rcParams["figure.figsize"] = [16,9] 18 | 19 | # More documentation about backtrader: https://www.backtrader.com/ 20 | 21 | class AlgoStrategy(): 22 | 23 | def __init__(self,config,strategy): 24 | self.config=config 25 | 26 | self.cerebro = bt.Cerebro() 27 | strategy.config=config 28 | strategy.init_broker(self.cerebro.broker) 29 | strategy.add_data(self.cerebro) 30 | self.cerebro.addstrategy(strategy) 31 | 32 | self.portfolioStartValue=self.cerebro.broker.getvalue() 33 | 34 | self.cerebro.addanalyzer(btanalyzers.DrawDown, _name='dd') 35 | self.cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe') 36 | self.cerebro.addanalyzer(btanalyzers.SQN, _name='sqn') 37 | self.cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta') 38 | 39 | def performance(self): 40 | analyzer=self.thestrat.analyzers.ta.get_analysis() 41 | dd_analyzer=self.thestrat.analyzers.dd.get_analysis() 42 | 43 | #Get the results we are interested in 44 | total_open = analyzer.total.open 45 | total_closed = analyzer.total.closed 46 | total_won = analyzer.won.total 47 | total_lost = analyzer.lost.total 48 | win_streak = analyzer.streak.won.longest 49 | lose_streak = analyzer.streak.lost.longest 50 | pnl_net = round(analyzer.pnl.net.total,2) 51 | strike_rate = (total_won / total_closed) * 100 52 | #Designate the rows 53 | h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost'] 54 | h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net'] 55 | h3 = ['DrawDown Pct','MoneyDown', '', ''] 56 | self.total_closed=total_closed 57 | self.strike_rate=strike_rate 58 | self.max_drawdown=dd_analyzer.max.drawdown 59 | r1 = [total_open, total_closed,total_won,total_lost] 60 | r2 = [('%.2f%%' %(strike_rate)), win_streak, lose_streak, pnl_net] 61 | r3 = [('%.2f%%' %(dd_analyzer.max.drawdown)), dd_analyzer.max.moneydown, '', ''] 62 | #Check which set of headers is the longest. 63 | header_length = max(len(h1),len(h2),len(h3)) 64 | #Print the rows 65 | print_list = [h1,r1,h2,r2,h3,r3] 66 | row_format ="{:<15}" * (header_length + 1) 67 | print("Trade Analysis Results:") 68 | for row in print_list: 69 | print(row_format.format('',*row)) 70 | 71 | analyzer=self.thestrat.analyzers.sqn.get_analysis() 72 | sharpe_analyzer=self.thestrat.analyzers.sharpe.get_analysis() 73 | self.sqn = analyzer.sqn 74 | self.sharpe_ratio = sharpe_analyzer['sharperatio'] 75 | if self.sharpe_ratio is None: 76 | self.sharpe_ratio=0 77 | self.pnl = self.cerebro.broker.getvalue()-self.portfolioStartValue 78 | print('[SQN:%.2f, Sharpe Ratio:%.2f, Final Portfolio:%.2f, Total PnL:%.2f]' % (self.sqn,self.sharpe_ratio,self.cerebro.broker.getvalue(),self.pnl)) 79 | 80 | # plot 81 | fig=self.cerebro.plot() 82 | plt.savefig(os.path.join(StrategyTemplate.MODEL_PATH, 'chart.png')) 83 | 84 | def submit(self): 85 | try: 86 | if 'submitUrl' in self.config: 87 | name=self.config['user']+'@'+self.config['account'] 88 | algo=self.config['algo_name'] 89 | submitUrl=self.config['submitUrl'] 90 | 91 | URL = submitUrl 92 | ts=str(int(time.time())) 93 | PARAMS={'id': algo, 94 | 'name': name, 95 | 'trades': self.total_closed, 96 | 'strike_rate': self.strike_rate, 97 | 'max_drawdown': self.max_drawdown, 98 | 'pnl': self.pnl, 99 | 'sqn': self.sqn, 100 | 'sharpe_ratio': self.sharpe_ratio} 101 | print("submit:%s" % (json.dumps(PARAMS))) 102 | r = requests.get(url = URL, params = PARAMS, timeout=3) 103 | print("status=%s,res=%s" % (r.status_code,r.text)) 104 | if r.status_code == 200: 105 | print("performance submitted") 106 | else: 107 | print("error submitting performance:%s" % r.text) 108 | except Exception as e: 109 | print("error submitting performance:%s" % e) 110 | 111 | def run(self): 112 | thestrats = self.cerebro.run() 113 | self.thestrat = thestrats[0] 114 | self.performance() 115 | self.submit() 116 | 117 | class StrategyTemplate(bt.Strategy): 118 | 119 | PREFIX='/opt/ml/' 120 | TRAIN_FILE = os.path.join(PREFIX,'input/data/training/data.csv') 121 | CONFIG_FILE = os.path.join(PREFIX,'input/config/hyperparameters.json') 122 | MODEL_PATH = os.path.join(PREFIX,'model') 123 | 124 | def __init__(self): 125 | with open(StrategyTemplate.CONFIG_FILE, 'r') as f: 126 | self.config = json.load(f) 127 | print("[INIT]:config:%s=%s" % (StrategyTemplate.CONFIG_FILE,self.config)) 128 | 129 | self.lastDay=-1 130 | self.lastMonth=-1 131 | self.dataclose = self.datas[0].close 132 | 133 | @staticmethod 134 | def init_broker(broker): 135 | pass 136 | 137 | @staticmethod 138 | def add_data(cerebro): 139 | pass 140 | 141 | def notify_order(self, order): 142 | dt=self.datas[0].datetime.datetime(0) 143 | 144 | if order.status in [order.Completed]: 145 | if order.isbuy(): 146 | print( 147 | '[%s] BUY EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' % 148 | (dt,order.executed.price,order.executed.pnl,self.broker.getvalue())) 149 | else: # Sell 150 | print('[%s] SELL EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' % 151 | (dt,order.executed.price,order.executed.pnl,self.broker.getvalue())) 152 | 153 | def next(self): 154 | dt=self.datas[0].datetime.datetime(0) 155 | #print("[NEXT]:%s:close=%s" % (dt,self.dataclose[0])) 156 | 157 | #SOM 158 | if self.lastMonth!=dt.month: 159 | if self.lastMonth!=-1: 160 | chg=self.broker.getvalue()-self.monthCash 161 | print("[%s] SOM:chg=%.2f,cash=%.2f" % (dt,chg,self.broker.getvalue())) 162 | self.lastMonth=dt.month 163 | self.monthCash=self.broker.getvalue() 164 | 165 | #SOD 166 | if self.lastDay!=dt.day: 167 | self.lastDay=dt.day 168 | print("[%s] SOD:cash=%.2f" % (dt,self.broker.getvalue())) 169 | -------------------------------------------------------------------------------- /4_Kinesis/model/algo_base.py: -------------------------------------------------------------------------------- 1 | import backtrader as bt 2 | import backtrader.feeds as btfeeds 3 | import backtrader.analyzers as btanalyzers 4 | import backtrader.plot 5 | import os 6 | import pytz 7 | from pytz import timezone 8 | import requests 9 | import json 10 | import time 11 | from algo_sim_feed import AlgoSimData 12 | #from abc import classmethod 13 | 14 | import matplotlib 15 | matplotlib.use('Agg') 16 | import matplotlib.pyplot as plt 17 | plt.rcParams["figure.figsize"] = [16,9] 18 | 19 | # More documentation about backtrader: https://www.backtrader.com/ 20 | 21 | class AlgoStrategy(): 22 | 23 | def __init__(self,config,strategy): 24 | self.config=config 25 | 26 | self.cerebro = bt.Cerebro() 27 | strategy.config=config 28 | strategy.init_broker(self.cerebro.broker) 29 | strategy.add_data(self.cerebro) 30 | self.cerebro.addstrategy(strategy) 31 | 32 | self.portfolioStartValue=self.cerebro.broker.getvalue() 33 | 34 | self.cerebro.addanalyzer(btanalyzers.DrawDown, _name='dd') 35 | self.cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe') 36 | self.cerebro.addanalyzer(btanalyzers.SQN, _name='sqn') 37 | self.cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta') 38 | 39 | def performance(self): 40 | analyzer=self.thestrat.analyzers.ta.get_analysis() 41 | dd_analyzer=self.thestrat.analyzers.dd.get_analysis() 42 | 43 | #Get the results we are interested in 44 | total_open = analyzer.total.open 45 | total_closed = analyzer.total.closed 46 | total_won = analyzer.won.total 47 | total_lost = analyzer.lost.total 48 | win_streak = analyzer.streak.won.longest 49 | lose_streak = analyzer.streak.lost.longest 50 | pnl_net = round(analyzer.pnl.net.total,2) 51 | strike_rate = (total_won / total_closed) * 100 52 | #Designate the rows 53 | h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost'] 54 | h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net'] 55 | h3 = ['DrawDown Pct','MoneyDown', '', ''] 56 | self.total_closed=total_closed 57 | self.strike_rate=strike_rate 58 | self.max_drawdown=dd_analyzer.max.drawdown 59 | r1 = [total_open, total_closed,total_won,total_lost] 60 | r2 = [('%.2f%%' %(strike_rate)), win_streak, lose_streak, pnl_net] 61 | r3 = [('%.2f%%' %(dd_analyzer.max.drawdown)), dd_analyzer.max.moneydown, '', ''] 62 | #Check which set of headers is the longest. 63 | header_length = max(len(h1),len(h2),len(h3)) 64 | #Print the rows 65 | print_list = [h1,r1,h2,r2,h3,r3] 66 | row_format ="{:<15}" * (header_length + 1) 67 | print("Trade Analysis Results:") 68 | for row in print_list: 69 | print(row_format.format('',*row)) 70 | 71 | analyzer=self.thestrat.analyzers.sqn.get_analysis() 72 | sharpe_analyzer=self.thestrat.analyzers.sharpe.get_analysis() 73 | self.sqn = analyzer.sqn 74 | self.sharpe_ratio = sharpe_analyzer['sharperatio'] 75 | if self.sharpe_ratio is None: 76 | self.sharpe_ratio=0 77 | self.pnl = self.cerebro.broker.getvalue()-self.portfolioStartValue 78 | print('[SQN:%.2f, Sharpe Ratio:%.2f, Final Portfolio:%.2f, Total PnL:%.2f]' % (self.sqn,self.sharpe_ratio,self.cerebro.broker.getvalue(),self.pnl)) 79 | 80 | # plot 81 | fig=self.cerebro.plot() 82 | plt.savefig(os.path.join(StrategyTemplate.MODEL_PATH, 'chart.png')) 83 | 84 | def submit(self): 85 | try: 86 | if 'submitUrl' in self.config: 87 | name=self.config['user']+'@'+self.config['account'] 88 | algo=self.config['algo_name'] 89 | submitUrl=self.config['submitUrl'] 90 | 91 | URL = submitUrl 92 | ts=str(int(time.time())) 93 | PARAMS={'id': algo, 94 | 'name': name, 95 | 'trades': self.total_closed, 96 | 'strike_rate': self.strike_rate, 97 | 'max_drawdown': self.max_drawdown, 98 | 'pnl': self.pnl, 99 | 'sqn': self.sqn, 100 | 'sharpe_ratio': self.sharpe_ratio} 101 | print("submit:%s" % (json.dumps(PARAMS))) 102 | r = requests.get(url = URL, params = PARAMS, timeout=3) 103 | print("status=%s,res=%s" % (r.status_code,r.text)) 104 | if r.status_code == 200: 105 | print("performance submitted") 106 | else: 107 | print("error submitting performance:%s" % r.text) 108 | except Exception as e: 109 | print("error submitting performance:%s" % e) 110 | 111 | def run(self): 112 | thestrats = self.cerebro.run() 113 | self.thestrat = thestrats[0] 114 | self.performance() 115 | self.submit() 116 | 117 | class StrategyTemplate(bt.Strategy): 118 | 119 | PREFIX='/opt/ml/' 120 | TRAIN_FILE = os.path.join(PREFIX,'input/data/training/data.csv') 121 | CONFIG_FILE = os.path.join(PREFIX,'input/config/hyperparameters.json') 122 | MODEL_PATH = os.path.join(PREFIX,'model') 123 | 124 | def __init__(self): 125 | with open(StrategyTemplate.CONFIG_FILE, 'r') as f: 126 | self.config = json.load(f) 127 | print("[INIT]:config:%s=%s" % (StrategyTemplate.CONFIG_FILE,self.config)) 128 | 129 | self.lastDay=-1 130 | self.lastMonth=-1 131 | self.dataclose = self.datas[0].close 132 | 133 | @staticmethod 134 | def init_broker(broker): 135 | pass 136 | 137 | @staticmethod 138 | def add_data(cerebro): 139 | pass 140 | 141 | def notify_order(self, order): 142 | dt=self.datas[0].datetime.datetime(0) 143 | 144 | if order.status in [order.Completed]: 145 | if order.isbuy(): 146 | print( 147 | '[%s] BUY EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' % 148 | (dt,order.executed.price,order.executed.pnl,self.broker.getvalue())) 149 | else: # Sell 150 | print('[%s] SELL EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' % 151 | (dt,order.executed.price,order.executed.pnl,self.broker.getvalue())) 152 | 153 | def next(self): 154 | dt=self.datas[0].datetime.datetime(0) 155 | #print("[NEXT]:%s:close=%s" % (dt,self.dataclose[0])) 156 | 157 | #SOM 158 | if self.lastMonth!=dt.month: 159 | if self.lastMonth!=-1: 160 | chg=self.broker.getvalue()-self.monthCash 161 | print("[%s] SOM:chg=%.2f,cash=%.2f" % (dt,chg,self.broker.getvalue())) 162 | self.lastMonth=dt.month 163 | self.monthCash=self.broker.getvalue() 164 | 165 | #SOD 166 | if self.lastDay!=dt.day: 167 | self.lastDay=dt.day 168 | print("[%s] SOD:cash=%.2f" % (dt,self.broker.getvalue())) 169 | -------------------------------------------------------------------------------- /5_SageMakerStudio/docker/model/algo_base.py: -------------------------------------------------------------------------------- 1 | import backtrader as bt 2 | import backtrader.feeds as btfeeds 3 | import backtrader.analyzers as btanalyzers 4 | import backtrader.plot 5 | import os 6 | import pytz 7 | from pytz import timezone 8 | import requests 9 | import json 10 | import time 11 | from algo_sim_feed import AlgoSimData 12 | 13 | import matplotlib 14 | import matplotlib.pyplot as plt 15 | 16 | # More documentation about backtrader: https://www.backtrader.com/ 17 | 18 | class AlgoStrategy(): 19 | 20 | def __init__(self,config,strategy): 21 | self.config=config 22 | 23 | self.cerebro = bt.Cerebro() 24 | strategy.config=config 25 | strategy.init_broker(self.cerebro.broker) 26 | strategy.add_data(self.cerebro) 27 | self.cerebro.addstrategy(strategy) 28 | 29 | self.portfolioStartValue=self.cerebro.broker.getvalue() 30 | 31 | self.cerebro.addanalyzer(btanalyzers.DrawDown, _name='dd') 32 | self.cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe') 33 | self.cerebro.addanalyzer(btanalyzers.SQN, _name='sqn') 34 | self.cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta') 35 | 36 | def performance(self): 37 | analyzer=self.thestrat.analyzers.ta.get_analysis() 38 | dd_analyzer=self.thestrat.analyzers.dd.get_analysis() 39 | 40 | #Get the results we are interested in 41 | total_open = analyzer.total.open 42 | total_closed = analyzer.total.closed 43 | total_won = analyzer.won.total 44 | total_lost = analyzer.lost.total 45 | win_streak = analyzer.streak.won.longest 46 | lose_streak = analyzer.streak.lost.longest 47 | pnl_net = round(analyzer.pnl.net.total,2) 48 | strike_rate = (total_won / total_closed) * 100 49 | #Designate the rows 50 | h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost'] 51 | h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net'] 52 | h3 = ['DrawDown Pct','MoneyDown', '', ''] 53 | self.total_closed=total_closed 54 | self.strike_rate=strike_rate 55 | self.max_drawdown=dd_analyzer.max.drawdown 56 | r1 = [total_open, total_closed,total_won,total_lost] 57 | r2 = [('%.2f%%' %(strike_rate)), win_streak, lose_streak, pnl_net] 58 | r3 = [('%.2f%%' %(dd_analyzer.max.drawdown)), dd_analyzer.max.moneydown, '', ''] 59 | #Check which set of headers is the longest. 60 | header_length = max(len(h1),len(h2),len(h3)) 61 | #Print the rows 62 | print_list = [h1,r1,h2,r2,h3,r3] 63 | row_format ="{:<15}" * (header_length + 1) 64 | print("Trade Analysis Results:") 65 | for row in print_list: 66 | print(row_format.format('',*row)) 67 | 68 | analyzer=self.thestrat.analyzers.sqn.get_analysis() 69 | sharpe_analyzer=self.thestrat.analyzers.sharpe.get_analysis() 70 | self.sqn = analyzer.sqn 71 | self.sharpe_ratio = sharpe_analyzer['sharperatio'] 72 | if self.sharpe_ratio is None: 73 | self.sharpe_ratio=0 74 | self.pnl = self.cerebro.broker.getvalue()-self.portfolioStartValue 75 | print('[SQN:%.2f, Sharpe Ratio:%.2f, Final Portfolio:%.2f, Total PnL:%.2f]' % (self.sqn,self.sharpe_ratio,self.cerebro.broker.getvalue(),self.pnl)) 76 | 77 | # plot 78 | chart=False 79 | if 'chart' in self.config and self.config['chart']=='true': 80 | chart=True 81 | if chart: 82 | fig=self.cerebro.plot() 83 | plt.rcParams["figure.figsize"] = [16,9] 84 | plt.savefig(os.path.join(StrategyTemplate.MODEL_PATH, 'chart.png')) 85 | 86 | def submit(self): 87 | try: 88 | if 'submitUrl' in self.config: 89 | name=self.config['user']+'@'+self.config['account'] 90 | algo=self.config['algo_name'] 91 | submitUrl=self.config['submitUrl'] 92 | 93 | URL = submitUrl 94 | ts=str(int(time.time())) 95 | PARAMS={'id': algo, 96 | 'name': name, 97 | 'trades': self.total_closed, 98 | 'strike_rate': self.strike_rate, 99 | 'max_drawdown': self.max_drawdown, 100 | 'pnl': self.pnl, 101 | 'sqn': self.sqn, 102 | 'sharpe_ratio': self.sharpe_ratio} 103 | print("submit:%s" % (json.dumps(PARAMS))) 104 | r = requests.get(url = URL, params = PARAMS, timeout=3) 105 | print("status=%s,res=%s" % (r.status_code,r.text)) 106 | if r.status_code == 200: 107 | print("performance submitted") 108 | else: 109 | print("error submitting performance:%s" % r.text) 110 | except Exception as e: 111 | print("error submitting performance:%s" % e) 112 | 113 | def run(self): 114 | thestrats = self.cerebro.run() 115 | self.thestrat = thestrats[0] 116 | self.performance() 117 | self.submit() 118 | 119 | class StrategyTemplate(bt.Strategy): 120 | 121 | PREFIX='/opt/ml/' 122 | TRAIN_FILE = os.path.join(PREFIX,'input/data/training/data.csv') 123 | CONFIG_FILE = os.path.join(PREFIX,'input/config/hyperparameters.json') 124 | MODEL_PATH = os.path.join(PREFIX,'model') 125 | 126 | def __init__(self): 127 | with open(StrategyTemplate.CONFIG_FILE, 'r') as f: 128 | self.config = json.load(f) 129 | print("[INIT]:config:%s=%s" % (StrategyTemplate.CONFIG_FILE,self.config)) 130 | 131 | self.lastDay=-1 132 | self.lastMonth=-1 133 | self.dataclose = self.datas[0].close 134 | 135 | @staticmethod 136 | def init_broker(broker): 137 | pass 138 | 139 | @staticmethod 140 | def add_data(cerebro): 141 | pass 142 | 143 | def notify_order(self, order): 144 | dt=self.datas[0].datetime.datetime(0) 145 | 146 | if order.status in [order.Completed]: 147 | if order.isbuy(): 148 | print( 149 | '[%s] BUY EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' % 150 | (dt,order.executed.price,order.executed.pnl,self.broker.getvalue())) 151 | else: # Sell 152 | print('[%s] SELL EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' % 153 | (dt,order.executed.price,order.executed.pnl,self.broker.getvalue())) 154 | 155 | def next(self): 156 | dt=self.datas[0].datetime.datetime(0) 157 | #print("[NEXT]:%s:close=%s" % (dt,self.dataclose[0])) 158 | 159 | #SOM 160 | if self.lastMonth!=dt.month: 161 | if self.lastMonth!=-1: 162 | chg=self.broker.getvalue()-self.monthCash 163 | #print("[%s] SOM:chg=%.2f,cash=%.2f" % (dt,chg,self.broker.getvalue())) 164 | self.lastMonth=dt.month 165 | self.monthCash=self.broker.getvalue() 166 | 167 | #SOD 168 | if self.lastDay!=dt.day: 169 | self.lastDay=dt.day 170 | #print("[%s] SOD:cash=%.2f" % (dt,self.broker.getvalue())) 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Algorithmic Trading Workshop 2 | 3 | In this workshop, participants will learn how to load and store financial data on AWS from AWS Data Exchange and other external data sources and how to build and backtest algorithmic trading strategies with Amazon SageMaker that use technical indicators and advanced machine learning models. 4 | 5 | ![chart](assets/chart.png) 6 | 7 | _Time Commitment Expectation: This workshop was created to be completed in approximately 1h 30 min._ 8 | 9 | ## Regions 10 | 11 | This workshop has been tested in **us-east-1**. 12 | 13 | ## Considerations for Each Role 14 | As the team lead on this lean team of one, you'll need to wear multiple hats. Below are some things we'll cover from the perspective of each role: 15 | * Data Engineer - You'll modify scripts to load external market data to AWS. 16 | * Data Scientist - You'll need to load the data into your machine learning development environment. Once loaded, you'll understand the data, use a machine learning algorithm to train the model and do predictions. 17 | * Trader - You will use different trading strategies based on data to maximize Profit & Loss while attributing to Risk. 18 | 19 | ## Goals 20 | 21 | At minimum, at the end of this workshop, you will have an understanding how to load historical price data from external market data sources like AWS Data Exchange into S3. You get familiar how to store price data in S3 and expose it via Glue Data Catalog and Athena, how to backtested trading strategies using Amazon SageMaker, and how to train machine learning models that are used in trading strategies. You also get a basic understand how trading strategies using trend following and machine learning are developed with Python and can be optimized for better performance. 22 | 23 | ## Architecture 24 | 25 | ![chart](assets/arch.png) 26 | 27 | ## License 28 | 29 | This library is licensed under the MIT-0 License. See the LICENSE file. 30 | 31 |
32 | 33 | External Dependencies 34 | 35 | This package depends on and may retrieve a number of third-party software packages (such as open source packages) from third-party servers at install-time or build-time ("External Dependencies"). The External Dependencies are subject to license terms that you must accept in order to use this package. If you do not accept all of the applicable license terms, you should not use this package. We recommend that you consult your company’s open source approval policy before proceeding. 36 | 37 | Provided below is a list of the External Dependencies and the applicable license terms as indicated by the documentation associated with the External Dependencies as of Amazon's most recent review of such documentation. 38 | THIS INFORMATION IS PROVIDED FOR CONVENIENCE ONLY. AMAZON DOES NOT PROMISE THAT THE LIST OR THE APPLICABLE TERMS AND CONDITIONS ARE COMPLETE, ACCURATE, OR UP-TO-DATE, AND AMAZON WILL HAVE NO LIABILITY FOR ANY INACCURACIES. YOU SHOULD CONSULT THE DOWNLOAD SITES FOR THE EXTERNAL DEPENDENCIES FOR THE MOST COMPLETE AND UP-TO-DATE LICENSING INFORMATION. 39 | YOUR USE OF THE EXTERNAL DEPENDENCIES IS AT YOUR SOLE RISK. IN NO EVENT WILL AMAZON BE LIABLE FOR ANY DAMAGES, INCLUDING WITHOUT LIMITATION ANY DIRECT, INDIRECT, CONSEQUENTIAL, SPECIAL, INCIDENTAL, OR PUNITIVE DAMAGES (INCLUDING FOR ANY LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, OR COMPUTER FAILURE OR MALFUNCTION) ARISING FROM OR RELATING TO THE EXTERNAL DEPENDENCIES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, EVEN IF AMAZON HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS AND DISCLAIMERS APPLY EXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW. 40 | 41 | ** backtrader; version 1.9.74.123 -- https://www.backtrader.com/ 42 |
43 | 44 | ## References 45 | 46 | - Reference Architecture for Algorithmic Trading (Dec 2020): https://d1.awsstatic.com/architecture-diagrams/ArchitectureDiagrams/algorithmic-trading-ra.pdf 47 | - Blog Post (Feb 2021): Algorithmic Trading with SageMaker and AWS Data Exchange: https://aws.amazon.com/blogs/industries/algorithmic-trading-on-aws-with-amazon-sagemaker-and-aws-data-exchange/ 48 | - Blog Post (June 2021): How to run what-if scenarios for trading strategies with Amazon FinSpace: https://aws.amazon.com/blogs/industries/how-to-run-what-if-scenarios-for-trading-strategies-with-amazon-finspace/ 49 | - Blog Post (July 2021): Algorithmic Trading with SageMaker: https://aws.amazon.com/blogs/machine-learning/building-algorithmic-trading-strategies-with-amazon-sagemaker/ 50 | - Blog Post (Jan 2022): Backtest trading strategies with Amazon Kinesis Data Streams long-term retention and Amazon SageMaker: https://aws.amazon.com/blogs/big-data/backtest-trading-strategies-with-amazon-kinesis-data-streams-long-term-retention-and-amazon-sagemaker/ 51 | - YouTube (Dec 2020): Automated Analysis of Financial Data and Algorithmic Trading: https://www.youtube.com/watch?v=i0izMuiU12I 52 | 53 | --- 54 | 55 | ## Instructions using SageMaker Studio 56 | 57 | A newer version of this workshop has been developed for SageMaker Studio and is available in folder **5_SageMakerStudio**. 58 | 59 | 1. Setup SageMaker Studio with sufficient permissions. 60 | 1. Run Jupyter Notebook: **1_Setup.ipynb**: This will setup S3 bucket, Glue Data Catalog Schema, Athena Workgroup. For SageMaker Studio, a custom kernel is built and attached. 61 | 1. Run Jupyter Notebook: **2_Load_Data.ipynb**: This will load daily stock price data into the S3 bucket. 62 | 1. Run Jupyter Notebook: **3_Backtest_Strategy.ipynb**: Backtest strategy in SageMaker Studio and find optimal parameters. 63 | 64 | --- 65 | 66 | ## Instructions using SageMaker Notebooks 67 | 68 | ## Step 0: Set up environment 69 | 70 | 1. Create a new unique S3 bucket that starts with "**algotrading-**" (e.g. "**algotrading-YYYY-MM-DD-XYZ**") that we use for storing external price data. 71 | 1. For the base infrastructure components (SageMaker Notebook, Athena, Glue Tables), deploy the following [CloudFormation template](https://github.com/aws-samples/algorithmic-trading/raw/master/0_Setup/algo-reference.yaml). Go to [CloudFormation](https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=algotrading) and upload the downloaded CF template. For the S3 bucket specify the previously created bucket name. Verify that stackName is **algotrading** before creating the stack and acknowledge that IAM changes will be made. 72 | 73 | This step will take ca. 5 minutes. 74 | 75 | ## Step 1: Load Historical Price Data 76 | 77 | Here are a few data source options for this workshop. The daily datasets can be downloaded and generated in a few minutes, for the intraday dataset, please plan for at least 15 mins. 78 | 1. Sample Daily EOD Stock Price Data (from public data source or AWS Data Exchange) 79 | 80 | ### Option 1a: Sample Daily EOD Stock Price Data (from public data source) 81 | 82 | If you are not able to use AWS Data Exchange in your account, you can run instead the following Jupyter notebook that generates some sample EOD price data from a public data souce. Run all the cells in **1_Data/Load_Hist_Data_Daily_Public.ipynb**. 83 | 84 | ### Option 1b: Sample Daily EOD Stock Price Data (via AWS Data Exchange) 85 | 86 | If you want to use AWS Data Exchange, you can download the following [dataset](https://aws.amazon.com/marketplace/pp/prodview-e2aizdzkos266#overview) for example. There are multiple options available, and we picked this for demonstration purposes. 87 | 88 | To download this dataset, complete a subscription request first where you provide the required information for Company Name, Name, Email Address, and Intended Use Case. Once the provider confirms the subscription, you can navigate to [AWS Data Exchange/My subscriptions/Entitled data](https://console.aws.amazon.com/dataexchange/home?#/entitled-data). 89 | Then choose the latest revision for this subscription, select all assets, and click on **Export to Amazon S3**. In the new window select the root folder of the S3 bucket that starts with "*algotrading-data-*". Then click on **Export** and wait until your export job is completed. 90 | 91 | In order to use this dataset for algorithmic trading, we want to standardize it to a CSV file with the following columns: **dt, sym, open, high, low, close, vol**. 92 | Once you have successfully exported the dataset, please run the the following Jupyter notebook to format the dataset and store it in the ***hist_data_daily*** folder of your S3 bucket. Go to [Amazon SageMaker/Notebook/Notebook instances](https://console.aws.amazon.com/sagemaker/home?#/notebook-instances), then click on **Open Jupyter** for the provisioned notebook instance. Run all the cells in **1_Data/Load_Hist_Data_Daily.ipynb**. 93 | 94 | ## Step 2: Backtest a trend following strategy (or move directly to Step 3) 95 | 96 | In this module, we backtest a trend following strategy on daily price data with Amazon SageMaker. For these notebooks, please ensure that you have daily price data loaded. 97 | 98 | You can choose between the following trading strategies: 99 | 1. **Simple Moving Average Strategy**: **2_Strategies/Strategy SMA.ipynb** 100 | 101 | 1. **Daily Breakout Strategy**: **2_Strategies/Strategy_Breakout.ipynb** 102 | 103 | Select the Jupyter Notebook for backtesting the strategy in the folder **2_Strategies** for your selected strategy and run it from your Amazon SageMaker Notebook instance. In the instructions, there is guidance on how to optimize the strategy. 104 | 105 | ## Step 3: Backtest a machine-learning based strategy 106 | 107 | In this module, we backtest a machine-learning strategy with Amazon SageMaker on daily or intraday price data. Please ensure that you have daily or intraday price data loaded before running the corresponding notebooks. 108 | 109 | Usually you will have two parts, one for training the machine learning model, and one for backtesting the strategy. You can run both notebooks or skip the training of the model as a trained model is already available in the repository: 110 | 111 | **ML Long/Short Prediction Strategy** 112 | * Model Training (Daily Price Data) (Optional): **3_Models/Train_Model_Forecast.ipynb** 113 | * Strategy Backtesting (Daily Price Data): **2_Strategies/Strategy_Forecast.ipynb** 114 | 115 | --- 116 | 117 | ## Instructions using Amazon FinSpace 118 | 119 | 1. Setup Amazon FinSpace 120 | 1. Run the following notebook: **2_Strategies/Strategy_WhatIfScenarios.ipynb** in Amazon FinSpace 121 | 122 | ### Congratulations! You have completed the workshop. Don't forget to cleanup the resources if you use your own account in this workshop. -------------------------------------------------------------------------------- /2_Strategies/Strategy_SMA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%run init_model.py 'algo_simple_sma'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Step 1) Data Preparation" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# get S3 bucket\n", 26 | "s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')\n", 27 | "s3bucket=s3bucket[0]\n", 28 | "s3bucket" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import sys\n", 38 | "!{sys.executable} -m pip install PyAthena" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import os\n", 48 | "import sagemaker as sage\n", 49 | "from sagemaker import get_execution_role\n", 50 | "import datetime\n", 51 | "from sagemaker.tensorflow import TensorFlow\n", 52 | "import json\n", 53 | "\n", 54 | "role = get_execution_role()\n", 55 | "sess = sage.Session()\n", 56 | "region = sess.boto_session.region_name" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "import pandas as pd\n", 66 | "from pyathena import connect\n", 67 | "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n", 68 | " region_name=region)\n", 69 | "\n", 70 | "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n", 71 | "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n", 72 | "del df['dt']\n", 73 | "df.head()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "trainCount=int(len(df)*0.4)\n", 83 | "dfTrain = df.iloc[:trainCount]\n", 84 | "\n", 85 | "dfTest = df.iloc[trainCount:]\n", 86 | "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n", 87 | "dfTest.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "scrolled": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "%matplotlib notebook\n", 99 | "dfTest[\"close\"].plot()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# Step 2) Modify Strategy Configuration \n", 107 | "\n", 108 | "In the following cell, you can adjust the parameters for the strategy.\n", 109 | "\n", 110 | "* `user` = Name for Leaderboard (optional)\n", 111 | "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 50)\n", 112 | "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 200)\n", 113 | "* `size` = The number of shares for a transaction\n", 114 | "\n", 115 | "`Tip`: A good starting point for improving the strategy is to reduce the number of trades that get triggered by increasing the slow and fast period. Longer periods tend to perform better. " 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "%%writefile local/{algo_name}/input/config/hyperparameters.json\n", 125 | "{ \"user\" : \"user\",\n", 126 | " \"fast_period\" : \"50\",\n", 127 | " \"slow_period\" : \"200\",\n", 128 | " \"size\" : \"100\"\n", 129 | "}" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "%run update_config.py $algo_name" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "# Step 3) Modify Strategy Code\n", 146 | "\n", 147 | "In the following cell, you can modify the strategy code. For the first backtests, you can leave it as is.\n", 148 | "\n", 149 | "`Tip`: A good starting point for improving the strategy is to try different indicators like ExponentialMovingAverage or delay when trades are triggered and check crossover again before placing a trade.\n", 150 | "\n", 151 | "Here are some helpful links:\n", 152 | "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n", 153 | "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n", 154 | "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "%%writefile model/{algo_name}.py\n", 164 | "import backtrader as bt\n", 165 | "from algo_base import *\n", 166 | "\n", 167 | "class MyStrategy(StrategyTemplate):\n", 168 | "\n", 169 | " def __init__(self): # Initiation\n", 170 | " super(MyStrategy, self).__init__()\n", 171 | " self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n", 172 | " self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n", 173 | " self.config[\"size\"]=int(self.config[\"size\"])\n", 174 | "\n", 175 | " self.smaFast = bt.ind.SimpleMovingAverage(period=self.config[\"fast_period\"])\n", 176 | " self.smaSlow = bt.ind.SimpleMovingAverage(period=self.config[\"slow_period\"])\n", 177 | " self.size = self.config[\"size\"]\n", 178 | "\n", 179 | " def init_broker(broker):\n", 180 | " broker.setcash(100000.0)\n", 181 | " broker.setcommission(commission=0.0) \n", 182 | " \n", 183 | " def add_data(cerebro):\n", 184 | " data = btfeeds.GenericCSVData(\n", 185 | " dataname=MyStrategy.TRAIN_FILE,\n", 186 | " dtformat=('%Y-%m-%d'),\n", 187 | " timeframe=bt.TimeFrame.Days,\n", 188 | " datetime=0,\n", 189 | " time=-1,\n", 190 | " high=2,\n", 191 | " low=3,\n", 192 | " open=1,\n", 193 | " close=4,\n", 194 | " volume=5,\n", 195 | " openinterest=-1\n", 196 | " )\n", 197 | " cerebro.adddata(data)\n", 198 | "\n", 199 | " def next(self): # Processing\n", 200 | " super(MyStrategy, self).next()\n", 201 | " dt=self.datas[0].datetime.datetime(0)\n", 202 | " if not self.position:\n", 203 | " if self.smaFast[0] > self.smaSlow[0]:\n", 204 | " self.buy(size=self.size) # Go long\n", 205 | " else:\n", 206 | " self.sell(size=self.size) # Go short\n", 207 | " elif self.position.size>0 and self.smaFast[0] < self.smaSlow[0]:\n", 208 | " self.sell(size=2*self.size) # Go short\n", 209 | " elif self.position.size<0 and self.smaFast[0] > self.smaSlow[0]: \n", 210 | " self.buy(size=2*self.size) # Go long" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "# Step 4) Backtest Locally\n", 218 | "\n", 219 | "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "#Build Local Algo Image\n", 229 | "!docker build -t $algo_name .\n", 230 | "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "from IPython.display import Image\n", 240 | "Image(filename='local/'+algo_name+'/model/chart.png')" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step." 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "# Step 5) Backtest on SageMaker and submit performance" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "#Deploy Algo Image to ECS\n", 264 | "!./build_and_push.sh $algo_name" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "#Run Remote test via SageMaker\n", 274 | "import sagemaker as sage\n", 275 | "from sagemaker import get_execution_role\n", 276 | "from sagemaker.estimator import Estimator \n", 277 | "\n", 278 | "role = get_execution_role()\n", 279 | "sess = sage.Session()\n", 280 | "\n", 281 | "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n", 282 | "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n", 283 | "print(data_location)\n", 284 | "\n", 285 | "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n", 286 | "with open(conf_file, 'r') as f:\n", 287 | " config = json.load(f)\n", 288 | "#config['sim_data']='True'\n", 289 | "print(config)\n", 290 | "\n", 291 | "prefix=algo_name\n", 292 | "job_name=prefix.replace('_','-')\n", 293 | "\n", 294 | "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", 295 | "region = sess.boto_session.region_name\n", 296 | "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n", 297 | "\n", 298 | "algo = sage.estimator.Estimator(\n", 299 | " image_uri=image,\n", 300 | " role=role,\n", 301 | " instance_count=1,\n", 302 | " instance_type='ml.m4.xlarge',\n", 303 | " output_path=\"s3://{}/output\".format(sess.default_bucket()),\n", 304 | " sagemaker_session=sess,\n", 305 | " base_job_name=job_name,\n", 306 | " hyperparameters=config,\n", 307 | " metric_definitions=[\n", 308 | " {\n", 309 | " \"Name\": \"algo:pnl\",\n", 310 | " \"Regex\": \"Total PnL:(.*?)]\"\n", 311 | " },\n", 312 | " {\n", 313 | " \"Name\": \"algo:sharpe_ratio\",\n", 314 | " \"Regex\": \"Sharpe Ratio:(.*?),\"\n", 315 | " }\n", 316 | " ])\n", 317 | "algo.fit(data_location)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "from sagemaker.analytics import TrainingJobAnalytics\n", 327 | "\n", 328 | "latest_job_name = algo.latest_training_job.job_name\n", 329 | "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n", 330 | "metrics_dataframe" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "scrolled": true 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "#Get Algo Chart from S3\n", 342 | "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n", 343 | "import boto3\n", 344 | "s3 = boto3.resource('s3')\n", 345 | "my_bucket = s3.Bucket(sess.default_bucket())\n", 346 | "my_bucket.download_file(model_name,'model.tar.gz')\n", 347 | "!tar -xzf model.tar.gz\n", 348 | "!rm model.tar.gz\n", 349 | "from IPython.display import Image\n", 350 | "Image(filename='chart.png') " 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "### Congratulations! You've completed this strategy." 358 | ] 359 | } 360 | ], 361 | "metadata": { 362 | "kernelspec": { 363 | "display_name": "conda_tensorflow_p36", 364 | "language": "python", 365 | "name": "conda_tensorflow_p36" 366 | }, 367 | "language_info": { 368 | "codemirror_mode": { 369 | "name": "ipython", 370 | "version": 3 371 | }, 372 | "file_extension": ".py", 373 | "mimetype": "text/x-python", 374 | "name": "python", 375 | "nbconvert_exporter": "python", 376 | "pygments_lexer": "ipython3", 377 | "version": "3.6.10" 378 | } 379 | }, 380 | "nbformat": 4, 381 | "nbformat_minor": 2 382 | } 383 | -------------------------------------------------------------------------------- /2_Strategies/Strategy_Breakout.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%run init_model.py 'algo_daily_breakout'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Step 1) Data Preparation" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# get S3 bucket\n", 26 | "s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')\n", 27 | "s3bucket=s3bucket[0]\n", 28 | "s3bucket" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import sys\n", 38 | "!{sys.executable} -m pip install PyAthena" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import os\n", 48 | "import sagemaker as sage\n", 49 | "from sagemaker import get_execution_role\n", 50 | "import datetime\n", 51 | "from sagemaker.tensorflow import TensorFlow\n", 52 | "import json\n", 53 | "\n", 54 | "role = get_execution_role()\n", 55 | "sess = sage.Session()\n", 56 | "region = sess.boto_session.region_name" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "import pandas as pd\n", 66 | "from pyathena import connect\n", 67 | "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n", 68 | " region_name=region)\n", 69 | "\n", 70 | "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n", 71 | "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n", 72 | "del df['dt']\n", 73 | "df.head()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "trainCount=int(len(df)*0.4)\n", 83 | "dfTrain = df.iloc[:trainCount]\n", 84 | "\n", 85 | "dfTest = df.iloc[trainCount:]\n", 86 | "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n", 87 | "dfTest.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "%matplotlib notebook\n", 97 | "dfTest[\"close\"].plot()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "# Step 2) Modify Strategy Configuration \n", 105 | "\n", 106 | "In the following cell, you can adjust the parameters for the strategy.\n", 107 | "\n", 108 | "* `user` = Name for Leaderboard (optional)\n", 109 | "* `go_long` = Go Long for Breakout (true or false)\n", 110 | "* `go_short` = Go Short for Breakout (true or false)\n", 111 | "* `period` = Length of window for previous high and low\n", 112 | "* `size` = The number of shares for a transaction\n", 113 | "\n", 114 | "`Tip`: A good starting point for improving the strategy is to lengthen the period of the previous high and low. Equity Markets tend to have a long bias and if you only consider long trades this might improve the performance." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "%%writefile local/{algo_name}/input/config/hyperparameters.json\n", 124 | "{ \"user\" : \"user\",\n", 125 | " \"go_long\" : \"true\",\n", 126 | " \"go_short\" : \"true\",\n", 127 | " \"period\" : \"50\",\n", 128 | " \"size\" : \"100\"\n", 129 | "}" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "%run update_config.py $algo_name" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "# Step 3) Modify Strategy Code\n", 146 | "\n", 147 | "`Tip`: A good starting point for improving the strategy is to add additional indicators like ATR (Average True Range) before placing a trade. You want to avoid false signals if there is not enough volatility.\n", 148 | "\n", 149 | "Here are some helpful links:\n", 150 | "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n", 151 | "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n", 152 | "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "%%writefile model/{algo_name}.py\n", 162 | "import backtrader as bt\n", 163 | "from algo_base import *\n", 164 | "import pytz\n", 165 | "from pytz import timezone\n", 166 | "\n", 167 | "class MyStrategy(StrategyTemplate):\n", 168 | "\n", 169 | " def __init__(self): # Initiation\n", 170 | " super(MyStrategy, self).__init__()\n", 171 | " self.config[\"period\"]=int(self.config[\"period\"])\n", 172 | " self.config[\"size\"]=int(self.config[\"size\"])\n", 173 | " self.config[\"go_long\"]=(str(self.config[\"go_long\"]).lower()==\"true\")\n", 174 | " self.config[\"go_short\"]=(str(self.config[\"go_short\"]).lower()==\"true\")\n", 175 | "\n", 176 | " self.highest = bt.ind.Highest(period=self.config[\"period\"])\n", 177 | " self.lowest = bt.ind.Lowest(period=self.config[\"period\"])\n", 178 | " self.size = self.config[\"size\"]\n", 179 | " \n", 180 | " def init_broker(broker):\n", 181 | " broker.setcash(100000.0)\n", 182 | " broker.setcommission(commission=0.0) \n", 183 | " \n", 184 | " def add_data(cerebro):\n", 185 | " data = btfeeds.GenericCSVData(\n", 186 | " dataname=MyStrategy.TRAIN_FILE,\n", 187 | " dtformat=('%Y-%m-%d'),\n", 188 | " timeframe=bt.TimeFrame.Days,\n", 189 | " datetime=0,\n", 190 | " time=-1,\n", 191 | " high=2,\n", 192 | " low=3,\n", 193 | " open=1,\n", 194 | " close=4,\n", 195 | " volume=5,\n", 196 | " openinterest=-1\n", 197 | " )\n", 198 | " cerebro.adddata(data)\n", 199 | " \n", 200 | " def next(self): # Processing\n", 201 | " super(MyStrategy, self).next()\n", 202 | " dt=self.datas[0].datetime.datetime(0)\n", 203 | " if not self.position:\n", 204 | " if self.config[\"go_long\"] and self.datas[0] > self.highest[-1]:\n", 205 | " self.buy(size=self.size) # Go long\n", 206 | " elif self.config[\"go_short\"] and self.datas[0] < self.lowest[-1]:\n", 207 | " self.sell(size=self.size) # Go short\n", 208 | " elif self.position.size>0 and self.datas[0] < self.highest[-1]:\n", 209 | " self.close()\n", 210 | " elif self.position.size<0 and self.datas[0] > self.lowest[-1]: \n", 211 | " self.close()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "# Step 4) Backtest Locally\n", 219 | "\n", 220 | "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "#Build Local Algo Image\n", 230 | "!docker build -t $algo_name .\n", 231 | "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "scrolled": false 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "from IPython.display import Image\n", 243 | "Image(filename='local/'+algo_name+'/model/chart.png')" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step." 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "# Step 5) Backtest on SageMaker and submit performance" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "#Deploy Algo Image to ECS\n", 267 | "!./build_and_push.sh $algo_name" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "#Run Remote Forwardtest via SageMaker\n", 277 | "import sagemaker as sage\n", 278 | "from sagemaker import get_execution_role\n", 279 | "from sagemaker.estimator import Estimator \n", 280 | "\n", 281 | "role = get_execution_role()\n", 282 | "sess = sage.Session()\n", 283 | "\n", 284 | "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n", 285 | "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n", 286 | "print(data_location)\n", 287 | "\n", 288 | "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n", 289 | "with open(conf_file, 'r') as f:\n", 290 | " config = json.load(f)\n", 291 | "#config['sim_data']='True'\n", 292 | "print(config)\n", 293 | "\n", 294 | "prefix=algo_name\n", 295 | "job_name=prefix.replace('_','-')\n", 296 | "\n", 297 | "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", 298 | "region = sess.boto_session.region_name\n", 299 | "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n", 300 | "\n", 301 | "algo = sage.estimator.Estimator(\n", 302 | " image_uri=image,\n", 303 | " role=role,\n", 304 | " instance_count=1,\n", 305 | " instance_type='ml.m4.xlarge',\n", 306 | " output_path=\"s3://{}/output\".format(sess.default_bucket()),\n", 307 | " sagemaker_session=sess,\n", 308 | " base_job_name=job_name,\n", 309 | " hyperparameters=config,\n", 310 | " metric_definitions=[\n", 311 | " {\n", 312 | " \"Name\": \"algo:pnl\",\n", 313 | " \"Regex\": \"Total PnL:(.*?)]\"\n", 314 | " },\n", 315 | " {\n", 316 | " \"Name\": \"algo:sharpe_ratio\",\n", 317 | " \"Regex\": \"Sharpe Ratio:(.*?),\"\n", 318 | " }\n", 319 | " ])\n", 320 | "algo.fit(data_location)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "#Get Algo Metrics\n", 330 | "from sagemaker.analytics import TrainingJobAnalytics\n", 331 | "\n", 332 | "latest_job_name = algo.latest_training_job.job_name\n", 333 | "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n", 334 | "metrics_dataframe" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": { 341 | "scrolled": true 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "#Get Algo Chart from S3\n", 346 | "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n", 347 | "import boto3\n", 348 | "s3 = boto3.resource('s3')\n", 349 | "my_bucket = s3.Bucket(sess.default_bucket())\n", 350 | "my_bucket.download_file(model_name,'model.tar.gz')\n", 351 | "!tar -xzf model.tar.gz\n", 352 | "!rm model.tar.gz\n", 353 | "from IPython.display import Image\n", 354 | "Image(filename='chart.png') " 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "### Congratulations! You've completed this strategy." 362 | ] 363 | } 364 | ], 365 | "metadata": { 366 | "kernelspec": { 367 | "display_name": "conda_tensorflow_p36", 368 | "language": "python", 369 | "name": "conda_tensorflow_p36" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.6.10" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 2 386 | } 387 | -------------------------------------------------------------------------------- /4_Kinesis/algo-kinesis.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: 2010-09-09 2 | Description: >- 3 | This CloudFormation sample template migrates Market data from S3 to Kinesis using 4 | DMS. 5 | This Template requires an existing source s3 bucket with full load of market data. 6 | Parameters: 7 | VpcCIDR: 8 | Description: Please enter the IP range (CIDR notation) for this VPC 9 | Type: String 10 | Default: 10.111.0.0/16 11 | 12 | PublicSubnet1CIDR: 13 | Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone 14 | Type: String 15 | Default: 10.111.10.0/24 16 | 17 | PublicSubnet2CIDR: 18 | Description: Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone 19 | Type: String 20 | Default: 10.111.11.0/24 21 | 22 | MarketDataS3Bucket: 23 | Type: String 24 | Description: S3 Bucket where the market data will reside. 25 | 26 | Metadata: 27 | 'AWS::CloudFormation::Interface': 28 | ParameterGroups: 29 | - Label: 30 | default: Configuration 31 | Parameters: 32 | - VpcCIDR 33 | - PublicSubnet1CIDR 34 | - PublicSubnet2CIDR 35 | - MarketDataS3Bucket 36 | ParameterLabels: 37 | VpcCIDR: 38 | default: Provide VPC CIDR Range 39 | PublicSubnet1CIDR: 40 | default: Provide Public Subnet 1 CIDR Range 41 | PublicSubnet2CIDR: 42 | default: Provide Public Subnet 2 CIDR Range 43 | MarketDataS3Bucket: 44 | default: Name of the S3 Bucket where market Data resides 45 | 46 | Resources: 47 | VPC: 48 | Type: AWS::EC2::VPC 49 | Properties: 50 | CidrBlock: !Ref VpcCIDR 51 | EnableDnsHostnames: true 52 | Tags: 53 | - Key: Name 54 | Value: Kinesis Algo Trading 55 | 56 | InternetGateway: 57 | Type: AWS::EC2::InternetGateway 58 | Properties: 59 | Tags: 60 | - Key: Name 61 | Value: Kinesis Algo Trading 62 | 63 | InternetGatewayAttachment: 64 | Type: AWS::EC2::VPCGatewayAttachment 65 | Properties: 66 | InternetGatewayId: !Ref InternetGateway 67 | VpcId: !Ref VPC 68 | 69 | PublicSubnet1: 70 | Type: AWS::EC2::Subnet 71 | Properties: 72 | VpcId: !Ref VPC 73 | AvailabilityZone: !Select [0, !GetAZs ""] 74 | CidrBlock: !Ref PublicSubnet1CIDR 75 | MapPublicIpOnLaunch: true 76 | Tags: 77 | - Key: Name 78 | Value: Kinesis Algo Trading Public Subnet (AZ1) 79 | 80 | PublicSubnet2: 81 | Type: AWS::EC2::Subnet 82 | Properties: 83 | VpcId: !Ref VPC 84 | AvailabilityZone: !Select [1, !GetAZs ""] 85 | CidrBlock: !Ref PublicSubnet2CIDR 86 | MapPublicIpOnLaunch: true 87 | Tags: 88 | - Key: Name 89 | Value: Kinesis Algo Trading Public Subnet (AZ2) 90 | 91 | PublicRouteTable: 92 | Type: AWS::EC2::RouteTable 93 | Properties: 94 | VpcId: !Ref VPC 95 | Tags: 96 | - Key: Name 97 | Value: Kinesis Algo Trading Public Routes 98 | 99 | DefaultPublicRoute: 100 | Type: AWS::EC2::Route 101 | DependsOn: InternetGatewayAttachment 102 | Properties: 103 | RouteTableId: !Ref PublicRouteTable 104 | DestinationCidrBlock: 0.0.0.0/0 105 | GatewayId: !Ref InternetGateway 106 | 107 | PublicSubnet1RouteTableAssociation: 108 | Type: AWS::EC2::SubnetRouteTableAssociation 109 | Properties: 110 | RouteTableId: !Ref PublicRouteTable 111 | SubnetId: !Ref PublicSubnet1 112 | 113 | PublicSubnet2RouteTableAssociation: 114 | Type: AWS::EC2::SubnetRouteTableAssociation 115 | Properties: 116 | RouteTableId: !Ref PublicRouteTable 117 | SubnetId: !Ref PublicSubnet2 118 | 119 | KinesisDMSCloudwatchRole: 120 | Type: 'AWS::IAM::Role' 121 | Properties: 122 | RoleName: kinesis-dms-cloudwatch-logs-role 123 | AssumeRolePolicyDocument: 124 | Version: 2012-10-17 125 | Statement: 126 | - Effect: Allow 127 | Principal: 128 | Service: 129 | - dms.amazonaws.com 130 | Action: 131 | - 'sts:AssumeRole' 132 | ManagedPolicyArns: 133 | - 'arn:aws:iam::aws:policy/service-role/AmazonDMSCloudWatchLogsRole' 134 | Path: / 135 | 136 | KinesisDMSVpcRole: 137 | Type: 'AWS::IAM::Role' 138 | Properties: 139 | RoleName: kinesis-dms-vpc-role 140 | AssumeRolePolicyDocument: 141 | Version: 2012-10-17 142 | Statement: 143 | - Effect: Allow 144 | Principal: 145 | Service: 146 | - dms.amazonaws.com 147 | Action: 148 | - 'sts:AssumeRole' 149 | ManagedPolicyArns: 150 | - 'arn:aws:iam::aws:policy/service-role/AmazonDMSVPCManagementRole' 151 | Path: / 152 | 153 | DMSReplicationSubnetGroup: 154 | Type: 'AWS::DMS::ReplicationSubnetGroup' 155 | Properties: 156 | ReplicationSubnetGroupDescription: Subnets available for DMS 157 | SubnetIds: 158 | - !Ref PublicSubnet1 159 | - !Ref PublicSubnet2 160 | DependsOn: 161 | - KinesisDMSVpcRole 162 | - KinesisDMSCloudwatchRole 163 | 164 | KinesisStream: 165 | Type: 'AWS::Kinesis::Stream' 166 | Properties: 167 | Name: 'kinesis-algo-blog' 168 | RetentionPeriodHours: 8760 169 | ShardCount: 1 170 | StreamEncryption: 171 | EncryptionType: KMS 172 | KeyId: alias/aws/kinesis 173 | 174 | S3SourceDMSRole: 175 | Type: 'AWS::IAM::Role' 176 | Properties: 177 | AssumeRolePolicyDocument: 178 | Version: 2012-10-17 179 | Statement: 180 | - Effect: Allow 181 | Principal: 182 | Service: 183 | - dms.amazonaws.com 184 | Action: 185 | - 'sts:AssumeRole' 186 | Path: / 187 | Policies: 188 | - PolicyName: S3AccessForDMSPolicy 189 | PolicyDocument: 190 | Version: 2012-10-17 191 | Statement: 192 | - Effect: Allow 193 | Action: 194 | - 's3:GetObject' 195 | Resource: 196 | - !Join 197 | - '' 198 | - - 'arn:' 199 | - !Ref AWS::Partition 200 | - ':s3:::' 201 | - !Ref MarketDataS3Bucket 202 | - !Join 203 | - '' 204 | - - 'arn:' 205 | - !Ref AWS::Partition 206 | - ':s3:::' 207 | - !Ref MarketDataS3Bucket 208 | - '/*' 209 | - Effect: Allow 210 | Action: 's3:ListBucket' 211 | Resource: 212 | - !Join 213 | - '' 214 | - - 'arn:' 215 | - !Ref AWS::Partition 216 | - ':s3:::' 217 | - !Ref MarketDataS3Bucket 218 | DMSSecurityGroup: 219 | Type: 'AWS::EC2::SecurityGroup' 220 | Properties: 221 | GroupDescription: Security group for DMS Instance 222 | VpcId: !Ref VPC 223 | TargetKinesisRole: 224 | Type: 'AWS::IAM::Role' 225 | Properties: 226 | AssumeRolePolicyDocument: 227 | Version: 2012-10-17 228 | Statement: 229 | - Effect: Allow 230 | Principal: 231 | Service: 232 | - dms.amazonaws.com 233 | Action: 234 | - 'sts:AssumeRole' 235 | Path: / 236 | Policies: 237 | - PolicyName: KinesisAccessForDMSPolicy 238 | PolicyDocument: 239 | Version: 2012-10-17 240 | Statement: 241 | - Effect: Allow 242 | Action: 243 | - 'kinesis:DescribeStream' 244 | - 'kinesis:PutRecord' 245 | - 'kinesis:PutRecords' 246 | Resource: 247 | - !GetAtt 248 | - KinesisStream 249 | - Arn 250 | DMSReplicationInstance: 251 | Type: 'AWS::DMS::ReplicationInstance' 252 | Properties: 253 | PubliclyAccessible: true 254 | ReplicationInstanceClass: dms.r4.xlarge 255 | ReplicationSubnetGroupIdentifier: !Ref DMSReplicationSubnetGroup 256 | VpcSecurityGroupIds: 257 | - !Ref DMSSecurityGroup 258 | 259 | S3SourceEndpoint: 260 | Type: 'AWS::DMS::Endpoint' 261 | Properties: 262 | EndpointType: source 263 | EngineName: "s3" 264 | S3Settings: 265 | BucketName: !Ref MarketDataS3Bucket 266 | ExternalTableDefinition: >- 267 | {"TableCount": "1", "Tables": [{"TableName":"intc","TablePath":"marketData/intc/","TableOwner":"marketData","TableColumns":[{"ColumnName": "dt","ColumnType":"TIMESTAMP","ColumnNullable": "false","ColumnIsPk":"true"},{"ColumnName": "sym","ColumnType": "STRING","ColumnLength": "10"},{"ColumnName": "open","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"},{"ColumnName": "high","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"},{"ColumnName": "low","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"}, {"ColumnName": "close","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"},{"ColumnName": "vol","ColumnType": "NUMERIC","ColumnPrecision": "12","ColumnScale": "2"}],"TableColumnsTotal": "7"}]} 268 | ServiceAccessRoleArn: !GetAtt 269 | - S3SourceDMSRole 270 | - Arn 271 | DependsOn: 272 | - DMSReplicationInstance 273 | 274 | KinesisTargetEndpoint: 275 | Type: 'AWS::DMS::Endpoint' 276 | Properties: 277 | EndpointType: target 278 | EngineName: "kinesis" 279 | KinesisSettings: 280 | MessageFormat: json 281 | StreamArn: !GetAtt 282 | - KinesisStream 283 | - Arn 284 | ServiceAccessRoleArn: !GetAtt 285 | - TargetKinesisRole 286 | - Arn 287 | DependsOn: 288 | - DMSReplicationInstance 289 | 290 | DMSReplicationTask: 291 | Type: 'AWS::DMS::ReplicationTask' 292 | Properties: 293 | MigrationType: full-load 294 | ReplicationInstanceArn: !Ref DMSReplicationInstance 295 | ReplicationTaskSettings: >- 296 | { "Logging" : { "EnableLogging" : true, "LogComponents": [ { "Id" : 297 | "SOURCE_UNLOAD", "Severity" : "LOGGER_SEVERITY_DEFAULT" }, { "Id" : 298 | "SOURCE_CAPTURE", "Severity" : "LOGGER_SEVERITY_DEFAULT" }, { "Id" : 299 | "TARGET_LOAD", "Severity" : "LOGGER_SEVERITY_DEFAULT" }, { "Id" : 300 | "TARGET_APPLY", "Severity" : "LOGGER_SEVERITY_DEFAULT" } ] } } 301 | SourceEndpointArn: !Ref S3SourceEndpoint 302 | TableMappings: >- 303 | { "rules": [ { "rule-type" : "selection", "rule-id" : "1", "rule-name" : 304 | "1", "object-locator" : { "schema-name" : "%", "table-name" : "%" }, 305 | "rule-action" : "include" } ] } 306 | TargetEndpointArn: !Ref KinesisTargetEndpoint 307 | 308 | AlgorithmicTradingInstance: 309 | Type: AWS::SageMaker::NotebookInstance 310 | Properties: 311 | InstanceType: ml.t3.medium 312 | DefaultCodeRepository: https://github.com/aws-samples/algorithmic-trading 313 | RoleArn: !GetAtt 'SageMakerExecutionRole.Arn' 314 | 315 | SageMakerExecutionRole: 316 | Type: AWS::IAM::Role 317 | Properties: 318 | AssumeRolePolicyDocument: 319 | Version: '2012-10-17' 320 | Statement: 321 | - Effect: Allow 322 | Principal: 323 | Service: 324 | - sagemaker.amazonaws.com 325 | Action: 326 | - sts:AssumeRole 327 | Path: / 328 | ManagedPolicyArns: 329 | - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess 330 | - arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess 331 | - arn:aws:iam::aws:policy/AmazonKinesisReadOnlyAccess 332 | - arn:aws:iam::aws:policy/AmazonECS_FullAccess 333 | - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess 334 | - !Ref 'S3Policy' 335 | 336 | S3Policy: 337 | Type: AWS::IAM::ManagedPolicy 338 | Properties: 339 | Description: S3 Permission 340 | Path: / 341 | PolicyDocument: 342 | Version: '2012-10-17' 343 | Statement: 344 | - Effect: Allow 345 | Action: 346 | - s3:GetObject 347 | - s3:PutObject 348 | - s3:DeleteObject 349 | - s3:ListBucket 350 | Resource: 351 | - !Sub 352 | - arn:aws:s3:::${S3Bucket}/* 353 | - S3Bucket: !Ref 'MarketDataS3Bucket' 354 | 355 | Outputs: 356 | 01StackName: 357 | Value: !Ref 'AWS::StackName' 358 | 02RegionName: 359 | Value: !Ref 'AWS::Region' 360 | 03TargetKinesisStream: 361 | Value: !Ref KinesisStream 362 | 04DMSReplicationInstance: 363 | Value: !Ref DMSReplicationInstance 364 | 05SourceEndpoint: 365 | Value: !Ref S3SourceEndpoint 366 | 06TargetEndpoint: 367 | Value: !Ref KinesisTargetEndpoint 368 | 07DMSReplicationTask: 369 | Value: !Ref DMSReplicationTask 370 | 08KinesisStreamName: 371 | Value: !Ref KinesisStream 372 | 373 | -------------------------------------------------------------------------------- /5_SageMakerStudio/3_Backtest_Strategy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Step 1) Data Preparation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "algo_name='algo_sma'" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "s3bucket=!(aws cloudformation list-exports --query \"Exports[?Name=='algotrading-s3bucket'].Value\" --output text)\n", 26 | "s3bucket=s3bucket[0]\n", 27 | "s3bucket" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import pandas as pd\n", 37 | "from pyathena import connect\n", 38 | "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/')\n", 39 | "\n", 40 | "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n", 41 | "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n", 42 | "del df['dt']\n", 43 | "df.head()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from pathlib import Path\n", 53 | "trainCount=int(len(df)*0.4)\n", 54 | "dfTrain = df.iloc[:trainCount]\n", 55 | "dfTest = df.iloc[trainCount:]\n", 56 | "\n", 57 | "dfTest.to_csv('/opt/ml/input/data/training/data.csv')\n", 58 | "dfTest.head()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "%matplotlib inline\n", 68 | "dfTest[\"close\"].plot()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "# Step 2) Modify Strategy Configuration \n", 76 | "\n", 77 | "In the following cell, you can adjust the parameters for the strategy.\n", 78 | "\n", 79 | "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 8)\n", 80 | "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 21)\n", 81 | "* `size` = The number of shares for a transaction (e.g. 100)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "%%writefile /opt/ml/input/config/hyperparameters.json\n", 91 | "{ \"fast_period\" : \"8\",\n", 92 | " \"slow_period\" : \"21\",\n", 93 | " \"size\" : \"100\"\n", 94 | "}" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "%run /opt/program/update_config.py $algo_name $s3bucket" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# Step 3) Modify Strategy Code\n", 111 | "\n", 112 | "Here are some helpful links:\n", 113 | "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n", 114 | "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n", 115 | "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "%%writefile /opt/program/{algo_name}.py\n", 125 | "import backtrader as bt\n", 126 | "from algo_base import *\n", 127 | "import pytz\n", 128 | "from pytz import timezone\n", 129 | "\n", 130 | "class MyStrategy(StrategyTemplate):\n", 131 | "\n", 132 | " def __init__(self): # Initiation\n", 133 | " super(MyStrategy, self).__init__()\n", 134 | " self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n", 135 | " self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n", 136 | " self.config[\"size\"]=int(self.config[\"size\"])\n", 137 | " print(self.config)\n", 138 | " self.emaFast = bt.ind.ExponentialMovingAverage(period=self.config[\"fast_period\"])\n", 139 | " self.emaSlow = bt.ind.ExponentialMovingAverage(period=self.config[\"slow_period\"])\n", 140 | " self.size = self.config[\"size\"]\n", 141 | "\n", 142 | " def init_broker(broker):\n", 143 | " broker.setcash(100000.0)\n", 144 | " broker.setcommission(commission=0.0) \n", 145 | " \n", 146 | " def add_data(cerebro):\n", 147 | " data = btfeeds.GenericCSVData(\n", 148 | " dataname=MyStrategy.TRAIN_FILE,\n", 149 | " dtformat=('%Y-%m-%d'),\n", 150 | " timeframe=bt.TimeFrame.Days,\n", 151 | " datetime=0,\n", 152 | " time=-1,\n", 153 | " high=2,\n", 154 | " low=3,\n", 155 | " open=1,\n", 156 | " close=4,\n", 157 | " volume=5,\n", 158 | " openinterest=-1\n", 159 | " )\n", 160 | " cerebro.adddata(data)\n", 161 | "\n", 162 | " def next(self): # Processing\n", 163 | " super(MyStrategy, self).next()\n", 164 | " dt=self.datas[0].datetime.datetime(0)\n", 165 | " if not self.position:\n", 166 | " if self.emaFast[0] > self.emaSlow[0]:\n", 167 | " self.buy(size=self.size) # Go long\n", 168 | " else:\n", 169 | " self.sell(size=self.size) # Go short\n", 170 | " elif self.position.size>0 and self.emaFast[0] < self.emaSlow[0]:\n", 171 | " self.sell(size=2*self.size) # Go short\n", 172 | " elif self.position.size<0 and self.emaFast[0] > self.emaSlow[0]: \n", 173 | " self.buy(size=2*self.size) # Go long" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "# Step 4) Backtest Locally" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "scrolled": true 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "%run /opt/program/train" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "# Step 5) Backtest Remotely with SageMaker" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "!aws s3 cp \"/opt/program/\" \"s3://{s3bucket}/{algo_name}/\" --recursive --exclude \"*\" --include \"{algo_name}*.*\"" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "conf_file='/opt/ml/input/config/hyperparameters.json'\n", 217 | "with open(conf_file, 'r') as f:\n", 218 | " config = json.load(f)\n", 219 | "config['s3']=s3_bucket \n", 220 | "config['chart']='true'\n", 221 | "print(config)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "scrolled": true 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "#Run Remote Backtest via SageMaker\n", 233 | "import sagemaker as sage\n", 234 | "from sagemaker import get_execution_role\n", 235 | "from sagemaker.estimator import Estimator \n", 236 | "\n", 237 | "role = get_execution_role()\n", 238 | "sess = sage.Session()\n", 239 | "\n", 240 | "WORK_DIRECTORY = '/opt/ml/input/data/training'\n", 241 | "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n", 242 | "print(data_location)\n", 243 | "\n", 244 | "prefix=algo_name\n", 245 | "job_name=prefix.replace('_','-')\n", 246 | "\n", 247 | "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", 248 | "region = sess.boto_session.region_name\n", 249 | "image = f'{account}.dkr.ecr.{region}.amazonaws.com/algotrading:1.0'\n", 250 | "\n", 251 | "algo = sage.estimator.Estimator(\n", 252 | " image_uri=image,\n", 253 | " role=role,\n", 254 | " instance_count=1,\n", 255 | " instance_type='ml.m4.xlarge',\n", 256 | " output_path=\"s3://{}/output\".format(sess.default_bucket()),\n", 257 | " sagemaker_session=sess,\n", 258 | " base_job_name=job_name,\n", 259 | " hyperparameters=config,\n", 260 | " metric_definitions=[\n", 261 | " {\n", 262 | " \"Name\": \"algo:pnl\",\n", 263 | " \"Regex\": \"Total PnL:(.*?)]\"\n", 264 | " },\n", 265 | " {\n", 266 | " \"Name\": \"algo:sharpe_ratio\",\n", 267 | " \"Regex\": \"Sharpe Ratio:(.*?),\"\n", 268 | " }\n", 269 | " ])" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "algo.fit(data_location)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "#Get Algo Metrics\n", 288 | "from sagemaker.analytics import TrainingJobAnalytics\n", 289 | "\n", 290 | "latest_job_name = algo.latest_training_job.job_name\n", 291 | "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n", 292 | "metrics_dataframe" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [ 301 | "#Get Algo Chart from S3\n", 302 | "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n", 303 | "import boto3\n", 304 | "s3 = boto3.resource('s3')\n", 305 | "my_bucket = s3.Bucket(sess.default_bucket())\n", 306 | "my_bucket.download_file(model_name,'model.tar.gz')\n", 307 | "!tar -xzf model.tar.gz\n", 308 | "!rm model.tar.gz\n", 309 | "from IPython.display import Image\n", 310 | "Image(filename='chart.png') " 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "# Step 6) Run Hyperparameter Optimization with SageMaker" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "from sagemaker.tuner import (\n", 327 | " IntegerParameter,\n", 328 | " CategoricalParameter,\n", 329 | " ContinuousParameter,\n", 330 | " HyperparameterTuner,\n", 331 | ")\n", 332 | "\n", 333 | "hyperparameter_ranges = {\n", 334 | " \"fast_period\": IntegerParameter(5, 10),\n", 335 | " \"slow_period\": IntegerParameter(21, 31)\n", 336 | "}\n", 337 | "objective_metric_name= \"algo:pnl\"\n", 338 | "tuner = HyperparameterTuner(algo,\n", 339 | " objective_metric_name,\n", 340 | " hyperparameter_ranges,\n", 341 | " max_jobs=6,\n", 342 | " max_parallel_jobs=3,\n", 343 | " metric_definitions=[\n", 344 | " {\n", 345 | " \"Name\": \"algo:pnl\",\n", 346 | " \"Regex\": \"Total PnL:(.*?)]\"\n", 347 | " }\n", 348 | " ]\n", 349 | " )" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "tuner.fit(data_location)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "best_params=boto3.client('sagemaker').describe_hyper_parameter_tuning_job(\n", 368 | "HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['BestTrainingJob']['TunedHyperParameters']\n", 369 | "best_params" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "from sagemaker.analytics import TrainingJobAnalytics\n", 379 | "bestjob=tuner.best_training_job()\n", 380 | "metrics_dataframe = TrainingJobAnalytics(training_job_name=bestjob).dataframe()\n", 381 | "metrics_dataframe" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "# Step 7) Backtest Locally with Optimal Parameters" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "# Use optimal hyperparameter and test data\n", 398 | "conf_file='/opt/ml/input/config/hyperparameters.json'\n", 399 | "with open(conf_file, 'r') as f:\n", 400 | " config = json.load(f)\n", 401 | "config['fast_period']=best_params['fast_period']\n", 402 | "config['slow_period']=best_params['slow_period']\n", 403 | "config['chart']='false'\n", 404 | "print(config)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "%run /opt/program/train" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "### Congratulations! You've completed this strategy." 421 | ] 422 | } 423 | ], 424 | "metadata": { 425 | "instance_type": "ml.t3.medium", 426 | "kernelspec": { 427 | "display_name": "Python 3 (Data Science)", 428 | "language": "python", 429 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 430 | }, 431 | "language_info": { 432 | "codemirror_mode": { 433 | "name": "ipython", 434 | "version": 3 435 | }, 436 | "file_extension": ".py", 437 | "mimetype": "text/x-python", 438 | "name": "python", 439 | "nbconvert_exporter": "python", 440 | "pygments_lexer": "ipython3", 441 | "version": "3.7.10" 442 | } 443 | }, 444 | "nbformat": 4, 445 | "nbformat_minor": 4 446 | } 447 | -------------------------------------------------------------------------------- /2_Strategies/Strategy_ML_Forecast.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%run init_model.py 'algo_ml_long_short_predict'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Step 1) Data Preparation" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# get S3 bucket\n", 26 | "s3bucket=!(aws s3 ls | grep algotrading- | awk '{print $3}')\n", 27 | "s3bucket=s3bucket[0]\n", 28 | "s3bucket" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import sys\n", 38 | "!{sys.executable} -m pip install PyAthena" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import os\n", 48 | "import sagemaker as sage\n", 49 | "from sagemaker import get_execution_role\n", 50 | "import datetime\n", 51 | "from sagemaker.tensorflow import TensorFlow\n", 52 | "import json\n", 53 | "\n", 54 | "role = get_execution_role()\n", 55 | "sess = sage.Session()\n", 56 | "region = sess.boto_session.region_name" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "import pandas as pd\n", 66 | "from pyathena import connect\n", 67 | "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n", 68 | " region_name=region)\n", 69 | "\n", 70 | "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n", 71 | "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n", 72 | "del df['dt']\n", 73 | "df.head()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "trainCount=int(len(df)*0.4)\n", 83 | "dfTrain = df.iloc[:trainCount]\n", 84 | "\n", 85 | "dfTest = df.iloc[trainCount:]\n", 86 | "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n", 87 | "dfTest.head()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "scrolled": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "%matplotlib notebook\n", 99 | "dfTest[\"close\"].plot()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# Step 2) Modify Strategy Configuration \n", 107 | "\n", 108 | "In the following cell, you can adjust the parameters for the strategy.\n", 109 | "\n", 110 | "* `user` = Name for Leaderboard (optional)\n", 111 | "* `long_threshold` = Threshold for Long Trade (0 to 1)\n", 112 | "* `short_threshold` = Threshold for Short Trade (0 to 1)\n", 113 | "* `profit_target_pct` = Profit Target Percentage \n", 114 | "* `stop_target_pct` = Stop Target Percentage\n", 115 | "* `size` = The number of shares for a transaction\n", 116 | "\n", 117 | "`Tip`: A good starting point for improving the strategy is modify the profit / stop target and the risk/reward ratio. Another option is to reduce the number of signals by increasing the threshold." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "%%writefile local/{algo_name}/input/config/hyperparameters.json\n", 127 | "{ \"user\" : \"user\",\n", 128 | " \"long_threshold\" : \"0.5\",\n", 129 | " \"short_threshold\" : \"0.5\",\n", 130 | " \"profit_target_pct\" : \"2.00\",\n", 131 | " \"stop_target_pct\" : \"1.50\",\n", 132 | " \"size\" : \"100\"\n", 133 | "}" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "%run update_config.py $algo_name" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "# Step 3) Modify Strategy Code\n", 150 | "\n", 151 | "In the following cell, you can modify the strategy code. For the first backtests, you can leave it as is.\n", 152 | "\n", 153 | "`Tip`: A good starting point for improving the strategy is to combine the signal from the model with traditional trend indicators (e.g. moving average). This will likely improve the performance. To improve the strategy further, you could increase the accuracy of the machine learning model by including more indicators (e.g. ATR) or modify the input and forecast window. This requires to re-train the machine learning model as this needs to match your strategy. For timeseries forecasting, you could compare the performance with more advanced ML networks (e.g. CNN, LTSM, RNN) and pick the model with the best predictions.\n", 154 | "\n", 155 | "You can also checkout other AWS-provided options for timeseries forecasting and formulate a strategy that uses price predictions and integrate them in your strategy:\n", 156 | "* https://docs.aws.amazon.com/sagemaker/latest/dg/deepar.html \n", 157 | "* https://aws.amazon.com/forecast/\n", 158 | "\n", 159 | "Here are some helpful links:\n", 160 | "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n", 161 | "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n", 162 | "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "%%writefile model/{algo_name}.py\n", 172 | "import backtrader as bt\n", 173 | "from algo_base import *\n", 174 | "import math\n", 175 | "import numpy as np\n", 176 | "import pandas as pd\n", 177 | "import tensorflow as tf\n", 178 | "import keras\n", 179 | "from keras import backend as K\n", 180 | "from keras.models import load_model\n", 181 | "\n", 182 | "class MyStrategy(StrategyTemplate):\n", 183 | "\n", 184 | " def __init__(self):\n", 185 | " super(MyStrategy, self).__init__()\n", 186 | " self.config[\"long_threshold\"]=float(self.config[\"long_threshold\"])\n", 187 | " self.config[\"short_threshold\"]=float(self.config[\"short_threshold\"])\n", 188 | " self.config[\"size\"]=int(self.config[\"size\"])\n", 189 | " self.config[\"profit_target_pct\"]=float(self.config[\"profit_target_pct\"])\n", 190 | " self.config[\"stop_target_pct\"]=float(self.config[\"stop_target_pct\"])\n", 191 | "\n", 192 | " self.order=None\n", 193 | " self.orderPlaced=False\n", 194 | " \n", 195 | " self.model = load_model('model_long_short_predict.h5')\n", 196 | " \n", 197 | " # input / indicators\n", 198 | " self.repeatCount=15\n", 199 | " self.repeatStep=1\n", 200 | " \n", 201 | " self.profitTarget=self.config[\"profit_target_pct\"]/100.0\n", 202 | " self.stopTarget=self.config[\"stop_target_pct\"]/100.0\n", 203 | " self.size=self.config[\"size\"]\n", 204 | " \n", 205 | " self.sma=[]\n", 206 | " self.roc=[]\n", 207 | " \n", 208 | " self.hData=[\"dt\"]\n", 209 | " self.hData.append(\"close\") \n", 210 | " for a in range(0,self.repeatCount):\n", 211 | " tp=(a+1)*self.repeatStep+1\n", 212 | " self.hData.append(\"sma\"+str(tp))\n", 213 | " self.sma.append(bt.talib.SMA(self.data, timeperiod=tp, plot=False))\n", 214 | " for a in range(0,self.repeatCount):\n", 215 | " tp=(a+1)*self.repeatStep+1\n", 216 | " self.hData.append(\"roc\"+str(tp))\n", 217 | " self.roc.append(bt.talib.ROC(self.data, timeperiod=tp, plot=False))\n", 218 | "\n", 219 | " def init_broker(broker):\n", 220 | " broker.setcash(100000.0)\n", 221 | " broker.setcommission(commission=0.0) \n", 222 | " \n", 223 | " def add_data(cerebro):\n", 224 | " data = btfeeds.GenericCSVData(\n", 225 | " dataname=MyStrategy.TRAIN_FILE,\n", 226 | " dtformat=('%Y-%m-%d'),\n", 227 | " timeframe=bt.TimeFrame.Days,\n", 228 | " datetime=0,\n", 229 | " time=-1,\n", 230 | " high=2,\n", 231 | " low=3,\n", 232 | " open=1,\n", 233 | " close=4,\n", 234 | " volume=5,\n", 235 | " openinterest=-1\n", 236 | " )\n", 237 | " cerebro.adddata(data)\n", 238 | "\n", 239 | " def next(self):\n", 240 | " super(MyStrategy, self).next()\n", 241 | " \n", 242 | " dt=self.datas[0].datetime.datetime(0)\n", 243 | " cl=self.dataclose[0]\n", 244 | " inputRec=[] \n", 245 | "\n", 246 | " #open\n", 247 | " inputRec0=[]\n", 248 | " inputRec0.append(cl)\n", 249 | "\n", 250 | " #sma\n", 251 | " for a in range(0,self.repeatCount):\n", 252 | " if math.isnan(self.sma[a][0]):\n", 253 | " inputRec0.append(cl)\n", 254 | " else:\n", 255 | " inputRec0.append(self.sma[a][0])\n", 256 | "\n", 257 | " m1=min(inputRec0)\n", 258 | " m2=max(inputRec0)\n", 259 | " for a in inputRec0:\n", 260 | " if m2-m1==0:\n", 261 | " inputRec.append(0)\n", 262 | " else:\n", 263 | " inputRec.append((a-m1)/(m2-m1))\n", 264 | "\n", 265 | " #roc\n", 266 | " for a in range(0,self.repeatCount):\n", 267 | " if math.isnan(self.roc[a][0]):\n", 268 | " inputRec.append(0)\n", 269 | " else:\n", 270 | " inputRec.append(self.roc[a][0])\n", 271 | "\n", 272 | " mX=[]\n", 273 | " mX.append(np.array(inputRec))\n", 274 | " dataX=np.array(mX)\n", 275 | " #print(\"dataX=%s\" % dataX)\n", 276 | "\n", 277 | " # *** ML prediction ***\n", 278 | " mY=self.model.predict(dataX)\n", 279 | " #print(\"mY=%s\" % mY)\n", 280 | " tLong=mY[0][0]\n", 281 | " tShort=mY[0][1]\n", 282 | " #print(\"[%s]:long=%s,short=%s\" % (dt,tLong,tShort))\n", 283 | " if not self.position:\n", 284 | " fLong=(tLong>self.config[\"long_threshold\"]) \n", 285 | " fShort=(tShort>self.config[\"short_threshold\"])\n", 286 | " if fLong:\n", 287 | " self.order=self.buy(size=self.size)\n", 288 | " self.limitPrice=cl+self.profitTarget*cl\n", 289 | " self.stopPrice=cl-self.stopTarget*cl\n", 290 | " elif fShort:\n", 291 | " self.order=self.sell(size=self.size) \n", 292 | " self.limitPrice=cl-self.profitTarget*cl\n", 293 | " self.stopPrice=cl+self.stopTarget*cl\n", 294 | "\n", 295 | " if self.position:\n", 296 | " if self.position.size>0:\n", 297 | " if cl>=self.limitPrice or cl<=self.stopPrice:\n", 298 | " self.order=self.sell(size=self.size)\n", 299 | " elif self.position.size<0:\n", 300 | " if cl<=self.limitPrice or cl>=self.stopPrice:\n", 301 | " self.order=self.buy(size=self.size)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "# Step 4) Backtest Locally\n", 309 | "\n", 310 | "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "#Build Local Algo Image\n", 320 | "!docker build -t $algo_name .\n", 321 | "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": { 328 | "scrolled": false 329 | }, 330 | "outputs": [], 331 | "source": [ 332 | "from IPython.display import Image\n", 333 | "Image(filename='local/'+algo_name+'/model/chart.png')" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step." 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "# Step 5) Backtest on SageMaker and submit performance" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "#Deploy Algo Image to ECS\n", 357 | "!./build_and_push.sh $algo_name" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "#Run Remote Forwardtest via SageMaker\n", 367 | "import sagemaker as sage\n", 368 | "from sagemaker import get_execution_role\n", 369 | "from sagemaker.estimator import Estimator \n", 370 | "\n", 371 | "role = get_execution_role()\n", 372 | "sess = sage.Session()\n", 373 | "\n", 374 | "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n", 375 | "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n", 376 | "print(data_location)\n", 377 | "\n", 378 | "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n", 379 | "with open(conf_file, 'r') as f:\n", 380 | " config = json.load(f)\n", 381 | "#config['sim_data']='True'\n", 382 | "print(config)\n", 383 | "\n", 384 | "prefix=algo_name\n", 385 | "job_name=prefix.replace('_','-')\n", 386 | "\n", 387 | "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n", 388 | "region = sess.boto_session.region_name\n", 389 | "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n", 390 | "\n", 391 | "algo = sage.estimator.Estimator(\n", 392 | " image_uri=image,\n", 393 | " role=role,\n", 394 | " instance_count=1,\n", 395 | " instance_type='ml.m4.xlarge',\n", 396 | " output_path=\"s3://{}/output\".format(sess.default_bucket()),\n", 397 | " sagemaker_session=sess,\n", 398 | " base_job_name=job_name,\n", 399 | " hyperparameters=config,\n", 400 | " metric_definitions=[\n", 401 | " {\n", 402 | " \"Name\": \"algo:pnl\",\n", 403 | " \"Regex\": \"Total PnL:(.*?)]\"\n", 404 | " },\n", 405 | " {\n", 406 | " \"Name\": \"algo:sharpe_ratio\",\n", 407 | " \"Regex\": \"Sharpe Ratio:(.*?),\"\n", 408 | " }\n", 409 | " ])\n", 410 | "algo.fit(data_location)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "#Get Algo Metrics\n", 420 | "from sagemaker.analytics import TrainingJobAnalytics\n", 421 | "\n", 422 | "latest_job_name = algo.latest_training_job.job_name\n", 423 | "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n", 424 | "metrics_dataframe" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "scrolled": true 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "#Get Algo Chart from S3\n", 436 | "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n", 437 | "import boto3\n", 438 | "s3 = boto3.resource('s3')\n", 439 | "my_bucket = s3.Bucket(sess.default_bucket())\n", 440 | "my_bucket.download_file(model_name,'model.tar.gz')\n", 441 | "!tar -xzf model.tar.gz\n", 442 | "!rm model.tar.gz\n", 443 | "from IPython.display import Image\n", 444 | "Image(filename='chart.png') " 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": {}, 450 | "source": [ 451 | "### Congratulations! You've completed this strategy." 452 | ] 453 | } 454 | ], 455 | "metadata": { 456 | "kernelspec": { 457 | "display_name": "conda_tensorflow_p36", 458 | "language": "python", 459 | "name": "conda_tensorflow_p36" 460 | }, 461 | "language_info": { 462 | "codemirror_mode": { 463 | "name": "ipython", 464 | "version": 3 465 | }, 466 | "file_extension": ".py", 467 | "mimetype": "text/x-python", 468 | "name": "python", 469 | "nbconvert_exporter": "python", 470 | "pygments_lexer": "ipython3", 471 | "version": "3.6.10" 472 | } 473 | }, 474 | "nbformat": 4, 475 | "nbformat_minor": 2 476 | } 477 | -------------------------------------------------------------------------------- /0_Setup/algo-reference.yaml: -------------------------------------------------------------------------------- 1 | Description: > 2 | This template deploys the algorithmic trading reference architecture 3 | 4 | Parameters: 5 | EnvironmentName: 6 | Description: An environment name that will be prefixed to resource names 7 | Type: String 8 | Default: algo 9 | 10 | VpcCIDR: 11 | Description: Please enter the IP range (CIDR notation) for this VPC 12 | Type: String 13 | Default: 10.111.0.0/16 14 | 15 | PublicSubnet1CIDR: 16 | Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone 17 | Type: String 18 | Default: 10.111.10.0/24 19 | 20 | PublicSubnet2CIDR: 21 | Description: Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone 22 | Type: String 23 | Default: 10.111.11.0/24 24 | 25 | PrivateSubnet1CIDR: 26 | Description: Please enter the IP range (CIDR notation) for the private subnet in the first Availability Zone 27 | Type: String 28 | Default: 10.111.20.0/24 29 | 30 | PrivateSubnet2CIDR: 31 | Description: Please enter the IP range (CIDR notation) for the private subnet in the second Availability Zone 32 | Type: String 33 | Default: 10.111.21.0/24 34 | 35 | S3Bucket: 36 | Description: Please specify your S3 bucket 37 | Type: String 38 | 39 | 40 | Resources: 41 | VPC: 42 | Type: AWS::EC2::VPC 43 | Properties: 44 | CidrBlock: !Ref VpcCIDR 45 | EnableDnsHostnames: true 46 | Tags: 47 | - Key: Name 48 | Value: !Ref EnvironmentName 49 | 50 | InternetGateway: 51 | Type: AWS::EC2::InternetGateway 52 | Properties: 53 | Tags: 54 | - Key: Name 55 | Value: !Ref EnvironmentName 56 | 57 | InternetGatewayAttachment: 58 | Type: AWS::EC2::VPCGatewayAttachment 59 | Properties: 60 | InternetGatewayId: !Ref InternetGateway 61 | VpcId: !Ref VPC 62 | 63 | PublicSubnet1: 64 | Type: AWS::EC2::Subnet 65 | Properties: 66 | VpcId: !Ref VPC 67 | AvailabilityZone: !Select [0, !GetAZs ""] 68 | CidrBlock: !Ref PublicSubnet1CIDR 69 | MapPublicIpOnLaunch: true 70 | Tags: 71 | - Key: Name 72 | Value: !Sub ${EnvironmentName} Public Subnet (AZ1) 73 | 74 | PublicSubnet2: 75 | Type: AWS::EC2::Subnet 76 | Properties: 77 | VpcId: !Ref VPC 78 | AvailabilityZone: !Select [1, !GetAZs ""] 79 | CidrBlock: !Ref PublicSubnet2CIDR 80 | MapPublicIpOnLaunch: true 81 | Tags: 82 | - Key: Name 83 | Value: !Sub ${EnvironmentName} Public Subnet (AZ2) 84 | 85 | PrivateSubnet1: 86 | Type: AWS::EC2::Subnet 87 | Properties: 88 | VpcId: !Ref VPC 89 | AvailabilityZone: !Select [0, !GetAZs ""] 90 | CidrBlock: !Ref PrivateSubnet1CIDR 91 | MapPublicIpOnLaunch: false 92 | Tags: 93 | - Key: Name 94 | Value: !Sub ${EnvironmentName} Private Subnet (AZ1) 95 | 96 | PrivateSubnet2: 97 | Type: AWS::EC2::Subnet 98 | Properties: 99 | VpcId: !Ref VPC 100 | AvailabilityZone: !Select [1, !GetAZs ""] 101 | CidrBlock: !Ref PrivateSubnet2CIDR 102 | MapPublicIpOnLaunch: false 103 | Tags: 104 | - Key: Name 105 | Value: !Sub ${EnvironmentName} Private Subnet (AZ2) 106 | 107 | NatGateway1EIP: 108 | Type: AWS::EC2::EIP 109 | DependsOn: InternetGatewayAttachment 110 | Properties: 111 | Domain: vpc 112 | 113 | NatGateway2EIP: 114 | Type: AWS::EC2::EIP 115 | DependsOn: InternetGatewayAttachment 116 | Properties: 117 | Domain: vpc 118 | 119 | NatGateway1: 120 | Type: AWS::EC2::NatGateway 121 | Properties: 122 | AllocationId: !GetAtt NatGateway1EIP.AllocationId 123 | SubnetId: !Ref PublicSubnet1 124 | 125 | NatGateway2: 126 | Type: AWS::EC2::NatGateway 127 | Properties: 128 | AllocationId: !GetAtt NatGateway2EIP.AllocationId 129 | SubnetId: !Ref PublicSubnet2 130 | 131 | PublicRouteTable: 132 | Type: AWS::EC2::RouteTable 133 | Properties: 134 | VpcId: !Ref VPC 135 | Tags: 136 | - Key: Name 137 | Value: !Sub ${EnvironmentName} Public Routes 138 | 139 | DefaultPublicRoute: 140 | Type: AWS::EC2::Route 141 | DependsOn: InternetGatewayAttachment 142 | Properties: 143 | RouteTableId: !Ref PublicRouteTable 144 | DestinationCidrBlock: 0.0.0.0/0 145 | GatewayId: !Ref InternetGateway 146 | 147 | PublicSubnet1RouteTableAssociation: 148 | Type: AWS::EC2::SubnetRouteTableAssociation 149 | Properties: 150 | RouteTableId: !Ref PublicRouteTable 151 | SubnetId: !Ref PublicSubnet1 152 | 153 | PublicSubnet2RouteTableAssociation: 154 | Type: AWS::EC2::SubnetRouteTableAssociation 155 | Properties: 156 | RouteTableId: !Ref PublicRouteTable 157 | SubnetId: !Ref PublicSubnet2 158 | 159 | PrivateRouteTable1: 160 | Type: AWS::EC2::RouteTable 161 | Properties: 162 | VpcId: !Ref VPC 163 | Tags: 164 | - Key: Name 165 | Value: !Sub ${EnvironmentName} Private Routes (AZ1) 166 | 167 | DefaultPrivateRoute1: 168 | Type: AWS::EC2::Route 169 | Properties: 170 | RouteTableId: !Ref PrivateRouteTable1 171 | DestinationCidrBlock: 0.0.0.0/0 172 | NatGatewayId: !Ref NatGateway1 173 | 174 | PrivateSubnet1RouteTableAssociation: 175 | Type: AWS::EC2::SubnetRouteTableAssociation 176 | Properties: 177 | RouteTableId: !Ref PrivateRouteTable1 178 | SubnetId: !Ref PrivateSubnet1 179 | 180 | PrivateRouteTable2: 181 | Type: AWS::EC2::RouteTable 182 | Properties: 183 | VpcId: !Ref VPC 184 | Tags: 185 | - Key: Name 186 | Value: !Sub ${EnvironmentName} Private Routes (AZ2) 187 | 188 | DefaultPrivateRoute2: 189 | Type: AWS::EC2::Route 190 | Properties: 191 | RouteTableId: !Ref PrivateRouteTable2 192 | DestinationCidrBlock: 0.0.0.0/0 193 | NatGatewayId: !Ref NatGateway2 194 | 195 | PrivateSubnet2RouteTableAssociation: 196 | Type: AWS::EC2::SubnetRouteTableAssociation 197 | Properties: 198 | RouteTableId: !Ref PrivateRouteTable2 199 | SubnetId: !Ref PrivateSubnet2 200 | 201 | ECR: 202 | Type: AWS::ECR::Repository 203 | Properties: 204 | RepositoryName: !Sub ${EnvironmentName}_ecr 205 | 206 | AlgorithmicTradingInstance: 207 | Type: AWS::SageMaker::NotebookInstance 208 | Properties: 209 | InstanceType: ml.t2.large 210 | DefaultCodeRepository: https://github.com/aws-samples/algorithmic-trading 211 | RoleArn: !GetAtt 'SageMakerExecutionRole.Arn' 212 | 213 | S3Policy: 214 | Type: AWS::IAM::ManagedPolicy 215 | Properties: 216 | Description: S3 Permission 217 | Path: / 218 | PolicyDocument: 219 | Version: '2012-10-17' 220 | Statement: 221 | - Effect: Allow 222 | Action: 223 | - s3:GetObject 224 | - s3:PutObject 225 | - s3:DeleteObject 226 | - s3:ListBucket 227 | Resource: 228 | - !Sub 229 | - arn:aws:s3:::${S3Bucket}/* 230 | - S3Bucket: !Ref 'S3Bucket' 231 | 232 | SageMakerExecutionRole: 233 | Type: AWS::IAM::Role 234 | Properties: 235 | AssumeRolePolicyDocument: 236 | Version: '2012-10-17' 237 | Statement: 238 | - Effect: Allow 239 | Principal: 240 | Service: 241 | - sagemaker.amazonaws.com 242 | Action: 243 | - sts:AssumeRole 244 | Path: / 245 | ManagedPolicyArns: 246 | - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess 247 | - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess 248 | - arn:aws:iam::aws:policy/AmazonAthenaFullAccess 249 | - arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess 250 | - arn:aws:iam::aws:policy/AmazonECS_FullAccess 251 | - arn:aws:iam::aws:policy/AmazonKinesisReadOnlyAccess 252 | - !Ref 'S3Policy' 253 | 254 | ECSHostSecurityGroup: 255 | Type: AWS::EC2::SecurityGroup 256 | Properties: 257 | VpcId: !Ref 'VPC' 258 | GroupDescription: Access to the ECS hosts and the tasks/containers that run on them 259 | SecurityGroupIngress: 260 | # Allow access from anywhere to our ECS services 261 | - CidrIp: 0.0.0.0/0 262 | IpProtocol: -1 263 | Tags: 264 | - Key: Name 265 | Value: !Sub ${EnvironmentName}-ECS 266 | 267 | ECSTaskExecutionRole: 268 | Type: AWS::IAM::Role 269 | Properties: 270 | AssumeRolePolicyDocument: 271 | Version: '2012-10-17' 272 | Statement: 273 | - Effect: Allow 274 | Principal: 275 | Service: 276 | - ecs-tasks.amazonaws.com 277 | Action: 278 | - sts:AssumeRole 279 | Path: / 280 | ManagedPolicyArns: 281 | - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy 282 | 283 | AlgoExecutionRole: 284 | Type: AWS::IAM::Role 285 | Properties: 286 | AssumeRolePolicyDocument: 287 | Version: '2012-10-17' 288 | Statement: 289 | - Effect: Allow 290 | Principal: 291 | Service: 292 | - ecs-tasks.amazonaws.com 293 | Action: 294 | - sts:AssumeRole 295 | Path: / 296 | ManagedPolicyArns: 297 | - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy 298 | 299 | ECSCluster: 300 | Type: AWS::ECS::Cluster 301 | Properties: 302 | ClusterName: !Ref EnvironmentName 303 | 304 | GlueDatabase: 305 | Type: AWS::Glue::Database 306 | Properties: 307 | CatalogId: !Ref 'AWS::AccountId' 308 | DatabaseInput: 309 | Name: algo_data 310 | 311 | GlueHistDataDaily: 312 | Type: AWS::Glue::Table 313 | Properties: 314 | CatalogId: !Ref 'AWS::AccountId' 315 | DatabaseName: !Ref 'GlueDatabase' 316 | TableInput: 317 | Description: Daily Price Data 318 | Name: hist_data_daily 319 | Parameters: 320 | classification: csv 321 | has_encrypted_data: false 322 | StorageDescriptor: 323 | Columns: 324 | - Name: dt 325 | Type: string 326 | - Name: sym 327 | Type: string 328 | - Name: open 329 | Type: double 330 | - Name: high 331 | Type: double 332 | - Name: low 333 | Type: double 334 | - Name: close 335 | Type: double 336 | - Name: vol 337 | Type: double 338 | Compressed: false 339 | InputFormat: org.apache.hadoop.mapred.TextInputFormat 340 | Location: !Join 341 | - '' 342 | - - s3:// 343 | - !Ref 'S3Bucket' 344 | - /hist_data_daily 345 | OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat 346 | SerdeInfo: 347 | SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe 348 | Parameters: 349 | field.delim: ',' 350 | skip.header.line.count: '1' 351 | StoredAsSubDirectories: false 352 | TableType: EXTERNAL_TABLE 353 | 354 | GlueHistDataIntraday: 355 | Type: AWS::Glue::Table 356 | Properties: 357 | CatalogId: !Ref 'AWS::AccountId' 358 | DatabaseName: !Ref 'GlueDatabase' 359 | TableInput: 360 | Description: Intraday Price Data 361 | Name: hist_data_intraday 362 | Parameters: 363 | classification: csv 364 | has_encrypted_data: false 365 | StorageDescriptor: 366 | Columns: 367 | - Name: dt 368 | Type: string 369 | - Name: sym 370 | Type: string 371 | - Name: open 372 | Type: double 373 | - Name: high 374 | Type: double 375 | - Name: low 376 | Type: double 377 | - Name: close 378 | Type: double 379 | - Name: vol 380 | Type: double 381 | Compressed: false 382 | InputFormat: org.apache.hadoop.mapred.TextInputFormat 383 | Location: !Join 384 | - '' 385 | - - s3:// 386 | - !Ref 'S3Bucket' 387 | - /hist_data_intraday 388 | OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat 389 | SerdeInfo: 390 | SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe 391 | Parameters: 392 | field.delim: ',' 393 | skip.header.line.count: '1' 394 | StoredAsSubDirectories: false 395 | TableType: EXTERNAL_TABLE 396 | 397 | AthenaWorkgroup: 398 | Type: AWS::Athena::WorkGroup 399 | Properties: 400 | Name: MyWorkGroup 401 | Description: AlgoWorkgroup 402 | RecursiveDeleteOption: true 403 | State: ENABLED 404 | WorkGroupConfiguration: 405 | RequesterPaysEnabled: true 406 | ResultConfiguration: 407 | OutputLocation: !Join 408 | - '' 409 | - - s3:// 410 | - !Ref 'S3Bucket' 411 | - /results/ 412 | 413 | AlgoHistDataDaily: 414 | Type: AWS::Athena::NamedQuery 415 | Properties: 416 | Database: !Ref 'GlueDatabase' 417 | QueryString: !Join 418 | - '' 419 | - - select * from algo_data. 420 | - !Ref 'GlueHistDataDaily' 421 | - ' limit 10;' 422 | Name: HistDataDaily 423 | 424 | AlgoHistDataIntraday: 425 | Type: AWS::Athena::NamedQuery 426 | Properties: 427 | Database: !Ref 'GlueDatabase' 428 | QueryString: !Join 429 | - '' 430 | - - select * from algo_data. 431 | - !Ref 'GlueHistDataIntraday' 432 | - ' limit 10;' 433 | Name: HistDataIntraday 434 | 435 | GlueTableFeedDB: 436 | Type: AWS::Glue::Table 437 | Properties: 438 | CatalogId: !Ref 'AWS::AccountId' 439 | DatabaseName: !Ref 'GlueDatabase' 440 | TableInput: 441 | Description: Deutsche Boerse Xetra PDS 442 | Name: market_feed_deutsche_boerse 443 | Parameters: 444 | classification: csv 445 | has_encrypted_data: false 446 | StorageDescriptor: 447 | Columns: 448 | - Name: isin 449 | Type: string 450 | - Name: mnemonic 451 | Type: string 452 | - Name: securitydesc 453 | Type: string 454 | - Name: securitytype 455 | Type: string 456 | - Name: currency 457 | Type: string 458 | - Name: securityid 459 | Type: bigint 460 | - Name: date 461 | Type: string 462 | - Name: time 463 | Type: string 464 | - Name: startprice 465 | Type: double 466 | - Name: maxprice 467 | Type: double 468 | - Name: minprice 469 | Type: double 470 | - Name: endprice 471 | Type: double 472 | - Name: tradedvolume 473 | Type: bigint 474 | - Name: numberoftrades 475 | Type: bigint 476 | Compressed: false 477 | InputFormat: org.apache.hadoop.mapred.TextInputFormat 478 | Location: !Join 479 | - '' 480 | - - s3:// 481 | - !Ref 'S3Bucket' 482 | - /feed/deutsche-boerse-xetra-pds 483 | OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat 484 | SerdeInfo: 485 | SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe 486 | Parameters: 487 | field.delim: ',' 488 | skip.header.line.count: '1' 489 | StoredAsSubDirectories: false 490 | PartitionKeys: 491 | - Name: year 492 | Type: bigint 493 | - Name: month 494 | Type: bigint 495 | - Name: day 496 | Type: bigint 497 | TableType: EXTERNAL_TABLE 498 | 499 | LogGroup: 500 | Type: AWS::Logs::LogGroup 501 | Properties: 502 | LogGroupName: algo 503 | RetentionInDays: 7 504 | 505 | Outputs: 506 | VPC: 507 | Description: A reference to the created VPC 508 | Value: !Ref VPC 509 | Export: 510 | Name: AlgorithmicTrading-VPC 511 | 512 | PublicSubnets: 513 | Description: A list of the public subnets 514 | Value: !Join [",", [!Ref PublicSubnet1, !Ref PublicSubnet2]] 515 | Export: 516 | Name: AlgorithmicTrading-PublicSubnets 517 | 518 | PrivateSubnets: 519 | Description: A list of the private subnets 520 | Value: !Join [",", [!Ref PrivateSubnet1, !Ref PrivateSubnet2]] 521 | Export: 522 | Name: AlgorithmicTrading-PrivateSubnets 523 | 524 | PublicSubnet1: 525 | Description: A reference to the public subnet in the 1st Availability Zone 526 | Value: !Ref PublicSubnet1 527 | Export: 528 | Name: AlgorithmicTrading-PublicSubnet1 529 | 530 | PublicSubnet2: 531 | Description: A reference to the public subnet in the 2nd Availability Zone 532 | Value: !Ref PublicSubnet2 533 | Export: 534 | Name: AlgorithmicTrading-PublicSubnet2 535 | 536 | PrivateSubnet1: 537 | Description: A reference to the private subnet in the 1st Availability Zone 538 | Value: !Ref PrivateSubnet1 539 | Export: 540 | Name: AlgorithmicTrading-PrivateSubnet1 541 | 542 | PrivateSubnet2: 543 | Description: A reference to the private subnet in the 2nd Availability Zone 544 | Value: !Ref PrivateSubnet2 545 | Export: 546 | Name: AlgorithmicTrading-PrivateSubnet2 547 | 548 | ECR: 549 | Description: A reference to ECR 550 | Value: !Ref ECR 551 | Export: 552 | Name: AlgorithmicTrading-ECR 553 | 554 | S3Bucket: 555 | Description: A reference to S3 Bucket 556 | Value: !Ref S3Bucket 557 | Export: 558 | Name: AlgorithmicTrading-S3Bucket 559 | 560 | ECSHostSecurityGroup: 561 | Description: ECSHostSecurityGroup 562 | Value: !Ref ECSHostSecurityGroup 563 | Export: 564 | Name: AlgorithmicTrading-ECSHostSecurityGroup 565 | 566 | ECSTaskExecutionRole: 567 | Description: ECSTaskExecutionRole 568 | Value: !Ref ECSTaskExecutionRole 569 | Export: 570 | Name: AlgorithmicTrading-ECSTaskExecutionRole 571 | 572 | AlgoExecutionRole: 573 | Description: AlgoExecutionRole ARN 574 | Value: !GetAtt 'AlgoExecutionRole.Arn' 575 | Export: 576 | Name: AlgorithmicTrading-AlgoExecutionRole-ARN 577 | 578 | Cluster: 579 | Description: A reference to the ECS cluster 580 | Value: !Ref ECSCluster 581 | Export: 582 | Name: AlgorithmicTrading-ECSCluster --------------------------------------------------------------------------------