├── assets
    ├── arch.png
    └── chart.png
├── 1_Data
    ├── stocks_history_target.h5
    ├── data_prep.py
    ├── Load_Hist_Data_Daily.ipynb
    └── Load_Hist_Data_Daily_Public.ipynb
├── 5_SageMakerStudio
    ├── init_schema.sh
    ├── stocks_history_target.h5
    ├── init_s3_bucket.sh
    ├── docker
    │   ├── model
    │   │   ├── wsgi.py
    │   │   ├── train
    │   │   ├── nginx.conf
    │   │   ├── predictor.py
    │   │   ├── algo_live_feed.py
    │   │   ├── serve
    │   │   ├── algo_sim_feed.py
    │   │   └── algo_base.py
    │   ├── default-user-settings.json
    │   ├── app-image-config-input.json
    │   └── Dockerfile
    ├── s3_bucket.yaml
    ├── data_prep.py
    ├── 2_Load_Data.ipynb
    ├── schema.yaml
    ├── 1_Setup.ipynb
    └── 3_Backtest_Strategy.ipynb
├── 2_Strategies
    ├── model
    │   ├── model_long_short_predict.h5
    │   ├── wsgi.py
    │   ├── train
    │   ├── nginx.conf
    │   ├── predictor.py
    │   ├── algo_live_feed.py
    │   ├── serve
    │   ├── algo_sim_feed.py
    │   └── algo_base.py
    ├── leaderboard.py
    ├── docker-compose.yml.template
    ├── init_model.py
    ├── ecs-params.yml.template
    ├── update_config.py
    ├── Dockerfile
    ├── build_and_push.sh
    ├── Run_Strategy_ECS.ipynb
    ├── deploy.sh
    ├── Strategy_SMA.ipynb
    ├── Strategy_Breakout.ipynb
    └── Strategy_ML_Forecast.ipynb
├── 3_Models
    ├── model
    │   ├── wsgi.py
    │   ├── nginx.conf
    │   ├── serve
    │   └── predictor.py
    ├── Dockerfile
    └── build_and_push.sh
├── 4_Kinesis
    ├── model
    │   ├── wsgi.py
    │   ├── train
    │   ├── nginx.conf
    │   ├── predictor.py
    │   ├── algo_live_feed.py
    │   ├── serve
    │   ├── algo_sim_feed.py
    │   └── algo_base.py
    ├── init_model.py
    ├── update_config_kinesis.py
    ├── Dockerfile
    ├── build_and_push.sh
    └── algo-kinesis.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── CONTRIBUTING.md
├── README.md
└── 0_Setup
    └── algo-reference.yaml


/assets/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/assets/arch.png


--------------------------------------------------------------------------------
/assets/chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/assets/chart.png


--------------------------------------------------------------------------------
/1_Data/stocks_history_target.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/1_Data/stocks_history_target.h5


--------------------------------------------------------------------------------
/5_SageMakerStudio/init_schema.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | aws cloudformation deploy --template-file=schema.yaml --stack-name=algotrading-schema


--------------------------------------------------------------------------------
/5_SageMakerStudio/stocks_history_target.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/5_SageMakerStudio/stocks_history_target.h5


--------------------------------------------------------------------------------
/2_Strategies/model/model_long_short_predict.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/algorithmic-trading/master/2_Strategies/model/model_long_short_predict.h5


--------------------------------------------------------------------------------
/5_SageMakerStudio/init_s3_bucket.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | aws cloudformation deploy --template-file=schema.yaml --stack-name=algotrading-data-schema --capabilities=CAPABILITY_IAM


--------------------------------------------------------------------------------
/3_Models/model/wsgi.py:
--------------------------------------------------------------------------------
1 | import predictor as myapp
2 | 
3 | # This is just a simple wrapper for gunicorn to find your app.
4 | # If you want to change the algorithm file, simply change "predictor" above to the
5 | # new file.
6 | 
7 | app = myapp.app
8 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/wsgi.py:
--------------------------------------------------------------------------------
1 | import predictor as myapp
2 | 
3 | # This is just a simple wrapper for gunicorn to find your app.
4 | # If you want to change the algorithm file, simply change "predictor" above to the
5 | # new file.
6 | 
7 | app = myapp.app
8 | 


--------------------------------------------------------------------------------
/2_Strategies/model/wsgi.py:
--------------------------------------------------------------------------------
1 | import predictor as myapp
2 | 
3 | # This is just a simple wrapper for gunicorn to find your app.
4 | # If you want to change the algorithm file, simply change "predictor" above to the
5 | # new file.
6 | 
7 | app = myapp.app
8 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/wsgi.py:
--------------------------------------------------------------------------------
1 | import predictor as myapp
2 | 
3 | # This is just a simple wrapper for gunicorn to find your app.
4 | # If you want to change the algorithm file, simply change "predictor" above to the
5 | # new file.
6 | 
7 | app = myapp.app
8 | 


--------------------------------------------------------------------------------
/2_Strategies/leaderboard.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json 
 3 | 
 4 | try:
 5 |     with open('algo_event.config', 'r') as f:
 6 |         event_config = json.load(f)
 7 |         print(event_config['leaderboard'])
 8 | except:
 9 |   print("No leaderboard for this event")
10 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/2_Strategies/docker-compose.yml.template:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   $SERVICE:
 4 |     image: $IMAGE
 5 |     environment:
 6 |       - AWS_DEFAULT_REGION=$REGION
 7 |       - ALGO_CMD=train
 8 |     logging:
 9 |       driver: awslogs
10 |       options:
11 |         awslogs-group: algo
12 |         awslogs-region: $REGION
13 |         awslogs-stream-prefix: $SERVICE


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/default-user-settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "DefaultUserSettings": {
 3 |         "KernelGatewayAppSettings": {
 4 |             "CustomImages": [
 5 |                 {
 6 |                     "ImageName": "algotrading",
 7 |                     "AppImageConfigName": "algotrading-config"
 8 |                 }
 9 |             ]
10 |         }
11 |     }
12 | }


--------------------------------------------------------------------------------
/2_Strategies/init_model.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import sys
 3 | 
 4 | algo_name=sys.argv[1]
 5 | 
 6 | Path('local/'+algo_name+'/input/data/training').mkdir(parents=True, exist_ok=True)
 7 | Path('local/'+algo_name+'/input/config').mkdir(parents=True, exist_ok=True)
 8 | Path('local/'+algo_name+'/model').mkdir(parents=True, exist_ok=True)
 9 | 
10 | model_name=algo_name
11 | print(algo_name)


--------------------------------------------------------------------------------
/4_Kinesis/init_model.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import sys
 3 | 
 4 | algo_name=sys.argv[1]
 5 | 
 6 | Path('local/'+algo_name+'/input/data/training').mkdir(parents=True, exist_ok=True)
 7 | Path('local/'+algo_name+'/input/config').mkdir(parents=True, exist_ok=True)
 8 | Path('local/'+algo_name+'/model').mkdir(parents=True, exist_ok=True)
 9 | 
10 | model_name=algo_name
11 | print(algo_name)


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/app-image-config-input.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "AppImageConfigName": "algotrading-config",
 3 |     "KernelGatewayImageConfig": {
 4 |         "KernelSpecs": [
 5 |             {
 6 |                 "Name": "python3",
 7 |                 "DisplayName": "tensorflow 2.9.1-py3.9-cpu"
 8 |             }
 9 |         ],
10 |         "FileSystemConfig": {
11 |             "MountPath": "/root",
12 |             "DefaultUid": 0,
13 |             "DefaultGid": 0
14 |         }
15 |     }
16 | }


--------------------------------------------------------------------------------
/4_Kinesis/model/train:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import backtrader as bt
 4 | from algo_base import *
 5 | import importlib
 6 | import json
 7 | 
 8 | prefix = '/opt/ml/'
 9 | hyper_params_path = os.path.join(prefix, 'input/config/hyperparameters.json')
10 | with open(hyper_params_path, 'r') as f:
11 |     config = json.load(f)
12 | 
13 | algo_name=config['algo_name']
14 | print("import:%s" % algo_name)
15 | 
16 | cls = getattr(importlib.import_module(algo_name), 'MyStrategy')
17 | print(cls)
18 | 
19 | algo=AlgoStrategy(config,cls)
20 | algo.run()


--------------------------------------------------------------------------------
/2_Strategies/model/train:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import backtrader as bt
 4 | from algo_base import *
 5 | import importlib
 6 | import json
 7 | 
 8 | prefix = '/opt/ml/'
 9 | hyper_params_path = os.path.join(prefix, 'input/config/hyperparameters.json')
10 | with open(hyper_params_path, 'r') as f:
11 |     config = json.load(f)
12 | 
13 | algo_name=config['algo_name']
14 | print("import:%s" % algo_name)
15 | 
16 | cls = getattr(importlib.import_module(algo_name), 'MyStrategy')
17 | print(cls)
18 | 
19 | algo=AlgoStrategy(config,cls)
20 | algo.run()


--------------------------------------------------------------------------------
/2_Strategies/ecs-params.yml.template:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | task_definition:
 3 |   task_execution_role: $TASK_ROLE
 4 |   task_role_arn: $ROLE_ARN
 5 |   ecs_network_mode: awsvpc
 6 |   task_size:
 7 |     mem_limit: 512
 8 |     cpu_limit: 256
 9 | run_params:
10 |   network_configuration:
11 |     awsvpc_configuration:
12 |       subnets:
13 |         - $SUBNET1
14 |         - $SUBNET2
15 |       security_groups:
16 |         - $SG
17 |       assign_public_ip: DISABLED
18 |   service_discovery:
19 |     private_dns_namespace:
20 |         vpc: $VPC
21 |         name: algo
22 | 


--------------------------------------------------------------------------------
/3_Models/model/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 1;
 2 | daemon off; # Prevent forking
 3 | 
 4 | 
 5 | pid /tmp/nginx.pid;
 6 | error_log /var/log/nginx/error.log;
 7 | 
 8 | events {
 9 |   # defaults
10 | }
11 | 
12 | http {
13 |   include /etc/nginx/mime.types;
14 |   default_type application/octet-stream;
15 |   access_log /var/log/nginx/access.log combined;
16 |   
17 |   upstream gunicorn {
18 |     server unix:/tmp/gunicorn.sock;
19 |   }
20 | 
21 |   server {
22 |     listen 8080 deferred;
23 |     client_max_body_size 5m;
24 | 
25 |     keepalive_timeout 5;
26 | 
27 |     location ~ ^/(ping|invocations) {
28 |       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
29 |       proxy_set_header Host $http_host;
30 |       proxy_redirect off;
31 |       proxy_pass http://gunicorn;
32 |     }
33 | 
34 |     location / {
35 |       return 404 "{}";
36 |     }
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/2_Strategies/model/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 1;
 2 | daemon off; # Prevent forking
 3 | 
 4 | 
 5 | pid /tmp/nginx.pid;
 6 | error_log /var/log/nginx/error.log;
 7 | 
 8 | events {
 9 |   # defaults
10 | }
11 | 
12 | http {
13 |   include /etc/nginx/mime.types;
14 |   default_type application/octet-stream;
15 |   access_log /var/log/nginx/access.log combined;
16 |   
17 |   upstream gunicorn {
18 |     server unix:/tmp/gunicorn.sock;
19 |   }
20 | 
21 |   server {
22 |     listen 8080 deferred;
23 |     client_max_body_size 5m;
24 | 
25 |     keepalive_timeout 5;
26 | 
27 |     location ~ ^/(ping|invocations) {
28 |       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
29 |       proxy_set_header Host $http_host;
30 |       proxy_redirect off;
31 |       proxy_pass http://gunicorn;
32 |     }
33 | 
34 |     location / {
35 |       return 404 "{}";
36 |     }
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 1;
 2 | daemon off; # Prevent forking
 3 | 
 4 | 
 5 | pid /tmp/nginx.pid;
 6 | error_log /var/log/nginx/error.log;
 7 | 
 8 | events {
 9 |   # defaults
10 | }
11 | 
12 | http {
13 |   include /etc/nginx/mime.types;
14 |   default_type application/octet-stream;
15 |   access_log /var/log/nginx/access.log combined;
16 |   
17 |   upstream gunicorn {
18 |     server unix:/tmp/gunicorn.sock;
19 |   }
20 | 
21 |   server {
22 |     listen 8080 deferred;
23 |     client_max_body_size 5m;
24 | 
25 |     keepalive_timeout 5;
26 | 
27 |     location ~ ^/(ping|invocations) {
28 |       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
29 |       proxy_set_header Host $http_host;
30 |       proxy_redirect off;
31 |       proxy_pass http://gunicorn;
32 |     }
33 | 
34 |     location / {
35 |       return 404 "{}";
36 |     }
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/train:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import backtrader as bt
 4 | from algo_base import *
 5 | import importlib
 6 | import json
 7 | import boto3
 8 | 
 9 | prefix = '/opt/ml/'
10 | hyper_params_path = os.path.join(prefix, 'input/config/hyperparameters.json')
11 | with open(hyper_params_path, 'r') as f:
12 |     config = json.load(f)
13 | 
14 | algo_name=config['algo_name']
15 | print("import:%s" % algo_name)
16 | 
17 | if 's3' in config:
18 |     try:
19 |         s3_bucket=config['s3']
20 |         print("s3_bucket=%s" % s3_bucket)
21 |         s3 = boto3.client('s3')
22 |         s3.download_file(s3_bucket, algo_name+'/'+algo_name+'.py', '/opt/program/'+algo_name+'.py')
23 |     except Exception as e:
24 |       print(e)
25 | 
26 | cls = getattr(importlib.import_module(algo_name), 'MyStrategy')
27 | print(cls)
28 | 
29 | algo=AlgoStrategy(config,cls)
30 | algo.run()


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/nginx.conf:
--------------------------------------------------------------------------------
 1 | worker_processes 1;
 2 | daemon off; # Prevent forking
 3 | 
 4 | 
 5 | pid /tmp/nginx.pid;
 6 | error_log /var/log/nginx/error.log;
 7 | 
 8 | events {
 9 |   # defaults
10 | }
11 | 
12 | http {
13 |   include /etc/nginx/mime.types;
14 |   default_type application/octet-stream;
15 |   access_log /var/log/nginx/access.log combined;
16 |   
17 |   upstream gunicorn {
18 |     server unix:/tmp/gunicorn.sock;
19 |   }
20 | 
21 |   server {
22 |     listen 8080 deferred;
23 |     client_max_body_size 5m;
24 | 
25 |     keepalive_timeout 5;
26 | 
27 |     location ~ ^/(ping|invocations) {
28 |       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
29 |       proxy_set_header Host $http_host;
30 |       proxy_redirect off;
31 |       proxy_pass http://gunicorn;
32 |     }
33 | 
34 |     location / {
35 |       return 404 "{}";
36 |     }
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 
16 | 


--------------------------------------------------------------------------------
/2_Strategies/update_config.py:
--------------------------------------------------------------------------------
 1 | # Update configurations
 2 | import json
 3 | import sys
 4 | import boto3
 5 | import sagemaker as sage
 6 | 
 7 | sess = sage.Session()
 8 | 
 9 | algo_name=sys.argv[1]
10 | conf_file='local/'+algo_name+'/input/config/hyperparameters.json'
11 | with open(conf_file, 'r') as f:
12 |     config = json.load(f)
13 | 
14 | config["algo_name"]=algo_name    
15 | 
16 | account=boto3.client('sts').get_caller_identity().get('Account')
17 | 
18 | if 'user' not in config:
19 |     config['user']='user'
20 | config["account"] = account
21 | config["region"]=sess.boto_session.region_name
22 | 
23 | #try:
24 | #    s3 = boto3.client('s3')
25 | #    s3.download_file('', 'algo_event.config', 'algo_event.config')
26 | #    with open('algo_event.config', 'r') as f:
27 | #        event_config = json.load(f)
28 | #    config['submitUrl']=event_config['submitUrl']
29 | #except:
30 | #  print("Skipped event config") 
31 |     
32 | with open(conf_file, "w") as text_file:
33 |     text_file.write(json.dumps(config))    
34 | 
35 | print("config=%s" % json.dumps(config))


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM 763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.9.1-cpu-py39-ubuntu20.04-sagemaker
 2 | 
 3 | RUN pip install ipykernel && \
 4 |         python -m ipykernel install --sys-prefix
 5 | 
 6 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz
 7 | 
 8 | # Install all of the packages
 9 | RUN pip install pandas
10 | RUN pip install flask
11 | RUN pip install gevent
12 | RUN pip install backtrader
13 | RUN pip install matplotlib==3.2.2
14 | RUN pip install ta-lib
15 | RUN pip install pyathena
16 | 
17 | RUN mkdir -p /opt/ml/input/data/training
18 | RUN mkdir -p /opt/ml/input/config
19 | RUN mkdir -p /opt/ml/model
20 | 
21 | # Env Variables
22 | ENV PYTHONUNBUFFERED=TRUE
23 | ENV PYTHONDONTWRITEBYTECODE=TRUE
24 | ENV PATH="/opt/program:${PATH}"
25 | 
26 | # Set up the program in the image
27 | COPY model /opt/program
28 | WORKDIR /opt/program


--------------------------------------------------------------------------------
/4_Kinesis/update_config_kinesis.py:
--------------------------------------------------------------------------------
 1 | # Update configurations
 2 | import json
 3 | import sys
 4 | import boto3
 5 | import sagemaker as sage
 6 | 
 7 | sess = sage.Session()
 8 | 
 9 | algo_name=sys.argv[1]
10 | kinesis_stream=sys.argv[2]
11 | sym=sys.argv[3]
12 | 
13 | conf_file='local/'+algo_name+'/input/config/hyperparameters.json'
14 | with open(conf_file, 'r') as f:
15 |     config = json.load(f)
16 | 
17 | config["algo_name"]=algo_name
18 | config["kinesis_stream"]=kinesis_stream
19 | config["sym"]=sym
20 | 
21 | account=boto3.client('sts').get_caller_identity().get('Account')
22 | 
23 | if 'user' not in config:
24 |     config['user']='user'
25 | config["account"] = account
26 | config["region"]=sess.boto_session.region_name
27 | 
28 | #try:
29 | #    s3 = boto3.client('s3')
30 | #    s3.download_file('', 'algo_event.config', 'algo_event.config')
31 | #    with open('algo_event.config', 'r') as f:
32 | #        event_config = json.load(f)
33 | #    config['submitUrl']=event_config['submitUrl']
34 | #except:
35 | #  print("Skipped event config") 
36 |     
37 | with open(conf_file, "w") as text_file:
38 |     text_file.write(json.dumps(config))    
39 | 
40 | print("config=%s" % json.dumps(config))


--------------------------------------------------------------------------------
/4_Kinesis/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.1.0rc2-py3
 2 | 
 3 | RUN apt-get -y update && apt-get install -y --no-install-recommends \
 4 |          wget \
 5 |          python3 \
 6 |          nginx \
 7 |          ca-certificates \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz
11 | 
12 | # Install all of the packages
13 | RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py
14 | RUN pip install numpy
15 | RUN pip install scipy
16 | RUN pip install scikit-learn
17 | RUN pip install pandas
18 | RUN pip install flask
19 | RUN pip install gevent
20 | RUN pip install gunicorn
21 | RUN pip install tensorflow==2.2.0
22 | RUN pip install keras
23 | RUN pip install backtrader
24 | RUN pip install matplotlib==3.2.2
25 | RUN pip install ta-lib
26 | RUN pip install boto3
27 | 
28 | # Env Variables
29 | ENV PYTHONUNBUFFERED=TRUE
30 | ENV PYTHONDONTWRITEBYTECODE=TRUE
31 | ENV PATH="/opt/program:${PATH}"
32 | 
33 | # Set up the program in the image
34 | COPY model /opt/program
35 | WORKDIR /opt/program


--------------------------------------------------------------------------------
/2_Strategies/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.1.0rc2-py3
 2 | 
 3 | RUN apt-get -y update && apt-get install -y --no-install-recommends \
 4 |          wget \
 5 |          python3 \
 6 |          nginx \
 7 |          ca-certificates \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz
11 | 
12 | # Install all of the packages
13 | # RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py
14 | RUN pip install numpy
15 | RUN pip install scipy
16 | RUN pip install scikit-learn
17 | RUN pip install pandas
18 | RUN pip install flask
19 | RUN pip install gevent
20 | RUN pip install gunicorn
21 | RUN pip install tensorflow==2.2.0
22 | RUN pip install keras
23 | RUN pip install backtrader
24 | RUN pip install matplotlib==3.2.2
25 | RUN pip install ta-lib
26 | RUN pip install boto3
27 | 
28 | # Env Variables
29 | ENV PYTHONUNBUFFERED=TRUE
30 | ENV PYTHONDONTWRITEBYTECODE=TRUE
31 | ENV PATH="/opt/program:${PATH}"
32 | 
33 | # Set up the program in the image
34 | COPY model /opt/program
35 | WORKDIR /opt/program
36 | 


--------------------------------------------------------------------------------
/3_Models/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.1.0rc2-py3
 2 | 
 3 | RUN apt-get -y update && apt-get install -y --no-install-recommends \
 4 |          wget \
 5 |          python3 \
 6 |          nginx \
 7 |          ca-certificates \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | RUN wget https://sourceforge.net/projects/ta-lib/files/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz && tar -xzf ta-lib-0.4.0-src.tar.gz && cd ta-lib/ && ./configure --prefix=/usr && make && make install && cd ../ && rm -rf ta-lib && rm ta-lib-0.4.0-src.tar.gz
11 | 
12 | # Install all of the packages
13 | # RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py
14 | RUN pip install numpy
15 | RUN pip install scipy
16 | RUN pip install scikit-learn
17 | RUN pip install pandas
18 | RUN pip install flask
19 | RUN pip install gevent
20 | RUN pip install gunicorn
21 | RUN pip install tensorflow==2.2.0
22 | RUN pip install keras
23 | RUN pip install backtrader
24 | RUN pip install matplotlib==3.2.2
25 | RUN pip install ta-lib
26 | RUN pip install boto3
27 | 
28 | # Env Variables
29 | ENV PYTHONUNBUFFERED=TRUE
30 | ENV PYTHONDONTWRITEBYTECODE=TRUE
31 | ENV PATH="/opt/program:${PATH}"
32 | 
33 | # Set up the program in the image
34 | COPY model /opt/program
35 | WORKDIR /opt/program
36 | 


--------------------------------------------------------------------------------
/3_Models/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use
 4 | # by SageMaker.
 5 | 
 6 | image=$1
 7 | 
 8 | chmod +x model/train
 9 | chmod +x model/serve
10 | 
11 | # Get the account number associated with the current IAM credentials
12 | account=$(aws sts get-caller-identity --query Account --output text)
13 | 
14 | if [ $? -ne 0 ]
15 | then
16 |     exit 255
17 | fi
18 | 
19 | 
20 | # Get the region defined in the current configuration (default to us-west-2 if none defined)
21 | region=$(aws configure get region)
22 | region=${region:-us-east-1}
23 | 
24 | 
25 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest"
26 | 
27 | # If the repository doesn't exist in ECR, create it.
28 | 
29 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
30 | 
31 | if [ $? -ne 0 ]
32 | then
33 |     aws ecr create-repository --repository-name "${image}" > /dev/null
34 | fi
35 | 
36 | # Get the login command from ECR and execute it directly
37 | $(aws ecr get-login --region ${region} --no-include-email)
38 | 
39 | # Build the docker image locally with the image name and then push it to ECR
40 | # with the full name.
41 | 
42 | docker build  -t ${image} .
43 | docker tag ${image} ${fullname}
44 | 
45 | docker push ${fullname}


--------------------------------------------------------------------------------
/2_Strategies/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use
 4 | # by SageMaker.
 5 | 
 6 | image=$1
 7 | 
 8 | chmod +x model/train
 9 | chmod +x model/serve
10 | 
11 | # Get the account number associated with the current IAM credentials
12 | account=$(aws sts get-caller-identity --query Account --output text)
13 | 
14 | if [ $? -ne 0 ]
15 | then
16 |     exit 255
17 | fi
18 | 
19 | 
20 | # Get the region defined in the current configuration (default to us-west-2 if none defined)
21 | region=$(aws configure get region)
22 | region=${region:-us-east-1}
23 | 
24 | 
25 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest"
26 | 
27 | # If the repository doesn't exist in ECR, create it.
28 | 
29 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
30 | 
31 | if [ $? -ne 0 ]
32 | then
33 |     aws ecr create-repository --repository-name "${image}" > /dev/null
34 | fi
35 | 
36 | # Get the login command from ECR and execute it directly
37 | $(aws ecr get-login --region ${region} --no-include-email)
38 | 
39 | # Build the docker image locally with the image name and then push it to ECR
40 | # with the full name.
41 | 
42 | docker build  -t ${image} .
43 | docker tag ${image} ${fullname}
44 | 
45 | docker push ${fullname}


--------------------------------------------------------------------------------
/4_Kinesis/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script shows how to build the Docker image and push it to ECR to be ready for use
 4 | # by SageMaker.
 5 | 
 6 | image=$1
 7 | 
 8 | chmod +x model/train
 9 | chmod +x model/serve
10 | 
11 | # Get the account number associated with the current IAM credentials
12 | account=$(aws sts get-caller-identity --query Account --output text)
13 | 
14 | if [ $? -ne 0 ]
15 | then
16 |     exit 255
17 | fi
18 | 
19 | 
20 | # Get the region defined in the current configuration (default to us-west-2 if none defined)
21 | region=$(aws configure get region)
22 | region=${region:-us-east-1}
23 | 
24 | 
25 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image}:latest"
26 | 
27 | # If the repository doesn't exist in ECR, create it.
28 | 
29 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
30 | 
31 | if [ $? -ne 0 ]
32 | then
33 |     aws ecr create-repository --repository-name "${image}" > /dev/null
34 | fi
35 | 
36 | # Get the login command from ECR and execute it directly
37 | $(aws ecr get-login --region ${region} --no-include-email)
38 | 
39 | # Build the docker image locally with the image name and then push it to ECR
40 | # with the full name.
41 | 
42 | docker build  -t ${image} .
43 | docker tag ${image} ${fullname}
44 | 
45 | docker push ${fullname}


--------------------------------------------------------------------------------
/2_Strategies/Run_Strategy_ECS.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "### Please run through the strategy notebook first. This will create the container image."
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "strategy_image=\"algo_daily_breakout\""
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "!./deploy.sh $strategy_image"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": null,
31 |    "metadata": {},
32 |    "outputs": [],
33 |    "source": [
34 |     "!./ecs-cli compose up"
35 |    ]
36 |   }
37 |  ],
38 |  "metadata": {
39 |   "kernelspec": {
40 |    "display_name": "conda_python3",
41 |    "language": "python",
42 |    "name": "conda_python3"
43 |   },
44 |   "language_info": {
45 |    "codemirror_mode": {
46 |     "name": "ipython",
47 |     "version": 3
48 |    },
49 |    "file_extension": ".py",
50 |    "mimetype": "text/x-python",
51 |    "name": "python",
52 |    "nbconvert_exporter": "python",
53 |    "pygments_lexer": "ipython3",
54 |    "version": "3.6.10"
55 |   }
56 |  },
57 |  "nbformat": 4,
58 |  "nbformat_minor": 4
59 | }
60 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/s3_bucket.yaml:
--------------------------------------------------------------------------------
 1 | Resources:
 2 |   dataBucket:
 3 |     Metadata:
 4 |       'aws:copilot:description': 'An Amazon S3 bucket to store and retrieve objects'
 5 |     Type: AWS::S3::Bucket
 6 |     DeletionPolicy: Retain
 7 |     Properties:
 8 |       VersioningConfiguration:
 9 |         Status: Enabled
10 |       AccessControl: Private 
11 |       BucketEncryption:
12 |         ServerSideEncryptionConfiguration:
13 |         - ServerSideEncryptionByDefault:
14 |             SSEAlgorithm: AES256
15 |       PublicAccessBlockConfiguration:
16 |         BlockPublicAcls: true
17 |         BlockPublicPolicy: true
18 | 
19 |   dataBucketPolicy:
20 |     Metadata:
21 |       'aws:copilot:description': 'A bucket policy to deny unencrypted access to the bucket and its contents'
22 |     Type: AWS::S3::BucketPolicy
23 |     DeletionPolicy: Retain
24 |     Properties:
25 |       PolicyDocument:
26 |         Version: 2012-10-17
27 |         Statement:
28 |           - Sid: ForceHTTPS
29 |             Effect: Deny
30 |             Principal: '*'
31 |             Action: 's3:*'
32 |             Resource: 
33 |               - !Sub ${ dataBucket.Arn}/*
34 |               - !Sub ${ dataBucket.Arn}
35 |             Condition: 
36 |               Bool:
37 |                 "aws:SecureTransport": false
38 |       Bucket: !Ref dataBucket
39 | 
40 | Outputs:
41 |   databucket:
42 |     Description: "The name of a user-defined bucket."
43 |     Value: !Ref dataBucket
44 |     Export:
45 |       Name: "algotrading-s3bucket"


--------------------------------------------------------------------------------
/5_SageMakerStudio/data_prep.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import datetime
 3 | import pandas as pd
 4 | import sys
 5 | 
 6 | START_DATE = '2012-08-13'
 7 | END_DATE = '2017-08-11'
 8 | DATE_FORMAT = '%Y-%m-%d'
 9 | START_DATETIME = datetime.datetime.strptime(START_DATE, DATE_FORMAT)
10 | 
11 | def read_stock_history(filepath):
12 |     """ Read data from extracted h5
13 |     Args:
14 |         filepath: path of file
15 |     Returns:
16 |         history:
17 |         abbreviation:
18 |     """
19 |     with h5py.File(filepath, 'r') as f:
20 |         history = f['history'][:]
21 |         abbreviation = f['abbreviation'][:].tolist()
22 |         abbreviation = [abbr.decode('utf-8') for abbr in abbreviation]
23 |     return history, abbreviation
24 | 
25 | def index_to_date(index):
26 |     return (START_DATETIME + datetime.timedelta(index)).strftime(DATE_FORMAT)
27 | 
28 | def save_stock_data(stk,history,abbreviation):
29 |     p=abbreviation.index(stk)
30 |     h=history[p]
31 |     tData=[]
32 |     hData=['dt','sym','open','high','low','close','vol']
33 |     for x in range(0,h.shape[0]):
34 |         row=[]
35 |         row.append(index_to_date(x))
36 |         row.append(stk)
37 |         v=h[x]
38 |         for y in range(0,len(v)):
39 |             row.append(v[y])
40 |         tData.append(row)    
41 |     df=pd.DataFrame(tData,columns=hData)
42 |     df.set_index(pd.DatetimeIndex(df['dt']), inplace=True)
43 |     del df['dt']
44 |     df.to_csv(stk+".csv")
45 |     print("store:"+stk)
46 |     return df
47 | 
48 | stk=sys.argv[1]
49 | history,abbreviation=read_stock_history('stocks_history_target.h5')
50 | save_stock_data(stk,history,abbreviation)


--------------------------------------------------------------------------------
/1_Data/data_prep.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import datetime
 3 | import pandas as pd
 4 | import sys
 5 | 
 6 | START_DATE = '2012-08-13'
 7 | END_DATE = '2017-08-11'
 8 | DATE_FORMAT = '%Y-%m-%d'
 9 | START_DATETIME = datetime.datetime.strptime(START_DATE, DATE_FORMAT)
10 | 
11 | def read_stock_history(filepath):
12 |     """ Read data from extracted h5
13 |     Args:
14 |         filepath: path of file
15 |     Returns:
16 |         history:
17 |         abbreviation:
18 |     """
19 |     with h5py.File(filepath, 'r') as f:
20 |         history = f['history'][:]
21 |         abbreviation = f['abbreviation'][:].tolist()
22 |         abbreviation = [abbr.decode('utf-8') for abbr in abbreviation]
23 |     return history, abbreviation
24 | 
25 | def index_to_date(index):
26 |     return (START_DATETIME + datetime.timedelta(index)).strftime(DATE_FORMAT)
27 | 
28 | def save_stock_data(stk,history,abbreviation):
29 |     p=abbreviation.index(stk)
30 |     h=history[p]
31 |     tData=[]
32 |     hData=['dt','sym','open','high','low','close','vol']
33 |     for x in range(0,h.shape[0]):
34 |         row=[]
35 |         row.append(index_to_date(x))
36 |         row.append(stk)
37 |         v=h[x]
38 |         for y in range(0,len(v)):
39 |             row.append(v[y])
40 |         tData.append(row)    
41 |     df=pd.DataFrame(tData,columns=hData)
42 |     df.set_index(pd.DatetimeIndex(df['dt']), inplace=True)
43 |     del df['dt']
44 |     df.to_csv("../1_Data/"+stk+".csv")
45 |     print("store:"+stk)
46 |     return df
47 | 
48 | stk=sys.argv[1]
49 | history,abbreviation=read_stock_history('../1_Data/stocks_history_target.h5')
50 | save_stock_data(stk,history,abbreviation)


--------------------------------------------------------------------------------
/2_Strategies/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | image=$1
 3 | service=$1
 4 | 
 5 | # Get the account number associated with the current IAM credentials
 6 | account=$(aws sts get-caller-identity --query Account --output text)
 7 | 
 8 | if [ $? -ne 0 ]
 9 | then
10 |     exit 255
11 | fi
12 | 
13 | # Get the region defined in the current configuration
14 | region=$(aws configure get region)
15 | region=${region:-us-east-1}
16 | 
17 | echo "create docker-compose.yml"
18 | 
19 | cp docker-compose.yml.template docker-compose.yml
20 | sed -i "s/\$ENV/${env}/g" docker-compose.yml
21 | sed -i "s/\$REGION/${region}/g" docker-compose.yml
22 | sed -i "s/\$IMAGE/${account}.dkr.ecr.${region}.amazonaws.com\/${image}/g" docker-compose.yml
23 | sed -i "s/\$SERVICE/${service}/g" docker-compose.yml
24 | 
25 | AWS_EXPORTS=`aws cloudformation list-exports`
26 | 
27 | VPC=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-VPC") | .Value'`
28 | SUBNET1=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-PrivateSubnet1") | .Value'`
29 | SUBNET2=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-PrivateSubnet2") | .Value'`
30 | SG=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-ECSHostSecurityGroup") | .Value'`
31 | TASK_ROLE=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-ECSTaskExecutionRole") | .Value'`
32 | ROLE_ARN=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-AlgoExecutionRole-ARN") | .Value'`
33 | CLUSTER=`echo $AWS_EXPORTS | jq -r '.Exports[] | select (.Name=="AlgorithmicTrading-ECSCluster") | .Value'`
34 | 
35 | echo "create ecs-params.yml"
36 | 
37 | cp ecs-params.yml.template ecs-params.yml
38 | sed -i "s/\$VPC/${VPC}/g" ecs-params.yml
39 | sed -i "s/\$SUBNET1/${SUBNET1}/g" ecs-params.yml
40 | sed -i "s/\$SUBNET2/${SUBNET2}/g" ecs-params.yml
41 | sed -i "s/\$SG/${SG}/g" ecs-params.yml
42 | sed -i "s/\$TASK_ROLE/${TASK_ROLE}/g" ecs-params.yml
43 | sed -i "s@\$ROLE_ARN@${ROLE_ARN}@g" ecs-params.yml
44 | 
45 | if [ ! -f "ecs-cli" ] ; then
46 |   curl -Lo ecs-cli https://amazon-ecs-cli.s3.amazonaws.com/ecs-cli-linux-amd64-latest
47 |   chmod 777 ecs-cli
48 |   echo "ecs-cli installed"
49 | fi
50 | 
51 | ./ecs-cli configure --region ${region} --cluster ${CLUSTER} --default-launch-type FARGATE


--------------------------------------------------------------------------------
/2_Strategies/model/predictor.py:
--------------------------------------------------------------------------------
 1 | # This is the file that implements a flask server to do inferences. It's the
 2 | # file that you will modify to implement the scoring for your own algorithm.
 3 | from __future__ import print_function
 4 | 
 5 | import os
 6 | from io import StringIO
 7 | #import StringIO
 8 | import flask
 9 | 
10 | import tensorflow as tf
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | from keras import backend as K
15 | from keras.models import load_model
16 | from sklearn.preprocessing import StandardScaler
17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
18 | 
19 | prefix = '/opt/ml/'
20 | model_path = os.path.join(prefix, 'model')
21 | 
22 | # A singleton for holding the model. This simply loads the model and holds it.
23 | # It has a predict function that does a prediction based on the model and the
24 | # input data.
25 | class ScoringService(object):
26 |     model = None                # Where we keep the model when it's loaded
27 | 
28 |     @classmethod
29 |     def get_model(cls):
30 |         """
31 |         Get the model object for this instance,
32 |         loading it if it's not already loaded.
33 |         """
34 |         if cls.model is None:
35 |             cls.model = load_model(
36 |                 os.path.join(model_path, 'model.h5'))
37 |         return cls.model
38 | 
39 |     @classmethod
40 |     def predict(cls, input):
41 |         """For the input, do the predictions and return them.
42 | 
43 |         Args:
44 |             input (a pandas dataframe): The data on which to do the
45 |             predictions.
46 | 
47 |             There will be one prediction per row in the dataframe
48 |         """
49 |         sess = K.get_session()
50 |         with sess.graph.as_default():
51 |             clf = cls.get_model()
52 |             return clf.predict(input)
53 | 
54 | # The flask app for serving predictions
55 | app = flask.Flask(__name__)
56 | 
57 | 
58 | @app.route('/ping', methods=['GET'])
59 | def ping():
60 |     """
61 |     Determine if the container is working and healthy.
62 |     In this sample container, we declare it healthy if we can load the model
63 |     successfully.
64 |     """
65 |     # Health check -- You can insert a health check here
66 |     health = True
67 |     status = 200 if health else 404
68 |     return flask.Response(
69 |         response='{"status":"ok"}',
70 |         status=status,
71 |         mimetype='application/json')
72 | 
73 | 
74 | @app.route('/invocations', methods=['POST'])
75 | def transformation():
76 |     return flask.Response(
77 |         response='{"status":"ok"}',
78 |         status=status,
79 |         mimetype='application/json')
80 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/predictor.py:
--------------------------------------------------------------------------------
 1 | # This is the file that implements a flask server to do inferences. It's the
 2 | # file that you will modify to implement the scoring for your own algorithm.
 3 | from __future__ import print_function
 4 | 
 5 | import os
 6 | from io import StringIO
 7 | #import StringIO
 8 | import flask
 9 | 
10 | import tensorflow as tf
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | from keras import backend as K
15 | from keras.models import load_model
16 | from sklearn.preprocessing import StandardScaler
17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
18 | 
19 | prefix = '/opt/ml/'
20 | model_path = os.path.join(prefix, 'model')
21 | 
22 | # A singleton for holding the model. This simply loads the model and holds it.
23 | # It has a predict function that does a prediction based on the model and the
24 | # input data.
25 | class ScoringService(object):
26 |     model = None                # Where we keep the model when it's loaded
27 | 
28 |     @classmethod
29 |     def get_model(cls):
30 |         """
31 |         Get the model object for this instance,
32 |         loading it if it's not already loaded.
33 |         """
34 |         if cls.model is None:
35 |             cls.model = load_model(
36 |                 os.path.join(model_path, 'model.h5'))
37 |         return cls.model
38 | 
39 |     @classmethod
40 |     def predict(cls, input):
41 |         """For the input, do the predictions and return them.
42 | 
43 |         Args:
44 |             input (a pandas dataframe): The data on which to do the
45 |             predictions.
46 | 
47 |             There will be one prediction per row in the dataframe
48 |         """
49 |         sess = K.get_session()
50 |         with sess.graph.as_default():
51 |             clf = cls.get_model()
52 |             return clf.predict(input)
53 | 
54 | # The flask app for serving predictions
55 | app = flask.Flask(__name__)
56 | 
57 | 
58 | @app.route('/ping', methods=['GET'])
59 | def ping():
60 |     """
61 |     Determine if the container is working and healthy.
62 |     In this sample container, we declare it healthy if we can load the model
63 |     successfully.
64 |     """
65 |     # Health check -- You can insert a health check here
66 |     health = True
67 |     status = 200 if health else 404
68 |     return flask.Response(
69 |         response='{"status":"ok"}',
70 |         status=status,
71 |         mimetype='application/json')
72 | 
73 | 
74 | @app.route('/invocations', methods=['POST'])
75 | def transformation():
76 |     return flask.Response(
77 |         response='{"status":"ok"}',
78 |         status=status,
79 |         mimetype='application/json')
80 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/predictor.py:
--------------------------------------------------------------------------------
 1 | # This is the file that implements a flask server to do inferences. It's the
 2 | # file that you will modify to implement the scoring for your own algorithm.
 3 | from __future__ import print_function
 4 | 
 5 | import os
 6 | from io import StringIO
 7 | #import StringIO
 8 | import flask
 9 | 
10 | import tensorflow as tf
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | from keras import backend as K
15 | from keras.models import load_model
16 | from sklearn.preprocessing import StandardScaler
17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
18 | 
19 | prefix = '/opt/ml/'
20 | model_path = os.path.join(prefix, 'model')
21 | 
22 | # A singleton for holding the model. This simply loads the model and holds it.
23 | # It has a predict function that does a prediction based on the model and the
24 | # input data.
25 | class ScoringService(object):
26 |     model = None                # Where we keep the model when it's loaded
27 | 
28 |     @classmethod
29 |     def get_model(cls):
30 |         """
31 |         Get the model object for this instance,
32 |         loading it if it's not already loaded.
33 |         """
34 |         if cls.model is None:
35 |             cls.model = load_model(
36 |                 os.path.join(model_path, 'model.h5'))
37 |         return cls.model
38 | 
39 |     @classmethod
40 |     def predict(cls, input):
41 |         """For the input, do the predictions and return them.
42 | 
43 |         Args:
44 |             input (a pandas dataframe): The data on which to do the
45 |             predictions.
46 | 
47 |             There will be one prediction per row in the dataframe
48 |         """
49 |         sess = K.get_session()
50 |         with sess.graph.as_default():
51 |             clf = cls.get_model()
52 |             return clf.predict(input)
53 | 
54 | # The flask app for serving predictions
55 | app = flask.Flask(__name__)
56 | 
57 | 
58 | @app.route('/ping', methods=['GET'])
59 | def ping():
60 |     """
61 |     Determine if the container is working and healthy.
62 |     In this sample container, we declare it healthy if we can load the model
63 |     successfully.
64 |     """
65 |     # Health check -- You can insert a health check here
66 |     health = True
67 |     status = 200 if health else 404
68 |     return flask.Response(
69 |         response='{"status":"ok"}',
70 |         status=status,
71 |         mimetype='application/json')
72 | 
73 | 
74 | @app.route('/invocations', methods=['POST'])
75 | def transformation():
76 |     return flask.Response(
77 |         response='{"status":"ok"}',
78 |         status=status,
79 |         mimetype='application/json')
80 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/algo_live_feed.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import struct
 3 | import time
 4 | 
 5 | from backtrader.feed import DataBase
 6 | from backtrader import date2num
 7 | from backtrader import TimeFrame
 8 | import backtrader as bt
 9 | import math
10 | import numpy as np
11 | import pandas as pd
12 | import json
13 | import boto3
14 | 
15 | class AlgoLiveData(DataBase):
16 |     def __init__(self,region):
17 |         super(AlgoLiveData, self).__init__()
18 |         self.region=region
19 |         self.lambda_client = boto3.client('lambda',region_name=self.region)
20 |         self.connected=False
21 | 
22 |         #start_date = '2017-08-11'
23 |         #now = datetime.datetime.now() # current date and time
24 |         #end_date = now.strftime("%Y-%m-%d")
25 |         
26 |         #self.fromdate=pd.to_datetime(start_date, format = "%Y-%m-%d")
27 |         #self.todate=pd.to_datetime(end_date, format = "%Y-%m-%d")
28 |         self.timeframe=bt.TimeFrame.Ticks
29 |         print(self.lines.datetime.array)
30 |  
31 |     def start(self):
32 |         print("start feed")
33 |         print(self.lines.datetime.array)
34 |     
35 |     def stop(self):
36 |         print("stop feed")
37 |     
38 |     def islive(self):
39 |         '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce
40 |         should be deactivated'''
41 |         return True
42 |  
43 |     def haslivedata(self):
44 |         '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce
45 |         should be deactivated'''
46 |         return self.connected
47 | 
48 |     def _load(self):
49 |         #print("A:%s" % self.lines.datetime.array)
50 |         if not self.connected:
51 |             while not self.connected:
52 |                 self.pull()
53 |         else:
54 |             self.pull()
55 |         return True
56 | 
57 |     def pull(self):
58 |         #print("B:%s" % self.lines.datetime.array)
59 |         if math.isnan(self.lines.datetime[0]):
60 |             now = datetime.datetime.now()
61 |             self.lines.datetime[0]=date2num(now)
62 |         now=datetime.datetime.now()
63 |         try:
64 |             item={}
65 |             res=self.lambda_client.invoke(
66 |                 FunctionName='algo_market_data',
67 |                 InvocationType='RequestResponse',
68 |                 Payload=json.dumps(item)
69 |             )
70 |             t=res['Payload']
71 |             l=json.loads(t.read().decode('utf-8'))
72 |             print("load:%s" % l)
73 |             
74 |             #print(self.lines.datetime.array)
75 |             #print(self.lines.close.array)
76 |             
77 |             for x in l:
78 |                 dt=pd.to_datetime(x['date'], format = "%Y-%m-%d")
79 |                 #print(dt)
80 |                 close=x['close']
81 |                 
82 |                 self.lines.datetime[0] = date2num(datetime.datetime.now())
83 |                 self.lines.open[0] = close
84 |                 self.lines.high[0] = close
85 |                 self.lines.low[0] = close
86 |                 self.lines.close[0] = close
87 |                 self.lines.volume[0] = 0
88 |                 
89 |                 self.connected=True
90 |                 self._laststatus=self.LIVE
91 |                 #print("connected")
92 |         except Exception as e:
93 |             print("err:%s" % e)
94 |             time.sleep(5)


--------------------------------------------------------------------------------
/2_Strategies/model/algo_live_feed.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import struct
 3 | import time
 4 | 
 5 | from backtrader.feed import DataBase
 6 | from backtrader import date2num
 7 | from backtrader import TimeFrame
 8 | import backtrader as bt
 9 | import math
10 | import numpy as np
11 | import pandas as pd
12 | import json
13 | import boto3
14 | 
15 | class AlgoLiveData(DataBase):
16 |     def __init__(self,region):
17 |         super(AlgoLiveData, self).__init__()
18 |         self.region=region
19 |         self.lambda_client = boto3.client('lambda',region_name=self.region)
20 |         self.connected=False
21 | 
22 |         #start_date = '2017-08-11'
23 |         #now = datetime.datetime.now() # current date and time
24 |         #end_date = now.strftime("%Y-%m-%d")
25 |         
26 |         #self.fromdate=pd.to_datetime(start_date, format = "%Y-%m-%d")
27 |         #self.todate=pd.to_datetime(end_date, format = "%Y-%m-%d")
28 |         self.timeframe=bt.TimeFrame.Ticks
29 |         print(self.lines.datetime.array)
30 |  
31 |     def start(self):
32 |         print("start feed")
33 |         print(self.lines.datetime.array)
34 |     
35 |     def stop(self):
36 |         print("stop feed")
37 |     
38 |     def islive(self):
39 |         '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce
40 |         should be deactivated'''
41 |         return True
42 |  
43 |     def haslivedata(self):
44 |         '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce
45 |         should be deactivated'''
46 |         return self.connected
47 | 
48 |     def _load(self):
49 |         #print("A:%s" % self.lines.datetime.array)
50 |         if not self.connected:
51 |             while not self.connected:
52 |                 self.pull()
53 |         else:
54 |             self.pull()
55 |         return True
56 | 
57 |     def pull(self):
58 |         #print("B:%s" % self.lines.datetime.array)
59 |         if math.isnan(self.lines.datetime[0]):
60 |             now = datetime.datetime.now()
61 |             self.lines.datetime[0]=date2num(now)
62 |         now=datetime.datetime.now()
63 |         try:
64 |             item={}
65 |             res=self.lambda_client.invoke(
66 |                 FunctionName='algo_market_data',
67 |                 InvocationType='RequestResponse',
68 |                 Payload=json.dumps(item)
69 |             )
70 |             t=res['Payload']
71 |             l=json.loads(t.read().decode('utf-8'))
72 |             print("load:%s" % l)
73 |             
74 |             #print(self.lines.datetime.array)
75 |             #print(self.lines.close.array)
76 |             
77 |             for x in l:
78 |                 dt=pd.to_datetime(x['date'], format = "%Y-%m-%d")
79 |                 #print(dt)
80 |                 close=x['close']
81 |                 
82 |                 self.lines.datetime[0] = date2num(datetime.datetime.now())
83 |                 self.lines.open[0] = close
84 |                 self.lines.high[0] = close
85 |                 self.lines.low[0] = close
86 |                 self.lines.close[0] = close
87 |                 self.lines.volume[0] = 0
88 |                 
89 |                 self.connected=True
90 |                 self._laststatus=self.LIVE
91 |                 #print("connected")
92 |         except Exception as e:
93 |             print("err:%s" % e)
94 |             time.sleep(5)


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/algo_live_feed.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import struct
 3 | import time
 4 | 
 5 | from backtrader.feed import DataBase
 6 | from backtrader import date2num
 7 | from backtrader import TimeFrame
 8 | import backtrader as bt
 9 | import math
10 | import numpy as np
11 | import pandas as pd
12 | import json
13 | import boto3
14 | 
15 | class AlgoLiveData(DataBase):
16 |     def __init__(self,region):
17 |         super(AlgoLiveData, self).__init__()
18 |         self.region=region
19 |         self.lambda_client = boto3.client('lambda',region_name=self.region)
20 |         self.connected=False
21 | 
22 |         #start_date = '2017-08-11'
23 |         #now = datetime.datetime.now() # current date and time
24 |         #end_date = now.strftime("%Y-%m-%d")
25 |         
26 |         #self.fromdate=pd.to_datetime(start_date, format = "%Y-%m-%d")
27 |         #self.todate=pd.to_datetime(end_date, format = "%Y-%m-%d")
28 |         self.timeframe=bt.TimeFrame.Ticks
29 |         print(self.lines.datetime.array)
30 |  
31 |     def start(self):
32 |         print("start feed")
33 |         print(self.lines.datetime.array)
34 |     
35 |     def stop(self):
36 |         print("stop feed")
37 |     
38 |     def islive(self):
39 |         '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce
40 |         should be deactivated'''
41 |         return True
42 |  
43 |     def haslivedata(self):
44 |         '''Returns ``True`` to notify ``Cerebro`` that preloading and runonce
45 |         should be deactivated'''
46 |         return self.connected
47 | 
48 |     def _load(self):
49 |         #print("A:%s" % self.lines.datetime.array)
50 |         if not self.connected:
51 |             while not self.connected:
52 |                 self.pull()
53 |         else:
54 |             self.pull()
55 |         return True
56 | 
57 |     def pull(self):
58 |         #print("B:%s" % self.lines.datetime.array)
59 |         if math.isnan(self.lines.datetime[0]):
60 |             now = datetime.datetime.now()
61 |             self.lines.datetime[0]=date2num(now)
62 |         now=datetime.datetime.now()
63 |         try:
64 |             item={}
65 |             res=self.lambda_client.invoke(
66 |                 FunctionName='algo_market_data',
67 |                 InvocationType='RequestResponse',
68 |                 Payload=json.dumps(item)
69 |             )
70 |             t=res['Payload']
71 |             l=json.loads(t.read().decode('utf-8'))
72 |             print("load:%s" % l)
73 |             
74 |             #print(self.lines.datetime.array)
75 |             #print(self.lines.close.array)
76 |             
77 |             for x in l:
78 |                 dt=pd.to_datetime(x['date'], format = "%Y-%m-%d")
79 |                 #print(dt)
80 |                 close=x['close']
81 |                 
82 |                 self.lines.datetime[0] = date2num(datetime.datetime.now())
83 |                 self.lines.open[0] = close
84 |                 self.lines.high[0] = close
85 |                 self.lines.low[0] = close
86 |                 self.lines.close[0] = close
87 |                 self.lines.volume[0] = 0
88 |                 
89 |                 self.connected=True
90 |                 self._laststatus=self.LIVE
91 |                 #print("connected")
92 |         except Exception as e:
93 |             print("err:%s" % e)
94 |             time.sleep(5)


--------------------------------------------------------------------------------
/2_Strategies/model/serve:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various
 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until
 5 | # gunicorn exits.
 6 | #
 7 | # The flask server is specified to be the app object in wsgi.py
 8 | #
 9 | # We set the following parameters:
10 | #
11 | # Parameter                Environment Variable              Default Value
12 | # ---------                --------------------              -------------
13 | # number of workers        MODEL_SERVER_WORKERS              the number of CPU cores
14 | # timeout                  MODEL_SERVER_TIMEOUT              60 seconds
15 | 
16 | from __future__ import print_function
17 | import multiprocessing
18 | import os
19 | import signal
20 | import subprocess
21 | import sys
22 | 
23 | print("python:%s" % sys.version)
24 | 
25 | import backtrader as bt
26 | from algo_base import *
27 | import importlib
28 | from algo_live_feed import AlgoLiveData
29 | 
30 | algo_name=''
31 | with open('algo_name', 'r') as file:
32 |     algo_name = file.read().replace('\n', '')
33 | algo_package='algo_'+algo_name
34 | print("import:%s" % algo_package)
35 | 
36 | cls = getattr(importlib.import_module(algo_package), 'MyStrategy')
37 | print(cls)
38 | 
39 | cpu_count = multiprocessing.cpu_count()
40 | 
41 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
42 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count))
43 | 
44 | 
45 | def sigterm_handler(nginx_pid, gunicorn_pid):
46 |     try:
47 |         os.kill(nginx_pid, signal.SIGQUIT)
48 |     except OSError:
49 |         pass
50 |     try:
51 |         os.kill(gunicorn_pid, signal.SIGTERM)
52 |     except OSError:
53 |         pass
54 | 
55 |     sys.exit(0)
56 | 
57 | 
58 | def start_server():
59 |     print('Starting the inference server with {} workers.'.format(model_server_workers))
60 | 
61 |     # link the log streams to stdout/err so they will be logged to the container logs
62 |     subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
63 |     subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log'])
64 | 
65 |     nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf'])
66 |     gunicorn = subprocess.Popen(['gunicorn',
67 |                                  '--timeout', str(model_server_timeout),
68 |                                  '-k', 'gevent',
69 |                                  '-b', 'unix:/tmp/gunicorn.sock',
70 |                                  '-w', str(model_server_workers),
71 |                                  'wsgi:app'])
72 | 
73 |     signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid))
74 | 
75 |     with open('algo_config', 'r') as f:
76 |         config = json.load(f)
77 |     print("config=%s" % (config))
78 | 
79 |     data=AlgoLiveData(config["region"])
80 |     algo=AlgoStrategy(cls,data)
81 |     algo.run()
82 |     
83 |     # If either subprocess exits, so do we.
84 |     pids = set([nginx.pid, gunicorn.pid])
85 |     while True:
86 |         pid, _ = os.wait()
87 |         if pid in pids:
88 |             break
89 | 
90 |     sigterm_handler(nginx.pid, gunicorn.pid)
91 |     print('Inference server exiting')
92 | 
93 | 
94 | # The main routine just invokes the start function.
95 | if __name__ == '__main__':
96 |     start_server()
97 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/serve:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various
 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until
 5 | # gunicorn exits.
 6 | #
 7 | # The flask server is specified to be the app object in wsgi.py
 8 | #
 9 | # We set the following parameters:
10 | #
11 | # Parameter                Environment Variable              Default Value
12 | # ---------                --------------------              -------------
13 | # number of workers        MODEL_SERVER_WORKERS              the number of CPU cores
14 | # timeout                  MODEL_SERVER_TIMEOUT              60 seconds
15 | 
16 | from __future__ import print_function
17 | import multiprocessing
18 | import os
19 | import signal
20 | import subprocess
21 | import sys
22 | 
23 | print("python:%s" % sys.version)
24 | 
25 | import backtrader as bt
26 | from algo_base import *
27 | import importlib
28 | from algo_live_feed import AlgoLiveData
29 | 
30 | algo_name=''
31 | with open('algo_name', 'r') as file:
32 |     algo_name = file.read().replace('\n', '')
33 | algo_package='algo_'+algo_name
34 | print("import:%s" % algo_package)
35 | 
36 | cls = getattr(importlib.import_module(algo_package), 'MyStrategy')
37 | print(cls)
38 | 
39 | cpu_count = multiprocessing.cpu_count()
40 | 
41 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
42 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count))
43 | 
44 | 
45 | def sigterm_handler(nginx_pid, gunicorn_pid):
46 |     try:
47 |         os.kill(nginx_pid, signal.SIGQUIT)
48 |     except OSError:
49 |         pass
50 |     try:
51 |         os.kill(gunicorn_pid, signal.SIGTERM)
52 |     except OSError:
53 |         pass
54 | 
55 |     sys.exit(0)
56 | 
57 | 
58 | def start_server():
59 |     print('Starting the inference server with {} workers.'.format(model_server_workers))
60 | 
61 |     # link the log streams to stdout/err so they will be logged to the container logs
62 |     subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
63 |     subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log'])
64 | 
65 |     nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf'])
66 |     gunicorn = subprocess.Popen(['gunicorn',
67 |                                  '--timeout', str(model_server_timeout),
68 |                                  '-k', 'gevent',
69 |                                  '-b', 'unix:/tmp/gunicorn.sock',
70 |                                  '-w', str(model_server_workers),
71 |                                  'wsgi:app'])
72 | 
73 |     signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid))
74 | 
75 |     with open('algo_config', 'r') as f:
76 |         config = json.load(f)
77 |     print("config=%s" % (config))
78 | 
79 |     data=AlgoLiveData(config["region"])
80 |     algo=AlgoStrategy(cls,data)
81 |     algo.run()
82 |     
83 |     # If either subprocess exits, so do we.
84 |     pids = set([nginx.pid, gunicorn.pid])
85 |     while True:
86 |         pid, _ = os.wait()
87 |         if pid in pids:
88 |             break
89 | 
90 |     sigterm_handler(nginx.pid, gunicorn.pid)
91 |     print('Inference server exiting')
92 | 
93 | 
94 | # The main routine just invokes the start function.
95 | if __name__ == '__main__':
96 |     start_server()
97 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/serve:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various
 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until
 5 | # gunicorn exits.
 6 | #
 7 | # The flask server is specified to be the app object in wsgi.py
 8 | #
 9 | # We set the following parameters:
10 | #
11 | # Parameter                Environment Variable              Default Value
12 | # ---------                --------------------              -------------
13 | # number of workers        MODEL_SERVER_WORKERS              the number of CPU cores
14 | # timeout                  MODEL_SERVER_TIMEOUT              60 seconds
15 | 
16 | from __future__ import print_function
17 | import multiprocessing
18 | import os
19 | import signal
20 | import subprocess
21 | import sys
22 | 
23 | print("python:%s" % sys.version)
24 | 
25 | import backtrader as bt
26 | from algo_base import *
27 | import importlib
28 | from algo_live_feed import AlgoLiveData
29 | 
30 | algo_name=''
31 | with open('algo_name', 'r') as file:
32 |     algo_name = file.read().replace('\n', '')
33 | algo_package='algo_'+algo_name
34 | print("import:%s" % algo_package)
35 | 
36 | cls = getattr(importlib.import_module(algo_package), 'MyStrategy')
37 | print(cls)
38 | 
39 | cpu_count = multiprocessing.cpu_count()
40 | 
41 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
42 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count))
43 | 
44 | 
45 | def sigterm_handler(nginx_pid, gunicorn_pid):
46 |     try:
47 |         os.kill(nginx_pid, signal.SIGQUIT)
48 |     except OSError:
49 |         pass
50 |     try:
51 |         os.kill(gunicorn_pid, signal.SIGTERM)
52 |     except OSError:
53 |         pass
54 | 
55 |     sys.exit(0)
56 | 
57 | 
58 | def start_server():
59 |     print('Starting the inference server with {} workers.'.format(model_server_workers))
60 | 
61 |     # link the log streams to stdout/err so they will be logged to the container logs
62 |     subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
63 |     subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log'])
64 | 
65 |     nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf'])
66 |     gunicorn = subprocess.Popen(['gunicorn',
67 |                                  '--timeout', str(model_server_timeout),
68 |                                  '-k', 'gevent',
69 |                                  '-b', 'unix:/tmp/gunicorn.sock',
70 |                                  '-w', str(model_server_workers),
71 |                                  'wsgi:app'])
72 | 
73 |     signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid))
74 | 
75 |     with open('algo_config', 'r') as f:
76 |         config = json.load(f)
77 |     print("config=%s" % (config))
78 | 
79 |     data=AlgoLiveData(config["region"])
80 |     algo=AlgoStrategy(cls,data)
81 |     algo.run()
82 |     
83 |     # If either subprocess exits, so do we.
84 |     pids = set([nginx.pid, gunicorn.pid])
85 |     while True:
86 |         pid, _ = os.wait()
87 |         if pid in pids:
88 |             break
89 | 
90 |     sigterm_handler(nginx.pid, gunicorn.pid)
91 |     print('Inference server exiting')
92 | 
93 | 
94 | # The main routine just invokes the start function.
95 | if __name__ == '__main__':
96 |     start_server()
97 | 


--------------------------------------------------------------------------------
/1_Data/Load_Hist_Data_Daily.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Load Daily Data from AWS Data Exchange into S3 Bucket\n",
  8 |     "\n",
  9 |     "### Obtaining Data\n",
 10 |     "\n",
 11 |     "We obtain EOD stock data from AWS Data Exchange and export it to a S3 bucket. Then we format the data for our daily dataset. In this example we use the following data: https://aws.amazon.com/marketplace/pp/prodview-e2aizdzkos266\n",
 12 |     "\n",
 13 |     "### Output dataset \n",
 14 |     "\n",
 15 |     "- Contains 20 years of EOD data for one of the top 10 US companies\n",
 16 |     "- The data is saved into the specified S3 bucket as CSV.\n",
 17 |     "\n",
 18 |     "```\n",
 19 |     "hist_data_daily/{sym}.csv (columns: dt, sym,open,high,low,close,vol)\n",
 20 |     "```"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "# get S3 bucket\n",
 30 |     "s3bucket=!(aws s3 ls | grep algotrading- | awk  '{print $3}')\n",
 31 |     "s3bucket=s3bucket[0]\n",
 32 |     "s3bucket"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# symbol\n",
 42 |     "sym='JNJ'"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# copy daily eod data to local\n",
 52 |     "! aws s3 cp s3://{s3bucket}/daily_adjusted_{sym}.csv ./"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "import pandas as pd\n",
 62 |     "\n",
 63 |     "df = pd.read_csv(\"daily_adjusted_\"+sym+\".csv\",infer_datetime_format=True, parse_dates=['timestamp'], index_col=['timestamp'])\n",
 64 |     "del df[\"split_coefficient\"]\n",
 65 |     "del df[\"dividend_amount\"]\n",
 66 |     "del df[\"adjusted_close\"]\n",
 67 |     "df.rename(inplace=True,columns={'volume':'vol'})\n",
 68 |     "df.index=df.index.rename('dt')\n",
 69 |     "df['sym']=sym\n",
 70 |     "df = df[['sym', 'open', 'high', 'low', 'close','vol']]\n",
 71 |     "df.sort_index(inplace=True)\n",
 72 |     "df.head()"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "df.to_csv(sym+'.csv')"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "!aws s3 cp {sym}.csv s3://{s3bucket}/hist_data_daily/\n",
 91 |     "!rm daily_adjusted_{sym}.csv\n",
 92 |     "!rm {sym}.csv"
 93 |    ]
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": "conda_python3",
 99 |    "language": "python",
100 |    "name": "conda_python3"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 3
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython3",
112 |    "version": "3.6.10"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 2
117 | }
118 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/3_Models/model/serve:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various
  4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until
  5 | # gunicorn exits.
  6 | #
  7 | # The flask server is specified to be the app object in wsgi.py
  8 | #
  9 | # We set the following parameters:
 10 | #
 11 | # Parameter                Environment Variable              Default Value
 12 | # ---------                --------------------              -------------
 13 | # number of workers        MODEL_SERVER_WORKERS              the number of CPU cores
 14 | # timeout                  MODEL_SERVER_TIMEOUT              60 seconds
 15 | 
 16 | from __future__ import print_function
 17 | import multiprocessing
 18 | import os
 19 | import signal
 20 | import subprocess
 21 | import sys
 22 | 
 23 | print("python:%s" % sys.version)
 24 | 
 25 | cpu_count = multiprocessing.cpu_count()
 26 | 
 27 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
 28 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count))
 29 | 
 30 | 
 31 | def sigterm_handler(nginx_pid, gunicorn_pid):
 32 |     try:
 33 |         os.kill(nginx_pid, signal.SIGQUIT)
 34 |     except OSError:
 35 |         pass
 36 |     try:
 37 |         os.kill(gunicorn_pid, signal.SIGTERM)
 38 |     except OSError:
 39 |         pass
 40 | 
 41 |     sys.exit(0)
 42 | 
 43 | 
 44 | def start_server():
 45 |     print('Starting the inference server with {} workers.'.format(model_server_workers))
 46 | 
 47 |     # link the log streams to stdout/err so they will be logged to the container logs
 48 |     subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
 49 |     subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log'])
 50 | 
 51 |     nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf'])
 52 |     gunicorn = subprocess.Popen(['gunicorn',
 53 |                                  '--timeout', str(model_server_timeout),
 54 |                                  '-k', 'gevent',
 55 |                                  '-b', 'unix:/tmp/gunicorn.sock',
 56 |                                  '-w', str(model_server_workers),
 57 |                                  'wsgi:app'])
 58 | 
 59 |     signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid))
 60 |     
 61 |     
 62 |     from threading import Timer, Thread, Event
 63 |     from datetime import datetime
 64 | 
 65 |     class PT():
 66 |         def __init__(self, t, hFunction):
 67 |             self.t = t
 68 |             self.hFunction = hFunction
 69 |             self.thread = Timer(self.t, self.handle_function)
 70 | 
 71 |         def handle_function(self):
 72 |             self.hFunction()
 73 |             self.thread = Timer(self.t, self.handle_function)
 74 |             self.thread.start()
 75 | 
 76 |         def start(self):
 77 |             self.thread.start()
 78 | 
 79 |     def printer():
 80 |         tempo = datetime.today()
 81 |         h,m,s = tempo.hour, tempo.minute, tempo.second
 82 |         print(f"{h}:{m}:{s}")
 83 | 
 84 |     t = PT(5, printer)
 85 |     t.start()    
 86 |     
 87 |     # If either subprocess exits, so do we.
 88 |     pids = set([nginx.pid, gunicorn.pid])
 89 |     while True:
 90 |         pid, _ = os.wait()
 91 |         if pid in pids:
 92 |             break
 93 | 
 94 |     sigterm_handler(nginx.pid, gunicorn.pid)
 95 |     print('Inference server exiting')
 96 | 
 97 | 
 98 | # The main routine just invokes the start function.
 99 | if __name__ == '__main__':
100 |     start_server()
101 | 


--------------------------------------------------------------------------------
/3_Models/model/predictor.py:
--------------------------------------------------------------------------------
  1 | # This is the file that implements a flask server to do inferences. It's the
  2 | # file that you will modify to implement the scoring for your own algorithm.
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | from io import StringIO
  7 | #import StringIO
  8 | import flask
  9 | 
 10 | import tensorflow as tf
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | from keras import backend as K
 15 | from keras.models import load_model
 16 | from sklearn.preprocessing import StandardScaler
 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 18 | 
 19 | prefix = '/opt/ml/'
 20 | model_path = os.path.join(prefix, 'model')
 21 | 
 22 | # A singleton for holding the model. This simply loads the model and holds it.
 23 | # It has a predict function that does a prediction based on the model and the
 24 | # input data.
 25 | class ScoringService(object):
 26 |     model = None                # Where we keep the model when it's loaded
 27 | 
 28 |     @classmethod
 29 |     def get_model(cls):
 30 |         """
 31 |         Get the model object for this instance,
 32 |         loading it if it's not already loaded.
 33 |         """
 34 |         if cls.model is None:
 35 |             cls.model = load_model(
 36 |                 os.path.join(model_path, 'model.h5'))
 37 |         return cls.model
 38 | 
 39 |     @classmethod
 40 |     def predict(cls, input):
 41 |         """For the input, do the predictions and return them.
 42 | 
 43 |         Args:
 44 |             input (a pandas dataframe): The data on which to do the
 45 |             predictions.
 46 | 
 47 |             There will be one prediction per row in the dataframe
 48 |         """
 49 |         sess = K.get_session()
 50 |         with sess.graph.as_default():
 51 |             clf = cls.get_model()
 52 |             return clf.predict(input)
 53 | 
 54 | # The flask app for serving predictions
 55 | app = flask.Flask(__name__)
 56 | 
 57 | 
 58 | @app.route('/ping', methods=['GET'])
 59 | def ping():
 60 |     """
 61 |     Determine if the container is working and healthy.
 62 |     In this sample container, we declare it healthy if we can load the model
 63 |     successfully.
 64 |     """
 65 | 
 66 |     # Health check -- You can insert a health check here
 67 |     health = ScoringService.get_model() is not None
 68 |     status = 200 if health else 404
 69 |     return flask.Response(
 70 |         response='\n',
 71 |         status=status,
 72 |         mimetype='application/json')
 73 | 
 74 | 
 75 | @app.route('/invocations', methods=['POST'])
 76 | def transformation():
 77 |     """
 78 |     Do an inference on a single batch of data. In this sample server, we take
 79 |     data as CSV, convert it to a pandas data frame for internal use and then
 80 |     convert the predictions back to CSV (which really just means one prediction
 81 |     per line, since there's a single column.
 82 |     """
 83 |     data = None
 84 | 
 85 |     # Convert from CSV to pandas
 86 |     if flask.request.content_type == 'text/csv':
 87 |         data = flask.request.data.decode('utf-8')
 88 |         f = StringIO(data)
 89 |         data = pd.read_csv(f)            
 90 |     else:
 91 |         return flask.Response(
 92 |             response='This predictor only supports CSV data',
 93 |             status=415,
 94 |             mimetype='text/plain')
 95 | 
 96 |     print('Invoked with {} records'.format(data.shape[0]))
 97 | 
 98 |     # Do the prediction
 99 |     print("data=%s" % data)
100 |     predictions = ScoringService.predict(data)
101 |     print("predictions=%s" % predictions)
102 | 
103 |     # Convert from numpy back to CSV
104 |     
105 |     result = pd.DataFrame(predictions).to_csv(header=False, index=False)
106 |     print("result=%s" % result)
107 |     #result = out.getvalue()
108 | 
109 |     return flask.Response(response=result, status=200, mimetype='text/csv')
110 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/algo_sim_feed.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import struct
  3 | import time
  4 | 
  5 | from backtrader.feed import DataBase
  6 | from backtrader import date2num
  7 | from backtrader import TimeFrame
  8 | import backtrader as bt
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | # Based on this: https://towardsdatascience.com/simulating-stock-prices-in-python-using-geometric-brownian-motion-8dfd6e8c6b18
 14 | 
 15 | class AlgoSimData(DataBase):
 16 |     def __init__(self,datafile):
 17 |         super(AlgoSimData, self).__init__()
 18 | 
 19 |         df = pd.read_csv(datafile,infer_datetime_format=True, parse_dates=['dt'])
 20 |        
 21 |         start_date = '2012-08-13'
 22 |         end_date = '2017-08-11'
 23 |         
 24 |         now = datetime.datetime.now() # current date and time
 25 |         pred_end_date = now.strftime("%Y-%m-%d")
 26 |         
 27 |         self.fromdate=pd.to_datetime(end_date, format = "%Y-%m-%d")
 28 |         self.todate=pd.to_datetime(pred_end_date, format = "%Y-%m-%d")
 29 |         self.timeframe=bt.TimeFrame.Days
 30 | 
 31 |         S_eon = df[["dt","close"]]
 32 | 
 33 |         returns = (S_eon.loc[1:, 'close'] - \
 34 |                    S_eon.shift(1).loc[1:, 'close']) / \
 35 |                    S_eon.shift(1).loc[1:, 'close']
 36 | 
 37 |         # Parameter Assignments
 38 |         So = S_eon.loc[S_eon.shape[0] - 1, "close"]
 39 |         dt = 1 # day   # User input
 40 |         n_of_wkdays = pd.date_range(start = pd.to_datetime(end_date, 
 41 |                          format = "%Y-%m-%d") + pd.Timedelta('1 days'), 
 42 |                          end = pd.to_datetime(pred_end_date, 
 43 |                          format = "%Y-%m-%d")).to_series().map(lambda x: 
 44 |                          1 if x.isoweekday() in range(1,6) else 0).sum()
 45 |         T = n_of_wkdays # days  # User input -> follows from pred_end_date
 46 |         N = T / dt
 47 |         t = np.arange(1, int(N) + 1)
 48 |         mu = np.mean(returns)
 49 |         sigma = np.std(returns)
 50 |         scen_size = 1 # User input
 51 |         b = {str(scen): np.random.normal(0, 1, int(N)) for scen in range(1, scen_size + 1)}
 52 |         W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)}
 53 | 
 54 |         # Calculating drift and diffusion components
 55 |         drift = (mu - 0.5 * sigma**2) * t
 56 |         diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)}
 57 | 
 58 |         # Making the predictions
 59 |         S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)]) 
 60 |         S = np.hstack((np.array([[So] for scen in range(scen_size)]), S)) # add So to the beginning series
 61 | 
 62 |         # Dataframe format for predictions - first 10 scenarios only
 63 |         self.df = pd.DataFrame(S.swapaxes(0, 1)[:, :10]).set_index(
 64 |                    pd.date_range(start = S_eon["dt"].max(), 
 65 |                    end = pred_end_date, freq = 'D').map(lambda x:
 66 |                    x if x.isoweekday() in range(1, 6) else np.nan).dropna()
 67 |                    ).reset_index(drop = False)
 68 |         print("SimData generated:from=%s,to=%s,count=%s" % (self.fromdate,self.todate,len(self.df)))
 69 |         self.n=0
 70 |  
 71 |     def start(self):
 72 |         print("start feed")
 73 | 
 74 |     def stop(self):
 75 |         print("stop feed")
 76 | 
 77 |     def _load(self):
 78 |         #print("load feed")
 79 |         if self.n>=len(self.df):
 80 |             return False
 81 |         
 82 |         v=self.df.values
 83 |         #print(v)
 84 |         #print(self.n)
 85 |         dt=v[self.n][0]
 86 |         close=v[self.n][1]
 87 |         #print("%s:%s:%s" % (self.n,dt,close))
 88 |         
 89 |         self.lines.datetime[0] = date2num(dt)
 90 |         
 91 |         print(self.num2date(self.lines.datetime[0]))
 92 |         
 93 |         self.lines.open[0] = close
 94 |         self.lines.high[0] = close
 95 |         self.lines.low[0] = close
 96 |         self.lines.close[0] = close
 97 |         self.lines.volume[0] = 0
 98 |         
 99 |         self.n=self.n+1
100 |         return True


--------------------------------------------------------------------------------
/2_Strategies/model/algo_sim_feed.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import struct
  3 | import time
  4 | 
  5 | from backtrader.feed import DataBase
  6 | from backtrader import date2num
  7 | from backtrader import TimeFrame
  8 | import backtrader as bt
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | # Based on this: https://towardsdatascience.com/simulating-stock-prices-in-python-using-geometric-brownian-motion-8dfd6e8c6b18
 14 | 
 15 | class AlgoSimData(DataBase):
 16 |     def __init__(self,datafile):
 17 |         super(AlgoSimData, self).__init__()
 18 | 
 19 |         df = pd.read_csv(datafile,infer_datetime_format=True, parse_dates=['dt'])
 20 |        
 21 |         start_date = '2012-08-13'
 22 |         end_date = '2017-08-11'
 23 |         
 24 |         now = datetime.datetime.now() # current date and time
 25 |         pred_end_date = now.strftime("%Y-%m-%d")
 26 |         
 27 |         self.fromdate=pd.to_datetime(end_date, format = "%Y-%m-%d")
 28 |         self.todate=pd.to_datetime(pred_end_date, format = "%Y-%m-%d")
 29 |         self.timeframe=bt.TimeFrame.Days
 30 | 
 31 |         S_eon = df[["dt","close"]]
 32 | 
 33 |         returns = (S_eon.loc[1:, 'close'] - \
 34 |                    S_eon.shift(1).loc[1:, 'close']) / \
 35 |                    S_eon.shift(1).loc[1:, 'close']
 36 | 
 37 |         # Parameter Assignments
 38 |         So = S_eon.loc[S_eon.shape[0] - 1, "close"]
 39 |         dt = 1 # day   # User input
 40 |         n_of_wkdays = pd.date_range(start = pd.to_datetime(end_date, 
 41 |                          format = "%Y-%m-%d") + pd.Timedelta('1 days'), 
 42 |                          end = pd.to_datetime(pred_end_date, 
 43 |                          format = "%Y-%m-%d")).to_series().map(lambda x: 
 44 |                          1 if x.isoweekday() in range(1,6) else 0).sum()
 45 |         T = n_of_wkdays # days  # User input -> follows from pred_end_date
 46 |         N = T / dt
 47 |         t = np.arange(1, int(N) + 1)
 48 |         mu = np.mean(returns)
 49 |         sigma = np.std(returns)
 50 |         scen_size = 1 # User input
 51 |         b = {str(scen): np.random.normal(0, 1, int(N)) for scen in range(1, scen_size + 1)}
 52 |         W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)}
 53 | 
 54 |         # Calculating drift and diffusion components
 55 |         drift = (mu - 0.5 * sigma**2) * t
 56 |         diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)}
 57 | 
 58 |         # Making the predictions
 59 |         S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)]) 
 60 |         S = np.hstack((np.array([[So] for scen in range(scen_size)]), S)) # add So to the beginning series
 61 | 
 62 |         # Dataframe format for predictions - first 10 scenarios only
 63 |         self.df = pd.DataFrame(S.swapaxes(0, 1)[:, :10]).set_index(
 64 |                    pd.date_range(start = S_eon["dt"].max(), 
 65 |                    end = pred_end_date, freq = 'D').map(lambda x:
 66 |                    x if x.isoweekday() in range(1, 6) else np.nan).dropna()
 67 |                    ).reset_index(drop = False)
 68 |         print("SimData generated:from=%s,to=%s,count=%s" % (self.fromdate,self.todate,len(self.df)))
 69 |         self.n=0
 70 |  
 71 |     def start(self):
 72 |         print("start feed")
 73 | 
 74 |     def stop(self):
 75 |         print("stop feed")
 76 | 
 77 |     def _load(self):
 78 |         #print("load feed")
 79 |         if self.n>=len(self.df):
 80 |             return False
 81 |         
 82 |         v=self.df.values
 83 |         #print(v)
 84 |         #print(self.n)
 85 |         dt=v[self.n][0]
 86 |         close=v[self.n][1]
 87 |         #print("%s:%s:%s" % (self.n,dt,close))
 88 |         
 89 |         self.lines.datetime[0] = date2num(dt)
 90 |         
 91 |         print(self.num2date(self.lines.datetime[0]))
 92 |         
 93 |         self.lines.open[0] = close
 94 |         self.lines.high[0] = close
 95 |         self.lines.low[0] = close
 96 |         self.lines.close[0] = close
 97 |         self.lines.volume[0] = 0
 98 |         
 99 |         self.n=self.n+1
100 |         return True


--------------------------------------------------------------------------------
/1_Data/Load_Hist_Data_Daily_Public.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Load Daily Data into S3 Bucket\n",
  8 |     "\n",
  9 |     "### Obtaining Data\n",
 10 |     "\n",
 11 |     "\n",
 12 |     "We use the dataset generated by [Chi Zhang](https://github.com/vermouth1992/drl-portfolio-management/tree/master/src/utils/datasets). It contains the historic price of 16 target stocks from NASDAQ100, including open, close, high and low prices from 2012-08-13 to 2017-08-11. Specifically, those stocks are: “AAPL”, “ATVI”, “CMCSA”, “COST”, “CSX”, “DISH”, “EA”, “EBAY”, “FB”, “GOOGL”, “HAS”, “ILMN”, “INTC”, “MAR”, “REGN” and “SBUX”.\n",
 13 |     "\n",
 14 |     "**This dataset is licensed under a MIT License**\n",
 15 |     "\n",
 16 |     "Copyright (c) 2017 Chi Zhang\n",
 17 |     "\n",
 18 |     "Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n",
 19 |     "\n",
 20 |     "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n",
 21 |     "\n",
 22 |     "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n",
 23 |     "\n",
 24 |     "### Output dataset \n",
 25 |     "\n",
 26 |     "- Contains 5 years of EOD data for one of the stocks\n",
 27 |     "- The data is saved into the specified S3 bucket as CSV.\n",
 28 |     "\n",
 29 |     "```\n",
 30 |     "hist_data_daily/{sym}.csv (columns: dt,sym,open,high,low,close,vol)\n",
 31 |     "```"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# get S3 bucket\n",
 41 |     "s3bucket=!(aws s3 ls | grep algotrading- | awk  '{print $3}')\n",
 42 |     "s3bucket=s3bucket[0]\n",
 43 |     "s3bucket"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# symbol\n",
 53 |     "sym='INTC'"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "%run data_prep.py {sym}"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import pandas as pd\n",
 72 |     "\n",
 73 |     "df = pd.read_csv(\"INTC.csv\",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])\n",
 74 |     "df.head()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "!aws s3 cp {sym}.csv s3://{s3bucket}/hist_data_daily/\n",
 84 |     "!rm {sym}.csv"
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "conda_python3",
 91 |    "language": "python",
 92 |    "name": "conda_python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.6.10"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 2
109 | }


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/algo_sim_feed.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import struct
  3 | import time
  4 | 
  5 | from backtrader.feed import DataBase
  6 | from backtrader import date2num
  7 | from backtrader import TimeFrame
  8 | import backtrader as bt
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | 
 13 | # Based on this: https://towardsdatascience.com/simulating-stock-prices-in-python-using-geometric-brownian-motion-8dfd6e8c6b18
 14 | 
 15 | class AlgoSimData(DataBase):
 16 |     def __init__(self,datafile):
 17 |         super(AlgoSimData, self).__init__()
 18 | 
 19 |         df = pd.read_csv(datafile,infer_datetime_format=True, parse_dates=['dt'])
 20 |        
 21 |         start_date = '2012-08-13'
 22 |         end_date = '2017-08-11'
 23 |         
 24 |         now = datetime.datetime.now() # current date and time
 25 |         pred_end_date = now.strftime("%Y-%m-%d")
 26 |         
 27 |         self.fromdate=pd.to_datetime(end_date, format = "%Y-%m-%d")
 28 |         self.todate=pd.to_datetime(pred_end_date, format = "%Y-%m-%d")
 29 |         self.timeframe=bt.TimeFrame.Days
 30 | 
 31 |         S_eon = df[["dt","close"]]
 32 | 
 33 |         returns = (S_eon.loc[1:, 'close'] - \
 34 |                    S_eon.shift(1).loc[1:, 'close']) / \
 35 |                    S_eon.shift(1).loc[1:, 'close']
 36 | 
 37 |         # Parameter Assignments
 38 |         So = S_eon.loc[S_eon.shape[0] - 1, "close"]
 39 |         dt = 1 # day   # User input
 40 |         n_of_wkdays = pd.date_range(start = pd.to_datetime(end_date, 
 41 |                          format = "%Y-%m-%d") + pd.Timedelta('1 days'), 
 42 |                          end = pd.to_datetime(pred_end_date, 
 43 |                          format = "%Y-%m-%d")).to_series().map(lambda x: 
 44 |                          1 if x.isoweekday() in range(1,6) else 0).sum()
 45 |         T = n_of_wkdays # days  # User input -> follows from pred_end_date
 46 |         N = T / dt
 47 |         t = np.arange(1, int(N) + 1)
 48 |         mu = np.mean(returns)
 49 |         sigma = np.std(returns)
 50 |         scen_size = 1 # User input
 51 |         b = {str(scen): np.random.normal(0, 1, int(N)) for scen in range(1, scen_size + 1)}
 52 |         W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)}
 53 | 
 54 |         # Calculating drift and diffusion components
 55 |         drift = (mu - 0.5 * sigma**2) * t
 56 |         diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)}
 57 | 
 58 |         # Making the predictions
 59 |         S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)]) 
 60 |         S = np.hstack((np.array([[So] for scen in range(scen_size)]), S)) # add So to the beginning series
 61 | 
 62 |         # Dataframe format for predictions - first 10 scenarios only
 63 |         self.df = pd.DataFrame(S.swapaxes(0, 1)[:, :10]).set_index(
 64 |                    pd.date_range(start = S_eon["dt"].max(), 
 65 |                    end = pred_end_date, freq = 'D').map(lambda x:
 66 |                    x if x.isoweekday() in range(1, 6) else np.nan).dropna()
 67 |                    ).reset_index(drop = False)
 68 |         print("SimData generated:from=%s,to=%s,count=%s" % (self.fromdate,self.todate,len(self.df)))
 69 |         self.n=0
 70 |  
 71 |     def start(self):
 72 |         print("start feed")
 73 | 
 74 |     def stop(self):
 75 |         print("stop feed")
 76 | 
 77 |     def _load(self):
 78 |         #print("load feed")
 79 |         if self.n>=len(self.df):
 80 |             return False
 81 |         
 82 |         v=self.df.values
 83 |         #print(v)
 84 |         #print(self.n)
 85 |         dt=v[self.n][0]
 86 |         close=v[self.n][1]
 87 |         #print("%s:%s:%s" % (self.n,dt,close))
 88 |         
 89 |         self.lines.datetime[0] = date2num(dt)
 90 |         
 91 |         print(self.num2date(self.lines.datetime[0]))
 92 |         
 93 |         self.lines.open[0] = close
 94 |         self.lines.high[0] = close
 95 |         self.lines.low[0] = close
 96 |         self.lines.close[0] = close
 97 |         self.lines.volume[0] = 0
 98 |         
 99 |         self.n=self.n+1
100 |         return True


--------------------------------------------------------------------------------
/5_SageMakerStudio/2_Load_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Load Daily Data into S3 Bucket\n",
  8 |     "\n",
  9 |     "### Obtaining Data\n",
 10 |     "\n",
 11 |     "\n",
 12 |     "We use the dataset generated by [Chi Zhang](https://github.com/vermouth1992/drl-portfolio-management/tree/master/src/utils/datasets). It contains the historic price of 16 target stocks from NASDAQ100, including open, close, high and low prices from 2012-08-13 to 2017-08-11. Specifically, those stocks are: “AAPL”, “ATVI”, “CMCSA”, “COST”, “CSX”, “DISH”, “EA”, “EBAY”, “FB”, “GOOGL”, “HAS”, “ILMN”, “INTC”, “MAR”, “REGN” and “SBUX”.\n",
 13 |     "\n",
 14 |     "**This dataset is licensed under a MIT License**\n",
 15 |     "\n",
 16 |     "Copyright (c) 2017 Chi Zhang\n",
 17 |     "\n",
 18 |     "Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n",
 19 |     "\n",
 20 |     "The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n",
 21 |     "\n",
 22 |     "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n",
 23 |     "\n",
 24 |     "### Output dataset \n",
 25 |     "\n",
 26 |     "- Contains 5 years of EOD data for one of the stocks\n",
 27 |     "- The data is saved into the specified S3 bucket as CSV.\n",
 28 |     "\n",
 29 |     "```\n",
 30 |     "hist_data_daily/{sym}.csv (columns: dt,sym,open,high,low,close,vol)\n",
 31 |     "```"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "S3_BUCKET=!(aws cloudformation list-exports --query \"Exports[?Name=='algotrading-s3bucket'].Value\" --output text)\n",
 41 |     "S3_BUCKET=S3_BUCKET[0]\n",
 42 |     "S3_BUCKET"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# symbol\n",
 52 |     "sym='INTC'"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "%run data_prep.py {sym}"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import pandas as pd\n",
 71 |     "\n",
 72 |     "df = pd.read_csv(\"INTC.csv\",infer_datetime_format=True, parse_dates=['dt'], index_col=['dt'])\n",
 73 |     "df.head()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "!aws s3 cp {sym}.csv s3://{S3_BUCKET}/hist_data_daily/"
 83 |    ]
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "instance_type": "ml.t3.medium",
 88 |   "kernelspec": {
 89 |    "display_name": "Python 3 (Data Science)",
 90 |    "language": "python",
 91 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
 92 |   },
 93 |   "language_info": {
 94 |    "codemirror_mode": {
 95 |     "name": "ipython",
 96 |     "version": 3
 97 |    },
 98 |    "file_extension": ".py",
 99 |    "mimetype": "text/x-python",
100 |    "name": "python",
101 |    "nbconvert_exporter": "python",
102 |    "pygments_lexer": "ipython3",
103 |    "version": "3.7.10"
104 |   }
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 4
108 | }
109 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/schema.yaml:
--------------------------------------------------------------------------------
  1 | Resources:
  2 |   GlueDatabase:
  3 |     Type: AWS::Glue::Database
  4 |     Properties:
  5 |         CatalogId: !Ref 'AWS::AccountId'
  6 |         DatabaseInput:
  7 |           Name: algo_data
  8 | 
  9 |   GlueHistDataDaily:
 10 |     Type: AWS::Glue::Table
 11 |     Properties:
 12 |       CatalogId: !Ref 'AWS::AccountId'
 13 |       DatabaseName: !Ref 'GlueDatabase'
 14 |       TableInput:
 15 |         Description: Daily Price Data
 16 |         Name: hist_data_daily
 17 |         Parameters:
 18 |           classification: csv
 19 |           has_encrypted_data: false
 20 |         StorageDescriptor:
 21 |           Columns:
 22 |             - Name: dt
 23 |               Type: string
 24 |             - Name: sym
 25 |               Type: string
 26 |             - Name: open
 27 |               Type: double
 28 |             - Name: high
 29 |               Type: double
 30 |             - Name: low
 31 |               Type: double
 32 |             - Name: close
 33 |               Type: double
 34 |             - Name: vol
 35 |               Type: double
 36 |           Compressed: false
 37 |           InputFormat: org.apache.hadoop.mapred.TextInputFormat
 38 |           Location: !Join
 39 |             - ''
 40 |             - - s3://
 41 |               - !ImportValue algotrading-s3bucket
 42 |               - /hist_data_daily
 43 |           OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 44 |           SerdeInfo:
 45 |             SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 46 |             Parameters:
 47 |               field.delim: ','
 48 |               skip.header.line.count: '1'
 49 |           StoredAsSubDirectories: false
 50 |         TableType: EXTERNAL_TABLE
 51 | 
 52 |   GlueHistDataIntraday:
 53 |     Type: AWS::Glue::Table
 54 |     Properties:
 55 |       CatalogId: !Ref 'AWS::AccountId'
 56 |       DatabaseName: !Ref 'GlueDatabase'
 57 |       TableInput:
 58 |         Description: Intraday Price Data
 59 |         Name: hist_data_intraday
 60 |         Parameters:
 61 |           classification: csv
 62 |           has_encrypted_data: false
 63 |         StorageDescriptor:
 64 |           Columns:
 65 |             - Name: dt
 66 |               Type: string
 67 |             - Name: sym
 68 |               Type: string
 69 |             - Name: open
 70 |               Type: double
 71 |             - Name: high
 72 |               Type: double
 73 |             - Name: low
 74 |               Type: double
 75 |             - Name: close
 76 |               Type: double
 77 |             - Name: vol
 78 |               Type: double
 79 |           Compressed: false
 80 |           InputFormat: org.apache.hadoop.mapred.TextInputFormat
 81 |           Location: !Join
 82 |             - ''
 83 |             - - s3://
 84 |               - !ImportValue algotrading-s3bucket
 85 |               - /hist_data_intraday
 86 |           OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 87 |           SerdeInfo:
 88 |             SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 89 |             Parameters:
 90 |               field.delim: ','
 91 |               skip.header.line.count: '1'
 92 |           StoredAsSubDirectories: false
 93 |         TableType: EXTERNAL_TABLE
 94 |   
 95 |   AthenaWorkgroup:
 96 |     Type: AWS::Athena::WorkGroup
 97 |     Properties:
 98 |       Name: MyWorkGroup
 99 |       Description: AlgoWorkgroup
100 |       RecursiveDeleteOption: true
101 |       State: ENABLED
102 |       WorkGroupConfiguration:
103 |         RequesterPaysEnabled: true
104 |         ResultConfiguration:
105 |           OutputLocation: !Join
106 |             - ''
107 |             - - s3://
108 |               - !ImportValue algotrading-s3bucket
109 |               - /results/
110 | 
111 |   AlgoHistDataDaily:
112 |     Type: AWS::Athena::NamedQuery
113 |     Properties:
114 |       Database: !Ref 'GlueDatabase'
115 |       QueryString: !Join
116 |         - ''
117 |         - - select * from algo_data.
118 |           - !Ref 'GlueHistDataDaily'
119 |           - ' limit 10;'
120 |       Name: HistDataDaily
121 | 
122 |   AlgoHistDataIntraday:
123 |     Type: AWS::Athena::NamedQuery
124 |     Properties:
125 |       Database: !Ref 'GlueDatabase'
126 |       QueryString: !Join
127 |         - ''
128 |         - - select * from algo_data.
129 |           - !Ref 'GlueHistDataIntraday'
130 |           - ' limit 10;'
131 |       Name: HistDataIntraday


--------------------------------------------------------------------------------
/5_SageMakerStudio/1_Setup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Setup"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "scrolled": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "!./init_s3_bucket.sh"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "!./init_schema.sh"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "!pip install sagemaker-studio-image-build"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {
 43 |     "scrolled": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "!sm-docker build docker/. --repository algotrading:1.0"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "# Attach Container as SageMaker Kernel"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "import sagemaker as sage\n",
 64 |     "from sagemaker import get_execution_role\n",
 65 |     "\n",
 66 |     "role = get_execution_role()\n",
 67 |     "sess = sage.Session()\n",
 68 |     "\n",
 69 |     "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n",
 70 |     "region = sess.boto_session.region_name"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "!aws --region {region} sagemaker delete-image-version --image-name algotrading --version-number=1\n",
 80 |     "!aws --region {region} sagemaker list-image-versions --image-name algotrading\n",
 81 |     "!aws --region {region} sagemaker delete-image --image-name algotrading\n",
 82 |     "!aws --region {region} sagemaker list-images\n",
 83 |     "!aws --region {region} sagemaker delete-app-image-config --app-image-config-name=algotrading-config\n",
 84 |     "!aws --region {region} sagemaker list-app-image-configs"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "!aws --region {region} sagemaker create-image --image-name algotrading --role-arn {role}\n",
 94 |     "!aws --region {region} sagemaker list-images\n",
 95 |     "!aws --region {region} sagemaker create-image-version --image-name algotrading --base-image \"{account}.dkr.ecr.{region}.amazonaws.com/algotrading:1.0\"\n",
 96 |     "!aws --region {region} sagemaker list-image-versions --image-name algotrading\n",
 97 |     "!aws --region {region} sagemaker create-app-image-config --cli-input-json file://app-image-config-input.json\n",
 98 |     "!aws --region {region} sagemaker list-app-image-configs"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "domainid=!(aws --region {region} sagemaker list-domains --query 'Domains[0].DomainId' --output text)\n",
108 |     "domainid=domainid[0]\n",
109 |     "domainid"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "!aws --region {region} sagemaker update-domain --domain-id {domainid} --cli-input-json file://default-user-settings.json"
119 |    ]
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "instance_type": "ml.t3.medium",
124 |   "kernelspec": {
125 |    "display_name": "Python 3 (Data Science)",
126 |    "language": "python",
127 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
128 |   },
129 |   "language_info": {
130 |    "codemirror_mode": {
131 |     "name": "ipython",
132 |     "version": 3
133 |    },
134 |    "file_extension": ".py",
135 |    "mimetype": "text/x-python",
136 |    "name": "python",
137 |    "nbconvert_exporter": "python",
138 |    "pygments_lexer": "ipython3",
139 |    "version": "3.7.10"
140 |   }
141 |  },
142 |  "nbformat": 4,
143 |  "nbformat_minor": 4
144 | }
145 | 


--------------------------------------------------------------------------------
/2_Strategies/model/algo_base.py:
--------------------------------------------------------------------------------
  1 | import backtrader as bt
  2 | import backtrader.feeds as btfeeds
  3 | import backtrader.analyzers as btanalyzers
  4 | import backtrader.plot
  5 | import os
  6 | import pytz
  7 | from pytz import timezone
  8 | import requests
  9 | import json
 10 | import time
 11 | from algo_sim_feed import AlgoSimData
 12 | #from abc import classmethod
 13 | 
 14 | import matplotlib
 15 | matplotlib.use('Agg')
 16 | import matplotlib.pyplot as plt
 17 | plt.rcParams["figure.figsize"] = [16,9]
 18 | 
 19 | # More documentation about backtrader: https://www.backtrader.com/
 20 | 
 21 | class AlgoStrategy():
 22 |     
 23 |     def __init__(self,config,strategy):
 24 |         self.config=config
 25 |         
 26 |         self.cerebro = bt.Cerebro()        
 27 |         strategy.config=config
 28 |         strategy.init_broker(self.cerebro.broker)
 29 |         strategy.add_data(self.cerebro)
 30 |         self.cerebro.addstrategy(strategy)
 31 | 
 32 |         self.portfolioStartValue=self.cerebro.broker.getvalue()
 33 |                             
 34 |         self.cerebro.addanalyzer(btanalyzers.DrawDown, _name='dd')
 35 |         self.cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe')
 36 |         self.cerebro.addanalyzer(btanalyzers.SQN, _name='sqn')
 37 |         self.cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta')
 38 |         
 39 |     def performance(self):
 40 |         analyzer=self.thestrat.analyzers.ta.get_analysis()
 41 |         dd_analyzer=self.thestrat.analyzers.dd.get_analysis()
 42 |       
 43 |         #Get the results we are interested in
 44 |         total_open = analyzer.total.open
 45 |         total_closed = analyzer.total.closed
 46 |         total_won = analyzer.won.total
 47 |         total_lost = analyzer.lost.total
 48 |         win_streak = analyzer.streak.won.longest
 49 |         lose_streak = analyzer.streak.lost.longest
 50 |         pnl_net = round(analyzer.pnl.net.total,2)
 51 |         strike_rate = (total_won / total_closed) * 100
 52 |         #Designate the rows
 53 |         h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost']
 54 |         h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net']
 55 |         h3 = ['DrawDown Pct','MoneyDown', '', '']
 56 |         self.total_closed=total_closed
 57 |         self.strike_rate=strike_rate
 58 |         self.max_drawdown=dd_analyzer.max.drawdown
 59 |         r1 = [total_open, total_closed,total_won,total_lost]
 60 |         r2 = [('%.2f%%' %(strike_rate)), win_streak, lose_streak, pnl_net]
 61 |         r3 = [('%.2f%%' %(dd_analyzer.max.drawdown)), dd_analyzer.max.moneydown, '', '']
 62 |         #Check which set of headers is the longest.
 63 |         header_length = max(len(h1),len(h2),len(h3))
 64 |         #Print the rows
 65 |         print_list = [h1,r1,h2,r2,h3,r3]
 66 |         row_format ="{:<15}" * (header_length + 1)
 67 |         print("Trade Analysis Results:")
 68 |         for row in print_list:
 69 |             print(row_format.format('',*row))
 70 | 
 71 |         analyzer=self.thestrat.analyzers.sqn.get_analysis()
 72 |         sharpe_analyzer=self.thestrat.analyzers.sharpe.get_analysis()
 73 |         self.sqn = analyzer.sqn
 74 |         self.sharpe_ratio = sharpe_analyzer['sharperatio']
 75 |         if self.sharpe_ratio is None:
 76 |             self.sharpe_ratio=0
 77 |         self.pnl = self.cerebro.broker.getvalue()-self.portfolioStartValue
 78 |         print('[SQN:%.2f, Sharpe Ratio:%.2f, Final Portfolio:%.2f, Total PnL:%.2f]' % (self.sqn,self.sharpe_ratio,self.cerebro.broker.getvalue(),self.pnl))
 79 |         
 80 |         # plot
 81 |         fig=self.cerebro.plot()
 82 |         plt.savefig(os.path.join(StrategyTemplate.MODEL_PATH, 'chart.png'))
 83 |     
 84 |     def submit(self):
 85 |         try:
 86 |             if 'submitUrl' in self.config:
 87 |                 name=self.config['user']+'@'+self.config['account']
 88 |                 algo=self.config['algo_name']
 89 |                 submitUrl=self.config['submitUrl']
 90 | 
 91 |                 URL = submitUrl
 92 |                 ts=str(int(time.time()))       
 93 |                 PARAMS={'id': algo,
 94 |                         'name': name,
 95 |                         'trades': self.total_closed,
 96 |                         'strike_rate': self.strike_rate,
 97 |                         'max_drawdown': self.max_drawdown, 
 98 |                         'pnl': self.pnl,
 99 |                         'sqn': self.sqn,
100 |                         'sharpe_ratio': self.sharpe_ratio}
101 |                 print("submit:%s" % (json.dumps(PARAMS)))
102 |                 r = requests.get(url = URL, params = PARAMS, timeout=3) 
103 |                 print("status=%s,res=%s" % (r.status_code,r.text))
104 |                 if r.status_code == 200:
105 |                     print("performance submitted")
106 |                 else:
107 |                     print("error submitting performance:%s" % r.text)
108 |         except Exception as e:
109 |             print("error submitting performance:%s" % e)
110 |         
111 |     def run(self):
112 |         thestrats = self.cerebro.run()
113 |         self.thestrat = thestrats[0]
114 |         self.performance()
115 |         self.submit()
116 | 
117 | class StrategyTemplate(bt.Strategy):
118 | 
119 |     PREFIX='/opt/ml/'
120 |     TRAIN_FILE = os.path.join(PREFIX,'input/data/training/data.csv')
121 |     CONFIG_FILE = os.path.join(PREFIX,'input/config/hyperparameters.json')
122 |     MODEL_PATH = os.path.join(PREFIX,'model')
123 |     
124 |     def __init__(self):         
125 |         with open(StrategyTemplate.CONFIG_FILE, 'r') as f:
126 |             self.config = json.load(f)
127 |         print("[INIT]:config:%s=%s" % (StrategyTemplate.CONFIG_FILE,self.config))
128 |         
129 |         self.lastDay=-1
130 |         self.lastMonth=-1
131 |         self.dataclose = self.datas[0].close
132 |     
133 |     @staticmethod
134 |     def init_broker(broker):
135 |         pass
136 |     
137 |     @staticmethod
138 |     def add_data(cerebro):
139 |         pass
140 |         
141 |     def notify_order(self, order):
142 |         dt=self.datas[0].datetime.datetime(0)
143 |        
144 |         if order.status in [order.Completed]:
145 |             if order.isbuy():
146 |                 print(
147 |                     '[%s] BUY EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' %
148 |                          (dt,order.executed.price,order.executed.pnl,self.broker.getvalue()))
149 |             else:  # Sell
150 |                 print('[%s] SELL EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' %
151 |                          (dt,order.executed.price,order.executed.pnl,self.broker.getvalue()))
152 |                 
153 |     def next(self):
154 |         dt=self.datas[0].datetime.datetime(0)
155 |         #print("[NEXT]:%s:close=%s" % (dt,self.dataclose[0]))
156 |         
157 |         #SOM
158 |         if self.lastMonth!=dt.month:
159 |             if self.lastMonth!=-1:
160 |                 chg=self.broker.getvalue()-self.monthCash
161 |                 print("[%s] SOM:chg=%.2f,cash=%.2f" % (dt,chg,self.broker.getvalue()))
162 |             self.lastMonth=dt.month
163 |             self.monthCash=self.broker.getvalue()
164 |         
165 |         #SOD
166 |         if self.lastDay!=dt.day:
167 |             self.lastDay=dt.day
168 |             print("[%s] SOD:cash=%.2f" % (dt,self.broker.getvalue()))
169 | 


--------------------------------------------------------------------------------
/4_Kinesis/model/algo_base.py:
--------------------------------------------------------------------------------
  1 | import backtrader as bt
  2 | import backtrader.feeds as btfeeds
  3 | import backtrader.analyzers as btanalyzers
  4 | import backtrader.plot
  5 | import os
  6 | import pytz
  7 | from pytz import timezone
  8 | import requests
  9 | import json
 10 | import time
 11 | from algo_sim_feed import AlgoSimData
 12 | #from abc import classmethod
 13 | 
 14 | import matplotlib
 15 | matplotlib.use('Agg')
 16 | import matplotlib.pyplot as plt
 17 | plt.rcParams["figure.figsize"] = [16,9]
 18 | 
 19 | # More documentation about backtrader: https://www.backtrader.com/
 20 | 
 21 | class AlgoStrategy():
 22 |     
 23 |     def __init__(self,config,strategy):
 24 |         self.config=config
 25 |         
 26 |         self.cerebro = bt.Cerebro()        
 27 |         strategy.config=config
 28 |         strategy.init_broker(self.cerebro.broker)
 29 |         strategy.add_data(self.cerebro)
 30 |         self.cerebro.addstrategy(strategy)
 31 | 
 32 |         self.portfolioStartValue=self.cerebro.broker.getvalue()
 33 |                             
 34 |         self.cerebro.addanalyzer(btanalyzers.DrawDown, _name='dd')
 35 |         self.cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe')
 36 |         self.cerebro.addanalyzer(btanalyzers.SQN, _name='sqn')
 37 |         self.cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta')
 38 |         
 39 |     def performance(self):
 40 |         analyzer=self.thestrat.analyzers.ta.get_analysis()
 41 |         dd_analyzer=self.thestrat.analyzers.dd.get_analysis()
 42 |       
 43 |         #Get the results we are interested in
 44 |         total_open = analyzer.total.open
 45 |         total_closed = analyzer.total.closed
 46 |         total_won = analyzer.won.total
 47 |         total_lost = analyzer.lost.total
 48 |         win_streak = analyzer.streak.won.longest
 49 |         lose_streak = analyzer.streak.lost.longest
 50 |         pnl_net = round(analyzer.pnl.net.total,2)
 51 |         strike_rate = (total_won / total_closed) * 100
 52 |         #Designate the rows
 53 |         h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost']
 54 |         h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net']
 55 |         h3 = ['DrawDown Pct','MoneyDown', '', '']
 56 |         self.total_closed=total_closed
 57 |         self.strike_rate=strike_rate
 58 |         self.max_drawdown=dd_analyzer.max.drawdown
 59 |         r1 = [total_open, total_closed,total_won,total_lost]
 60 |         r2 = [('%.2f%%' %(strike_rate)), win_streak, lose_streak, pnl_net]
 61 |         r3 = [('%.2f%%' %(dd_analyzer.max.drawdown)), dd_analyzer.max.moneydown, '', '']
 62 |         #Check which set of headers is the longest.
 63 |         header_length = max(len(h1),len(h2),len(h3))
 64 |         #Print the rows
 65 |         print_list = [h1,r1,h2,r2,h3,r3]
 66 |         row_format ="{:<15}" * (header_length + 1)
 67 |         print("Trade Analysis Results:")
 68 |         for row in print_list:
 69 |             print(row_format.format('',*row))
 70 | 
 71 |         analyzer=self.thestrat.analyzers.sqn.get_analysis()
 72 |         sharpe_analyzer=self.thestrat.analyzers.sharpe.get_analysis()
 73 |         self.sqn = analyzer.sqn
 74 |         self.sharpe_ratio = sharpe_analyzer['sharperatio']
 75 |         if self.sharpe_ratio is None:
 76 |             self.sharpe_ratio=0
 77 |         self.pnl = self.cerebro.broker.getvalue()-self.portfolioStartValue
 78 |         print('[SQN:%.2f, Sharpe Ratio:%.2f, Final Portfolio:%.2f, Total PnL:%.2f]' % (self.sqn,self.sharpe_ratio,self.cerebro.broker.getvalue(),self.pnl))
 79 |         
 80 |         # plot
 81 |         fig=self.cerebro.plot()
 82 |         plt.savefig(os.path.join(StrategyTemplate.MODEL_PATH, 'chart.png'))
 83 |     
 84 |     def submit(self):
 85 |         try:
 86 |             if 'submitUrl' in self.config:
 87 |                 name=self.config['user']+'@'+self.config['account']
 88 |                 algo=self.config['algo_name']
 89 |                 submitUrl=self.config['submitUrl']
 90 | 
 91 |                 URL = submitUrl
 92 |                 ts=str(int(time.time()))       
 93 |                 PARAMS={'id': algo,
 94 |                         'name': name,
 95 |                         'trades': self.total_closed,
 96 |                         'strike_rate': self.strike_rate,
 97 |                         'max_drawdown': self.max_drawdown, 
 98 |                         'pnl': self.pnl,
 99 |                         'sqn': self.sqn,
100 |                         'sharpe_ratio': self.sharpe_ratio}
101 |                 print("submit:%s" % (json.dumps(PARAMS)))
102 |                 r = requests.get(url = URL, params = PARAMS, timeout=3) 
103 |                 print("status=%s,res=%s" % (r.status_code,r.text))
104 |                 if r.status_code == 200:
105 |                     print("performance submitted")
106 |                 else:
107 |                     print("error submitting performance:%s" % r.text)
108 |         except Exception as e:
109 |             print("error submitting performance:%s" % e)
110 |         
111 |     def run(self):
112 |         thestrats = self.cerebro.run()
113 |         self.thestrat = thestrats[0]
114 |         self.performance()
115 |         self.submit()
116 | 
117 | class StrategyTemplate(bt.Strategy):
118 | 
119 |     PREFIX='/opt/ml/'
120 |     TRAIN_FILE = os.path.join(PREFIX,'input/data/training/data.csv')
121 |     CONFIG_FILE = os.path.join(PREFIX,'input/config/hyperparameters.json')
122 |     MODEL_PATH = os.path.join(PREFIX,'model')
123 |     
124 |     def __init__(self):         
125 |         with open(StrategyTemplate.CONFIG_FILE, 'r') as f:
126 |             self.config = json.load(f)
127 |         print("[INIT]:config:%s=%s" % (StrategyTemplate.CONFIG_FILE,self.config))
128 |         
129 |         self.lastDay=-1
130 |         self.lastMonth=-1
131 |         self.dataclose = self.datas[0].close
132 |     
133 |     @staticmethod
134 |     def init_broker(broker):
135 |         pass
136 |     
137 |     @staticmethod
138 |     def add_data(cerebro):
139 |         pass
140 |         
141 |     def notify_order(self, order):
142 |         dt=self.datas[0].datetime.datetime(0)
143 |        
144 |         if order.status in [order.Completed]:
145 |             if order.isbuy():
146 |                 print(
147 |                     '[%s] BUY EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' %
148 |                          (dt,order.executed.price,order.executed.pnl,self.broker.getvalue()))
149 |             else:  # Sell
150 |                 print('[%s] SELL EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' %
151 |                          (dt,order.executed.price,order.executed.pnl,self.broker.getvalue()))
152 |                 
153 |     def next(self):
154 |         dt=self.datas[0].datetime.datetime(0)
155 |         #print("[NEXT]:%s:close=%s" % (dt,self.dataclose[0]))
156 |         
157 |         #SOM
158 |         if self.lastMonth!=dt.month:
159 |             if self.lastMonth!=-1:
160 |                 chg=self.broker.getvalue()-self.monthCash
161 |                 print("[%s] SOM:chg=%.2f,cash=%.2f" % (dt,chg,self.broker.getvalue()))
162 |             self.lastMonth=dt.month
163 |             self.monthCash=self.broker.getvalue()
164 |         
165 |         #SOD
166 |         if self.lastDay!=dt.day:
167 |             self.lastDay=dt.day
168 |             print("[%s] SOD:cash=%.2f" % (dt,self.broker.getvalue()))
169 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/docker/model/algo_base.py:
--------------------------------------------------------------------------------
  1 | import backtrader as bt
  2 | import backtrader.feeds as btfeeds
  3 | import backtrader.analyzers as btanalyzers
  4 | import backtrader.plot
  5 | import os
  6 | import pytz
  7 | from pytz import timezone
  8 | import requests
  9 | import json
 10 | import time
 11 | from algo_sim_feed import AlgoSimData
 12 | 
 13 | import matplotlib
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | # More documentation about backtrader: https://www.backtrader.com/
 17 | 
 18 | class AlgoStrategy():
 19 |     
 20 |     def __init__(self,config,strategy):
 21 |         self.config=config
 22 |         
 23 |         self.cerebro = bt.Cerebro()        
 24 |         strategy.config=config
 25 |         strategy.init_broker(self.cerebro.broker)
 26 |         strategy.add_data(self.cerebro)
 27 |         self.cerebro.addstrategy(strategy)
 28 | 
 29 |         self.portfolioStartValue=self.cerebro.broker.getvalue()
 30 |                             
 31 |         self.cerebro.addanalyzer(btanalyzers.DrawDown, _name='dd')
 32 |         self.cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe')
 33 |         self.cerebro.addanalyzer(btanalyzers.SQN, _name='sqn')
 34 |         self.cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta')
 35 |         
 36 |     def performance(self):
 37 |         analyzer=self.thestrat.analyzers.ta.get_analysis()
 38 |         dd_analyzer=self.thestrat.analyzers.dd.get_analysis()
 39 |       
 40 |         #Get the results we are interested in
 41 |         total_open = analyzer.total.open
 42 |         total_closed = analyzer.total.closed
 43 |         total_won = analyzer.won.total
 44 |         total_lost = analyzer.lost.total
 45 |         win_streak = analyzer.streak.won.longest
 46 |         lose_streak = analyzer.streak.lost.longest
 47 |         pnl_net = round(analyzer.pnl.net.total,2)
 48 |         strike_rate = (total_won / total_closed) * 100
 49 |         #Designate the rows
 50 |         h1 = ['Total Open', 'Total Closed', 'Total Won', 'Total Lost']
 51 |         h2 = ['Strike Rate','Win Streak', 'Losing Streak', 'PnL Net']
 52 |         h3 = ['DrawDown Pct','MoneyDown', '', '']
 53 |         self.total_closed=total_closed
 54 |         self.strike_rate=strike_rate
 55 |         self.max_drawdown=dd_analyzer.max.drawdown
 56 |         r1 = [total_open, total_closed,total_won,total_lost]
 57 |         r2 = [('%.2f%%' %(strike_rate)), win_streak, lose_streak, pnl_net]
 58 |         r3 = [('%.2f%%' %(dd_analyzer.max.drawdown)), dd_analyzer.max.moneydown, '', '']
 59 |         #Check which set of headers is the longest.
 60 |         header_length = max(len(h1),len(h2),len(h3))
 61 |         #Print the rows
 62 |         print_list = [h1,r1,h2,r2,h3,r3]
 63 |         row_format ="{:<15}" * (header_length + 1)
 64 |         print("Trade Analysis Results:")
 65 |         for row in print_list:
 66 |             print(row_format.format('',*row))
 67 | 
 68 |         analyzer=self.thestrat.analyzers.sqn.get_analysis()
 69 |         sharpe_analyzer=self.thestrat.analyzers.sharpe.get_analysis()
 70 |         self.sqn = analyzer.sqn
 71 |         self.sharpe_ratio = sharpe_analyzer['sharperatio']
 72 |         if self.sharpe_ratio is None:
 73 |             self.sharpe_ratio=0
 74 |         self.pnl = self.cerebro.broker.getvalue()-self.portfolioStartValue
 75 |         print('[SQN:%.2f, Sharpe Ratio:%.2f, Final Portfolio:%.2f, Total PnL:%.2f]' % (self.sqn,self.sharpe_ratio,self.cerebro.broker.getvalue(),self.pnl))
 76 |         
 77 |         # plot
 78 |         chart=False
 79 |         if 'chart' in self.config and self.config['chart']=='true':
 80 |             chart=True
 81 |         if chart:
 82 |             fig=self.cerebro.plot()
 83 |             plt.rcParams["figure.figsize"] = [16,9]
 84 |             plt.savefig(os.path.join(StrategyTemplate.MODEL_PATH, 'chart.png'))
 85 |     
 86 |     def submit(self):
 87 |         try:
 88 |             if 'submitUrl' in self.config:
 89 |                 name=self.config['user']+'@'+self.config['account']
 90 |                 algo=self.config['algo_name']
 91 |                 submitUrl=self.config['submitUrl']
 92 | 
 93 |                 URL = submitUrl
 94 |                 ts=str(int(time.time()))       
 95 |                 PARAMS={'id': algo,
 96 |                         'name': name,
 97 |                         'trades': self.total_closed,
 98 |                         'strike_rate': self.strike_rate,
 99 |                         'max_drawdown': self.max_drawdown, 
100 |                         'pnl': self.pnl,
101 |                         'sqn': self.sqn,
102 |                         'sharpe_ratio': self.sharpe_ratio}
103 |                 print("submit:%s" % (json.dumps(PARAMS)))
104 |                 r = requests.get(url = URL, params = PARAMS, timeout=3) 
105 |                 print("status=%s,res=%s" % (r.status_code,r.text))
106 |                 if r.status_code == 200:
107 |                     print("performance submitted")
108 |                 else:
109 |                     print("error submitting performance:%s" % r.text)
110 |         except Exception as e:
111 |             print("error submitting performance:%s" % e)
112 |         
113 |     def run(self):
114 |         thestrats = self.cerebro.run()
115 |         self.thestrat = thestrats[0]
116 |         self.performance()
117 |         self.submit()
118 | 
119 | class StrategyTemplate(bt.Strategy):
120 | 
121 |     PREFIX='/opt/ml/'
122 |     TRAIN_FILE = os.path.join(PREFIX,'input/data/training/data.csv')
123 |     CONFIG_FILE = os.path.join(PREFIX,'input/config/hyperparameters.json')
124 |     MODEL_PATH = os.path.join(PREFIX,'model')
125 |     
126 |     def __init__(self):         
127 |         with open(StrategyTemplate.CONFIG_FILE, 'r') as f:
128 |             self.config = json.load(f)
129 |         print("[INIT]:config:%s=%s" % (StrategyTemplate.CONFIG_FILE,self.config))
130 |         
131 |         self.lastDay=-1
132 |         self.lastMonth=-1
133 |         self.dataclose = self.datas[0].close
134 |     
135 |     @staticmethod
136 |     def init_broker(broker):
137 |         pass
138 |     
139 |     @staticmethod
140 |     def add_data(cerebro):
141 |         pass
142 |         
143 |     def notify_order(self, order):
144 |         dt=self.datas[0].datetime.datetime(0)
145 |        
146 |         if order.status in [order.Completed]:
147 |             if order.isbuy():
148 |                 print(
149 |                     '[%s] BUY EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' %
150 |                          (dt,order.executed.price,order.executed.pnl,self.broker.getvalue()))
151 |             else:  # Sell
152 |                 print('[%s] SELL EXECUTED, Price: %.2f, PNL: %.2f, Cash: %.2f' %
153 |                          (dt,order.executed.price,order.executed.pnl,self.broker.getvalue()))
154 |                 
155 |     def next(self):
156 |         dt=self.datas[0].datetime.datetime(0)
157 |         #print("[NEXT]:%s:close=%s" % (dt,self.dataclose[0]))
158 |         
159 |         #SOM
160 |         if self.lastMonth!=dt.month:
161 |             if self.lastMonth!=-1:
162 |                 chg=self.broker.getvalue()-self.monthCash
163 |                 #print("[%s] SOM:chg=%.2f,cash=%.2f" % (dt,chg,self.broker.getvalue()))
164 |             self.lastMonth=dt.month
165 |             self.monthCash=self.broker.getvalue()
166 |         
167 |         #SOD
168 |         if self.lastDay!=dt.day:
169 |             self.lastDay=dt.day
170 |             #print("[%s] SOD:cash=%.2f" % (dt,self.broker.getvalue()))
171 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Algorithmic Trading Workshop
  2 | 
  3 | In this workshop, participants will learn how to load and store financial data on AWS from AWS Data Exchange and other external data sources and how to build and backtest algorithmic trading strategies with Amazon SageMaker that use technical indicators and advanced machine learning models.
  4 | 
  5 | ![chart](assets/chart.png)
  6 | 
  7 | _Time Commitment Expectation: This workshop was created to be completed in approximately 1h 30 min._
  8 | 
  9 | ## Regions
 10 | 
 11 | This workshop has been tested in **us-east-1**.
 12 | 
 13 | ## Considerations for Each Role
 14 | As the team lead on this lean team of one, you'll need to wear multiple hats.  Below are some things we'll cover from the perspective of each role:
 15 | * Data Engineer - You'll modify scripts to load external market data to AWS.
 16 | * Data Scientist - You'll need to load the data into your machine learning development environment. Once loaded, you'll understand the data, use a machine learning algorithm to train the model and do predictions.
 17 | * Trader - You will use different trading strategies based on data to maximize Profit & Loss while attributing to Risk.
 18 | 
 19 | ## Goals
 20 | 
 21 | At minimum, at the end of this workshop, you will have an understanding how to load historical price data from external market data sources like AWS Data Exchange into S3. You get familiar how to store price data in S3 and expose it via Glue Data Catalog and Athena, how to backtested trading strategies using Amazon SageMaker, and how to train machine learning models that are used in trading strategies. You also get a basic understand how trading strategies using trend following and machine learning are developed with Python and can be optimized for better performance.
 22 | 
 23 | ## Architecture
 24 | 
 25 | ![chart](assets/arch.png)
 26 | 
 27 | ## License
 28 | 
 29 | This library is licensed under the MIT-0 License. See the LICENSE file.
 30 | 
 31 | <details>
 32 | <summary>  
 33 | <b>External Dependencies</b>
 34 | 
 35 | This package depends on and may retrieve a number of third-party software packages (such as open source packages) from third-party servers at install-time or build-time ("External Dependencies"). The External Dependencies are subject to license terms that you must accept in order to use this package. If you do not accept all of the applicable license terms, you should not use this package. We recommend that you consult your company’s open source approval policy before proceeding.
 36 | </summary>
 37 | Provided below is a list of the External Dependencies and the applicable license terms as indicated by the documentation associated with the External Dependencies as of Amazon's most recent review of such documentation.
 38 | THIS INFORMATION IS PROVIDED FOR CONVENIENCE ONLY. AMAZON DOES NOT PROMISE THAT THE LIST OR THE APPLICABLE TERMS AND CONDITIONS ARE COMPLETE, ACCURATE, OR UP-TO-DATE, AND AMAZON WILL HAVE NO LIABILITY FOR ANY INACCURACIES. YOU SHOULD CONSULT THE DOWNLOAD SITES FOR THE EXTERNAL DEPENDENCIES FOR THE MOST COMPLETE AND UP-TO-DATE LICENSING INFORMATION.
 39 | YOUR USE OF THE EXTERNAL DEPENDENCIES IS AT YOUR SOLE RISK. IN NO EVENT WILL AMAZON BE LIABLE FOR ANY DAMAGES, INCLUDING WITHOUT LIMITATION ANY DIRECT, INDIRECT, CONSEQUENTIAL, SPECIAL, INCIDENTAL, OR PUNITIVE DAMAGES (INCLUDING FOR ANY LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, OR COMPUTER FAILURE OR MALFUNCTION) ARISING FROM OR RELATING TO THE EXTERNAL DEPENDENCIES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, EVEN IF AMAZON HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS AND DISCLAIMERS APPLY EXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW.
 40 | 
 41 | ** backtrader; version 1.9.74.123 -- https://www.backtrader.com/
 42 | </details>
 43 | 
 44 | ## References
 45 | 
 46 | - Reference Architecture for Algorithmic Trading (Dec 2020): https://d1.awsstatic.com/architecture-diagrams/ArchitectureDiagrams/algorithmic-trading-ra.pdf
 47 | - Blog Post (Feb 2021): Algorithmic Trading with SageMaker and AWS Data Exchange: https://aws.amazon.com/blogs/industries/algorithmic-trading-on-aws-with-amazon-sagemaker-and-aws-data-exchange/
 48 | - Blog Post (June 2021): How to run what-if scenarios for trading strategies with Amazon FinSpace: https://aws.amazon.com/blogs/industries/how-to-run-what-if-scenarios-for-trading-strategies-with-amazon-finspace/
 49 | - Blog Post (July 2021): Algorithmic Trading with SageMaker: https://aws.amazon.com/blogs/machine-learning/building-algorithmic-trading-strategies-with-amazon-sagemaker/
 50 | - Blog Post (Jan 2022): Backtest trading strategies with Amazon Kinesis Data Streams long-term retention and Amazon SageMaker: https://aws.amazon.com/blogs/big-data/backtest-trading-strategies-with-amazon-kinesis-data-streams-long-term-retention-and-amazon-sagemaker/
 51 | - YouTube (Dec 2020): Automated Analysis of Financial Data and Algorithmic Trading: https://www.youtube.com/watch?v=i0izMuiU12I
 52 | 
 53 | ---
 54 | 
 55 | ## Instructions using SageMaker Studio 
 56 | 
 57 | A newer version of this workshop has been developed for SageMaker Studio and is available in folder **5_SageMakerStudio**. 
 58 | 
 59 | 1. Setup SageMaker Studio with sufficient permissions.
 60 | 1. Run Jupyter Notebook: **1_Setup.ipynb**: This will setup S3 bucket, Glue Data Catalog Schema, Athena Workgroup. For SageMaker Studio, a custom kernel is built and attached.
 61 | 1. Run Jupyter Notebook: **2_Load_Data.ipynb**: This will load daily stock price data into the S3 bucket.
 62 | 1. Run Jupyter Notebook: **3_Backtest_Strategy.ipynb**: Backtest strategy in SageMaker Studio and find optimal parameters.
 63 | 
 64 | ---
 65 | 
 66 | ## Instructions using SageMaker Notebooks
 67 | 
 68 | ## Step 0: Set up environment
 69 | 
 70 | 1. Create a new unique S3 bucket that starts with "**algotrading-**" (e.g. "**algotrading-YYYY-MM-DD-XYZ**") that we use for storing external price data. 
 71 | 1. For the base infrastructure components (SageMaker Notebook, Athena, Glue Tables), deploy the following [CloudFormation template](https://github.com/aws-samples/algorithmic-trading/raw/master/0_Setup/algo-reference.yaml). Go to [CloudFormation](https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=algotrading) and upload the downloaded CF template. For the S3 bucket specify the previously created bucket name. Verify that stackName is **algotrading** before creating the stack and acknowledge that IAM changes will be made.
 72 | 
 73 | This step will take ca. 5 minutes.
 74 | 
 75 | ## Step 1: Load Historical Price Data
 76 | 
 77 | Here are a few data source options for this workshop. The daily datasets can be downloaded and generated in a few minutes, for the intraday dataset, please plan for at least 15 mins.
 78 | 1. Sample Daily EOD Stock Price Data (from public data source or AWS Data Exchange)
 79 | 
 80 | ### Option 1a: Sample Daily EOD Stock Price Data (from public data source)
 81 | 
 82 | If you are not able to use AWS Data Exchange in your account, you can run instead the following Jupyter notebook that generates some sample EOD price data from a public data souce. Run all the cells in **1_Data/Load_Hist_Data_Daily_Public.ipynb**.
 83 | 
 84 | ### Option 1b: Sample Daily EOD Stock Price Data (via AWS Data Exchange)
 85 | 
 86 | If you want to use AWS Data Exchange, you can download the following [dataset](https://aws.amazon.com/marketplace/pp/prodview-e2aizdzkos266#overview) for example. There are multiple options available, and we picked this for demonstration purposes. 
 87 | 
 88 | To download this dataset, complete a subscription request first where you provide the required information for Company Name, Name, Email Address, and Intended Use Case. Once the provider confirms the subscription, you can navigate to [AWS Data Exchange/My subscriptions/Entitled data](https://console.aws.amazon.com/dataexchange/home?#/entitled-data).
 89 | Then choose the latest revision for this subscription, select all assets, and click on **Export to Amazon S3**. In the new window select the root folder of the S3 bucket that starts with "*algotrading-data-*". Then click on **Export** and wait until your export job is completed.
 90 | 
 91 | In order to use this dataset for algorithmic trading, we want to standardize it to a CSV file with the following columns: **dt, sym, open, high, low, close, vol**.
 92 | Once you have successfully exported the dataset, please run the the following Jupyter notebook to format the dataset and store it in the ***hist_data_daily*** folder of your S3 bucket. Go to [Amazon SageMaker/Notebook/Notebook instances](https://console.aws.amazon.com/sagemaker/home?#/notebook-instances), then click on **Open Jupyter** for the provisioned notebook instance. Run all the cells in **1_Data/Load_Hist_Data_Daily.ipynb**.
 93 | 
 94 | ## Step 2: Backtest a trend following strategy (or move directly to Step 3)
 95 | 
 96 | In this module, we backtest a trend following strategy on daily price data with Amazon SageMaker. For these notebooks, please ensure that you have daily price data loaded.
 97 | 
 98 | You can choose between the following trading strategies:
 99 | 1. **Simple Moving Average Strategy**: **2_Strategies/Strategy SMA.ipynb**
100 | 
101 | 1. **Daily Breakout Strategy**: **2_Strategies/Strategy_Breakout.ipynb**
102 | 
103 | Select the Jupyter Notebook for backtesting the strategy in the folder **2_Strategies** for your selected strategy and run it from your Amazon SageMaker Notebook instance. In the instructions, there is guidance on how to optimize the strategy.
104 | 
105 | ## Step 3: Backtest a machine-learning based strategy
106 | 
107 | In this module, we backtest a machine-learning strategy with Amazon SageMaker on daily or intraday price data. Please ensure that you have daily or intraday price data loaded before running the corresponding notebooks.
108 | 
109 | Usually you will have two parts, one for training the machine learning model, and one for backtesting the strategy. You can run both notebooks or skip the training of the model as a trained model is already available in the repository:
110 | 
111 | **ML Long/Short Prediction Strategy**
112 | * Model Training (Daily Price Data) (Optional): **3_Models/Train_Model_Forecast.ipynb**
113 | * Strategy Backtesting (Daily Price Data): **2_Strategies/Strategy_Forecast.ipynb**
114 | 
115 | ---
116 | 
117 | ## Instructions using Amazon FinSpace
118 | 
119 | 1. Setup Amazon FinSpace
120 | 1. Run the following notebook: **2_Strategies/Strategy_WhatIfScenarios.ipynb** in Amazon FinSpace
121 | 
122 | ### Congratulations! You have completed the workshop. Don't forget to cleanup the resources if you use your own account in this workshop.


--------------------------------------------------------------------------------
/2_Strategies/Strategy_SMA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%run init_model.py 'algo_simple_sma'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# Step 1) Data Preparation"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# get S3 bucket\n",
 26 |     "s3bucket=!(aws s3 ls | grep algotrading- | awk  '{print $3}')\n",
 27 |     "s3bucket=s3bucket[0]\n",
 28 |     "s3bucket"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import sys\n",
 38 |     "!{sys.executable} -m pip install PyAthena"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import os\n",
 48 |     "import sagemaker as sage\n",
 49 |     "from sagemaker import get_execution_role\n",
 50 |     "import datetime\n",
 51 |     "from sagemaker.tensorflow import TensorFlow\n",
 52 |     "import json\n",
 53 |     "\n",
 54 |     "role = get_execution_role()\n",
 55 |     "sess = sage.Session()\n",
 56 |     "region = sess.boto_session.region_name"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "import pandas as pd\n",
 66 |     "from pyathena import connect\n",
 67 |     "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n",
 68 |     "               region_name=region)\n",
 69 |     "\n",
 70 |     "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n",
 71 |     "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n",
 72 |     "del df['dt']\n",
 73 |     "df.head()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "trainCount=int(len(df)*0.4)\n",
 83 |     "dfTrain = df.iloc[:trainCount]\n",
 84 |     "\n",
 85 |     "dfTest = df.iloc[trainCount:]\n",
 86 |     "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n",
 87 |     "dfTest.head()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "scrolled": true
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "%matplotlib notebook\n",
 99 |     "dfTest[\"close\"].plot()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "# Step 2) Modify Strategy Configuration \n",
107 |     "\n",
108 |     "In the following cell, you can adjust the parameters for the strategy.\n",
109 |     "\n",
110 |     "* `user` = Name for Leaderboard (optional)\n",
111 |     "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 50)\n",
112 |     "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 200)\n",
113 |     "* `size` = The number of shares for a transaction\n",
114 |     "\n",
115 |     "`Tip`: A good starting point for improving the strategy is to reduce the number of trades that get triggered by increasing the slow and fast period. Longer periods tend to perform better. "
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "%%writefile local/{algo_name}/input/config/hyperparameters.json\n",
125 |     "{ \"user\" : \"user\",\n",
126 |     "  \"fast_period\" : \"50\",\n",
127 |     "  \"slow_period\" : \"200\",\n",
128 |     "  \"size\" : \"100\"\n",
129 |     "}"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "%run update_config.py $algo_name"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "# Step 3) Modify Strategy Code\n",
146 |     "\n",
147 |     "In the following cell, you can modify the strategy code. For the first backtests, you can leave it as is.\n",
148 |     "\n",
149 |     "`Tip`: A good starting point for improving the strategy is to try different indicators like ExponentialMovingAverage or delay when trades are triggered and check crossover again before placing a trade.\n",
150 |     "\n",
151 |     "Here are some helpful links:\n",
152 |     "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n",
153 |     "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n",
154 |     "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "%%writefile model/{algo_name}.py\n",
164 |     "import backtrader as bt\n",
165 |     "from algo_base import *\n",
166 |     "\n",
167 |     "class MyStrategy(StrategyTemplate):\n",
168 |     "\n",
169 |     "    def __init__(self):  # Initiation\n",
170 |     "        super(MyStrategy, self).__init__()\n",
171 |     "        self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n",
172 |     "        self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n",
173 |     "        self.config[\"size\"]=int(self.config[\"size\"])\n",
174 |     "\n",
175 |     "        self.smaFast = bt.ind.SimpleMovingAverage(period=self.config[\"fast_period\"])\n",
176 |     "        self.smaSlow = bt.ind.SimpleMovingAverage(period=self.config[\"slow_period\"])\n",
177 |     "        self.size = self.config[\"size\"]\n",
178 |     "\n",
179 |     "    def init_broker(broker):\n",
180 |     "        broker.setcash(100000.0)\n",
181 |     "        broker.setcommission(commission=0.0) \n",
182 |     "        \n",
183 |     "    def add_data(cerebro):\n",
184 |     "        data = btfeeds.GenericCSVData(\n",
185 |     "            dataname=MyStrategy.TRAIN_FILE,\n",
186 |     "            dtformat=('%Y-%m-%d'),\n",
187 |     "            timeframe=bt.TimeFrame.Days,\n",
188 |     "            datetime=0,\n",
189 |     "            time=-1,\n",
190 |     "            high=2,\n",
191 |     "            low=3,\n",
192 |     "            open=1,\n",
193 |     "            close=4,\n",
194 |     "            volume=5,\n",
195 |     "            openinterest=-1\n",
196 |     "        )\n",
197 |     "        cerebro.adddata(data)\n",
198 |     "\n",
199 |     "    def next(self):  # Processing\n",
200 |     "        super(MyStrategy, self).next()\n",
201 |     "        dt=self.datas[0].datetime.datetime(0)\n",
202 |     "        if not self.position:\n",
203 |     "            if self.smaFast[0] > self.smaSlow[0]:\n",
204 |     "                self.buy(size=self.size) # Go long\n",
205 |     "            else:\n",
206 |     "                self.sell(size=self.size) # Go short\n",
207 |     "        elif self.position.size>0 and self.smaFast[0] < self.smaSlow[0]:\n",
208 |     "            self.sell(size=2*self.size) # Go short\n",
209 |     "        elif self.position.size<0 and self.smaFast[0] > self.smaSlow[0]:          \n",
210 |     "            self.buy(size=2*self.size) # Go long"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "# Step 4) Backtest Locally\n",
218 |     "\n",
219 |     "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "#Build Local Algo Image\n",
229 |     "!docker build -t $algo_name .\n",
230 |     "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "from IPython.display import Image\n",
240 |     "Image(filename='local/'+algo_name+'/model/chart.png')"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step."
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "# Step 5) Backtest on SageMaker and submit performance"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {},
261 |    "outputs": [],
262 |    "source": [
263 |     "#Deploy Algo Image to ECS\n",
264 |     "!./build_and_push.sh $algo_name"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "#Run Remote test via SageMaker\n",
274 |     "import sagemaker as sage\n",
275 |     "from sagemaker import get_execution_role\n",
276 |     "from sagemaker.estimator import Estimator \n",
277 |     "\n",
278 |     "role = get_execution_role()\n",
279 |     "sess = sage.Session()\n",
280 |     "\n",
281 |     "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n",
282 |     "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n",
283 |     "print(data_location)\n",
284 |     "\n",
285 |     "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n",
286 |     "with open(conf_file, 'r') as f:\n",
287 |     "    config = json.load(f)\n",
288 |     "#config['sim_data']='True'\n",
289 |     "print(config)\n",
290 |     "\n",
291 |     "prefix=algo_name\n",
292 |     "job_name=prefix.replace('_','-')\n",
293 |     "\n",
294 |     "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n",
295 |     "region = sess.boto_session.region_name\n",
296 |     "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n",
297 |     "\n",
298 |     "algo = sage.estimator.Estimator(\n",
299 |     "    image_uri=image,\n",
300 |     "    role=role,\n",
301 |     "    instance_count=1,\n",
302 |     "    instance_type='ml.m4.xlarge',\n",
303 |     "    output_path=\"s3://{}/output\".format(sess.default_bucket()),\n",
304 |     "    sagemaker_session=sess,\n",
305 |     "    base_job_name=job_name,\n",
306 |     "    hyperparameters=config,\n",
307 |     "    metric_definitions=[\n",
308 |     "        {\n",
309 |     "            \"Name\": \"algo:pnl\",\n",
310 |     "            \"Regex\": \"Total PnL:(.*?)]\"\n",
311 |     "        },\n",
312 |     "        {\n",
313 |     "            \"Name\": \"algo:sharpe_ratio\",\n",
314 |     "            \"Regex\": \"Sharpe Ratio:(.*?),\"\n",
315 |     "        }\n",
316 |     "    ])\n",
317 |     "algo.fit(data_location)"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": null,
323 |    "metadata": {},
324 |    "outputs": [],
325 |    "source": [
326 |     "from sagemaker.analytics import TrainingJobAnalytics\n",
327 |     "\n",
328 |     "latest_job_name = algo.latest_training_job.job_name\n",
329 |     "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n",
330 |     "metrics_dataframe"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {
337 |     "scrolled": true
338 |    },
339 |    "outputs": [],
340 |    "source": [
341 |     "#Get Algo Chart from S3\n",
342 |     "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n",
343 |     "import boto3\n",
344 |     "s3 = boto3.resource('s3')\n",
345 |     "my_bucket = s3.Bucket(sess.default_bucket())\n",
346 |     "my_bucket.download_file(model_name,'model.tar.gz')\n",
347 |     "!tar -xzf model.tar.gz\n",
348 |     "!rm model.tar.gz\n",
349 |     "from IPython.display import Image\n",
350 |     "Image(filename='chart.png') "
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {},
356 |    "source": [
357 |     "### Congratulations! You've completed this strategy."
358 |    ]
359 |   }
360 |  ],
361 |  "metadata": {
362 |   "kernelspec": {
363 |    "display_name": "conda_tensorflow_p36",
364 |    "language": "python",
365 |    "name": "conda_tensorflow_p36"
366 |   },
367 |   "language_info": {
368 |    "codemirror_mode": {
369 |     "name": "ipython",
370 |     "version": 3
371 |    },
372 |    "file_extension": ".py",
373 |    "mimetype": "text/x-python",
374 |    "name": "python",
375 |    "nbconvert_exporter": "python",
376 |    "pygments_lexer": "ipython3",
377 |    "version": "3.6.10"
378 |   }
379 |  },
380 |  "nbformat": 4,
381 |  "nbformat_minor": 2
382 | }
383 | 


--------------------------------------------------------------------------------
/2_Strategies/Strategy_Breakout.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%run init_model.py 'algo_daily_breakout'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# Step 1) Data Preparation"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# get S3 bucket\n",
 26 |     "s3bucket=!(aws s3 ls | grep algotrading- | awk  '{print $3}')\n",
 27 |     "s3bucket=s3bucket[0]\n",
 28 |     "s3bucket"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import sys\n",
 38 |     "!{sys.executable} -m pip install PyAthena"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import os\n",
 48 |     "import sagemaker as sage\n",
 49 |     "from sagemaker import get_execution_role\n",
 50 |     "import datetime\n",
 51 |     "from sagemaker.tensorflow import TensorFlow\n",
 52 |     "import json\n",
 53 |     "\n",
 54 |     "role = get_execution_role()\n",
 55 |     "sess = sage.Session()\n",
 56 |     "region = sess.boto_session.region_name"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "import pandas as pd\n",
 66 |     "from pyathena import connect\n",
 67 |     "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n",
 68 |     "               region_name=region)\n",
 69 |     "\n",
 70 |     "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n",
 71 |     "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n",
 72 |     "del df['dt']\n",
 73 |     "df.head()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "trainCount=int(len(df)*0.4)\n",
 83 |     "dfTrain = df.iloc[:trainCount]\n",
 84 |     "\n",
 85 |     "dfTest = df.iloc[trainCount:]\n",
 86 |     "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n",
 87 |     "dfTest.head()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "%matplotlib notebook\n",
 97 |     "dfTest[\"close\"].plot()"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "# Step 2) Modify Strategy Configuration \n",
105 |     "\n",
106 |     "In the following cell, you can adjust the parameters for the strategy.\n",
107 |     "\n",
108 |     "* `user` = Name for Leaderboard (optional)\n",
109 |     "* `go_long` = Go Long for Breakout (true or false)\n",
110 |     "* `go_short` = Go Short for Breakout (true or false)\n",
111 |     "* `period` = Length of window for previous high and low\n",
112 |     "* `size` = The number of shares for a transaction\n",
113 |     "\n",
114 |     "`Tip`: A good starting point for improving the strategy is to lengthen the period of the previous high and low. Equity Markets tend to have a long bias and if you only consider long trades this might improve the performance."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "%%writefile local/{algo_name}/input/config/hyperparameters.json\n",
124 |     "{ \"user\" : \"user\",\n",
125 |     "  \"go_long\" : \"true\",\n",
126 |     "  \"go_short\" : \"true\",\n",
127 |     "  \"period\" : \"50\",\n",
128 |     "  \"size\" : \"100\"\n",
129 |     "}"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "%run update_config.py $algo_name"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "# Step 3) Modify Strategy Code\n",
146 |     "\n",
147 |     "`Tip`: A good starting point for improving the strategy is to add additional indicators like ATR (Average True Range) before placing a trade. You want to avoid false signals if there is not enough volatility.\n",
148 |     "\n",
149 |     "Here are some helpful links:\n",
150 |     "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n",
151 |     "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n",
152 |     "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "%%writefile model/{algo_name}.py\n",
162 |     "import backtrader as bt\n",
163 |     "from algo_base import *\n",
164 |     "import pytz\n",
165 |     "from pytz import timezone\n",
166 |     "\n",
167 |     "class MyStrategy(StrategyTemplate):\n",
168 |     "\n",
169 |     "    def __init__(self):  # Initiation\n",
170 |     "        super(MyStrategy, self).__init__()\n",
171 |     "        self.config[\"period\"]=int(self.config[\"period\"])\n",
172 |     "        self.config[\"size\"]=int(self.config[\"size\"])\n",
173 |     "        self.config[\"go_long\"]=(str(self.config[\"go_long\"]).lower()==\"true\")\n",
174 |     "        self.config[\"go_short\"]=(str(self.config[\"go_short\"]).lower()==\"true\")\n",
175 |     "\n",
176 |     "        self.highest = bt.ind.Highest(period=self.config[\"period\"])\n",
177 |     "        self.lowest = bt.ind.Lowest(period=self.config[\"period\"])\n",
178 |     "        self.size = self.config[\"size\"]\n",
179 |     "        \n",
180 |     "    def init_broker(broker):\n",
181 |     "        broker.setcash(100000.0)\n",
182 |     "        broker.setcommission(commission=0.0) \n",
183 |     "        \n",
184 |     "    def add_data(cerebro):\n",
185 |     "        data = btfeeds.GenericCSVData(\n",
186 |     "            dataname=MyStrategy.TRAIN_FILE,\n",
187 |     "            dtformat=('%Y-%m-%d'),\n",
188 |     "            timeframe=bt.TimeFrame.Days,\n",
189 |     "            datetime=0,\n",
190 |     "            time=-1,\n",
191 |     "            high=2,\n",
192 |     "            low=3,\n",
193 |     "            open=1,\n",
194 |     "            close=4,\n",
195 |     "            volume=5,\n",
196 |     "            openinterest=-1\n",
197 |     "        )\n",
198 |     "        cerebro.adddata(data)\n",
199 |     "        \n",
200 |     "    def next(self):  # Processing\n",
201 |     "        super(MyStrategy, self).next()\n",
202 |     "        dt=self.datas[0].datetime.datetime(0)\n",
203 |     "        if not self.position:\n",
204 |     "            if self.config[\"go_long\"] and self.datas[0] > self.highest[-1]:\n",
205 |     "                self.buy(size=self.size) # Go long\n",
206 |     "            elif self.config[\"go_short\"] and self.datas[0] < self.lowest[-1]:\n",
207 |     "                self.sell(size=self.size) # Go short\n",
208 |     "        elif self.position.size>0 and self.datas[0] < self.highest[-1]:\n",
209 |     "            self.close()\n",
210 |     "        elif self.position.size<0 and self.datas[0] > self.lowest[-1]:          \n",
211 |     "            self.close()"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "# Step 4) Backtest Locally\n",
219 |     "\n",
220 |     "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "#Build Local Algo Image\n",
230 |     "!docker build -t $algo_name .\n",
231 |     "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {
238 |     "scrolled": false
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "from IPython.display import Image\n",
243 |     "Image(filename='local/'+algo_name+'/model/chart.png')"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step."
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "# Step 5) Backtest on SageMaker and submit performance"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": null,
263 |    "metadata": {},
264 |    "outputs": [],
265 |    "source": [
266 |     "#Deploy Algo Image to ECS\n",
267 |     "!./build_and_push.sh $algo_name"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "#Run Remote Forwardtest via SageMaker\n",
277 |     "import sagemaker as sage\n",
278 |     "from sagemaker import get_execution_role\n",
279 |     "from sagemaker.estimator import Estimator \n",
280 |     "\n",
281 |     "role = get_execution_role()\n",
282 |     "sess = sage.Session()\n",
283 |     "\n",
284 |     "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n",
285 |     "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n",
286 |     "print(data_location)\n",
287 |     "\n",
288 |     "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n",
289 |     "with open(conf_file, 'r') as f:\n",
290 |     "    config = json.load(f)\n",
291 |     "#config['sim_data']='True'\n",
292 |     "print(config)\n",
293 |     "\n",
294 |     "prefix=algo_name\n",
295 |     "job_name=prefix.replace('_','-')\n",
296 |     "\n",
297 |     "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n",
298 |     "region = sess.boto_session.region_name\n",
299 |     "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n",
300 |     "\n",
301 |     "algo = sage.estimator.Estimator(\n",
302 |     "    image_uri=image,\n",
303 |     "    role=role,\n",
304 |     "    instance_count=1,\n",
305 |     "    instance_type='ml.m4.xlarge',\n",
306 |     "    output_path=\"s3://{}/output\".format(sess.default_bucket()),\n",
307 |     "    sagemaker_session=sess,\n",
308 |     "    base_job_name=job_name,\n",
309 |     "    hyperparameters=config,\n",
310 |     "    metric_definitions=[\n",
311 |     "        {\n",
312 |     "            \"Name\": \"algo:pnl\",\n",
313 |     "            \"Regex\": \"Total PnL:(.*?)]\"\n",
314 |     "        },\n",
315 |     "        {\n",
316 |     "            \"Name\": \"algo:sharpe_ratio\",\n",
317 |     "            \"Regex\": \"Sharpe Ratio:(.*?),\"\n",
318 |     "        }\n",
319 |     "    ])\n",
320 |     "algo.fit(data_location)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": [
329 |     "#Get Algo Metrics\n",
330 |     "from sagemaker.analytics import TrainingJobAnalytics\n",
331 |     "\n",
332 |     "latest_job_name = algo.latest_training_job.job_name\n",
333 |     "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n",
334 |     "metrics_dataframe"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {
341 |     "scrolled": true
342 |    },
343 |    "outputs": [],
344 |    "source": [
345 |     "#Get Algo Chart from S3\n",
346 |     "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n",
347 |     "import boto3\n",
348 |     "s3 = boto3.resource('s3')\n",
349 |     "my_bucket = s3.Bucket(sess.default_bucket())\n",
350 |     "my_bucket.download_file(model_name,'model.tar.gz')\n",
351 |     "!tar -xzf model.tar.gz\n",
352 |     "!rm model.tar.gz\n",
353 |     "from IPython.display import Image\n",
354 |     "Image(filename='chart.png') "
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {},
360 |    "source": [
361 |     "### Congratulations! You've completed this strategy."
362 |    ]
363 |   }
364 |  ],
365 |  "metadata": {
366 |   "kernelspec": {
367 |    "display_name": "conda_tensorflow_p36",
368 |    "language": "python",
369 |    "name": "conda_tensorflow_p36"
370 |   },
371 |   "language_info": {
372 |    "codemirror_mode": {
373 |     "name": "ipython",
374 |     "version": 3
375 |    },
376 |    "file_extension": ".py",
377 |    "mimetype": "text/x-python",
378 |    "name": "python",
379 |    "nbconvert_exporter": "python",
380 |    "pygments_lexer": "ipython3",
381 |    "version": "3.6.10"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 2
386 | }
387 | 


--------------------------------------------------------------------------------
/4_Kinesis/algo-kinesis.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: 2010-09-09
  2 | Description: >-
  3 |   This CloudFormation sample template migrates Market data from S3 to Kinesis using
  4 |   DMS.
  5 |   This Template requires an existing source s3 bucket with full load of market data.
  6 | Parameters:
  7 |   VpcCIDR:
  8 |     Description: Please enter the IP range (CIDR notation) for this VPC
  9 |     Type: String
 10 |     Default: 10.111.0.0/16
 11 | 
 12 |   PublicSubnet1CIDR:
 13 |     Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone
 14 |     Type: String
 15 |     Default: 10.111.10.0/24
 16 | 
 17 |   PublicSubnet2CIDR:
 18 |     Description: Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone
 19 |     Type: String
 20 |     Default: 10.111.11.0/24
 21 | 
 22 |   MarketDataS3Bucket:
 23 |     Type: String
 24 |     Description: S3 Bucket where the market data will reside. 
 25 |    
 26 | Metadata:
 27 |   'AWS::CloudFormation::Interface':
 28 |     ParameterGroups:
 29 |       - Label:
 30 |           default: Configuration
 31 |         Parameters:
 32 |           - VpcCIDR
 33 |           - PublicSubnet1CIDR
 34 |           - PublicSubnet2CIDR
 35 |           - MarketDataS3Bucket
 36 |     ParameterLabels:
 37 |       VpcCIDR:
 38 |         default: Provide VPC CIDR Range
 39 |       PublicSubnet1CIDR:
 40 |         default: Provide Public Subnet 1 CIDR Range
 41 |       PublicSubnet2CIDR:
 42 |         default: Provide Public Subnet 2 CIDR Range
 43 |       MarketDataS3Bucket:
 44 |         default: Name of the S3 Bucket where market Data resides
 45 | 
 46 | Resources:
 47 |   VPC:
 48 |     Type: AWS::EC2::VPC
 49 |     Properties:
 50 |       CidrBlock: !Ref VpcCIDR
 51 |       EnableDnsHostnames: true
 52 |       Tags:
 53 |         - Key: Name
 54 |           Value: Kinesis Algo Trading
 55 | 
 56 |   InternetGateway:
 57 |     Type: AWS::EC2::InternetGateway
 58 |     Properties:
 59 |       Tags:
 60 |         - Key: Name
 61 |           Value: Kinesis Algo Trading
 62 | 
 63 |   InternetGatewayAttachment:
 64 |     Type: AWS::EC2::VPCGatewayAttachment
 65 |     Properties:
 66 |       InternetGatewayId: !Ref InternetGateway
 67 |       VpcId: !Ref VPC
 68 | 
 69 |   PublicSubnet1:
 70 |     Type: AWS::EC2::Subnet
 71 |     Properties:
 72 |       VpcId: !Ref VPC
 73 |       AvailabilityZone: !Select [0, !GetAZs ""]
 74 |       CidrBlock: !Ref PublicSubnet1CIDR
 75 |       MapPublicIpOnLaunch: true
 76 |       Tags:
 77 |         - Key: Name
 78 |           Value: Kinesis Algo Trading Public Subnet (AZ1)
 79 | 
 80 |   PublicSubnet2:
 81 |     Type: AWS::EC2::Subnet
 82 |     Properties:
 83 |       VpcId: !Ref VPC
 84 |       AvailabilityZone: !Select [1, !GetAZs ""]
 85 |       CidrBlock: !Ref PublicSubnet2CIDR
 86 |       MapPublicIpOnLaunch: true
 87 |       Tags:
 88 |         - Key: Name
 89 |           Value: Kinesis Algo Trading Public Subnet (AZ2)
 90 | 
 91 |   PublicRouteTable:
 92 |     Type: AWS::EC2::RouteTable
 93 |     Properties:
 94 |       VpcId: !Ref VPC
 95 |       Tags:
 96 |         - Key: Name
 97 |           Value: Kinesis Algo Trading Public Routes
 98 | 
 99 |   DefaultPublicRoute:
100 |     Type: AWS::EC2::Route
101 |     DependsOn: InternetGatewayAttachment
102 |     Properties:
103 |       RouteTableId: !Ref PublicRouteTable
104 |       DestinationCidrBlock: 0.0.0.0/0
105 |       GatewayId: !Ref InternetGateway
106 | 
107 |   PublicSubnet1RouteTableAssociation:
108 |     Type: AWS::EC2::SubnetRouteTableAssociation
109 |     Properties:
110 |       RouteTableId: !Ref PublicRouteTable
111 |       SubnetId: !Ref PublicSubnet1
112 | 
113 |   PublicSubnet2RouteTableAssociation:
114 |     Type: AWS::EC2::SubnetRouteTableAssociation
115 |     Properties:
116 |       RouteTableId: !Ref PublicRouteTable
117 |       SubnetId: !Ref PublicSubnet2
118 | 
119 |   KinesisDMSCloudwatchRole:
120 |     Type: 'AWS::IAM::Role'
121 |     Properties:
122 |       RoleName: kinesis-dms-cloudwatch-logs-role
123 |       AssumeRolePolicyDocument:
124 |         Version: 2012-10-17
125 |         Statement:
126 |           - Effect: Allow
127 |             Principal:
128 |               Service:
129 |                 - dms.amazonaws.com
130 |             Action:
131 |               - 'sts:AssumeRole'
132 |       ManagedPolicyArns:
133 |         - 'arn:aws:iam::aws:policy/service-role/AmazonDMSCloudWatchLogsRole'
134 |       Path: /
135 | 
136 |   KinesisDMSVpcRole:
137 |     Type: 'AWS::IAM::Role'
138 |     Properties:
139 |       RoleName: kinesis-dms-vpc-role
140 |       AssumeRolePolicyDocument:
141 |         Version: 2012-10-17
142 |         Statement:
143 |           - Effect: Allow
144 |             Principal:
145 |               Service:
146 |                 - dms.amazonaws.com
147 |             Action:
148 |               - 'sts:AssumeRole'
149 |       ManagedPolicyArns:
150 |         - 'arn:aws:iam::aws:policy/service-role/AmazonDMSVPCManagementRole'
151 |       Path: /
152 |   
153 |   DMSReplicationSubnetGroup:
154 |     Type: 'AWS::DMS::ReplicationSubnetGroup'
155 |     Properties:
156 |       ReplicationSubnetGroupDescription: Subnets available for DMS
157 |       SubnetIds:
158 |         - !Ref PublicSubnet1
159 |         - !Ref PublicSubnet2
160 |     DependsOn:
161 |       - KinesisDMSVpcRole
162 |       - KinesisDMSCloudwatchRole
163 | 
164 |   KinesisStream:
165 |     Type: 'AWS::Kinesis::Stream'
166 |     Properties:
167 |       Name: 'kinesis-algo-blog'
168 |       RetentionPeriodHours: 8760
169 |       ShardCount: 1
170 |       StreamEncryption:
171 |           EncryptionType: KMS 
172 |           KeyId: alias/aws/kinesis 
173 | 
174 |   S3SourceDMSRole:
175 |     Type: 'AWS::IAM::Role'
176 |     Properties:
177 |       AssumeRolePolicyDocument:
178 |         Version: 2012-10-17
179 |         Statement:
180 |           - Effect: Allow
181 |             Principal:
182 |               Service:
183 |                 - dms.amazonaws.com
184 |             Action:
185 |               - 'sts:AssumeRole'
186 |       Path: /
187 |       Policies:
188 |         - PolicyName: S3AccessForDMSPolicy
189 |           PolicyDocument:
190 |             Version: 2012-10-17
191 |             Statement:
192 |               - Effect: Allow
193 |                 Action:
194 |                   - 's3:GetObject'
195 |                 Resource:
196 |                   - !Join
197 |                     - ''
198 |                     - - 'arn:'
199 |                       - !Ref AWS::Partition
200 |                       - ':s3:::'
201 |                       - !Ref MarketDataS3Bucket
202 |                   - !Join
203 |                     - ''
204 |                     - - 'arn:'
205 |                       - !Ref AWS::Partition
206 |                       - ':s3:::'
207 |                       - !Ref MarketDataS3Bucket
208 |                       - '/*'
209 |               - Effect: Allow
210 |                 Action: 's3:ListBucket'
211 |                 Resource: 
212 |                   - !Join
213 |                     - ''
214 |                     - - 'arn:'
215 |                       - !Ref AWS::Partition
216 |                       - ':s3:::'
217 |                       - !Ref MarketDataS3Bucket
218 |   DMSSecurityGroup:
219 |     Type: 'AWS::EC2::SecurityGroup'
220 |     Properties:
221 |       GroupDescription: Security group for DMS Instance
222 |       VpcId: !Ref VPC
223 |   TargetKinesisRole:
224 |     Type: 'AWS::IAM::Role'
225 |     Properties:
226 |       AssumeRolePolicyDocument:
227 |         Version: 2012-10-17
228 |         Statement:
229 |           - Effect: Allow
230 |             Principal:
231 |               Service:
232 |                 - dms.amazonaws.com
233 |             Action:
234 |               - 'sts:AssumeRole'
235 |       Path: /
236 |       Policies:
237 |         - PolicyName: KinesisAccessForDMSPolicy
238 |           PolicyDocument:
239 |             Version: 2012-10-17
240 |             Statement:
241 |               - Effect: Allow
242 |                 Action:
243 |                   - 'kinesis:DescribeStream'
244 |                   - 'kinesis:PutRecord'
245 |                   - 'kinesis:PutRecords'
246 |                 Resource:
247 |                   - !GetAtt
248 |                     - KinesisStream
249 |                     - Arn
250 |   DMSReplicationInstance:
251 |     Type: 'AWS::DMS::ReplicationInstance'
252 |     Properties:
253 |       PubliclyAccessible: true
254 |       ReplicationInstanceClass: dms.r4.xlarge
255 |       ReplicationSubnetGroupIdentifier: !Ref DMSReplicationSubnetGroup
256 |       VpcSecurityGroupIds:
257 |         - !Ref DMSSecurityGroup
258 | 
259 |   S3SourceEndpoint:
260 |     Type: 'AWS::DMS::Endpoint'
261 |     Properties:
262 |       EndpointType: source
263 |       EngineName: "s3"
264 |       S3Settings:
265 |         BucketName: !Ref MarketDataS3Bucket
266 |         ExternalTableDefinition: >-
267 |           {"TableCount": "1", "Tables": [{"TableName":"intc","TablePath":"marketData/intc/","TableOwner":"marketData","TableColumns":[{"ColumnName": "dt","ColumnType":"TIMESTAMP","ColumnNullable": "false","ColumnIsPk":"true"},{"ColumnName": "sym","ColumnType": "STRING","ColumnLength": "10"},{"ColumnName": "open","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"},{"ColumnName": "high","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"},{"ColumnName": "low","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"}, {"ColumnName": "close","ColumnType": "NUMERIC","ColumnPrecision": "5","ColumnScale": "2"},{"ColumnName": "vol","ColumnType": "NUMERIC","ColumnPrecision": "12","ColumnScale": "2"}],"TableColumnsTotal": "7"}]}
268 |         ServiceAccessRoleArn: !GetAtt
269 |           - S3SourceDMSRole
270 |           - Arn
271 |     DependsOn:
272 |       - DMSReplicationInstance
273 | 
274 |   KinesisTargetEndpoint:
275 |     Type: 'AWS::DMS::Endpoint'
276 |     Properties:
277 |       EndpointType: target
278 |       EngineName: "kinesis"
279 |       KinesisSettings:
280 |         MessageFormat: json
281 |         StreamArn: !GetAtt
282 |           - KinesisStream
283 |           - Arn
284 |         ServiceAccessRoleArn: !GetAtt
285 |           - TargetKinesisRole
286 |           - Arn
287 |     DependsOn:
288 |       - DMSReplicationInstance
289 | 
290 |   DMSReplicationTask:
291 |     Type: 'AWS::DMS::ReplicationTask'
292 |     Properties:
293 |       MigrationType: full-load
294 |       ReplicationInstanceArn: !Ref DMSReplicationInstance
295 |       ReplicationTaskSettings: >-
296 |         { "Logging" : { "EnableLogging" : true, "LogComponents": [ { "Id" :
297 |         "SOURCE_UNLOAD", "Severity" : "LOGGER_SEVERITY_DEFAULT" }, { "Id" :
298 |         "SOURCE_CAPTURE", "Severity" : "LOGGER_SEVERITY_DEFAULT" }, { "Id" :
299 |         "TARGET_LOAD", "Severity" : "LOGGER_SEVERITY_DEFAULT" }, { "Id" :
300 |         "TARGET_APPLY", "Severity" : "LOGGER_SEVERITY_DEFAULT" } ] } }
301 |       SourceEndpointArn: !Ref S3SourceEndpoint
302 |       TableMappings: >-
303 |         { "rules": [ { "rule-type" : "selection", "rule-id" : "1", "rule-name" :
304 |         "1", "object-locator" : { "schema-name" : "%", "table-name" : "%" },
305 |         "rule-action" : "include" } ] }
306 |       TargetEndpointArn: !Ref KinesisTargetEndpoint
307 |   
308 |   AlgorithmicTradingInstance:
309 |     Type: AWS::SageMaker::NotebookInstance
310 |     Properties:
311 |       InstanceType: ml.t3.medium
312 |       DefaultCodeRepository: https://github.com/aws-samples/algorithmic-trading
313 |       RoleArn: !GetAtt 'SageMakerExecutionRole.Arn'
314 | 
315 |   SageMakerExecutionRole:
316 |     Type: AWS::IAM::Role
317 |     Properties:
318 |       AssumeRolePolicyDocument:
319 |         Version: '2012-10-17'
320 |         Statement:
321 |           - Effect: Allow
322 |             Principal:
323 |               Service:
324 |                 - sagemaker.amazonaws.com
325 |             Action:
326 |               - sts:AssumeRole
327 |       Path: /
328 |       ManagedPolicyArns:
329 |         - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
330 |         - arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess 
331 |         - arn:aws:iam::aws:policy/AmazonKinesisReadOnlyAccess
332 |         - arn:aws:iam::aws:policy/AmazonECS_FullAccess
333 |         - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess
334 |         - !Ref 'S3Policy'
335 | 
336 |   S3Policy:
337 |     Type: AWS::IAM::ManagedPolicy
338 |     Properties:
339 |       Description: S3 Permission
340 |       Path: /
341 |       PolicyDocument:
342 |         Version: '2012-10-17'
343 |         Statement:
344 |           - Effect: Allow
345 |             Action:
346 |               - s3:GetObject
347 |               - s3:PutObject
348 |               - s3:DeleteObject
349 |               - s3:ListBucket
350 |             Resource:
351 |               - !Sub
352 |                 - arn:aws:s3:::${S3Bucket}/*
353 |                 - S3Bucket: !Ref 'MarketDataS3Bucket'
354 |  
355 | Outputs:
356 |   01StackName:
357 |     Value: !Ref 'AWS::StackName'
358 |   02RegionName:
359 |     Value: !Ref 'AWS::Region'
360 |   03TargetKinesisStream:
361 |     Value: !Ref KinesisStream
362 |   04DMSReplicationInstance:
363 |     Value: !Ref DMSReplicationInstance
364 |   05SourceEndpoint:
365 |     Value: !Ref S3SourceEndpoint
366 |   06TargetEndpoint:
367 |     Value: !Ref KinesisTargetEndpoint
368 |   07DMSReplicationTask:
369 |     Value: !Ref DMSReplicationTask
370 |   08KinesisStreamName:
371 |     Value: !Ref KinesisStream
372 |  
373 | 


--------------------------------------------------------------------------------
/5_SageMakerStudio/3_Backtest_Strategy.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Step 1) Data Preparation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "algo_name='algo_sma'"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "s3bucket=!(aws cloudformation list-exports --query \"Exports[?Name=='algotrading-s3bucket'].Value\" --output text)\n",
 26 |     "s3bucket=s3bucket[0]\n",
 27 |     "s3bucket"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import pandas as pd\n",
 37 |     "from pyathena import connect\n",
 38 |     "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/')\n",
 39 |     "\n",
 40 |     "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n",
 41 |     "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n",
 42 |     "del df['dt']\n",
 43 |     "df.head()"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "from pathlib import Path\n",
 53 |     "trainCount=int(len(df)*0.4)\n",
 54 |     "dfTrain = df.iloc[:trainCount]\n",
 55 |     "dfTest = df.iloc[trainCount:]\n",
 56 |     "\n",
 57 |     "dfTest.to_csv('/opt/ml/input/data/training/data.csv')\n",
 58 |     "dfTest.head()"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "%matplotlib inline\n",
 68 |     "dfTest[\"close\"].plot()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "# Step 2) Modify Strategy Configuration \n",
 76 |     "\n",
 77 |     "In the following cell, you can adjust the parameters for the strategy.\n",
 78 |     "\n",
 79 |     "* `fast_period` = Fast Period for Moving Average Indicator in min (e.g. 8)\n",
 80 |     "* `slow_period` = Slow Period for Moving Average Indicator in min (e.g. 21)\n",
 81 |     "* `size` = The number of shares for a transaction (e.g. 100)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "%%writefile /opt/ml/input/config/hyperparameters.json\n",
 91 |     "{ \"fast_period\" : \"8\",\n",
 92 |     "  \"slow_period\" : \"21\",\n",
 93 |     "  \"size\" : \"100\"\n",
 94 |     "}"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "%run /opt/program/update_config.py $algo_name $s3bucket"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "# Step 3) Modify Strategy Code\n",
111 |     "\n",
112 |     "Here are some helpful links:\n",
113 |     "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n",
114 |     "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n",
115 |     "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "%%writefile /opt/program/{algo_name}.py\n",
125 |     "import backtrader as bt\n",
126 |     "from algo_base import *\n",
127 |     "import pytz\n",
128 |     "from pytz import timezone\n",
129 |     "\n",
130 |     "class MyStrategy(StrategyTemplate):\n",
131 |     "\n",
132 |     "    def __init__(self):  # Initiation\n",
133 |     "        super(MyStrategy, self).__init__()\n",
134 |     "        self.config[\"fast_period\"]=int(self.config[\"fast_period\"])\n",
135 |     "        self.config[\"slow_period\"]=int(self.config[\"slow_period\"])\n",
136 |     "        self.config[\"size\"]=int(self.config[\"size\"])\n",
137 |     "        print(self.config)\n",
138 |     "        self.emaFast = bt.ind.ExponentialMovingAverage(period=self.config[\"fast_period\"])\n",
139 |     "        self.emaSlow = bt.ind.ExponentialMovingAverage(period=self.config[\"slow_period\"])\n",
140 |     "        self.size = self.config[\"size\"]\n",
141 |     "\n",
142 |     "    def init_broker(broker):\n",
143 |     "        broker.setcash(100000.0)\n",
144 |     "        broker.setcommission(commission=0.0) \n",
145 |     "        \n",
146 |     "    def add_data(cerebro):\n",
147 |     "        data = btfeeds.GenericCSVData(\n",
148 |     "            dataname=MyStrategy.TRAIN_FILE,\n",
149 |     "            dtformat=('%Y-%m-%d'),\n",
150 |     "            timeframe=bt.TimeFrame.Days,\n",
151 |     "            datetime=0,\n",
152 |     "            time=-1,\n",
153 |     "            high=2,\n",
154 |     "            low=3,\n",
155 |     "            open=1,\n",
156 |     "            close=4,\n",
157 |     "            volume=5,\n",
158 |     "            openinterest=-1\n",
159 |     "        )\n",
160 |     "        cerebro.adddata(data)\n",
161 |     "\n",
162 |     "    def next(self):  # Processing\n",
163 |     "        super(MyStrategy, self).next()\n",
164 |     "        dt=self.datas[0].datetime.datetime(0)\n",
165 |     "        if not self.position:\n",
166 |     "            if self.emaFast[0] > self.emaSlow[0]:\n",
167 |     "                self.buy(size=self.size) # Go long\n",
168 |     "            else:\n",
169 |     "                self.sell(size=self.size) # Go short\n",
170 |     "        elif self.position.size>0 and self.emaFast[0] < self.emaSlow[0]:\n",
171 |     "            self.sell(size=2*self.size) # Go short\n",
172 |     "        elif self.position.size<0 and self.emaFast[0] > self.emaSlow[0]:          \n",
173 |     "            self.buy(size=2*self.size) # Go long"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "# Step 4) Backtest Locally"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "scrolled": true
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "%run /opt/program/train"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "# Step 5) Backtest Remotely with SageMaker"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "!aws s3 cp \"/opt/program/\" \"s3://{s3bucket}/{algo_name}/\" --recursive --exclude \"*\" --include \"{algo_name}*.*\""
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "conf_file='/opt/ml/input/config/hyperparameters.json'\n",
217 |     "with open(conf_file, 'r') as f:\n",
218 |     "    config = json.load(f)\n",
219 |     "config['s3']=s3_bucket    \n",
220 |     "config['chart']='true'\n",
221 |     "print(config)"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {
228 |     "scrolled": true
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "#Run Remote Backtest via SageMaker\n",
233 |     "import sagemaker as sage\n",
234 |     "from sagemaker import get_execution_role\n",
235 |     "from sagemaker.estimator import Estimator \n",
236 |     "\n",
237 |     "role = get_execution_role()\n",
238 |     "sess = sage.Session()\n",
239 |     "\n",
240 |     "WORK_DIRECTORY = '/opt/ml/input/data/training'\n",
241 |     "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n",
242 |     "print(data_location)\n",
243 |     "\n",
244 |     "prefix=algo_name\n",
245 |     "job_name=prefix.replace('_','-')\n",
246 |     "\n",
247 |     "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n",
248 |     "region = sess.boto_session.region_name\n",
249 |     "image = f'{account}.dkr.ecr.{region}.amazonaws.com/algotrading:1.0'\n",
250 |     "\n",
251 |     "algo = sage.estimator.Estimator(\n",
252 |     "    image_uri=image,\n",
253 |     "    role=role,\n",
254 |     "    instance_count=1,\n",
255 |     "    instance_type='ml.m4.xlarge',\n",
256 |     "    output_path=\"s3://{}/output\".format(sess.default_bucket()),\n",
257 |     "    sagemaker_session=sess,\n",
258 |     "    base_job_name=job_name,\n",
259 |     "    hyperparameters=config,\n",
260 |     "    metric_definitions=[\n",
261 |     "        {\n",
262 |     "            \"Name\": \"algo:pnl\",\n",
263 |     "            \"Regex\": \"Total PnL:(.*?)]\"\n",
264 |     "        },\n",
265 |     "        {\n",
266 |     "            \"Name\": \"algo:sharpe_ratio\",\n",
267 |     "            \"Regex\": \"Sharpe Ratio:(.*?),\"\n",
268 |     "        }\n",
269 |     "    ])"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": null,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "algo.fit(data_location)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "#Get Algo Metrics\n",
288 |     "from sagemaker.analytics import TrainingJobAnalytics\n",
289 |     "\n",
290 |     "latest_job_name = algo.latest_training_job.job_name\n",
291 |     "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n",
292 |     "metrics_dataframe"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "#Get Algo Chart from S3\n",
302 |     "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n",
303 |     "import boto3\n",
304 |     "s3 = boto3.resource('s3')\n",
305 |     "my_bucket = s3.Bucket(sess.default_bucket())\n",
306 |     "my_bucket.download_file(model_name,'model.tar.gz')\n",
307 |     "!tar -xzf model.tar.gz\n",
308 |     "!rm model.tar.gz\n",
309 |     "from IPython.display import Image\n",
310 |     "Image(filename='chart.png') "
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {},
316 |    "source": [
317 |     "# Step 6) Run Hyperparameter Optimization with SageMaker"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": null,
323 |    "metadata": {},
324 |    "outputs": [],
325 |    "source": [
326 |     "from sagemaker.tuner import (\n",
327 |     "    IntegerParameter,\n",
328 |     "    CategoricalParameter,\n",
329 |     "    ContinuousParameter,\n",
330 |     "    HyperparameterTuner,\n",
331 |     ")\n",
332 |     "\n",
333 |     "hyperparameter_ranges = {\n",
334 |     "    \"fast_period\": IntegerParameter(5, 10),\n",
335 |     "    \"slow_period\": IntegerParameter(21, 31)\n",
336 |     "}\n",
337 |     "objective_metric_name= \"algo:pnl\"\n",
338 |     "tuner = HyperparameterTuner(algo,\n",
339 |     "    objective_metric_name,\n",
340 |     "    hyperparameter_ranges,\n",
341 |     "    max_jobs=6,\n",
342 |     "    max_parallel_jobs=3,\n",
343 |     "    metric_definitions=[\n",
344 |     "        {\n",
345 |     "            \"Name\": \"algo:pnl\",\n",
346 |     "            \"Regex\": \"Total PnL:(.*?)]\"\n",
347 |     "        }\n",
348 |     "    ]\n",
349 |     "   )"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": null,
355 |    "metadata": {},
356 |    "outputs": [],
357 |    "source": [
358 |     "tuner.fit(data_location)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "best_params=boto3.client('sagemaker').describe_hyper_parameter_tuning_job(\n",
368 |     "HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['BestTrainingJob']['TunedHyperParameters']\n",
369 |     "best_params"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "from sagemaker.analytics import TrainingJobAnalytics\n",
379 |     "bestjob=tuner.best_training_job()\n",
380 |     "metrics_dataframe = TrainingJobAnalytics(training_job_name=bestjob).dataframe()\n",
381 |     "metrics_dataframe"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {},
387 |    "source": [
388 |     "# Step 7) Backtest Locally with Optimal Parameters"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {},
395 |    "outputs": [],
396 |    "source": [
397 |     "# Use optimal hyperparameter and test data\n",
398 |     "conf_file='/opt/ml/input/config/hyperparameters.json'\n",
399 |     "with open(conf_file, 'r') as f:\n",
400 |     "    config = json.load(f)\n",
401 |     "config['fast_period']=best_params['fast_period']\n",
402 |     "config['slow_period']=best_params['slow_period']\n",
403 |     "config['chart']='false'\n",
404 |     "print(config)"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "metadata": {},
411 |    "outputs": [],
412 |    "source": [
413 |     "%run /opt/program/train"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "### Congratulations! You've completed this strategy."
421 |    ]
422 |   }
423 |  ],
424 |  "metadata": {
425 |   "instance_type": "ml.t3.medium",
426 |   "kernelspec": {
427 |    "display_name": "Python 3 (Data Science)",
428 |    "language": "python",
429 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
430 |   },
431 |   "language_info": {
432 |    "codemirror_mode": {
433 |     "name": "ipython",
434 |     "version": 3
435 |    },
436 |    "file_extension": ".py",
437 |    "mimetype": "text/x-python",
438 |    "name": "python",
439 |    "nbconvert_exporter": "python",
440 |    "pygments_lexer": "ipython3",
441 |    "version": "3.7.10"
442 |   }
443 |  },
444 |  "nbformat": 4,
445 |  "nbformat_minor": 4
446 | }
447 | 


--------------------------------------------------------------------------------
/2_Strategies/Strategy_ML_Forecast.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%run init_model.py 'algo_ml_long_short_predict'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# Step 1) Data Preparation"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# get S3 bucket\n",
 26 |     "s3bucket=!(aws s3 ls | grep algotrading- | awk  '{print $3}')\n",
 27 |     "s3bucket=s3bucket[0]\n",
 28 |     "s3bucket"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import sys\n",
 38 |     "!{sys.executable} -m pip install PyAthena"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import os\n",
 48 |     "import sagemaker as sage\n",
 49 |     "from sagemaker import get_execution_role\n",
 50 |     "import datetime\n",
 51 |     "from sagemaker.tensorflow import TensorFlow\n",
 52 |     "import json\n",
 53 |     "\n",
 54 |     "role = get_execution_role()\n",
 55 |     "sess = sage.Session()\n",
 56 |     "region = sess.boto_session.region_name"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "import pandas as pd\n",
 66 |     "from pyathena import connect\n",
 67 |     "conn = connect(s3_staging_dir='s3://'+s3bucket+'/results/',\n",
 68 |     "               region_name=region)\n",
 69 |     "\n",
 70 |     "df = pd.read_sql(\"SELECT dt,open,high,low,close,vol FROM algo_data.hist_data_daily;\", conn)\n",
 71 |     "df.set_index(pd.DatetimeIndex(df['dt']),inplace=True)\n",
 72 |     "del df['dt']\n",
 73 |     "df.head()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "trainCount=int(len(df)*0.4)\n",
 83 |     "dfTrain = df.iloc[:trainCount]\n",
 84 |     "\n",
 85 |     "dfTest = df.iloc[trainCount:]\n",
 86 |     "dfTest.to_csv('local/'+algo_name+'/input/data/training/data.csv')\n",
 87 |     "dfTest.head()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "scrolled": true
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "%matplotlib notebook\n",
 99 |     "dfTest[\"close\"].plot()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "# Step 2) Modify Strategy Configuration \n",
107 |     "\n",
108 |     "In the following cell, you can adjust the parameters for the strategy.\n",
109 |     "\n",
110 |     "* `user` = Name for Leaderboard (optional)\n",
111 |     "* `long_threshold` = Threshold for Long Trade (0 to 1)\n",
112 |     "* `short_threshold` = Threshold for Short Trade (0 to 1)\n",
113 |     "* `profit_target_pct` = Profit Target Percentage \n",
114 |     "* `stop_target_pct` = Stop Target Percentage\n",
115 |     "* `size` = The number of shares for a transaction\n",
116 |     "\n",
117 |     "`Tip`: A good starting point for improving the strategy is modify the profit / stop target and the risk/reward ratio. Another option is to reduce the number of signals by increasing the threshold."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "%%writefile local/{algo_name}/input/config/hyperparameters.json\n",
127 |     "{ \"user\" : \"user\",\n",
128 |     "  \"long_threshold\" : \"0.5\",\n",
129 |     "  \"short_threshold\" : \"0.5\",\n",
130 |     "  \"profit_target_pct\" : \"2.00\",\n",
131 |     "  \"stop_target_pct\" : \"1.50\",\n",
132 |     "  \"size\" : \"100\"\n",
133 |     "}"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "%run update_config.py $algo_name"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "# Step 3) Modify Strategy Code\n",
150 |     "\n",
151 |     "In the following cell, you can modify the strategy code. For the first backtests, you can leave it as is.\n",
152 |     "\n",
153 |     "`Tip`: A good starting point for improving the strategy is to combine the signal from the model with traditional trend indicators (e.g. moving average). This will likely improve the performance. To improve the strategy further, you could increase the accuracy of the machine learning model by including more indicators (e.g. ATR) or modify the input and forecast window. This requires to re-train the machine learning model as this needs to match your strategy. For timeseries forecasting, you could compare the performance with more advanced ML networks (e.g. CNN, LTSM, RNN) and pick the model with the best predictions.\n",
154 |     "\n",
155 |     "You can also checkout other AWS-provided options for timeseries forecasting and formulate a strategy that uses price predictions and integrate them in your strategy:\n",
156 |     "* https://docs.aws.amazon.com/sagemaker/latest/dg/deepar.html \n",
157 |     "* https://aws.amazon.com/forecast/\n",
158 |     "\n",
159 |     "Here are some helpful links:\n",
160 |     "* Backtrader Documentation: https://www.backtrader.com/docu/strategy/\n",
161 |     "* TA-Lib Indicator Reference: https://www.backtrader.com/docu/talibindautoref/\n",
162 |     "* Backtrader Indicator Reference: https://www.backtrader.com/docu/indautoref/"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "%%writefile model/{algo_name}.py\n",
172 |     "import backtrader as bt\n",
173 |     "from algo_base import *\n",
174 |     "import math\n",
175 |     "import numpy as np\n",
176 |     "import pandas as pd\n",
177 |     "import tensorflow as tf\n",
178 |     "import keras\n",
179 |     "from keras import backend as K\n",
180 |     "from keras.models import load_model\n",
181 |     "\n",
182 |     "class MyStrategy(StrategyTemplate):\n",
183 |     "\n",
184 |     "    def __init__(self):\n",
185 |     "        super(MyStrategy, self).__init__()\n",
186 |     "        self.config[\"long_threshold\"]=float(self.config[\"long_threshold\"])\n",
187 |     "        self.config[\"short_threshold\"]=float(self.config[\"short_threshold\"])\n",
188 |     "        self.config[\"size\"]=int(self.config[\"size\"])\n",
189 |     "        self.config[\"profit_target_pct\"]=float(self.config[\"profit_target_pct\"])\n",
190 |     "        self.config[\"stop_target_pct\"]=float(self.config[\"stop_target_pct\"])\n",
191 |     "\n",
192 |     "        self.order=None\n",
193 |     "        self.orderPlaced=False\n",
194 |     "                                \n",
195 |     "        self.model = load_model('model_long_short_predict.h5')\n",
196 |     "        \n",
197 |     "        # input / indicators\n",
198 |     "        self.repeatCount=15\n",
199 |     "        self.repeatStep=1\n",
200 |     "        \n",
201 |     "        self.profitTarget=self.config[\"profit_target_pct\"]/100.0\n",
202 |     "        self.stopTarget=self.config[\"stop_target_pct\"]/100.0\n",
203 |     "        self.size=self.config[\"size\"]\n",
204 |     "         \n",
205 |     "        self.sma=[]\n",
206 |     "        self.roc=[]\n",
207 |     "        \n",
208 |     "        self.hData=[\"dt\"]\n",
209 |     "        self.hData.append(\"close\") \n",
210 |     "        for a in range(0,self.repeatCount):\n",
211 |     "            tp=(a+1)*self.repeatStep+1\n",
212 |     "            self.hData.append(\"sma\"+str(tp))\n",
213 |     "            self.sma.append(bt.talib.SMA(self.data, timeperiod=tp, plot=False))\n",
214 |     "        for a in range(0,self.repeatCount):\n",
215 |     "            tp=(a+1)*self.repeatStep+1\n",
216 |     "            self.hData.append(\"roc\"+str(tp))\n",
217 |     "            self.roc.append(bt.talib.ROC(self.data, timeperiod=tp, plot=False))\n",
218 |     "\n",
219 |     "    def init_broker(broker):\n",
220 |     "        broker.setcash(100000.0)\n",
221 |     "        broker.setcommission(commission=0.0) \n",
222 |     "        \n",
223 |     "    def add_data(cerebro):\n",
224 |     "        data = btfeeds.GenericCSVData(\n",
225 |     "            dataname=MyStrategy.TRAIN_FILE,\n",
226 |     "            dtformat=('%Y-%m-%d'),\n",
227 |     "            timeframe=bt.TimeFrame.Days,\n",
228 |     "            datetime=0,\n",
229 |     "            time=-1,\n",
230 |     "            high=2,\n",
231 |     "            low=3,\n",
232 |     "            open=1,\n",
233 |     "            close=4,\n",
234 |     "            volume=5,\n",
235 |     "            openinterest=-1\n",
236 |     "        )\n",
237 |     "        cerebro.adddata(data)\n",
238 |     "\n",
239 |     "    def next(self):\n",
240 |     "        super(MyStrategy, self).next()\n",
241 |     "        \n",
242 |     "        dt=self.datas[0].datetime.datetime(0)\n",
243 |     "        cl=self.dataclose[0]\n",
244 |     "        inputRec=[]                \n",
245 |     "\n",
246 |     "        #open\n",
247 |     "        inputRec0=[]\n",
248 |     "        inputRec0.append(cl)\n",
249 |     "\n",
250 |     "        #sma\n",
251 |     "        for a in range(0,self.repeatCount):\n",
252 |     "            if math.isnan(self.sma[a][0]):\n",
253 |     "                inputRec0.append(cl)\n",
254 |     "            else:\n",
255 |     "                inputRec0.append(self.sma[a][0])\n",
256 |     "\n",
257 |     "        m1=min(inputRec0)\n",
258 |     "        m2=max(inputRec0)\n",
259 |     "        for a in inputRec0:\n",
260 |     "            if m2-m1==0:\n",
261 |     "                inputRec.append(0)\n",
262 |     "            else:\n",
263 |     "                inputRec.append((a-m1)/(m2-m1))\n",
264 |     "\n",
265 |     "        #roc\n",
266 |     "        for a in range(0,self.repeatCount):\n",
267 |     "            if math.isnan(self.roc[a][0]):\n",
268 |     "                inputRec.append(0)\n",
269 |     "            else:\n",
270 |     "                inputRec.append(self.roc[a][0])\n",
271 |     "\n",
272 |     "        mX=[]\n",
273 |     "        mX.append(np.array(inputRec))\n",
274 |     "        dataX=np.array(mX)\n",
275 |     "        #print(\"dataX=%s\" % dataX)\n",
276 |     "\n",
277 |     "        # *** ML prediction ***\n",
278 |     "        mY=self.model.predict(dataX)\n",
279 |     "        #print(\"mY=%s\" % mY)\n",
280 |     "        tLong=mY[0][0]\n",
281 |     "        tShort=mY[0][1]\n",
282 |     "        #print(\"[%s]:long=%s,short=%s\" % (dt,tLong,tShort))\n",
283 |     "        if not self.position:\n",
284 |     "            fLong=(tLong>self.config[\"long_threshold\"]) \n",
285 |     "            fShort=(tShort>self.config[\"short_threshold\"])\n",
286 |     "            if fLong:\n",
287 |     "                self.order=self.buy(size=self.size)\n",
288 |     "                self.limitPrice=cl+self.profitTarget*cl\n",
289 |     "                self.stopPrice=cl-self.stopTarget*cl\n",
290 |     "            elif fShort:\n",
291 |     "                self.order=self.sell(size=self.size)                    \n",
292 |     "                self.limitPrice=cl-self.profitTarget*cl\n",
293 |     "                self.stopPrice=cl+self.stopTarget*cl\n",
294 |     "\n",
295 |     "        if self.position:\n",
296 |     "            if self.position.size>0:\n",
297 |     "                if cl>=self.limitPrice or cl<=self.stopPrice:\n",
298 |     "                    self.order=self.sell(size=self.size)\n",
299 |     "            elif self.position.size<0:\n",
300 |     "                if cl<=self.limitPrice or cl>=self.stopPrice:\n",
301 |     "                    self.order=self.buy(size=self.size)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "# Step 4) Backtest Locally\n",
309 |     "\n",
310 |     "**Please note that the initial docker build may take a few minutes. Subsequent runs are fast.**"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": null,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": [
319 |     "#Build Local Algo Image\n",
320 |     "!docker build -t $algo_name .\n",
321 |     "!docker run -v $(pwd)/local/$algo_name:/opt/ml --rm $algo_name train"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": null,
327 |    "metadata": {
328 |     "scrolled": false
329 |    },
330 |    "outputs": [],
331 |    "source": [
332 |     "from IPython.display import Image\n",
333 |     "Image(filename='local/'+algo_name+'/model/chart.png')"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## Refine your trading strategy (step 2 to 4). Once you are ready, move on to the next step."
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "# Step 5) Backtest on SageMaker and submit performance"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": null,
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "#Deploy Algo Image to ECS\n",
357 |     "!./build_and_push.sh $algo_name"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {},
364 |    "outputs": [],
365 |    "source": [
366 |     "#Run Remote Forwardtest via SageMaker\n",
367 |     "import sagemaker as sage\n",
368 |     "from sagemaker import get_execution_role\n",
369 |     "from sagemaker.estimator import Estimator \n",
370 |     "\n",
371 |     "role = get_execution_role()\n",
372 |     "sess = sage.Session()\n",
373 |     "\n",
374 |     "WORK_DIRECTORY = 'local/'+algo_name+'/input/data/training'\n",
375 |     "data_location = sess.upload_data(WORK_DIRECTORY, key_prefix='data')\n",
376 |     "print(data_location)\n",
377 |     "\n",
378 |     "conf_file='local/'+algo_name+'/input/config/hyperparameters.json'\n",
379 |     "with open(conf_file, 'r') as f:\n",
380 |     "    config = json.load(f)\n",
381 |     "#config['sim_data']='True'\n",
382 |     "print(config)\n",
383 |     "\n",
384 |     "prefix=algo_name\n",
385 |     "job_name=prefix.replace('_','-')\n",
386 |     "\n",
387 |     "account = sess.boto_session.client('sts').get_caller_identity()['Account']\n",
388 |     "region = sess.boto_session.region_name\n",
389 |     "image = f'{account}.dkr.ecr.{region}.amazonaws.com/{prefix}:latest'\n",
390 |     "\n",
391 |     "algo = sage.estimator.Estimator(\n",
392 |     "    image_uri=image,\n",
393 |     "    role=role,\n",
394 |     "    instance_count=1,\n",
395 |     "    instance_type='ml.m4.xlarge',\n",
396 |     "    output_path=\"s3://{}/output\".format(sess.default_bucket()),\n",
397 |     "    sagemaker_session=sess,\n",
398 |     "    base_job_name=job_name,\n",
399 |     "    hyperparameters=config,\n",
400 |     "    metric_definitions=[\n",
401 |     "        {\n",
402 |     "            \"Name\": \"algo:pnl\",\n",
403 |     "            \"Regex\": \"Total PnL:(.*?)]\"\n",
404 |     "        },\n",
405 |     "        {\n",
406 |     "            \"Name\": \"algo:sharpe_ratio\",\n",
407 |     "            \"Regex\": \"Sharpe Ratio:(.*?),\"\n",
408 |     "        }\n",
409 |     "    ])\n",
410 |     "algo.fit(data_location)"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {},
417 |    "outputs": [],
418 |    "source": [
419 |     "#Get Algo Metrics\n",
420 |     "from sagemaker.analytics import TrainingJobAnalytics\n",
421 |     "\n",
422 |     "latest_job_name = algo.latest_training_job.job_name\n",
423 |     "metrics_dataframe = TrainingJobAnalytics(training_job_name=latest_job_name).dataframe()\n",
424 |     "metrics_dataframe"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": null,
430 |    "metadata": {
431 |     "scrolled": true
432 |    },
433 |    "outputs": [],
434 |    "source": [
435 |     "#Get Algo Chart from S3\n",
436 |     "model_name=algo.model_data.replace('s3://'+sess.default_bucket()+'/','')\n",
437 |     "import boto3\n",
438 |     "s3 = boto3.resource('s3')\n",
439 |     "my_bucket = s3.Bucket(sess.default_bucket())\n",
440 |     "my_bucket.download_file(model_name,'model.tar.gz')\n",
441 |     "!tar -xzf model.tar.gz\n",
442 |     "!rm model.tar.gz\n",
443 |     "from IPython.display import Image\n",
444 |     "Image(filename='chart.png') "
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "markdown",
449 |    "metadata": {},
450 |    "source": [
451 |     "### Congratulations! You've completed this strategy."
452 |    ]
453 |   }
454 |  ],
455 |  "metadata": {
456 |   "kernelspec": {
457 |    "display_name": "conda_tensorflow_p36",
458 |    "language": "python",
459 |    "name": "conda_tensorflow_p36"
460 |   },
461 |   "language_info": {
462 |    "codemirror_mode": {
463 |     "name": "ipython",
464 |     "version": 3
465 |    },
466 |    "file_extension": ".py",
467 |    "mimetype": "text/x-python",
468 |    "name": "python",
469 |    "nbconvert_exporter": "python",
470 |    "pygments_lexer": "ipython3",
471 |    "version": "3.6.10"
472 |   }
473 |  },
474 |  "nbformat": 4,
475 |  "nbformat_minor": 2
476 | }
477 | 


--------------------------------------------------------------------------------
/0_Setup/algo-reference.yaml:
--------------------------------------------------------------------------------
  1 | Description: >
  2 |   This template deploys the algorithmic trading reference architecture
  3 |   
  4 | Parameters:
  5 |   EnvironmentName:
  6 |     Description: An environment name that will be prefixed to resource names
  7 |     Type: String
  8 |     Default: algo
  9 | 
 10 |   VpcCIDR:
 11 |     Description: Please enter the IP range (CIDR notation) for this VPC
 12 |     Type: String
 13 |     Default: 10.111.0.0/16
 14 | 
 15 |   PublicSubnet1CIDR:
 16 |     Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone
 17 |     Type: String
 18 |     Default: 10.111.10.0/24
 19 | 
 20 |   PublicSubnet2CIDR:
 21 |     Description: Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone
 22 |     Type: String
 23 |     Default: 10.111.11.0/24
 24 | 
 25 |   PrivateSubnet1CIDR:
 26 |     Description: Please enter the IP range (CIDR notation) for the private subnet in the first Availability Zone
 27 |     Type: String
 28 |     Default: 10.111.20.0/24
 29 | 
 30 |   PrivateSubnet2CIDR:
 31 |     Description: Please enter the IP range (CIDR notation) for the private subnet in the second Availability Zone
 32 |     Type: String
 33 |     Default: 10.111.21.0/24
 34 | 
 35 |   S3Bucket:
 36 |     Description: Please specify your S3 bucket
 37 |     Type: String
 38 |     
 39 | 
 40 | Resources:
 41 |   VPC:
 42 |     Type: AWS::EC2::VPC
 43 |     Properties:
 44 |       CidrBlock: !Ref VpcCIDR
 45 |       EnableDnsHostnames: true
 46 |       Tags:
 47 |         - Key: Name
 48 |           Value: !Ref EnvironmentName
 49 | 
 50 |   InternetGateway:
 51 |     Type: AWS::EC2::InternetGateway
 52 |     Properties:
 53 |       Tags:
 54 |         - Key: Name
 55 |           Value: !Ref EnvironmentName
 56 | 
 57 |   InternetGatewayAttachment:
 58 |     Type: AWS::EC2::VPCGatewayAttachment
 59 |     Properties:
 60 |       InternetGatewayId: !Ref InternetGateway
 61 |       VpcId: !Ref VPC
 62 | 
 63 |   PublicSubnet1:
 64 |     Type: AWS::EC2::Subnet
 65 |     Properties:
 66 |       VpcId: !Ref VPC
 67 |       AvailabilityZone: !Select [0, !GetAZs ""]
 68 |       CidrBlock: !Ref PublicSubnet1CIDR
 69 |       MapPublicIpOnLaunch: true
 70 |       Tags:
 71 |         - Key: Name
 72 |           Value: !Sub ${EnvironmentName} Public Subnet (AZ1)
 73 | 
 74 |   PublicSubnet2:
 75 |     Type: AWS::EC2::Subnet
 76 |     Properties:
 77 |       VpcId: !Ref VPC
 78 |       AvailabilityZone: !Select [1, !GetAZs ""]
 79 |       CidrBlock: !Ref PublicSubnet2CIDR
 80 |       MapPublicIpOnLaunch: true
 81 |       Tags:
 82 |         - Key: Name
 83 |           Value: !Sub ${EnvironmentName} Public Subnet (AZ2)
 84 | 
 85 |   PrivateSubnet1:
 86 |     Type: AWS::EC2::Subnet
 87 |     Properties:
 88 |       VpcId: !Ref VPC
 89 |       AvailabilityZone: !Select [0, !GetAZs ""]
 90 |       CidrBlock: !Ref PrivateSubnet1CIDR
 91 |       MapPublicIpOnLaunch: false
 92 |       Tags:
 93 |         - Key: Name
 94 |           Value: !Sub ${EnvironmentName} Private Subnet (AZ1)
 95 | 
 96 |   PrivateSubnet2:
 97 |     Type: AWS::EC2::Subnet
 98 |     Properties:
 99 |       VpcId: !Ref VPC
100 |       AvailabilityZone: !Select [1, !GetAZs ""]
101 |       CidrBlock: !Ref PrivateSubnet2CIDR
102 |       MapPublicIpOnLaunch: false
103 |       Tags:
104 |         - Key: Name
105 |           Value: !Sub ${EnvironmentName} Private Subnet (AZ2)
106 | 
107 |   NatGateway1EIP:
108 |     Type: AWS::EC2::EIP
109 |     DependsOn: InternetGatewayAttachment
110 |     Properties:
111 |       Domain: vpc
112 | 
113 |   NatGateway2EIP:
114 |     Type: AWS::EC2::EIP
115 |     DependsOn: InternetGatewayAttachment
116 |     Properties:
117 |       Domain: vpc
118 | 
119 |   NatGateway1:
120 |     Type: AWS::EC2::NatGateway
121 |     Properties:
122 |       AllocationId: !GetAtt NatGateway1EIP.AllocationId
123 |       SubnetId: !Ref PublicSubnet1
124 | 
125 |   NatGateway2:
126 |     Type: AWS::EC2::NatGateway
127 |     Properties:
128 |       AllocationId: !GetAtt NatGateway2EIP.AllocationId
129 |       SubnetId: !Ref PublicSubnet2
130 | 
131 |   PublicRouteTable:
132 |     Type: AWS::EC2::RouteTable
133 |     Properties:
134 |       VpcId: !Ref VPC
135 |       Tags:
136 |         - Key: Name
137 |           Value: !Sub ${EnvironmentName} Public Routes
138 | 
139 |   DefaultPublicRoute:
140 |     Type: AWS::EC2::Route
141 |     DependsOn: InternetGatewayAttachment
142 |     Properties:
143 |       RouteTableId: !Ref PublicRouteTable
144 |       DestinationCidrBlock: 0.0.0.0/0
145 |       GatewayId: !Ref InternetGateway
146 | 
147 |   PublicSubnet1RouteTableAssociation:
148 |     Type: AWS::EC2::SubnetRouteTableAssociation
149 |     Properties:
150 |       RouteTableId: !Ref PublicRouteTable
151 |       SubnetId: !Ref PublicSubnet1
152 | 
153 |   PublicSubnet2RouteTableAssociation:
154 |     Type: AWS::EC2::SubnetRouteTableAssociation
155 |     Properties:
156 |       RouteTableId: !Ref PublicRouteTable
157 |       SubnetId: !Ref PublicSubnet2
158 | 
159 |   PrivateRouteTable1:
160 |     Type: AWS::EC2::RouteTable
161 |     Properties:
162 |       VpcId: !Ref VPC
163 |       Tags:
164 |         - Key: Name
165 |           Value: !Sub ${EnvironmentName} Private Routes (AZ1)
166 | 
167 |   DefaultPrivateRoute1:
168 |     Type: AWS::EC2::Route
169 |     Properties:
170 |       RouteTableId: !Ref PrivateRouteTable1
171 |       DestinationCidrBlock: 0.0.0.0/0
172 |       NatGatewayId: !Ref NatGateway1
173 | 
174 |   PrivateSubnet1RouteTableAssociation:
175 |     Type: AWS::EC2::SubnetRouteTableAssociation
176 |     Properties:
177 |       RouteTableId: !Ref PrivateRouteTable1
178 |       SubnetId: !Ref PrivateSubnet1
179 | 
180 |   PrivateRouteTable2:
181 |     Type: AWS::EC2::RouteTable
182 |     Properties:
183 |       VpcId: !Ref VPC
184 |       Tags:
185 |         - Key: Name
186 |           Value: !Sub ${EnvironmentName} Private Routes (AZ2)
187 | 
188 |   DefaultPrivateRoute2:
189 |     Type: AWS::EC2::Route
190 |     Properties:
191 |       RouteTableId: !Ref PrivateRouteTable2
192 |       DestinationCidrBlock: 0.0.0.0/0
193 |       NatGatewayId: !Ref NatGateway2
194 | 
195 |   PrivateSubnet2RouteTableAssociation:
196 |     Type: AWS::EC2::SubnetRouteTableAssociation
197 |     Properties:
198 |       RouteTableId: !Ref PrivateRouteTable2
199 |       SubnetId: !Ref PrivateSubnet2
200 | 
201 |   ECR:
202 |     Type: AWS::ECR::Repository
203 |     Properties: 
204 |       RepositoryName: !Sub ${EnvironmentName}_ecr
205 |     
206 |   AlgorithmicTradingInstance:
207 |     Type: AWS::SageMaker::NotebookInstance
208 |     Properties:
209 |       InstanceType: ml.t2.large
210 |       DefaultCodeRepository: https://github.com/aws-samples/algorithmic-trading
211 |       RoleArn: !GetAtt 'SageMakerExecutionRole.Arn'
212 | 
213 |   S3Policy:
214 |     Type: AWS::IAM::ManagedPolicy
215 |     Properties:
216 |       Description: S3 Permission
217 |       Path: /
218 |       PolicyDocument:
219 |         Version: '2012-10-17'
220 |         Statement:
221 |           - Effect: Allow
222 |             Action:
223 |               - s3:GetObject
224 |               - s3:PutObject
225 |               - s3:DeleteObject
226 |               - s3:ListBucket
227 |             Resource:
228 |               - !Sub
229 |                 - arn:aws:s3:::${S3Bucket}/*
230 |                 - S3Bucket: !Ref 'S3Bucket'
231 |                 
232 |   SageMakerExecutionRole:
233 |     Type: AWS::IAM::Role
234 |     Properties:
235 |       AssumeRolePolicyDocument:
236 |         Version: '2012-10-17'
237 |         Statement:
238 |           - Effect: Allow
239 |             Principal:
240 |               Service:
241 |                 - sagemaker.amazonaws.com
242 |             Action:
243 |               - sts:AssumeRole
244 |       Path: /
245 |       ManagedPolicyArns:
246 |         - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
247 |         - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess
248 |         - arn:aws:iam::aws:policy/AmazonAthenaFullAccess
249 |         - arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess 
250 |         - arn:aws:iam::aws:policy/AmazonECS_FullAccess
251 |         - arn:aws:iam::aws:policy/AmazonKinesisReadOnlyAccess
252 |         - !Ref 'S3Policy'
253 | 
254 |   ECSHostSecurityGroup:
255 |     Type: AWS::EC2::SecurityGroup
256 |     Properties:
257 |       VpcId: !Ref 'VPC'
258 |       GroupDescription: Access to the ECS hosts and the tasks/containers that run on them
259 |       SecurityGroupIngress:
260 |         # Allow access from anywhere to our ECS services
261 |         - CidrIp: 0.0.0.0/0
262 |           IpProtocol: -1
263 |       Tags:
264 |         - Key: Name
265 |           Value: !Sub ${EnvironmentName}-ECS
266 |   
267 |   ECSTaskExecutionRole:
268 |     Type: AWS::IAM::Role
269 |     Properties:
270 |       AssumeRolePolicyDocument:
271 |         Version: '2012-10-17'
272 |         Statement:
273 |           - Effect: Allow
274 |             Principal:
275 |               Service:
276 |                 - ecs-tasks.amazonaws.com
277 |             Action:
278 |               - sts:AssumeRole
279 |       Path: /
280 |       ManagedPolicyArns:
281 |         - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
282 |               
283 |   AlgoExecutionRole:
284 |     Type: AWS::IAM::Role
285 |     Properties:
286 |       AssumeRolePolicyDocument:
287 |         Version: '2012-10-17'
288 |         Statement:
289 |           - Effect: Allow
290 |             Principal:
291 |               Service:
292 |                 - ecs-tasks.amazonaws.com
293 |             Action:
294 |               - sts:AssumeRole
295 |       Path: /
296 |       ManagedPolicyArns:
297 |         - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
298 |   
299 |   ECSCluster:
300 |     Type: AWS::ECS::Cluster
301 |     Properties:
302 |         ClusterName: !Ref EnvironmentName
303 |         
304 |   GlueDatabase:
305 |     Type: AWS::Glue::Database
306 |     Properties:
307 |         CatalogId: !Ref 'AWS::AccountId'
308 |         DatabaseInput:
309 |           Name: algo_data
310 | 
311 |   GlueHistDataDaily:
312 |     Type: AWS::Glue::Table
313 |     Properties:
314 |       CatalogId: !Ref 'AWS::AccountId'
315 |       DatabaseName: !Ref 'GlueDatabase'
316 |       TableInput:
317 |         Description: Daily Price Data
318 |         Name: hist_data_daily
319 |         Parameters:
320 |           classification: csv
321 |           has_encrypted_data: false
322 |         StorageDescriptor:
323 |           Columns:
324 |             - Name: dt
325 |               Type: string
326 |             - Name: sym
327 |               Type: string
328 |             - Name: open
329 |               Type: double
330 |             - Name: high
331 |               Type: double
332 |             - Name: low
333 |               Type: double
334 |             - Name: close
335 |               Type: double
336 |             - Name: vol
337 |               Type: double
338 |           Compressed: false
339 |           InputFormat: org.apache.hadoop.mapred.TextInputFormat
340 |           Location: !Join
341 |             - ''
342 |             - - s3://
343 |               - !Ref 'S3Bucket'
344 |               - /hist_data_daily
345 |           OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
346 |           SerdeInfo:
347 |             SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
348 |             Parameters:
349 |               field.delim: ','
350 |               skip.header.line.count: '1'
351 |           StoredAsSubDirectories: false
352 |         TableType: EXTERNAL_TABLE
353 | 
354 |   GlueHistDataIntraday:
355 |     Type: AWS::Glue::Table
356 |     Properties:
357 |       CatalogId: !Ref 'AWS::AccountId'
358 |       DatabaseName: !Ref 'GlueDatabase'
359 |       TableInput:
360 |         Description: Intraday Price Data
361 |         Name: hist_data_intraday
362 |         Parameters:
363 |           classification: csv
364 |           has_encrypted_data: false
365 |         StorageDescriptor:
366 |           Columns:
367 |             - Name: dt
368 |               Type: string
369 |             - Name: sym
370 |               Type: string
371 |             - Name: open
372 |               Type: double
373 |             - Name: high
374 |               Type: double
375 |             - Name: low
376 |               Type: double
377 |             - Name: close
378 |               Type: double
379 |             - Name: vol
380 |               Type: double
381 |           Compressed: false
382 |           InputFormat: org.apache.hadoop.mapred.TextInputFormat
383 |           Location: !Join
384 |             - ''
385 |             - - s3://
386 |               - !Ref 'S3Bucket'
387 |               - /hist_data_intraday
388 |           OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
389 |           SerdeInfo:
390 |             SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
391 |             Parameters:
392 |               field.delim: ','
393 |               skip.header.line.count: '1'
394 |           StoredAsSubDirectories: false
395 |         TableType: EXTERNAL_TABLE
396 |   
397 |   AthenaWorkgroup:
398 |     Type: AWS::Athena::WorkGroup
399 |     Properties:
400 |       Name: MyWorkGroup
401 |       Description: AlgoWorkgroup
402 |       RecursiveDeleteOption: true
403 |       State: ENABLED
404 |       WorkGroupConfiguration:
405 |         RequesterPaysEnabled: true
406 |         ResultConfiguration:
407 |           OutputLocation: !Join
408 |             - ''
409 |             - - s3://
410 |               - !Ref 'S3Bucket'
411 |               - /results/
412 | 
413 |   AlgoHistDataDaily:
414 |     Type: AWS::Athena::NamedQuery
415 |     Properties:
416 |       Database: !Ref 'GlueDatabase'
417 |       QueryString: !Join
418 |         - ''
419 |         - - select * from algo_data.
420 |           - !Ref 'GlueHistDataDaily'
421 |           - ' limit 10;'
422 |       Name: HistDataDaily
423 | 
424 |   AlgoHistDataIntraday:
425 |     Type: AWS::Athena::NamedQuery
426 |     Properties:
427 |       Database: !Ref 'GlueDatabase'
428 |       QueryString: !Join
429 |         - ''
430 |         - - select * from algo_data.
431 |           - !Ref 'GlueHistDataIntraday'
432 |           - ' limit 10;'
433 |       Name: HistDataIntraday
434 | 
435 |   GlueTableFeedDB:
436 |     Type: AWS::Glue::Table
437 |     Properties:
438 |       CatalogId: !Ref 'AWS::AccountId'
439 |       DatabaseName: !Ref 'GlueDatabase'
440 |       TableInput:
441 |         Description: Deutsche Boerse Xetra PDS
442 |         Name: market_feed_deutsche_boerse
443 |         Parameters:
444 |           classification: csv
445 |           has_encrypted_data: false
446 |         StorageDescriptor:
447 |           Columns:
448 |             - Name: isin
449 |               Type: string
450 |             - Name: mnemonic
451 |               Type: string
452 |             - Name: securitydesc
453 |               Type: string
454 |             - Name: securitytype
455 |               Type: string
456 |             - Name: currency
457 |               Type: string
458 |             - Name: securityid
459 |               Type: bigint
460 |             - Name: date
461 |               Type: string
462 |             - Name: time
463 |               Type: string
464 |             - Name: startprice
465 |               Type: double
466 |             - Name: maxprice
467 |               Type: double
468 |             - Name: minprice
469 |               Type: double
470 |             - Name: endprice
471 |               Type: double
472 |             - Name: tradedvolume
473 |               Type: bigint
474 |             - Name: numberoftrades
475 |               Type: bigint
476 |           Compressed: false
477 |           InputFormat: org.apache.hadoop.mapred.TextInputFormat
478 |           Location: !Join
479 |             - ''
480 |             - - s3://
481 |               - !Ref 'S3Bucket'
482 |               - /feed/deutsche-boerse-xetra-pds
483 |           OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
484 |           SerdeInfo:
485 |             SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
486 |             Parameters:
487 |               field.delim: ','
488 |               skip.header.line.count: '1'
489 |           StoredAsSubDirectories: false
490 |         PartitionKeys:
491 |             - Name: year
492 |               Type: bigint
493 |             - Name: month
494 |               Type: bigint
495 |             - Name: day
496 |               Type: bigint       
497 |         TableType: EXTERNAL_TABLE
498 |   
499 |   LogGroup:
500 |     Type: AWS::Logs::LogGroup
501 |     Properties: 
502 |       LogGroupName: algo
503 |       RetentionInDays: 7
504 | 
505 | Outputs:
506 |   VPC:
507 |     Description: A reference to the created VPC
508 |     Value: !Ref VPC
509 |     Export:
510 |       Name: AlgorithmicTrading-VPC
511 | 
512 |   PublicSubnets:
513 |     Description: A list of the public subnets
514 |     Value: !Join [",", [!Ref PublicSubnet1, !Ref PublicSubnet2]]
515 |     Export:
516 |       Name: AlgorithmicTrading-PublicSubnets
517 | 
518 |   PrivateSubnets:
519 |     Description: A list of the private subnets
520 |     Value: !Join [",", [!Ref PrivateSubnet1, !Ref PrivateSubnet2]]
521 |     Export:
522 |       Name: AlgorithmicTrading-PrivateSubnets
523 | 
524 |   PublicSubnet1:
525 |     Description: A reference to the public subnet in the 1st Availability Zone
526 |     Value: !Ref PublicSubnet1
527 |     Export:
528 |       Name: AlgorithmicTrading-PublicSubnet1
529 | 
530 |   PublicSubnet2:
531 |     Description: A reference to the public subnet in the 2nd Availability Zone
532 |     Value: !Ref PublicSubnet2
533 |     Export:
534 |       Name: AlgorithmicTrading-PublicSubnet2
535 | 
536 |   PrivateSubnet1:
537 |     Description: A reference to the private subnet in the 1st Availability Zone
538 |     Value: !Ref PrivateSubnet1
539 |     Export:
540 |       Name: AlgorithmicTrading-PrivateSubnet1
541 | 
542 |   PrivateSubnet2:
543 |     Description: A reference to the private subnet in the 2nd Availability Zone
544 |     Value: !Ref PrivateSubnet2
545 |     Export:
546 |       Name: AlgorithmicTrading-PrivateSubnet2
547 | 
548 |   ECR:
549 |     Description: A reference to ECR
550 |     Value: !Ref ECR
551 |     Export:
552 |       Name: AlgorithmicTrading-ECR
553 | 
554 |   S3Bucket:
555 |     Description: A reference to S3 Bucket
556 |     Value: !Ref S3Bucket
557 |     Export:
558 |       Name: AlgorithmicTrading-S3Bucket
559 | 
560 |   ECSHostSecurityGroup:
561 |     Description: ECSHostSecurityGroup
562 |     Value: !Ref ECSHostSecurityGroup
563 |     Export:
564 |       Name: AlgorithmicTrading-ECSHostSecurityGroup
565 | 
566 |   ECSTaskExecutionRole:
567 |     Description: ECSTaskExecutionRole
568 |     Value: !Ref ECSTaskExecutionRole
569 |     Export:
570 |       Name: AlgorithmicTrading-ECSTaskExecutionRole
571 | 
572 |   AlgoExecutionRole:
573 |     Description: AlgoExecutionRole ARN
574 |     Value: !GetAtt 'AlgoExecutionRole.Arn'
575 |     Export:
576 |       Name: AlgorithmicTrading-AlgoExecutionRole-ARN
577 | 
578 |   Cluster:
579 |     Description: A reference to the ECS cluster
580 |     Value: !Ref ECSCluster
581 |     Export:
582 |       Name: AlgorithmicTrading-ECSCluster


--------------------------------------------------------------------------------