├── gym_cryptotrading ├── spaces │ ├── __init__.py │ ├── observation.py │ └── action.py ├── envs │ ├── __init__.py │ ├── basicenv.py │ ├── unrealizedPnL.py │ ├── realizedPnL.py │ ├── cryptoenv.py │ └── weightedPnL.py ├── errors.py ├── strings.py ├── __init__.py └── generator.py ├── setup.py ├── LICENSE ├── .gitignore └── README.md /gym_cryptotrading/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_cryptotrading.spaces.action import ActionSpace 2 | from gym_cryptotrading.spaces.observation import ObservationSpace -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='gym_cryptotrading', 4 | version='0.0.1', 5 | install_requires=['gym>=0.2.3', 6 | 'numpy', 7 | 'pandas'] 8 | ) -------------------------------------------------------------------------------- /gym_cryptotrading/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_cryptotrading.envs.realizedPnL import RealizedPnLEnv 2 | from gym_cryptotrading.envs.unrealizedPnL import UnRealizedPnLEnv 3 | from gym_cryptotrading.envs.weightedPnL import WeightedPnLEnv -------------------------------------------------------------------------------- /gym_cryptotrading/errors.py: -------------------------------------------------------------------------------- 1 | from gym.error import Error 2 | 3 | class EnvironmentAlreadyLoaded(Error): 4 | ''' 5 | Raised when user tries to set the parameters of the environment that is 6 | already loaded. 7 | ''' 8 | pass 9 | -------------------------------------------------------------------------------- /gym_cryptotrading/strings.py: -------------------------------------------------------------------------------- 1 | #actions 2 | DECAY_RATE = 'decay_rate' 3 | LAG = 'lag' 4 | LONG = 'long' 5 | NEUTRAL = 'neutral' 6 | SHORT = 'short' 7 | 8 | #weightedPnL 9 | INVALID_DECAY_RATE = 'Invalid Decay Rate' 10 | INVALID_LAG = 'Invalid Lag' 11 | 12 | #url 13 | URL = 'http://api.bitcoincharts.com/v1/csv/coinbaseUSD.csv.gz' 14 | -------------------------------------------------------------------------------- /gym_cryptotrading/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='RealizedPnLEnv-v0', 5 | entry_point='gym_cryptotrading.envs:RealizedPnLEnv', 6 | timestep_limit=10, 7 | nondeterministic = True 8 | ) 9 | 10 | register( 11 | id='UnRealizedPnLEnv-v0', 12 | entry_point='gym_cryptotrading.envs:UnRealizedPnLEnv', 13 | timestep_limit=10, 14 | nondeterministic = True 15 | ) 16 | 17 | register( 18 | id='WeightedPnLEnv-v0', 19 | entry_point='gym_cryptotrading.envs:WeightedPnLEnv', 20 | timestep_limit=10, 21 | nondeterministic = True 22 | ) -------------------------------------------------------------------------------- /gym_cryptotrading/spaces/observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Space 4 | 5 | class ObservationSpace(Space): 6 | max_ratio = 3.0 7 | 8 | def __init__(self): 9 | super(ObservationSpace, self).__init__() 10 | 11 | def sample(self): 12 | return np.random.uniform(0, ObservationSpace.max_ratio, 4) 13 | 14 | def contains(self, obs): 15 | return len(obs) == 4 and (obs >= 0.0).all() and (x <= ObservationSpace.max_ratio).all() 16 | 17 | def to_jsonable(self, sample_n): 18 | return np.array(sample_n).to_list() 19 | 20 | def from_jsonable(self, sample_n): 21 | return [np.asarray(sample) for sample in sample_n] 22 | -------------------------------------------------------------------------------- /gym_cryptotrading/spaces/action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import error 5 | 6 | from gym_cryptotrading.strings import * 7 | 8 | class ActionSpace(gym.Space): 9 | lookup = { 10 | 0: NEUTRAL, 11 | 1: LONG, 12 | 2: SHORT 13 | } 14 | 15 | def __init__(self): 16 | super(ActionSpace, self).__init__() 17 | 18 | def sample(self): 19 | return np.random.randint(0, 3) 20 | 21 | def contains(self, action): 22 | return action in ActionSpace.lookup.keys() 23 | 24 | def to_jsonable(self, sample_n): 25 | super(ActionSpace, self).to_jsonable(sample_n) 26 | 27 | def from_jsonable(self, sample_n): 28 | super(ActionSpace, self).from_jsonable(sample_n) 29 | 30 | @staticmethod 31 | def get_action_name(action): 32 | if action in ActionSpace.lookup.keys(): 33 | return ActionSpace.lookup[action] 34 | else: 35 | raise error.InvalidAction() 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Kartikay Garg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gym_cryptotrading/envs/basicenv.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | from gym import logger 4 | 5 | class BaseEnv: 6 | ''' 7 | Abstract Base Class for CryptoTrading Environments 8 | ''' 9 | 10 | __metaclass__ = ABCMeta 11 | 12 | def __init__(self): 13 | self.episode_number = 0 14 | self.logger = logger 15 | 16 | self.history_length = 100 17 | self.horizon = 5 18 | self.unit = 5e-4 19 | 20 | @abstractmethod 21 | def _get_new_state(self): 22 | raise NotImplementedError 23 | 24 | @abstractmethod 25 | def _get_reward(self): 26 | raise NotImplementedError 27 | 28 | @abstractmethod 29 | def _new_random_episode(self): 30 | raise NotImplementedError 31 | 32 | @abstractmethod 33 | def _reset_params(self): 34 | raise NotImplementedError 35 | 36 | @abstractmethod 37 | def _set_env_specific_params(self, **kwargs): 38 | raise NotImplementedError 39 | 40 | @abstractmethod 41 | def _take_action(self, action): 42 | raise NotImplementedError 43 | 44 | @abstractmethod 45 | def set_params(self, history_length, horizon, unit, **kwargs): 46 | raise NotImplementedError 47 | 48 | def set_logger(self, custom_logger): 49 | if custom_logger: 50 | self.logger = custom_logger 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | .vscode/* 104 | -------------------------------------------------------------------------------- /gym_cryptotrading/envs/unrealizedPnL.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import error 4 | 5 | from gym_cryptotrading.strings import * 6 | from gym_cryptotrading.envs.cryptoenv import CryptoEnv 7 | 8 | class UnRealizedPnLEnv(CryptoEnv): 9 | def __init__(self): 10 | super(UnRealizedPnLEnv, self).__init__() 11 | 12 | def _reset_params(self): 13 | self.long, self.short = 0, 0 14 | self.timesteps = 0 15 | 16 | def _take_action(self, action): 17 | if action not in CryptoEnv.action_space.lookup.keys(): 18 | raise error.InvalidAction() 19 | else: 20 | if CryptoEnv.action_space.lookup[action] is LONG: 21 | self.long = self.long + 1 22 | 23 | elif CryptoEnv.action_space.lookup[action] is SHORT: 24 | self.short = self.short + 1 25 | 26 | def _get_reward(self): 27 | return (self.long - self.short) * self.unit * self.diffs[self.current] 28 | 29 | def step(self, action): 30 | if not self.episode_number or self.timesteps is self.horizon: 31 | raise error.ResetNeeded() 32 | 33 | state = self._get_new_state() 34 | self._take_action(action) 35 | reward = self._get_reward() 36 | 37 | message = "Timestep {}:==: Action: {} ; Reward: {}".format( 38 | self.timesteps, CryptoEnv.action_space.lookup[action], reward 39 | ) 40 | self.logger.debug(message) 41 | 42 | self.timesteps = self.timesteps + 1 43 | if self.timesteps is not self.horizon: 44 | self.current = self.current + 1 45 | return state, reward, False, np.array([float(self.horizon - self.timesteps) / self.horizon]) 46 | else: 47 | return state, reward, True, np.array([0.0]) 48 | -------------------------------------------------------------------------------- /gym_cryptotrading/envs/realizedPnL.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import error 4 | 5 | from gym_cryptotrading.strings import * 6 | from gym_cryptotrading.envs.cryptoenv import CryptoEnv 7 | 8 | class RealizedPnLEnv(CryptoEnv): 9 | def __init__(self): 10 | super(RealizedPnLEnv, self).__init__() 11 | 12 | def _reset_params(self): 13 | self.long, self.short = 0, 0 14 | self.timesteps = 0 15 | self.reward = 0.0 16 | 17 | def _take_action(self, action): 18 | if action not in CryptoEnv.action_space.lookup.keys(): 19 | raise error.InvalidAction() 20 | else: 21 | if CryptoEnv.action_space.lookup[action] is LONG: 22 | self.long = self.long + 1 23 | 24 | elif CryptoEnv.action_space.lookup[action] is SHORT: 25 | self.short = self.short + 1 26 | 27 | def _get_reward(self): 28 | self.reward = self.reward + \ 29 | (self.long - self.short) * self.unit * self.diffs[self.current] 30 | if self.timesteps == (self.horizon - 1): 31 | return self.reward 32 | else: 33 | return 0.0 34 | 35 | def step(self, action): 36 | if not self.episode_number or self.timesteps is self.horizon: 37 | raise error.ResetNeeded() 38 | 39 | state = self._get_new_state() 40 | self._take_action(action) 41 | reward = self._get_reward() 42 | 43 | message = "Timestep {}:==: Action: {} ; Reward: {}".format( 44 | self.timesteps, CryptoEnv.action_space.lookup[action], reward 45 | ) 46 | self.logger.debug(message) 47 | 48 | self.timesteps = self.timesteps + 1 49 | if self.timesteps is not self.horizon: 50 | self.current = self.current + 1 51 | return state, reward, False, np.array([float(self.horizon - self.timesteps) / self.horizon]) 52 | else: 53 | return state, reward, True, np.array([0.0]) 54 | -------------------------------------------------------------------------------- /gym_cryptotrading/envs/cryptoenv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import error, logger 5 | 6 | from abc import abstractmethod 7 | 8 | from gym_cryptotrading.envs.basicenv import BaseEnv 9 | 10 | from gym_cryptotrading.generator import Generator 11 | from gym_cryptotrading.strings import * 12 | from gym_cryptotrading.errors import * 13 | 14 | from gym_cryptotrading.spaces.action import ActionSpace 15 | from gym_cryptotrading.spaces.observation import ObservationSpace 16 | 17 | class CryptoEnv(gym.Env, BaseEnv): 18 | action_space = ActionSpace() 19 | observation_space = ObservationSpace() 20 | metadata = {'render.modes': []} 21 | 22 | def __init__(self): 23 | super(CryptoEnv, self).__init__() 24 | self.generator = None 25 | 26 | def _get_new_state(self): 27 | return self.historical_prices[self.current] 28 | 29 | def _load_gen(self): 30 | if not self.generator: 31 | self.generator = Generator(self.history_length, self.horizon) 32 | 33 | def _new_random_episode(self): 34 | ''' 35 | TODO: In the current setting, the selection of an episode does not follow pure uniform process. 36 | Need to index every episode and then generate a random index rather than going on multiple levels 37 | of selection. 38 | ''' 39 | self._load_gen() 40 | self._reset_params() 41 | message_list = [] 42 | self.episode_number = self.episode_number + 1 43 | message_list.append("Starting a new episode numbered {}".format(self.episode_number)) 44 | 45 | block_index = np.random.randint(0, len(self.generator.price_blocks) - 1) 46 | message_list.append("Block index selected for episode number {} is {}".format( 47 | self.episode_number, block_index 48 | ) 49 | ) 50 | 51 | self.diffs = self.generator.diff_blocks[block_index] 52 | self.historical_prices = self.generator.price_blocks[block_index] 53 | 54 | self.current = np.random.randint(self.history_length, 55 | len(self.historical_prices) - self.horizon) 56 | message_list.append( 57 | "Starting index and timestamp point selected for episode number {} is {}:==:{}".format( 58 | self.episode_number, 59 | self.current, 60 | self.generator.timestamp_blocks[block_index][self.current] 61 | ) 62 | ) 63 | 64 | map(self.logger.debug, message_list) 65 | 66 | return self.historical_prices[self.current - self.history_length:self.current], np.array([1.0]) 67 | 68 | 69 | def _reset_params(self): 70 | pass 71 | 72 | def _set_env_specific_params(self, **kwargs): 73 | pass 74 | 75 | def reset(self): 76 | return self._new_random_episode() 77 | 78 | def set_params(self, history_length, horizon, unit, **kwargs): 79 | if self.generator: 80 | raise EnvironmentAlreadyLoaded() 81 | 82 | if history_length < 0 or horizon < 1 or unit < 0: 83 | raise ValueError() 84 | 85 | else: 86 | self.history_length = history_length 87 | self.horizon = horizon 88 | self.unit = unit #units of Bitcoin traded each time 89 | 90 | self._set_env_specific_params(**kwargs) 91 | -------------------------------------------------------------------------------- /gym_cryptotrading/envs/weightedPnL.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from collections import deque 4 | 5 | from gym import error 6 | 7 | from gym_cryptotrading.strings import * 8 | from gym_cryptotrading.envs.cryptoenv import CryptoEnv 9 | 10 | class ExponentiallyWeightedReward: 11 | def __init__(self, lag, decay_rate): 12 | self.lag = lag 13 | self.decay_rate = decay_rate 14 | 15 | ''' 16 | `self.rewards`: deque containing unrealized PnL rewards in order of occurence 17 | `self.sum`: sum of all the rewards in deque weighted by their position 18 | `self.denominator`: 19 | - sum of all the weights assigned to rewards 20 | - used for normalization of the weighted reward 21 | ''' 22 | self.rewards = deque(np.zeros(self.lag, dtype=float)) 23 | self.sum = 0.0 24 | self.denominator = 0.0 25 | for i in range(self.lag): 26 | self.denominator = self.denominator + np.exp(-1 * i * self.decay_rate) 27 | 28 | def insert(self, reward): 29 | stale_reward = self.rewards.popleft() 30 | self.sum = self.sum - np.exp(-1 * (self.lag - 1) * self.decay_rate) * stale_reward 31 | self.sum = self.sum * np.exp(-1 * self.decay_rate) 32 | self.sum = self.sum + reward 33 | self.rewards.append(reward) 34 | 35 | @property 36 | def reward(self): 37 | return self.sum / self.denominator 38 | 39 | class WeightedPnLEnv(CryptoEnv): 40 | def __init__(self): 41 | super(WeightedPnLEnv, self).__init__() 42 | 43 | self.decay_rate = 1e-2 44 | self.lag = self.horizon 45 | 46 | def _set_env_specific_params(self, **kwargs): 47 | if DECAY_RATE in kwargs: 48 | if kwargs[DECAY_RATE] > 0: 49 | self.decay_rate = kwargs[DECAY_RATE] 50 | else: 51 | raise ValueError(INVALID_DECAY_RATE) 52 | 53 | if LAG in kwargs: 54 | if kwargs[LAG] > 0 and kwargs[LAG] <= self.horizon: 55 | self.lag = kwargs[LAG] 56 | else: 57 | raise ValueError(INVALID_LAG) 58 | 59 | def _reset_params(self): 60 | self.long, self.short = 0, 0 61 | self.timesteps = 0 62 | 63 | self.reward = ExponentiallyWeightedReward(self.lag, self.decay_rate) 64 | 65 | def _take_action(self, action): 66 | if action not in CryptoEnv.action_space.lookup.keys(): 67 | raise error.InvalidAction() 68 | else: 69 | if CryptoEnv.action_space.lookup[action] is LONG: 70 | self.long = self.long + 1 71 | 72 | elif CryptoEnv.action_space.lookup[action] is SHORT: 73 | self.short = self.short + 1 74 | 75 | def _get_reward(self): 76 | reward = (self.long - self.short) * self.unit * self.diffs[self.current] 77 | self.reward.insert(reward) 78 | return self.reward.reward 79 | 80 | def step(self, action): 81 | if not self.episode_number or self.timesteps is self.horizon: 82 | raise error.ResetNeeded() 83 | 84 | state = self._get_new_state() 85 | self._take_action(action) 86 | reward = self._get_reward() 87 | 88 | message = "Timestep {}:==: Action: {} ; Reward: {}".format( 89 | self.timesteps, CryptoEnv.action_space.lookup[action], reward 90 | ) 91 | self.logger.debug(message) 92 | 93 | self.timesteps = self.timesteps + 1 94 | if self.timesteps is not self.horizon: 95 | self.current = self.current + 1 96 | return state, reward, False, np.array([float(self.horizon - self.timesteps) / self.horizon]) 97 | else: 98 | return state, reward, True, np.array([0.0]) 99 | -------------------------------------------------------------------------------- /gym_cryptotrading/generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import time 4 | import datetime 5 | 6 | import tempfile 7 | import urllib2 8 | import gzip 9 | 10 | import pandas as pd 11 | 12 | from gym import logger 13 | 14 | from gym_cryptotrading.strings import * 15 | 16 | class Generator: 17 | dataset_path = None 18 | temp_dir = None 19 | 20 | def __init__(self, history_length, horizon): 21 | Generator.load_gen() 22 | 23 | self.history_length = history_length 24 | self.horizon = horizon 25 | 26 | self._load_data() 27 | 28 | @property 29 | def diff_blocks(self): 30 | return self._diff_blocks 31 | 32 | @property 33 | def price_blocks(self): 34 | return self._price_blocks 35 | 36 | @property 37 | def timestamp_blocks(self): 38 | return self._timestamp_blocks 39 | 40 | def _preprocess(self): 41 | data = pd.read_csv(Generator.dataset_path) 42 | message = 'Columns found in the dataset {}'.format(data.columns) 43 | logger.info(message) 44 | data = data.dropna() 45 | start_time_stamp = data['Timestamp'][0] 46 | timestamps = data['Timestamp'].apply(lambda x: (x - start_time_stamp) / 60) 47 | timestamps = timestamps - range(timestamps.shape[0]) 48 | data.insert(0, 'blocks', timestamps) 49 | blocks = data.groupby('blocks') 50 | message = 'Number of blocks of continuous prices found are {}'.format(len(blocks)) 51 | logger.info(message) 52 | 53 | self._data_blocks = [] 54 | distinct_episodes = 0 55 | 56 | for name, indices in blocks.indices.items(): 57 | ''' 58 | Length of the block should exceed the history length and horizon by 1. 59 | Extra 1 is required to normalize each price block by previos time stamp 60 | ''' 61 | if len(indices) > (self.history_length + self.horizon + 1): 62 | 63 | self._data_blocks.append(blocks.get_group(name)) 64 | # similarly, we subtract an extra 1 to calculate the number of distinct episodes 65 | distinct_episodes = distinct_episodes + (len(indices) - (self.history_length + self.horizon) + 1 + 1) 66 | 67 | data = None 68 | message_list = [ 69 | 'Number of usable blocks obtained from the dataset are {}'.format(len(self._data_blocks)) 70 | ] 71 | message_list.append( 72 | 'Number of distinct episodes for the current configuration are {}'.format(distinct_episodes) 73 | ) 74 | map(logger.info, message_list) 75 | 76 | def _generate_attributes(self): 77 | self._diff_blocks = [] 78 | self._price_blocks = [] 79 | self._timestamp_blocks = [] 80 | 81 | for data_block in self._data_blocks: 82 | block = data_block[['price_close', 'price_low', 'price_high', 'volume']] 83 | closing_prices = block['price_close'] 84 | 85 | diff_block = closing_prices.shift(-1)[:-1].subtract(closing_prices[:-1]) 86 | 87 | # currently normalizing the prices by previous prices of the same category 88 | normalized_block = block.shift(-1)[:-1].truediv(block[:-1]) 89 | 90 | self._diff_blocks.append(diff_block.as_matrix()) 91 | self._price_blocks.append(normalized_block.as_matrix()) 92 | self._timestamp_blocks.append(data_block['DateTime_UTC'].values[1:]) 93 | 94 | self._data_blocks = None #free memory 95 | 96 | def _load_data(self): 97 | self._preprocess() 98 | self._generate_attributes() 99 | 100 | @staticmethod 101 | def get_transactions(): 102 | if not Generator.dataset_path: 103 | Generator.set_dataset_path() 104 | 105 | message = 'Getting latest transactions from {}.'.format(URL) + \ 106 | '\nThis might take a few minutes depending upon your internet speed.' 107 | logger.info(message) 108 | 109 | path = os.path.join(Generator.temp_dir, 'coinbaseUSD.csv.gz') 110 | f = urllib2.urlopen(URL) 111 | with open(path, 'w') as buffer: 112 | buffer.write(f.read()) 113 | message = 'Latest transactions saved to {}'.format(path) 114 | logger.info(message) 115 | 116 | # Read the transactions into pandas dataframe 117 | with gzip.open(path, 'r') as f: 118 | d = pd.read_table(f, sep=',', header=None, index_col=0, names=['price', 'volume']) 119 | os.remove(path) 120 | 121 | d.index = d.index.map(lambda ts: datetime.datetime.fromtimestamp(int(ts))) 122 | d.index.names = ['DateTime_UTC'] 123 | p = pd.DataFrame(d['price'].resample('1Min').ohlc()) 124 | p.columns = ['price_open', 'price_high', 'price_low', 'price_close'] 125 | v = pd.DataFrame(d['volume'].resample('1Min').sum()) 126 | v.columns = ['volume'] 127 | p['volume'] = v['volume'] 128 | unix_timestamps = p.index.map(lambda ts: int(time.mktime(ts.timetuple()))) 129 | p.insert(0, 'Timestamp', unix_timestamps) 130 | 131 | p.to_csv(Generator.dataset_path, sep=',') 132 | message = 'Dataset sampled and saved to {}'.format(Generator.dataset_path) 133 | logger.info(message) 134 | 135 | @staticmethod 136 | def update_gen(): 137 | if not Generator.dataset_path: 138 | Generator.set_dataset_path() 139 | 140 | if os.path.isfile(Generator.dataset_path): 141 | os.remove(Generator.dataset_path) 142 | Generator.get_transactions() 143 | 144 | @staticmethod 145 | def load_gen(): 146 | if not Generator.dataset_path: 147 | Generator.set_dataset_path() 148 | 149 | ''' 150 | TODO: Need to do sanity check of the sampled dataset 151 | ''' 152 | if not os.path.isfile(Generator.dataset_path): 153 | message = 'Sampled Dataset not found at {}.'.format(Generator.dataset_path) + \ 154 | '\nSetting up the environment for first use.' 155 | logger.info(message) 156 | Generator.get_transactions() 157 | 158 | @staticmethod 159 | def set_dataset_path(): 160 | if not Generator.temp_dir: 161 | Generator.set_temp_dir() 162 | 163 | Generator.dataset_path = os.path.join(Generator.temp_dir, 'btc.csv') 164 | 165 | @staticmethod 166 | def set_temp_dir(): 167 | Generator.temp_dir = tempfile.gettempdir() 168 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gym CryptoTrading Environment 2 | 3 | [![license](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://github.com/samre12/deep-trading-agent/blob/master/LICENSE) 4 | [![dep2](https://img.shields.io/badge/python-2.7-red.svg)](https://www.python.org/download/releases/2.7/) 5 | [![dep3](https://img.shields.io/badge/status-in%20progress-green.svg)](https://github.com/samre12/gym-cryptotrading/) 6 | [![dep4](https://img.shields.io/circleci/project/github/RedSparr0w/node-csgo-parser.svg)](https://github.com/samre12/gym-cryptotrading/) 7 | 8 | Gym Environment API based Bitcoin trading simulator with continuous observation space and discrete action space. It uses real world transactions from **CoinBaseUSD** exchange to sample *per minute closing, lowest and highest prices along with volume of the currency traded* in the particular minute interval. 9 | 10 | **Contents of this document** 11 | 12 | - [Installation](#installation) 13 | - [Usage](#usage) 14 | - [Basics](#basics) 15 | - [Obsevation Space](#obs) 16 | - [Action Space](#action) 17 | - [Parameters](#params) 18 | - [Simulator](#simulator) 19 | - [Important Information](#inf) 20 | - Environments 21 | - [Realized PnL Environment](https://github.com/samre12/gym-cryptotrading/wiki/Realized-PnL-Trading-Environment) 22 | - [Unrealized PnL Environment](https://github.com/samre12/gym-cryptotrading/wiki/Unrealized-PnL-Trading-Environment) 23 | - [Weighted Unrealized PnL Environment](https://github.com/samre12/gym-cryptotrading/wiki/Weighted-Unrealized-PnL-Trading-Environment) 24 | - [Examples](#exp) 25 | - [Recent Updates and Breaking Changes](#changes) 26 | 27 | 28 | 29 | ## Installation 30 | 31 | ```bash 32 | git clone https://github.com/samre12/gym-cryptotrading.git 33 | cd gym-cryptotrading 34 | pip install -e . 35 | ``` 36 | 37 | 38 | 39 | ## Usage 40 | 41 | Importing the module into the current session using `import gym_cryptotrading` will register the environment with `gym` after which it can be used as any other gym environment. 42 | 43 | ### Environments 44 | 45 | - `'RealizedPnLEnv-v0'` 46 | 47 | - `'UnRealizedPnLEnv-v0'` 48 | 49 | - `'WeightedPnLEnv-v0'` 50 | 51 | ```python 52 | import gym 53 | import gym_cryptotrading 54 | env = gym.make('RealizedPnLEnv-v0') 55 | ``` 56 | 57 | - Use `env.reset()` to start a new random episode. 58 | 59 | - returns history of observations prior to the starting point of the episode, fractional remaining trades that is `[1.0]` at the start of the episode. Look [Parameters](#params) for more information. 60 | 61 | ```python 62 | state = env.reset() # use state to make initial prediction 63 | ``` 64 | 65 | **Note:** Make sure to reset the environment before first use else `gym.error.ResetNeeded()` will be raised. 66 | 67 | - Use `env.step(action)` to take one step in the environment. 68 | 69 | - returns `(observation, reward, is_terminal, fractional_remaining_trades)` in respective order 70 | 71 | ```python 72 | observation, reward, is_terminal, remaining_trades = env.step(action) 73 | ``` 74 | 75 | **Note:** Calling `env.step(action)` after the terminal state is reached will raise `gym.error.ResetNeeded()`. 76 | 77 | - With the current implementation, the environment does not support `env.render()`. 78 | 79 | Setting the logging level of `gym` using `gym.logger.set_level(level)` to a value less than or equal 10 will allow to track all the logs (`debug` and `info` levels) generated by the environment.
80 | These include human readable timestamps of Bitcoin prices used to simulate an episode.
81 | For more information on `gym.logger` and setting logging levels, visit [**here**](https://github.com/openai/gym/blob/293eea787a662f501b0e4aab512d3769e830ece2/gym/logger.py#L11) . 82 | 83 | **Note:** *Custom loggers can also be provided to environments using* `env.env.set_logger(logger=)` 84 | 85 | 86 | 87 | 88 | ## Basics 89 | 90 | 91 | 92 | ### Observation Space 93 | 94 | - Observation at a time step is the relative `(closing, lowest, highest, volume)` of Bitcoin in the corresponding minute interval. 95 | 96 | - Since the price of Bitcoin varies from a few dollars to 15K dollars, the observation for time step i + 1 is normalized by the prices at time instant i. 97 | 98 | Each entry in the observation is the ratio of *increase (value greater than 1.0)* or *decrease (value lessar than 1.0)* from the price at previos time instant. 99 | 100 | 101 | 102 | ### Action Space 103 | 104 | At each time step, the agent can either go **LONG** or **SHORT** in a `unit` (for more information , refer to [Parameters](#params)) of Bitcoin or can stay **NEUTRAL**.
105 | Action space thus becomes *discrete* with three possible actions: 106 | 107 | - `NEUTRAL` corresponds to `0` 108 | 109 | - `LONG` corresponds to `1` 110 | 111 | - `SHORT` corresponds to `2` 112 | 113 | **Note:** Use `env.action_space.get_action(action)` to lookup action names corresponding to their respective values. 114 | 115 | 116 | 117 | ### Parameters 118 | 119 | The basic environment is characterized with these parameters: 120 | 121 | - `history_length` lag in the observations that is used for the state representation of the trading agent.
122 | 123 | - every call to `env.reset()` returns a numpy array of shape `(history_length,) + shape(observation)` that corresponds to observations of length `history_length` prior to the starting point of the episode. 124 | 125 | - trading agent can use the returned array to predict the first action 126 | 127 | - defaults to `100`. 128 | 129 | - supplied value must be greater than or equal to `0` 130 | 131 | - `horizon` alternatively **episode length** is the number trades that the agent does in a single episode 132 | 133 | - defaults to `5`. 134 | 135 | - supplied value must be greater than `0` 136 | 137 | - `unit` is the fraction of Bitcoin that can be traded in each time step 138 | 139 | - defaults to `5e-4`. 140 | 141 | - supplied value must be greater than `0` 142 | 143 | ### Usage 144 | 145 | ```python 146 | env = gym.make('RealizedPnLEnv-v0') 147 | env.env.set_params(history_length, horizon, unit) 148 | ``` 149 | 150 | **Note:** parameters can only be set before first reset of the environment, that is, before the first call to `env.reset()`, else `gym_cryptotrading.errors.EnvironmentAlreadyLoaded` will be raised. 151 | 152 | Some environments contain their own specific parameters due to the nature of their reward function.
153 | These parameters can be passed using `env.env.set_params(history_length, horizon, unit, **kwargs)` as keyworded arguements alongside setting *history length*, *horizon* and *unit*. 154 | 155 | 156 | 157 | ### Simulator 158 | 159 | **Dataset** 160 | 161 | Per minute Bitcoin series is obtained by modifying the procedure mentioned in [this](https://github.com/philipperemy/deep-learning-bitcoin) repository. Transactions in the *Coinbase* exchange are sampled to generate the Bitcoin price series. 162 | 163 | - Dataset for per minute prices of Bitcoin is not continuos and complete due to the downtime of the exchanges. 164 | 165 | - Current implementation does not make any assumptions about the missing values. 166 | 167 | - It rather finds continuos blocks with lengths greater than `history_length + horizon + 1` and use them to simulate episodes. This avoids any discrepancies in results due to random subsitution of missing values 168 | 169 | Sample logs generated by the simulator while preprocessing the dataset: 170 | 171 | ``` 172 | INFO: Columns found in the dataset Index([u'DateTime_UTC', u'Timestamp', u'price_open', u'price_high', 173 | u'price_low', u'price_close', u'volume'], 174 | dtype='object') 175 | INFO: Number of blocks of continuous prices found are 58880 176 | INFO: Number of usable blocks obtained from the dataset are 1651 177 | INFO: Number of distinct episodes for the current configuration are 838047 178 | ``` 179 | 180 | 181 | 182 | ## Important Information 183 | 184 | Upon first use, the environment downloads latest transactions dataset from the exchange which are then cached in *tempory directory* of the operating system for future use.
185 | 186 | - A user can also update the latest transactions dataset by the following code: 187 | 188 | ```python 189 | from gym_cryptotrading.generator import Generator 190 | Generator.update_gen() 191 | ``` 192 | 193 | - `update_gen` should be called prior to **first reset** of the environment to reflect the latest transactions in it 194 | 195 | - If you are running the environment behind a proxy, export suitalble **http proxy settings** to allow the environment to download transactions from the exchange 196 | 197 | 198 | 199 | ## Examples 200 | Coming soon. 201 | 202 | 203 | 204 | ## Recent Updates and Breaking Changes 205 | 206 | Listing changes from [**`b9af98db728230569a18d54dcfa87f7337930314`**](https://github.com/samre12/gym-cryptotrading/commit/b9af98db728230569a18d54dcfa87f7337930314) commit. Visit [**here**](https://github.com/samre12/gym-cryptotrading/tree/b9af98db728230569a18d54dcfa87f7337930314) to browse the repository with head at this commit. 207 | 208 | - Added support for trading environments with **Realized PnL** and **Weighted Unrealized PnL** reward functions 209 | 210 | - Renamed `cryptotrading.py` to `unrealizedPnL.py` to emphasize the specific reward function of the environment 211 | 212 | - Added support for setting custom logger for an environment using `env.env.set_logger(logger=)` 213 | 214 | - Updated environments to output the number of remaining trades on each call to `env.step(action=)` 215 | 216 | ### Breaking Changes 217 | 218 | - Environment with **Unrealized PnL** reward function is now built using `env = gym.make('UnrealizedPnLEnv-v0')` rather than `env = gym.make('CryptoTrading-v0')` 219 | 220 | - Instead of `remaining_trades`, `env.step(action)` now outputs `np.array([fractional_remaining_trades])`. This is to take into account more supplementary information (like **technical indicators**) in the future --------------------------------------------------------------------------------