├── README.md ├── april_trades.csv.gz └── volume_time.py /README.md: -------------------------------------------------------------------------------- 1 | vpin 2 | ==== 3 | 4 | Volume-Synchronized Probability of Informed Trading 5 | -------------------------------------------------------------------------------- /april_trades.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jheusser/vpin/HEAD/april_trades.csv.gz -------------------------------------------------------------------------------- /volume_time.py: -------------------------------------------------------------------------------- 1 | from itertools import izip_longest 2 | import itertools 3 | import pandas 4 | import numpy as np 5 | 6 | all_trades = pandas.read_csv('./april_trades.csv', parse_dates=[0], index_col=0) 7 | usd_trades = all_trades[all_trades['d.currency'] == 'USD'] 8 | 9 | volume = (usd_trades['d.amount_int']) 10 | trades = (usd_trades['d.price_int']) 11 | 12 | def cleanup(x): 13 | if isinstance(x, str) and 'e-' in x: 14 | return 0 15 | else: 16 | return float(x) 17 | 18 | volume = volume.apply(lambda x: cleanup(x)) 19 | volume = volume.astype(float32) 20 | 21 | ##### 22 | typestr = (usd_trades['d.type_str']) 23 | typestr[typestr == 'bid'] = 0 24 | typestr[typestr == 'ask'] = 1 25 | 26 | trades_1min = trades.resample('1min').diff(1).dropna() 27 | volume_1min = volume.resample('1min', how='sum') 28 | 29 | # assign trade sign to 1 minute time bar by averaging buys and sells and taking the more common one 30 | typestr_1min = typestr.astype(float32).resample('1min',how='mean').round() 31 | 32 | df = pandas.DataFrame({'type': typestr_1min, 'volume': volume_1min}) 33 | df_trades = pandas.DataFrame({'volume': volume_1min, 'trades': trades_1min}) 34 | 35 | # volume time! 36 | delta_p_expanded = [] 37 | missed = 0 38 | for t in df.itertuples(): 39 | idx = t[0] 40 | side = t[1] 41 | vol = t[2] 42 | 43 | if np.nan_to_num(vol) == 0.0: 44 | continue 45 | 46 | # expand price change over standardised volume 47 | for i in range(0, int(vol)): 48 | # 1 unit trades 49 | delta_p_expanded.append((idx, side)) 50 | 51 | # side for each standard size trade 52 | expanded = pandas.DataFrame.from_records(delta_p_expanded, index=0) 53 | 54 | ##################### 55 | # return distribution for volume time sampling 56 | # volume time! 57 | volume_sample_trades_expanded = [] 58 | missed = 0 59 | for t in df_trades.itertuples(): 60 | idx = t[0] 61 | vol = t[2] 62 | delta_p = t[1] 63 | 64 | if np.nan_to_num(vol) == 0.0: 65 | continue 66 | 67 | # expand price change over standardised volume 68 | for i in range(0, int(vol)): 69 | # 1 unit trades 70 | volume_sample_trades_expanded.append((idx, delta_p)) 71 | 72 | trades_expanded = pandas.DataFrame.from_records(volume_sample_trades_expanded, index=0) 73 | 74 | ################################ 75 | 76 | 77 | def grouper(n, iterable): 78 | it = iter(iterable) 79 | while True: 80 | chunk = tuple(itertools.islice(it, n)) 81 | if not chunk: 82 | return 83 | yield chunk 84 | 85 | 86 | # volume in BTC which makes up one bucket 87 | n_bucket_size = 500.0 88 | 89 | # find single-period VPIN 90 | OI = [] 91 | start = 0 92 | for each in grouper(n_bucket_size, expanded[1]): 93 | slce = pandas.Series(each) 94 | counts = slce.value_counts() 95 | if len(counts) > 1: 96 | OI.append(np.abs(counts[1] - counts[0])/n_bucket_size) 97 | else: 98 | if 0 in counts: 99 | OI.append(counts[0]/n_bucket_size) 100 | else: 101 | OI.append(counts[1]/n_bucket_size) 102 | 103 | 104 | 105 | # find time boundaries for volume buckets 106 | buckets = [] 107 | V = n_bucket_size 108 | running_volume = 0.0 109 | start_idx = None 110 | 111 | for idx in expanded.index: 112 | if not start_idx: 113 | start_idx = idx 114 | 115 | if running_volume >= V: 116 | buckets.append((start_idx, idx)) 117 | 118 | start_idx = None 119 | running_volume = 0 120 | running_volume += 1 121 | 122 | # find mid time of volume buckets 123 | mid_buckets = [] 124 | for start,end in buckets: 125 | diff = end - start 126 | mid_buckets.append(start + (diff/2)) 127 | 128 | # volume bucket duration 129 | diffs = [] 130 | for start,end in buckets: 131 | diffs.append(end-start) 132 | 133 | vpin_df = pandas.rolling_mean(pandas.Series(OI[:-1], index=mid_buckets), window=500) 134 | trades_adj = trades.resample('1min').reindex_like(vpin_df, method='ffill') 135 | 136 | ####### 137 | ## Plot VPIN vs Trades 138 | import matplotlib as mpl 139 | mpl.rc('font', **{'sans-serif':'Verdana','family':'sans-serif','size':8}) 140 | mpl.rcParams['xtick.direction'] = 'out' 141 | mpl.rcParams['ytick.direction'] = 'out' 142 | mpl.rcParams['axes.linewidth'] = 0.75 143 | 144 | 145 | fig, axes = plt.subplots(nrows=2, ncols=1) 146 | plt.subplots_adjust(hspace = 0.5) 147 | 148 | vpin_df.plot(ax=axes[0]) 149 | axes[0].set_title('VPIN') 150 | 151 | trades_adj.plot(ax=axes[1]) 152 | axes[1].set_title('Trades') 153 | 154 | fig.tight_layout() 155 | 156 | 157 | ##### 158 | ## Get mid price series from ticker for the same period 159 | 160 | all_ticker = pandas.read_csv('./all_ticker.txt', parse_dates=[0], index_col=0) 161 | 162 | ticker_df = all_ticker.ix[vpin_df.index[0] : vpin_df.index[-1]] 163 | # calculate mid 164 | ticker_df = ticker_df.resample('1min').apply(axis=1, func=lambda s: (s['d.bid'] + s['d.ask'])/2) 165 | 166 | # align with VPIN 167 | ticker_df = ticker_df.reindex_like(vpin_df, method='ffill') 168 | 169 | ### plot of return distributions of sampling by volume time (more normal). 170 | plt.figure() 171 | # volume-time samples price returns 172 | p1 = trades_expanded[1].hist(normed=True, bins=45, alpha=0.3) 173 | # trade-time sampled price returns 174 | p2 =ticker_df.diff(1).hist(normed=True, bins=45, alpha=0.3) 175 | p2.legend(['Volume Time', 'Chronological']) 176 | 177 | plt.draw() 178 | 179 | 180 | ### plot overlay of VPIN and trades 181 | 182 | ax = pandas.DataFrame({'VPIN': vpin_df , 'Price': trades_adj.fillna(method='ffill')}).plot(secondary_y=['VPIN']) 183 | ax.set_title('Price vs VPIN') 184 | ax.right_ax.set_ylabel('Probability of Informed Trading') 185 | plt.draw() 186 | 187 | --------------------------------------------------------------------------------