├── backtesting ├── README.MD ├── rank_function.R ├── backtest_function.R ├── run_corr_function.R ├── portfolio_template.R └── .Rhistory ├── rfortraders ├── Chapter_01 │ └── code.R ├── Chapter_03 │ ├── strategy.xlsx │ ├── sample_json_file.json │ └── code.R ├── Chapter_11 │ ├── add_2_file.cpp │ └── code.R ├── README.md ├── LICENSE ├── Errata.txt ├── Chapter_04 │ └── code.R ├── Chapter_05 │ └── code.R ├── Chapter_09 │ └── code.R ├── Chapter_10 │ └── code.R ├── Chapter_08 │ └── code.R ├── Chapter_02 │ └── code.R ├── Chapter_06 │ └── code.R └── Chapter_07 │ └── code.R ├── README.md ├── fb ├── density.png └── bday2016.R ├── extrema ├── output.PNG ├── README.md ├── main.R ├── load.R ├── inflection_points.R └── functions.R ├── bar_charts ├── .DS_Store ├── cqg_example.png ├── Screen Shot 2015-02-27 at 7.27.11 PM.png ├── Screen Shot 2015-02-27 at 7.27.21 PM.png ├── README.md~ ├── README.md ├── GCQ 2014-07-03.csv └── bar_charts.R ├── survivorship ├── dpdf.PNG ├── metadata.txt ├── rBBG_splits.R ├── README.md ├── rBBG_prices.R └── rBBG_survivors.R ├── package_mgmt ├── sn_1.0-0.tar.gz └── install_packages.R ├── simulation ├── README.md └── GBM_20140120.R ├── systematic_investor ├── dtw_20_30.PNG ├── dtw_20_60.PNG ├── dtw_20_90.PNG ├── euclid_20_30.PNG ├── euclid_20_60.PNG ├── euclid_20_90.PNG └── time_series_matching.R ├── my_functions ├── rank_function.R ├── README.md ├── trading_calendar.R ├── percent_rank.R └── run_corr_function.R ├── bloomberg └── rBBG.R ├── varadi └── varadi_chaos.R └── indicators └── indicators.R /backtesting/README.MD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfortraders/Chapter_01/code.R: -------------------------------------------------------------------------------- 1 | # This chapter contains no code snippets -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # R 2 | a collection of R projects, mostly relating to finance 3 | -------------------------------------------------------------------------------- /fb/density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/fb/density.png -------------------------------------------------------------------------------- /extrema/output.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/extrema/output.PNG -------------------------------------------------------------------------------- /bar_charts/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/.DS_Store -------------------------------------------------------------------------------- /survivorship/dpdf.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/survivorship/dpdf.PNG -------------------------------------------------------------------------------- /bar_charts/cqg_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/cqg_example.png -------------------------------------------------------------------------------- /package_mgmt/sn_1.0-0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/package_mgmt/sn_1.0-0.tar.gz -------------------------------------------------------------------------------- /simulation/README.md: -------------------------------------------------------------------------------- 1 | ######GBM 2 | Geometric Brownian Motion, simulates a stock's random walk 3 | -------------------------------------------------------------------------------- /systematic_investor/dtw_20_30.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/dtw_20_30.PNG -------------------------------------------------------------------------------- /systematic_investor/dtw_20_60.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/dtw_20_60.PNG -------------------------------------------------------------------------------- /systematic_investor/dtw_20_90.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/dtw_20_90.PNG -------------------------------------------------------------------------------- /rfortraders/Chapter_03/strategy.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/rfortraders/Chapter_03/strategy.xlsx -------------------------------------------------------------------------------- /systematic_investor/euclid_20_30.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/euclid_20_30.PNG -------------------------------------------------------------------------------- /systematic_investor/euclid_20_60.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/euclid_20_60.PNG -------------------------------------------------------------------------------- /systematic_investor/euclid_20_90.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/euclid_20_90.PNG -------------------------------------------------------------------------------- /bar_charts/Screen Shot 2015-02-27 at 7.27.11 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/Screen Shot 2015-02-27 at 7.27.11 PM.png -------------------------------------------------------------------------------- /bar_charts/Screen Shot 2015-02-27 at 7.27.21 PM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/Screen Shot 2015-02-27 at 7.27.21 PM.png -------------------------------------------------------------------------------- /backtesting/rank_function.R: -------------------------------------------------------------------------------- 1 | ## Ranking function 2 | ## N.B.: rank = 1 for max(x) 3 | ranking <- function(x){ 4 | r <- as.xts(t(apply(x, 1, rank, na.last = "keep"))) 5 | return(r) 6 | } 7 | -------------------------------------------------------------------------------- /my_functions/rank_function.R: -------------------------------------------------------------------------------- 1 | ## Ranking function 2 | ## N.B.: rank = 1 for max(x) 3 | ranking <- function(x){ 4 | r <- as.xts(t(apply(x, 1, rank, na.last = "keep"))) 5 | return(r) 6 | } 7 | -------------------------------------------------------------------------------- /extrema/README.md: -------------------------------------------------------------------------------- 1 | #Extrema 2 | Comparable methods for determining global and local extrema: a simple approach vs. the pastecs turnpoint function 3 | 4 | ![alt text] (https://github.com/geoquant/R/blob/master/extrema/output.PNG "Maxima and Minima") 5 | -------------------------------------------------------------------------------- /my_functions/README.md: -------------------------------------------------------------------------------- 1 | #####percent_rank.R 2 | percentrank-like functionality from Excel, in R 3 | 4 | 5 | #####rank_function.R 6 | general ranking function for rows 7 | 8 | 9 | #####run_corr_function.R 10 | running correlation, with window length n 11 | -------------------------------------------------------------------------------- /rfortraders/Chapter_11/add_2_file.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | long add_2_cpp(long max_value) { 6 | long sum = 0; 7 | for(long i = 1; i <= max_value; ++i) { 8 | sum = sum + i; 9 | } 10 | return sum; 11 | } -------------------------------------------------------------------------------- /rfortraders/Chapter_03/sample_json_file.json: -------------------------------------------------------------------------------- 1 | { 2 | "CVX": 3 | { 4 | "Currency": "USD", 5 | "Sector": "Basic Materials", 6 | "Industry": "Major Integrated Oil & Gas" 7 | }, 8 | "GOOG": 9 | { 10 | "Currency": "USD", 11 | "Sector": "Technology", 12 | "Industry": "Internet Information Providers" 13 | } 14 | } -------------------------------------------------------------------------------- /extrema/main.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | 3 | library(xts) 4 | library(pastecs) 5 | library(quantmod) 6 | 7 | WD <- setwd("C:/Users/jlappen/desktop/extrema") 8 | WD 9 | 10 | # load R scripts ---------------------------------------------------------- 11 | source("load.R") 12 | source("functions.R") 13 | source("inflection_points.R") 14 | 15 | -------------------------------------------------------------------------------- /rfortraders/README.md: -------------------------------------------------------------------------------- 1 | # Quantitative Trading with R: Understanding Mathematical and Computational Tools from a Quant's Perspective 2 | 3 | This repository contains errata and R code from the book. 4 | The book can be purchased on Amazon here: http://amzn.to/1DDNIn1 5 | The code is provided under the MIT [license](LICENSE). 6 | 7 | Please submit any corrections to this github repo. 8 | For further questions, email me here: rfortraders@gmail.com 9 | 10 | -------------------------------------------------------------------------------- /bar_charts/README.md~: -------------------------------------------------------------------------------- 1 | #bar_charts 2 | This is an example of reproducing CQG-like stock charts with some DeMark Indicators. Please note that there is absolutely zero DeMark data in the provided price series (GCQ is Gold, not to be confused with the charting platform CQG), and each indicator value that appears in the image below has been hard-coded by me. This is an exercise in plotting with ggplot2, and is not intended to be interpreted as any investing advice. 3 | 4 | ![alt text](https://github.com/geoquant/R/blob/master/bar_charts/Screen%20Shot%202015-02-27%20at%207.27.21%20PM.png "A DeMark Chart") 5 | -------------------------------------------------------------------------------- /bar_charts/README.md: -------------------------------------------------------------------------------- 1 | #bar_charts 2 | This is an example of reproducing CQG-like stock charts with some DeMark Indicators. Please note that there is absolutely zero DeMark data in the provided price series (GCQ is Gold, not to be confused with the charting platform CQG), and each indicator value that appears in the image below has been hard-coded by me. This is an exercise in plotting with ggplot2, and is not intended to be interpreted as any investing advice. 3 | 4 | ![alt text](https://github.com/geoquant/R/blob/master/bar_charts/Screen%20Shot%202015-02-27%20at%207.27.21%20PM.png "A DeMark Chart") 5 | 6 | This is an alternate output that looks like an old-school Bloomberg terminal: 7 | ![alt text](https://github.com/geoquant/R/blob/master/bar_charts/Screen%20Shot%202015-02-27%20at%207.27.11%20PM.png) 8 | -------------------------------------------------------------------------------- /extrema/load.R: -------------------------------------------------------------------------------- 1 | # read files -------------------------------------------------------------- 2 | load.data <- list.files(WD,pattern="*.csv",full.names=T) 3 | 4 | all.data <- lapply(load.data, read.csv) 5 | all.xts <- lapply(all.data, function(x) 6 | xts(x[,2:ncol(x)],as.Date(as.character(x[,1])))) 7 | 8 | 9 | 10 | # OHLC lists -------------------------------------------------------------- 11 | open.prices1 <- lapply(all.xts, function(x) x[,1]) 12 | high.prices1 <- lapply(all.xts, function(x) x[,2]) 13 | low.prices1 <- lapply(all.xts, function(x) x[,3]) 14 | close.prices1 <- lapply(all.xts, function(x) x[,4]) 15 | 16 | 17 | 18 | # OHLC data frames -------------------------------------------------------- 19 | open.prices2 <- do.call(cbind,open.prices1) 20 | high.prices2 <- do.call(cbind,high.prices1) 21 | low.prices2 <- do.call(cbind,low.prices1) 22 | close.prices2 <- do.call(cbind,close.prices1) -------------------------------------------------------------------------------- /survivorship/metadata.txt: -------------------------------------------------------------------------------- 1 | Two Folders to investigate: 2 | Unaligned 3 | Unassigned 4 | 5 | 6 | The first folder contains 62 Tickers that have unequal dimensions of OHLC data 7 | That is to day, there were instances where there were more close observations, than say, opens. Or any other combination. There is no general pattern. 8 | 9 | ------------------------------------------------------------------------------------------------ 10 | 11 | 12 | 3 Tickers yielded NULL data: 13 | W UN Equity 14 | SXCLD UQ Equity 15 | CNXT UQ Equity 16 | 17 | 18 | We believe these should be mapped to: 19 | 1051665D US Equity 20 | SXCL US Equity 21 | 1040983D UQ Equity 22 | 23 | 24 | 25 | ------------------------------------------------------------------------------------------------ 26 | 27 | 3 Tickers Have no OHL data available, just the close: 28 | 292356Z US Equity 29 | 601964Q US Equity 30 | SVFI US Equity -------------------------------------------------------------------------------- /bloomberg/rBBG.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | Sys.setenv(JAVA_HOME="C:\\Program Files\\Java\\jre7") 3 | 4 | library(Rbbg) 5 | library(xts) 6 | 7 | conn <- blpConnect(throw.ticker.errors = FALSE, log.level = "finest") 8 | 9 | symbols <- c("SPX Index", "USGG10YR Index", "USDJPY Curncy", "USDEUR Curncy") 10 | fields <- c("PX_OPEN", "PX_HIGH", "PX_LOW", "PX_LAST") 11 | start_date <- as.Date("1988-01-01") 12 | end_date <- as.Date("2015-05-11") 13 | 14 | bbg2xts <- function(symbols, ...) { 15 | bbg <- bdh(conn, symbols, ...) 16 | bbg$date <- NULL 17 | as.xts(bbg, order.by = as.Date(rownames(bbg))) 18 | } 19 | 20 | BBG_list <- lapply(symbols, bbg2xts, fields, start_date, end_date) 21 | names(BBG_list) <- symbols 22 | 23 | BBGdf <- lapply(BBG_list, "[", "PX_LAST") 24 | 25 | 26 | # export ------------------------------------------------------------------ 27 | #for(i in seq_along(BBG_list)) { 28 | # filename <- paste(names(BBG_list)[i], ".csv") 29 | # write.csv(BBG_list[[i]], filename) 30 | #} 31 | 32 | -------------------------------------------------------------------------------- /survivorship/rBBG_splits.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | 3 | library(Rbbg) 4 | library(xts) 5 | 6 | start_time <- Sys.time() 7 | 8 | # connect to bloomberg ---------------------------------------------------- 9 | conn <- blpConnect(throw.ticker.errors = FALSE,log.level = "finest") 10 | 11 | 12 | securities <- sort_tickers 13 | fields <- c("EQY_DVD_HIST_SPLITS") 14 | start_date <- as.Date("1999-11-01") 15 | end_date <- as.Date("2014-12-05") 16 | 17 | bbg2df <- function(security, ...) { 18 | bbg <- bds(conn, security, ...) 19 | as.data.frame(bbg) 20 | } 21 | 22 | BBGList <- lapply(securities, bbg2df, fields) 23 | names(BBGList) <- securities 24 | 25 | end_time <- Sys.time() 26 | end_time - start_time 27 | 28 | 29 | # write out --------------------------------------------------------------- 30 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/split_data/raw_data") 31 | files <- paste(securities,".split.csv",sep="") 32 | lapply(seq_along(BBGList), function(i){ 33 | write.zoo(BBGList[[i]], files[i],sep=",") 34 | }) 35 | 36 | -------------------------------------------------------------------------------- /backtesting/backtest_function.R: -------------------------------------------------------------------------------- 1 | ## backtesting function 2 | backtest_engine <- function(strategy, strategy_name, monthly_rets){ 3 | 4 | # allocation matrix 5 | cash <- ifelse(rowSums(strategy) == 0, 1, 1 - rowSums(strategy)) 6 | alloc_matrix <- cbind(strategy, cash) 7 | 8 | ## trade matrix 9 | prelim_trade <- alloc_matrix 10 | prelim_trade[is.na(prelim_trade)] <- 0 11 | trade_matrix <- prelim_trade - lag(prelim_trade, 1) 12 | 13 | ## friction matrix 14 | only_tickers <- trade_matrix[,1:ncol(strategy)] 15 | fric_matrix <- cbind(abs(only_tickers) * friction_pct, 0) 16 | wheninvested_rets <- as.numeric(monthly_rets) * lag(alloc_matrix, 1) - fric_matrix 17 | 18 | ## Calculate portfolio returns vector 19 | port_rets <- rowSums(wheninvested_rets) 20 | port_rets_xts <- xts(port_rets, order.by = index(strategy)) 21 | port_returns <- port_rets_xts[is.na(port_rets_xts) == FALSE] 22 | colnames(port_returns) <- strategy_name 23 | 24 | list_output <- list(allocation_matrix = alloc_matrix, 25 | returns = wheninvested_rets, 26 | portfolio_returns = port_returns) 27 | list_output 28 | } 29 | -------------------------------------------------------------------------------- /rfortraders/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Folk Creations, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /my_functions/trading_calendar.R: -------------------------------------------------------------------------------- 1 | # 2 | # Get a vector of trading days. These are week days, with holidays removed. The trading days 3 | # are relative to the New York Stock Exchange (NYSE). 4 | # 5 | # startDate and endDate are Date objects. The function returns a date ordered vector of Date 6 | # objects which are the trading days. 7 | # 8 | # This function requires the timeDate package 9 | # 10 | # Credit to Ian Kaplan, I modified syntax styling and renamed the function (tradingCalendar) 11 | # Source: http://www.bearcave.com/finance/random_r_hacks/ 12 | trading_calendar <- function( startDate, endDate) 13 | { 14 | require(timeDate) 15 | timeSeq <- timeSequence(from = as.character(startDate), 16 | to = as.character(endDate), 17 | by="day", format = "%Y-%m-%d", 18 | zone = "NewYork", FinCenter = "America/New_York") 19 | 20 | ix <- as.logical(isWeekday(timeSeq, wday = 1:5)) 21 | tradingDays <- timeSeq[ix] 22 | startYear <- as.POSIXlt(startDate)$year + 1900 23 | endYear <- as.POSIXlt(endDate)$year + 1900 24 | 25 | tradingDays.dt <- as.Date(tradingDays) 26 | hol <- as.Date(holidayNYSE(startYear:endYear)) 27 | ix <- which(tradingDays.dt %in% hol) 28 | tradingDays.dt <- tradingDays.dt[-ix] 29 | return(tradingDays.dt) 30 | } # tradingCalendar 31 | -------------------------------------------------------------------------------- /survivorship/README.md: -------------------------------------------------------------------------------- 1 | The scripts in this directory are utilized, in conjuction with a Bloomberg Terminal, to download the constituents of a stock index for a range of dates. This data collection is a prerequisite for any index-level analysis that aims not to be biased by survivorship. For example, these scripts reveal that between 1999-11-01 and 2014-12-05 that there were a total of 1082 companies that participated in the SP 500. Knowing when these companies' returns attributed to the index would be helpful to accurately understand the performance of the overall index. 2 | 3 | The output of these scripts is a singular matrix of 1s and 0s, that can then be multiplied by an equivallently sized matrix of price data. The number of rows equals the number of days between the dates, and number of columns equals the total number of companies. The sum of each of these rows would be 500, given that a 1 indicates participation in the index and a 0 represents a company out of the index. An interesting feature of the SP 500 data is that there have been two instances when there was only 499 consistuents and those periods begin in: 1999-11 and 2008-09. 4 | 5 | ###### rBBG_survivors.R 6 | Downloads the company name and date of participation in an index. 7 | 8 | ###### rBBG_prices.R 9 | Downloads the actual OHLC price data for each constituent, given the output from rBBG_survivors.R 10 | 11 | ###### rBBG_splits.R 12 | Downloads the date and ratio amount for a stock split. 13 | 14 | These are my DPDF data settings on the terminal. 15 | ![alt text] (https://github.com/geoquant/R/blob/master/survivorship/dpdf.PNG "{DPDF}") 16 | -------------------------------------------------------------------------------- /my_functions/percent_rank.R: -------------------------------------------------------------------------------- 1 | # Function to calculate percentrank with a certain interval "gap" 2 | # Input: "scoreVec" is a vector of scores 3 | # "gap" is any speceficied interval 4 | # output: vector of percentrank 5 | percentrank <- function(scoreVec, gap) 6 | { 7 | scoreVec <- as.vector(scoreVec) 8 | prank <- NULL 9 | 10 | for(i in 1:length(scoreVec)) 11 | { 12 | if(gap < length(scoreVec)) 13 | { 14 | if(i <= (length(scoreVec) - gap)) 15 | { 16 | testScore <- scoreVec[i:(i + gap - 1)] 17 | temp_data <- data.frame(testScore, score.rank = rank(testScore, ties.method = "min")) 18 | score <- unique(temp_data[temp_data$testScore == scoreVec[i], "score.rank"]) 19 | prank[i] <- as.integer((score - 1) / (gap - 1) * 1000) / 1000 20 | rm(temp_data) ; rm(score) 21 | } 22 | if(i > (length(scoreVec) - gap)) 23 | { 24 | testScore <- scoreVec[i:length(scoreVec)] 25 | temp_data <- data.frame(testScore, score.rank = rank(testScore, ties.method = "min")) 26 | score <- unique(temp_data[temp_data$testScore == scoreVec[i], "score.rank"]) 27 | prank[i] <- as.integer((score - 1) / (gap - 1) * 1000) / 1000 28 | rm(temp_data) ; rm(score) 29 | } 30 | } 31 | if(gap == length(scoreVec)) 32 | { 33 | temp_data <- data.frame(scoreVec, score.rank = rank(scoreVec, ties.method = "min")) 34 | score <- unique(temp_data[temp_data$scoreVec == scoreVec[i], "score.rank"]) 35 | prank[i] <- as.integer((score - 1) / (gap - 1) * 1000) / 1000 36 | rm(temp_data) ; rm(score) 37 | } 38 | } 39 | prank 40 | } -------------------------------------------------------------------------------- /varadi/varadi_chaos.R: -------------------------------------------------------------------------------- 1 | rm(list=ls()) 2 | 3 | library(quantmod) 4 | library(PerformanceAnalytics) 5 | 6 | symbol <- "SPY" 7 | data <- new.env() 8 | getSymbols(symbol, from = "2005-01-01", env = data,auto.assign = TRUE) 9 | 10 | data_high <- data[[symbol]][, paste(symbol, ".High", sep = "")] 11 | data_low <- data[[symbol]][, paste(symbol, ".Low", sep = "")] 12 | data_close <- data[[symbol]][, paste(symbol, ".Close", sep = "")] 13 | data_ad <- data[[symbol]][, paste(symbol, ".Adjusted", sep = "")] 14 | data_ret <- diff(data_ad) / data_ad[-length(data_ad)] 15 | 16 | data_high <- data_high * as.numeric(data_ad)/as.numeric(data_close) 17 | data_low <- data_low * as.numeric(data_ad)/as.numeric(data_close) 18 | data_range <- data_high - data_low 19 | data_range1 <- rollapply(data_range, 10, sum, fill = FALSE) 20 | data_range2 <- rollapply(data_high, 10, max, fill = FALSE) - rollapply(data_low, 10, min, fill = FALSE) 21 | 22 | tmp <- data_range1 / data_range2 23 | tmp <- SMA(tmp, 60) 24 | tmp_z <- (tmp - SMA(tmp, 252)) / rollapply(tmp, 252, sd) 25 | tmp_p <- pnorm(tmp_z) 26 | colnames(tmp_p) <- c("Choas.Stability.Metric") 27 | 28 | 29 | 30 | # Returns Chart ----------------------------------------------------------- 31 | chaos_regime <- ifelse(tmp_p > 0.5, 1, 0) 32 | chaos_return <- chaos_regime * data_ret 33 | 34 | stable_regime <- ifelse(tmp_p < 0.5, 1, 0) 35 | stable_return <- stable_regime * data_ret 36 | 37 | all_returns <- cbind(stable_return, chaos_return) 38 | names(all_returns) <- c("Stable", "Chaos") 39 | 40 | par(mfrow=c(1, 1)) 41 | chart.CumReturns(as.data.frame(all_returns), wealth.index = TRUE, 42 | legend.loc = "topleft") 43 | 44 | 45 | -------------------------------------------------------------------------------- /survivorship/rBBG_prices.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | 3 | library(Rbbg) 4 | library(xts) 5 | 6 | start_time <- Sys.time() 7 | 8 | # connect to bloomberg ---------------------------------------------------- 9 | conn <- blpConnect(throw.ticker.errors = FALSE,log.level = "finest") 10 | 11 | 12 | # if you ran the rBBG_survivors code, you can use sort_tickers: 13 | securities <- "SPX Index" 14 | fields <- c("PX_OPEN","PX_HIGH","PX_LOW","PX_LAST") 15 | start_date <- as.Date("1982-04-21") 16 | end_date <- as.Date("2014-12-05") 17 | 18 | bbg2xts <- function(security, ...) { 19 | bbg <- bdh(conn, security, ...) 20 | bbg$date <- NULL 21 | as.xts(bbg, order.by = as.Date(rownames(bbg))) 22 | } 23 | 24 | BBGList <- lapply(securities, bbg2xts, fields, start_date, end_date) 25 | names(BBGList) <- securities 26 | 27 | BBGdf <- do.call("cbind",BBGList) 28 | #colnames(BBGdf) <- securities 29 | #weeklyData <- BBGdf[endpoints(BBGdf, "weeks")] 30 | write.zoo(BBGdf,file="SPX_data.csv", sep=",") 31 | 32 | 33 | end_time <- Sys.time() 34 | end_time - start_time # for the SP500, 1999-2014, this takes ~30 mins 35 | 36 | # determine if the download accurately collected verything 37 | length(securities) 38 | ncol(BBGdf)/4 39 | 40 | 41 | 42 | # output ------------------------------------------------------------------ 43 | # change the working directory and the suffix in the 'files' object, 44 | # according to the setting established on the terminal: 45 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/dpdf_OFF") 46 | files <- paste(securities,".ON.csv",sep="") 47 | lapply(seq_along(BBGList), function(i){ 48 | write.zoo(BBGList[[i]], files[i],sep=",") 49 | }) 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /backtesting/run_corr_function.R: -------------------------------------------------------------------------------- 1 | ## Average Correlation function 2 | runningCor <- function(idseq,colIndicator,data,n) 3 | { 4 | x <- data[,colIndicator$Var1[idseq]] 5 | y <- data[,colIndicator$Var2[idseq]] 6 | runcor_est <- runCor(x,y,n) 7 | return(runcor_est) 8 | } 9 | 10 | corIndicator <- function(universe,data,n) 11 | { 12 | uniComb <- 1:length(universe) 13 | uniComb <- expand.grid(uniComb,uniComb) 14 | uniComb <- uniComb[uniComb$Var1!=uniComb$Var2,] 15 | uniComb <- uniComb[order(uniComb$Var1),] 16 | 17 | idseq<-1:nrow(uniComb) 18 | corDat <- alply(idseq,1,runningCor,colIndicator=uniComb,data=data,n=n) 19 | 20 | correlationIndicator <- NULL 21 | for(i in 1:length(universe)) 22 | { 23 | datseq <- (1+(length(universe)-1)*(i-1)):(i*(length(universe)-1)) 24 | tempCor <- eval(parse(text=paste("cbind(", 25 | paste(paste("corDat[[", 26 | paste(datseq), 27 | paste("]]"),sep=""), 28 | collapse=","),paste(")"), 29 | sep=""))) 30 | colnames(tempCor) <- paste("cor",1:ncol(tempCor),sep="") 31 | eval(parse(text=paste("avgCor",paste(i), 32 | paste("<- xts(rowMeans(tempCor), 33 | index(tempCor))"),sep=""))) 34 | correlationIndicator <- cbind(correlationIndicator, 35 | eval(parse(text=paste("avgCor", 36 | paste(i),sep="")))) 37 | rm(tempCor) 38 | } 39 | colnames(correlationIndicator) <- universe 40 | return(correlationIndicator) 41 | } -------------------------------------------------------------------------------- /my_functions/run_corr_function.R: -------------------------------------------------------------------------------- 1 | ## Average Correlation function 2 | runningCor <- function(idseq,colIndicator,data,n) 3 | { 4 | x <- data[,colIndicator$Var1[idseq]] 5 | y <- data[,colIndicator$Var2[idseq]] 6 | runcor_est <- runCor(x,y,n) 7 | return(runcor_est) 8 | } 9 | 10 | corIndicator <- function(universe,data,n) 11 | { 12 | uniComb <- 1:length(universe) 13 | uniComb <- expand.grid(uniComb,uniComb) 14 | uniComb <- uniComb[uniComb$Var1!=uniComb$Var2,] 15 | uniComb <- uniComb[order(uniComb$Var1),] 16 | 17 | idseq<-1:nrow(uniComb) 18 | corDat <- alply(idseq,1,runningCor,colIndicator=uniComb,data=data,n=n) 19 | 20 | correlationIndicator <- NULL 21 | for(i in 1:length(universe)) 22 | { 23 | datseq <- (1+(length(universe)-1)*(i-1)):(i*(length(universe)-1)) 24 | tempCor <- eval(parse(text=paste("cbind(", 25 | paste(paste("corDat[[", 26 | paste(datseq), 27 | paste("]]"),sep=""), 28 | collapse=","),paste(")"), 29 | sep=""))) 30 | colnames(tempCor) <- paste("cor",1:ncol(tempCor),sep="") 31 | eval(parse(text=paste("avgCor",paste(i), 32 | paste("<- xts(rowMeans(tempCor), 33 | index(tempCor))"),sep=""))) 34 | correlationIndicator <- cbind(correlationIndicator, 35 | eval(parse(text=paste("avgCor", 36 | paste(i),sep="")))) 37 | rm(tempCor) 38 | } 39 | colnames(correlationIndicator) <- universe 40 | return(correlationIndicator) 41 | } -------------------------------------------------------------------------------- /extrema/inflection_points.R: -------------------------------------------------------------------------------- 1 | # sample plots ------------------------------------------------------------ 2 | par(mfrow=c(1,2)) 3 | 4 | # method 1 5 | maxima.test(x=1:nrow(close.prices2["2014-06/",11]), 6 | y=coredata(close.prices2["2014-06/",11]), 7 | w=round(nrow(close.prices2)*.001), 8 | span=.1) 9 | 10 | # method 2 11 | sig <- .65 12 | x <- index(close.prices2["2014-06/"]) 13 | y <- as.vector(close.prices2["2014-06/",11]) 14 | plot(y, type="o", cex = 1/4,col="gray") 15 | p.1 <- Peak(turnpoints(y), level = sig) 16 | p.2 <- Valley(turnpoints(y), level = sig ) 17 | points(seq(y)[p.1], y[p.1], col = "#d7191c", cex = 1.2,pch=19) 18 | points(seq(y)[p.2], y[p.2], col = "#1a9641", cex = 1.2,pch=19) 19 | mtext("extrema al level = 1%", col="black") 20 | par(mfrow=c(1,1)) 21 | 22 | 23 | # Iterate through Study Parameters ---------------------------------------- 24 | #all.params <- seq(.001,1,by=.001) 25 | 26 | all.params <- .2 27 | all.turnpoints <- lapply(close.prices1,as.vector) 28 | all.turnpoints <- lapply(all.turnpoints,turnpoints) 29 | 30 | 31 | # Collect Peaks and Pits -------------------------------------------------- 32 | all.blank <- list() 33 | for(i in 1:length(all.params)){ 34 | all.blank[[i]] <- lapply(all.turnpoints,Extrema,level=all.params[i]) 35 | } 36 | 37 | all.map <- do.call(Map,c(c,all.blank)) 38 | all.map <- lapply(all.map,sort) 39 | names(all.map) <- colnames(close.prices2) 40 | #all.stop <- lapply(all.map,unique) 41 | all.stop <- all.map 42 | 43 | # Identify a start date for a range of prices before the turnpoint 44 | all.start <- lapply(all.stop, function(x) x-25) 45 | all.start <- rapply(all.start, f= function(x) ifelse(x <= 0,1,x),how="replace") 46 | 47 | ## Vectorized version of the previous function 48 | all.extrema <- Map(min.max.subset, all.xts, all.start, all.stop) 49 | 50 | 51 | # Inflection Statistics --------------------------------------------------- 52 | number.of.inflections <- lapply(all.extrema,length) 53 | # frequency table 54 | # all.frequency <- lapply(all.map,table) 55 | 56 | 57 | -------------------------------------------------------------------------------- /simulation/GBM_20140120.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | 3 | # Functional Method ------------------------------------------------------- 4 | gbm_function <- function(x, omega, mu, sigma, n) { 5 | # Geometric Brownian Motion: 6 | # x = initial value of the process at t0 7 | # alpha = start time 8 | # omega = final time 9 | # mu = rate of return (drift) 10 | # sigma = volatility 11 | # n = number of intervals in which to split [t0, t1] 12 | dt <- omega/n 13 | t <- seq(0, omega, length = n + 1) 14 | bm <- ts(cumsum(c(0, rnorm(n) * sqrt(dt))), start = 0, deltat = dt) 15 | gbm <- x * exp((mu - sigma ^ 2 / 2) * time(bm) + sigma * as.numeric(bm)) 16 | out <- ts(gbm, start = 0, deltat = deltat(bm)) 17 | } 18 | 19 | model1 <- gbm_function(x = 5, 20 | omega = 1, 21 | mu = 0.09, 22 | sigma = 0.3, 23 | n = 5000) 24 | 25 | 26 | # Iterative Method -------------------------------------------------------- 27 | gbm_f <- function(t0, t1, mu, sigma, n){ 28 | # Geometric Brownian Motion: 29 | # t0 = initial value of the process at t0 30 | # t1 = final time 31 | # mu = expected annual rate of return (drift) 32 | # sigma = expected annual variation 33 | # n = number of intervals in which to split [t0, t1] 34 | dt <- t1/n 35 | out <- NULL 36 | for (i in 1:n) { 37 | out[1] <- t0 38 | out[i + 1] <- out[i] * exp((mu - 0.5 * sigma ^ 2) * dt + 39 | sigma * sqrt(dt) * rnorm(n = 1)) 40 | } 41 | out 42 | } 43 | 44 | model_output <- gbm_f(t0 = 1, 45 | t1 = 5, 46 | mu = 0.0927, 47 | sigma = 0.30, 48 | n = 10000) 49 | 50 | 51 | # Plot each method -------------------------------------------------------- 52 | par(mfrow=c(1, 2)) 53 | plot(x = seq(1, length(model1), by = 1), 54 | y = model1, 55 | type = "l", 56 | lwd = 1.5, 57 | col = "#31a354", 58 | ylab = "$", 59 | xlab = "# of Observations") 60 | 61 | plot(model_output, type = "l") 62 | -------------------------------------------------------------------------------- /indicators/indicators.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | library(xts) 3 | library(quantmod) 4 | 5 | prices <- rev(c(128.46, 130.415, 128.79, 132.17, 133.00, 129.495, 128.45, 6 | 128.715, 127.83, 127.08, 126.46, 124.88, 122.02, 7 | 119.72, 118.93, 119.94, 119.56, 118.65, 118.63, 8 | 117.16)) 9 | 10 | # Running Sum ------------------------------------------------------------- 11 | running_sum <- function (x, n = 10, cumulative = FALSE) 12 | { 13 | x <- try.xts(x, error = as.matrix) 14 | if (n < 1 || n > NROW(x)) 15 | stop("Invalid 'n'") 16 | NAs <- sum(is.na(x)) 17 | if (NAs > 0) { 18 | if (any(is.na(x[-(1:NAs)]))) 19 | stop("Series contains non-leading NAs") 20 | } 21 | beg <- 1 + NAs 22 | len <- NROW(x) - NAs 23 | result <- double(NROW(x)) 24 | if (cumulative) { 25 | result[beg:NROW(x)] <- cumsum(x[beg:NROW(x)]) 26 | } 27 | else { 28 | result[(n + beg - 1)] <- sum(x[beg:(n + beg - 1)]) 29 | result <- .Fortran("runsum", ia = as.double(x[beg:NROW(x)]), 30 | lia = as.integer(len), n = as.integer(n), oa = as.double(result[beg:NROW(x)]), 31 | loa = as.integer(len), PACKAGE = "TTR", DUP = FALSE)$oa 32 | result <- c(rep(NA, NAs), result) 33 | } 34 | is.na(result) <- c(1:(n - 1 + NAs)) 35 | reclass(result, x) 36 | } 37 | 38 | # Running Mean ------------------------------------------------------------ 39 | running_mean <- function (x, n = 10, cumulative = FALSE) 40 | { 41 | if (cumulative) { 42 | result <- runSum(x, n, cumulative)/1:NROW(x) 43 | } 44 | else { 45 | result <- runSum(x, n)/n 46 | } 47 | return(result) 48 | } 49 | 50 | # Smoothing Average ------------------------------------------------------- 51 | newSMA <- function (x, n = 10, ...) 52 | { 53 | ma <- running_mean(x, n) 54 | if (!is.null(dim(ma))) { 55 | colnames(ma) <- paste(colnames(x), "SMA", n, sep = ".") 56 | } 57 | return(ma) 58 | } 59 | 60 | 61 | window <- 6 62 | 63 | sma_data <- cbind(SMA(prices, window)) 64 | sma_data 65 | 66 | # triangular moving average 67 | new_window <- ceiling((window + 1)/2) 68 | cbind(SMA(SMA(prices, new_window), new_window)) 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /rfortraders/Errata.txt: -------------------------------------------------------------------------------- 1 | 2 | * Page 6 3 | In figure 1.2, the price ladder should have an 8 4 | as opposed to a 10 for the best bid price. 5 | 6 | * Page 8 7 | "Financial data is the the lifeblood..." 8 | Two the's in a row. 9 | 10 | * Page 30 11 | "...is the the concept of nonmutability of state[85]." 12 | Two the's in a row. 13 | 14 | * Page 33 15 | The comment for the code at the top of the page 16 | should read: # "Create 100 standard normals ..." and not 17 | # "Greate 100 standard normals ..." 18 | 19 | * Page 37 20 | Towards the end of the first paragraph, 21 | topper() should be toupper() 22 | 23 | * Page 37 24 | Change filter_symbols(c("MOT", "cvx", "123", "Gog2", "XLe")) 25 | to: filter_and_sort_symbols(c("MOT", "cvx", "123", "Gog2", "XLe")) 26 | 27 | * Page 63 28 | "...representation of the the time differences..." 29 | Two the's in a row. 30 | 31 | * Page 87 32 | "The graphs of a few popular continuous distributions are illustrated below:", should read: 33 | "The graphs of a few popular continuous distributions are illustrated in Fig 4.13" 34 | 35 | * Page 88 36 | "Here is what the histogram of the outcomes looks like." 37 | should read: 38 | "Figure 4.14 illustrates the histogram of the outcomes." 39 | 40 | * Page 89 41 | "In the graph below, the mean shifts from 0.5 to 0.6." 42 | should read: 43 | "A shift in the mean from 0.5 to 0.6 can be seen in Figure 4.15." 44 | 45 | * Page 104 46 | "The scatter plot between the SPY and VXX time series looks like this:" 47 | should read: 48 | "The SPY and VXX time series scatter plot can be seen in Figure 5.11." 49 | 50 | * Page 124 51 | The function calculate_buy_sell_signals is defined but not used in subsequent 52 | examples. (TO EDITOR: No change required.) 53 | 54 | * Page 130 and 131 55 | The same code "plot data_out$spread" is used twice. 56 | (TO EDITOR: No change required.) 57 | 58 | * Page 144 59 | The code title: " # 4. Histogram of trade duration", should be changed to 60 | " Histogram of trade duration" 61 | 62 | * Page 221 63 | The code snippet at the bottom of the page has: 64 | "[1] 11". It should be "## [1] 11". 65 | 66 | * Page 234 67 | Insert ## before the number 0.9978 close to the end of the page. 68 | Should read "## 0.9978" instead of "0.9978" 69 | 70 | * Page 235 71 | Same as above. Insert ## before the outputs. 72 | "## [1] 0.597" and "## [1] 0.515" 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /package_mgmt/install_packages.R: -------------------------------------------------------------------------------- 1 | install.packages(c("AER", 2 | "boot", 3 | "car", 4 | "caTools", 5 | "copula", 6 | "corpcor", 7 | "corrplot", 8 | "devtools", 9 | "dtw", 10 | "Ecdat", 11 | "ellipse", 12 | "evir", 13 | "faraway", 14 | "fCopulae", 15 | "fEcofin", 16 | "fGarch", 17 | "FinTS", 18 | "FitAR", 19 | "foreach", 20 | "forecast", 21 | "ggplot2", 22 | "htmltools", 23 | "knitr", 24 | "lattice", 25 | "leaps", 26 | "lmtest", 27 | "markdown", 28 | "MASS", 29 | "mixAK", 30 | "mnormt", 31 | "nor1mix", 32 | "numDeriv", 33 | "pastecs", 34 | "pcaPP", 35 | "PerformanceAnalytics", 36 | "plyr", 37 | "quantmod", 38 | "reshape", 39 | "reshape2", 40 | "rJava", 41 | "robust", 42 | "rugarch", 43 | "tseries", 44 | "urca", 45 | "vars", 46 | "xtable", 47 | "xts", 48 | "yaml", 49 | "zoo")) 50 | 51 | # R connection to Bloomberg 52 | install.packages("Rbbg", repos="http://r.findata.org/", dependencies = TRUE) 53 | 54 | # Install an old version of the sn package for CFRM 542 homework (Winter 2015) 55 | install.packages("http://cran.r-project.org/src/contrib/Archive/sn/sn_1.0-0.tar.gz", 56 | repos = NULL, type = "source") 57 | 58 | 59 | # Install Packages for CFRM 543 60 | install.packages(c("ROI","ROI.plugin.glpk","ROI.plugin.quadprog")) 61 | install.packages("mpo", repos = "http://r-forge.r-project.org") 62 | install.packages("factorAnalytics", repos = "http://r-forge.r-project.org") 63 | install.packages("PerformanceAnalytics", repos = "http://r-forge.r-project.org") 64 | 65 | # Source: 66 | # https://rmkrug.wordpress.com/2014/11/10/update-all-user-installed-r-packages-again/ 67 | #install.packages( 68 | # lib = lib <- .libPaths()[1], 69 | # pkgs = as.data.frame(installed.packages(lib), stringsAsFactors=FALSE)$Package, 70 | # type = 'source' 71 | #) -------------------------------------------------------------------------------- /fb/bday2016.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | 3 | library(xts) 4 | library(dplyr) 5 | library(scales) 6 | library(ggplot2) 7 | library(gridExtra) 8 | 9 | setwd("~/dev/fbook") 10 | 11 | # load data --------------------------------------------------------------- 12 | # 'data.frame': 326 obs. of 2 variables: 13 | # $ year : int 14 | # $ person: chr 15 | data <- read.csv("hbd_2016.csv", header = TRUE, as.is = TRUE, sep = ",") 16 | 17 | 18 | # individual frequency ---------------------------------------------------- 19 | indiv_freq <- count_(unique(data), vars = "person", sort = TRUE) 20 | 21 | percent1 <- quantile(indiv_freq$n, 0.99) # 2 posts 22 | percent5 <- quantile(indiv_freq$n, 0.95) # 3 posts 23 | 24 | who1 <- indiv_freq[indiv_freq$n >= percent1, ] # 4 people 25 | who5 <- indiv_freq[indiv_freq$n >= percent5, ] # 23 people 26 | 27 | # density plot 28 | plot1 <- ggplot(indiv_freq, aes(n)) + 29 | geom_density(fill = "black") + 30 | ggtitle("Kernel Density of FB HBD Posts 2008-2016") + 31 | xlab("Cumulative Sum of Posts Per Person") + 32 | ylab("Density") + 33 | geom_vline(xintercept = c(percent1, percent5), 34 | colour = c("#7fcdbb", "#2c7fb8"), size = 1.05) + 35 | annotate("text", x = c(3, 5) + 0.25 , y = c(1, 1) + 0.05, 36 | label = c("95%", "99%"), 37 | colour = c("#2c7fb8", "#7fcdbb")) 38 | 39 | # bar plot 40 | plot2 <- ggplot(data = who5, aes(x = reorder(person, n), y = n)) + 41 | ggtitle("95th Percentile of HBD Posters 2008-2016") + 42 | ylab("Cumulative Sum of Posts Per Person") + 43 | xlab(NULL) + 44 | geom_bar(stat = 'identity') + 45 | coord_flip() + 46 | annotate("text", x = nrow(who5) - nrow(who1) + 0.35, y = 6 + 0.25, 47 | label = "99%", colour = "#7fcdbb") + 48 | geom_vline(xintercept = nrow(who5) - nrow(who1) + 1, 49 | colour = "#7fcdbb", size = 1.05) 50 | 51 | # plot density and barplot side-by-side 52 | grid.arrange(plot1, plot2, ncol = 2) 53 | 54 | 55 | # streaks ----------------------------------------------------------------- 56 | split_names <- split(data, data$person) 57 | split_dates <- lapply(split_names, `[`, , 1) 58 | split_diff <- lapply(split_dates, function(x) diff(rev(x))) 59 | split_con <- lapply(split_diff, function(x) x[!x > 1]) 60 | split_sum <- lapply(split_con, sum) 61 | 62 | consecutive_sums <- sort(unlist(split_sum)) 63 | 64 | # print to console consecutive count of 99% percentile 65 | cbind(consecutive_sums[who1$person]) 66 | 67 | 68 | # annual frequency -------------------------------------------------------- 69 | annual_count <- 70 | data %>% 71 | group_by(year) %>% 72 | summarise(count = n_distinct(person)) %>% 73 | mutate(count = replace(count, count == 1, 0)) %>% 74 | arrange(year) 75 | 76 | # line plot 77 | ggplot(data = annual_count, aes(x = year, y = count)) + 78 | ggtitle("Total HBD Posts Per Year") + 79 | ylab(NULL) + 80 | xlab(NULL) + 81 | geom_line() 82 | -------------------------------------------------------------------------------- /extrema/functions.R: -------------------------------------------------------------------------------- 1 | # Method 1 ---------------------------------------------------------------- 2 | # http://stats.stackexchange.com/questions/36309/how-do-i-find-peaks-in-a-dataset 3 | 4 | #maxima 5 | argmax <- function(x=1:length(x),y=coredata(x), w=1, ...) { 6 | n <- length(y) 7 | y.smooth <- loess(y ~ x, ...)$fitted 8 | y.max <- rollapply(zoo(y.smooth), 2*w+1, max, align="center") 9 | delta <- y.max - y.smooth[-c(1:w, n+1-1:w)] 10 | i.max <- which(delta <= 0) + w 11 | list(x=x[i.max], i=i.max, y.hat=y.smooth) 12 | } 13 | 14 | 15 | # minema 16 | argmin <- function(x=1:length(x),y=coredata(x), w=1, ...) { 17 | n <- length(y) 18 | y.smooth <- loess(y ~ x, ...)$fitted 19 | y.min <- rollapply(zoo(y.smooth), 2*w+1, min, align="center") 20 | delta <- y.min - y.smooth[-c(1:w, n+1-1:w)] 21 | i.min <- which(delta >= 0) + w 22 | list(x=x[i.min], i=i.min, y.hat=y.smooth) 23 | } 24 | 25 | 26 | maxima.test <- function(x,y,w, span) { 27 | 28 | # plot peaks 29 | peaks <- argmax(x, y, w=w, span=span) 30 | plot(x, y, cex=1.25, type="l", col="Gray", main=paste("w = ", w, ", span = ", span, sep="")) 31 | lines(x, peaks$y.hat, cex=.75) #$ 32 | y.max <- max(y) 33 | sapply(peaks$i, function(i) lines(c(x[i],x[i]), c(y.max, peaks$y.hat[i]), 34 | col="#d7191c", lty=2)) 35 | points(x[peaks$i], peaks$y.hat[peaks$i], col="#d7191c", pch=19, cex=1.25) 36 | 37 | # plot valleys 38 | valley <- argmin(x, y, w=w, span=span) 39 | y.min <- min(y) 40 | sapply(valley$i, function(i) lines(c(x[i],x[i]), c(y.min, valley$y.hat[i]), 41 | col="#1a9641", lty=2)) 42 | points(x[valley$i], valley$y.hat[valley$i], col="#1a9641", pch=19, cex=1.25) 43 | } 44 | 45 | 46 | 47 | 48 | # Method 2 ---------------------------------------------------------------- 49 | # https://stat.ethz.ch/pipermail/r-help/2005-November/083423.html 50 | 51 | Peak <- function(x, level = 0.05) { 52 | if (!inherits(x, "turnpoints")) 53 | stop("x must be a 'turnpoints' object!") 54 | # Extract position and probability 55 | tp.pos <- x$tppos 56 | tp.proba <- x$proba 57 | # We have both peaks and pits. Keep only peaks 58 | keep <- 1:(x$nturns / 2) * 2 59 | if (x$firstispeak) keep <- keep - 1 60 | tp.pos <- tp.pos[keep] 61 | tp.proba <- tp.proba[keep] 62 | # Keep only peaks whose probability is lower than level 63 | return(tp.pos[tp.proba < level]) 64 | } 65 | 66 | Valley <- function(x, level = 0.05) { 67 | if (!inherits(x, "turnpoints")) 68 | stop("x must be a 'turnpoints' object!") 69 | # Extract position and probability 70 | tp.pos <- x$tppos 71 | tp.proba <- x$proba 72 | # We have both peaks and pits. Keep only peaks 73 | keep <- 1:(x$nturns / 2) * 2 74 | if (x$firstispeak==FALSE) keep <- keep - 1 75 | tp.pos <- tp.pos[keep] 76 | tp.proba <- tp.proba[keep] 77 | # Keep only peaks whose probability is lower than level 78 | return(tp.pos[tp.proba < level]) 79 | } 80 | 81 | 82 | 83 | Extrema <- function(x, level = 0.05) { 84 | if (!inherits(x, "turnpoints")) 85 | stop("x must be a 'turnpoints' object!") 86 | # Extract position and probability 87 | tp.pos <- x$tppos 88 | tp.proba <- x$proba 89 | # Keep only peaks whose probability is lower than level 90 | return(tp.pos[tp.proba < level]) 91 | } 92 | 93 | 94 | 95 | 96 | # General Use ------------------------------------------------------------- 97 | 98 | ## function subset an xts object giving a min/max lists 99 | min.max.subset <- function(l,min.list,max.list) 100 | Map(function(x,y)l[seq(x,y)],min.list,max.list) 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /rfortraders/Chapter_04/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 4 2 | # Basic Statistics and Probability 3 | 4 | ############################ 5 | # Population versus sample # 6 | ############################ 7 | # Set the seed 8 | set.seed(100) 9 | X <- rnorm(1000000, mean = 2.33, sd = 0.5) 10 | mu <- mean(X) 11 | sd <- sd(X) 12 | hist(X, breaks = 100) 13 | abline(v = mu, lwd = 3, lty = 2) 14 | 15 | set.seed(12) 16 | rnorm(5) 17 | ## [1] -1.4805676 1.5771695 -0.9567445 -0.9200052 -1.9976421 18 | 19 | rnorm(5) 20 | ## [1] -0.2722960 -0.3153487 -0.6282552 -0.1064639 0.4280148 21 | 22 | sample5 <- sample(X, 5, replace = TRUE) 23 | sample10 <- sample(X, 10, replace = TRUE) 24 | sample50 <- sample(X, 50, replace = TRUE) 25 | 26 | sample5 27 | ## [1] 2.497921 2.635927 2.291848 2.127974 2.268268 28 | 29 | sample10 30 | ## [1] 2.064451 2.274464 2.468938 1.800007 2.557669 31 | ## [6] 2.535241 1.331020 1.159151 1.661762 2.285889 32 | 33 | sample50 34 | ## [1] 2.581844 2.138331 3.003670 1.864148 2.049141 35 | ## [6] 2.808971 1.400057 2.527640 3.639216 3.311873 36 | 37 | mean(sample5) 38 | ## [1] 2.364388 39 | 40 | mean(sample10) 41 | ## 2.013859 42 | 43 | mean(sample50) 44 | ## 2.447003 45 | 46 | mean(sample(X, 1000, replace = TRUE)) 47 | ## 2.323124 48 | 49 | mean(sample(X, 10000, replace = TRUE)) 50 | ## [1] 2.334109 51 | 52 | ############################## 53 | # Central Limit Theorem in R # 54 | ############################## 55 | mean_list <- list() 56 | for(i in 1:10000) { 57 | mean_list[[i]] <- mean(sample(X, 10, replace = TRUE)) 58 | } 59 | 60 | hist(unlist(mean_list), breaks = 500, 61 | xlab = "Mean of 10 samples from X", 62 | main = "Convergence of sample distribution", 63 | cex.main = 0.8) 64 | abline(v = mu, lwd = 3, col = "white", lty = 2) 65 | 66 | population <- sample(c(0, 1), 100000, replace = TRUE) 67 | hist(population, main = "Non-normal", cex.main = 0.8) 68 | abline(v = mean(population), lwd = 3, lty = 3) 69 | 70 | mean_list <- list() 71 | for(i in 1:10000) { 72 | mean_list[[i]] <- mean(sample(population, 10, replace = TRUE)) 73 | } 74 | 75 | hist(unlist(mean_list), main = "Distribution of averages", 76 | cex.main = 0.8, 77 | xlab = "Average of 10 samples") 78 | abline(v = 0.5, lwd = 3) 79 | 80 | ############################### 81 | # Unbiasedness and efficiency # 82 | ############################### 83 | # Formula for population variance 84 | population_variance <- function(x) { 85 | mean <- sum(x) / length(x) 86 | return(sum((x - mean) ^ 2) / length(x)) 87 | } 88 | 89 | # Create a population 90 | population <- as.numeric(1:100000) 91 | variance <- population_variance(population) 92 | 93 | variance 94 | ## [1] 833333333 95 | 96 | output <- list() 97 | for(i in 1:1000) { 98 | output[[i]] <- population_variance(sample(population, 99 | 10, replace = TRUE)) 100 | } 101 | 102 | variance_estimates <- unlist(output) 103 | hist(variance_estimates, breaks = 100, cex.main = 0.9) 104 | average_variance <- mean(variance_estimates) 105 | abline(v = average_variance, , lty = 2, lwd = 2) 106 | abline(v = variance, lwd = 2) 107 | 108 | average_variance 109 | ## [1] 738123625 110 | 111 | # Formula for unbiased variance estimator 112 | sample_variance <- function(x) { 113 | mean <- sum(x) / length(x) 114 | return(sum((x - mean) ^ 2) / (length(x) - 1)) 115 | } 116 | 117 | output <- list() 118 | 119 | for( i in 1:1000 ) { 120 | output[[i]] <- sample_variance(sample(population, 121 | 10, replace = TRUE)) 122 | } 123 | 124 | sample_variance_estimates <- unlist(output) 125 | average_sample_variance <- mean(sample_variance_estimates) 126 | 127 | average_sample_variance 128 | ## [1] 836184961 129 | 130 | ############################# 131 | # Probability distributions # 132 | ############################# 133 | plot(c(-1, 1), c(0.5, 0.5), type = "h", lwd = 3, 134 | xlim = c(-2, 2), main = "Probability mass function of coin toss", 135 | ylab = "Probability", 136 | xlab = "Random Variable", 137 | cex.main = 0.9) 138 | 139 | ######################## 140 | # Simulations of coins # 141 | ######################## 142 | outcomes <- sample(c(0, 1), 1000, replace = TRUE) 143 | 144 | set.seed(101) 145 | biased_outcomes <- sample(c(0, 1), 1000, 146 | replace = TRUE, prob = c(0.4, 0.6)) 147 | 148 | prob_estimate <- sum(biased_outcomes) / 149 | length(biased_outcomes) 150 | 151 | prob_estimate 152 | ## [1] 0.603 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /survivorship/rBBG_survivors.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | 3 | library(Rbbg) 4 | library(xts) 5 | library(plyr) 6 | library(reshape2) 7 | 8 | # connect to bloomberg ---------------------------------------------------- 9 | start_time <- Sys.time() 10 | conn <- blpConnect(throw.ticker.errors = FALSE,log.level = "finest") 11 | 12 | securities <- c("SPX Index") 13 | fields <- c("INDX_MWEIGHT_HIST") 14 | date_seq <- as.POSIXlt(seq(as.Date("1999-11-01"),as.Date("2014-12-05"),1)) 15 | date_final <- format(date_seq[! date_seq$wday %in% c(0,6)],"%Y%m%d") 16 | 17 | BBGList <- list() 18 | for(i in 1:length(date_final)){ 19 | BBGList[[i]] <- bds(conn, securities, fields, 20 | override_fields = c("END DT"), 21 | override_values = c(date_final[i])) 22 | } 23 | 24 | # BBGdf <- do.call("cbind",BBGList) 25 | # BBGdf_final <- BBGdf[, seq(1, ncol(BBGdf)-1, 2)] 26 | # colnames(BBGdf_final) <- date_final 27 | 28 | 29 | end_time <- Sys.time() 30 | end_time - start_time # for the SP500, 1999-2014, this takes ~2 hours 31 | 32 | # output ------------------------------------------------------------------ 33 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/survivors/raw_data") 34 | files <- paste(date_final,".csv",sep="") 35 | lapply(seq_along(BBGList), function(i){ 36 | write.zoo(BBGList[[i]], files[i],sep=",") 37 | }) 38 | 39 | 40 | # read & reformat list data to data frame --------------------------------- 41 | WD <- setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/survivors/raw_data") 42 | WD 43 | 44 | load_data <- list.files(WD,pattern="*.csv",full.names=T) 45 | read_data <- lapply(load_data, read.csv, stringsAsFactors=FALSE) 46 | 47 | 48 | max_length <- max(sapply(read_data,nrow)) 49 | constituents <- lapply(read_data, function(x) x[, 2]) 50 | equal_length <- lapply(constituents, `[`, 1:max_length) 51 | BBGdf <- do.call("cbind", equal_length) 52 | BBGdf <- data.frame(BBGdf) 53 | colnames(BBGdf) <- date_final 54 | 55 | 56 | # suffix normalization 57 | sub1 <- apply(BBGdf, 2, gsub, pattern = " UN", replacement = " ZZZZZZ" ) 58 | sub2 <- apply(sub1, 2, gsub, pattern = " UA", replacement = " ZZZZZZ" ) 59 | sub3 <- apply(sub2, 2, gsub, pattern = " UW", replacement = " ZZZZZZ" ) 60 | sub4 <- apply(sub3, 2, gsub, pattern = " UQ", replacement = " ZZZZZZ" ) 61 | sub5 <- apply(sub4, 2, gsub, pattern = " UP", replacement = " ZZZZZZ" ) 62 | sub6 <- apply(sub5, 2, gsub, pattern = " US", replacement = " ZZZZZZ" ) 63 | sub7 <- apply(sub6, 2, gsub, pattern = " ZZZZZZ Equity Equity", replacement = " ZZZZZZ") 64 | sub8 <- apply(sub7, 2, gsub, pattern = " ZZZZZZ Equity", replacement = " ZZZZZZ") 65 | sub9 <- apply(sub8, 2, gsub, pattern = " ZZZZZZ", replacement = " US Equity") 66 | 67 | # ticker replacement 68 | sub10 <- apply(sub9, 2, gsub, pattern = "ZTS-W US Equity", replacement = "ZTS US Equity") 69 | final_output <- as.data.frame(sub10) 70 | # write.csv(final_output,file="daily_survivors.csv") 71 | 72 | 73 | 74 | # unique list of tickers to implement in rBBG_2000.R and rBBG_splits.R 75 | sort_tickers <- unique(sort(na.omit(as.vector(as.matrix(final_output))))) 76 | sort_tickers 77 | 78 | 79 | 80 | 81 | # format survivors into an xts object ------------------------------------- 82 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/survivors") 83 | ticker_read <- read.csv("daily_survivors.csv", as.is = TRUE, header=FALSE) 84 | ticker_read <- ticker_read[,2:ncol(ticker_read)] 85 | 86 | # associate each ticker with a date 87 | blank_list <- list() 88 | for(i in 1:ncol(ticker_read)){ 89 | blank_list[[i]] <- cbind(ticker_read[1,i], ticker_read[2:nrow(ticker_read),i]) 90 | } 91 | 92 | long_df <- as.data.frame(do.call(rbind, blank_list)) 93 | 94 | # cleaning and convert the data frame from long to wide format 95 | just_dates <- as.character(long_df[, 1]) 96 | just_tickers <- as.character(long_df[, 2]) 97 | clean_tickers <- substr(just_tickers, 1, nchar(just_tickers)-10) 98 | long_df2 <- as.data.frame(cbind(just_dates, clean_tickers, value=1)) 99 | wide_df <- dcast(long_df2, long_df2[,1] ~ long_df2[,2], value.var = "value") 100 | 101 | clean_dates <- as.Date(as.character(wide_df[, 1]), "%Y%m%d") 102 | wide_xts <- xts(wide_df[,2:ncol(wide_df)], order.by = clean_dates) 103 | wide_xts <- xts(apply(wide_xts,2,as.numeric), order.by = clean_dates) 104 | wide_xts[is.na(wide_xts)] <- 0 105 | colnames(wide_xts)[1] <- "NA" 106 | 107 | 108 | # clean holidays 109 | trade_dates <- as.vector(as.matrix(read.table("NYSETradingDays.txt"))) # Jim Kragenbring provided this file 110 | trade_dates <- na.omit(as.Date(trade_dates, "%Y%m%d")) 111 | blank_xts <- xts(1:length(trade_dates), order.by=trade_dates) 112 | final_dates <- index(blank_xts["1999-11-01/2014-12-05"]) 113 | last_xts <- wide_xts[final_dates, 1:ncol(wide_xts)-1] 114 | 115 | 116 | 117 | # write.zoo(last_xts, file="xts_survivors.csv", sep = ",") 118 | 119 | -------------------------------------------------------------------------------- /backtesting/portfolio_template.R: -------------------------------------------------------------------------------- 1 | rm(list = ls(all = TRUE)) 2 | options(scipen=999) 3 | 4 | setwd() 5 | 6 | library(xts) 7 | library(plyr) 8 | library(quantmod) 9 | library(PerformanceAnalytics) 10 | 11 | # load custom functions --------------------------------------------------- 12 | source("backtest_function.R") 13 | source("run_corr_function.R") 14 | source("rank_function.R") 15 | 16 | 17 | # backtest parameters ----------------------------------------------------- 18 | start_date <- as.Date("2007-04-01") 19 | end_date <- as.Date("2015-01-31") 20 | 21 | ## Define frictions (as percentage of trade) 22 | commission_pct <- c(.0003) 23 | slippage_pct <- c(.0005) 24 | friction_pct <- commission_pct + slippage_pct 25 | 26 | ## Portfolio Symbols 27 | data_source <- c("yahoo") 28 | universe <- c("SPY", "MDY", "IWM", # Large-Mid-Small Cap equities 29 | "EFA", "EEM", # Intl.and emerging markets 30 | "AGG","TIP","TLT", "LQD", # Fixed Income 31 | "GSG", # Commodities 32 | "RWR", "RWX", "MBB") # Real Estateads 33 | 34 | 35 | #universe <- c("SPY", "QQQ", "IWM", "EEM", "EFA", 36 | # "TLT", "LQD", "DBC", "GLD", "AGG", 37 | # "RWR","RWX","MBB") 38 | 39 | 40 | ## Cash 41 | cash_symbol <- c("SHV") 42 | portfolio_symbols <- c(universe, cash_symbol) 43 | 44 | 45 | # download data and compute returns --------------------------------------- 46 | getSymbols(universe, src = data_source, from = start_date, to = end_date) 47 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date) 48 | 49 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x)))) 50 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x)))) 51 | 52 | ## Convert Daily to Monthly 53 | monthly_raw <- raw_close[endpoints(index(raw_close), on = "months"), ] 54 | monthly_adj <- adj_close[endpoints(index(adj_close), on = "months"), ] 55 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1) 56 | 57 | 58 | 59 | 60 | 61 | # Buy and Hold ------------------------------------------------------------ 62 | bh_allocation <- 1/length(universe) 63 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1) 64 | bh_weights <- bh_initial * bh_allocation 65 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets)) 66 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ] 67 | colnames(bh_weights) <- universe 68 | 69 | 70 | # output ------------------------------------------------------------------ 71 | strat1_returns <- 72 | strat2_returns <- 73 | bh_returns <- backtest_engine(bh_weights, "buy_hold", monthly_rets) 74 | 75 | port_returns_report <- cbind(strat1_returns$portfolio_returns, 76 | bh_returns$portfolio_returns, 77 | strat2_returns$portfolio_returns) 78 | 79 | aa <- table.AnnualizedReturns(cbind(port_returns_report)) 80 | bb <- table.DownsideRisk(cbind(port_returns_report)) 81 | a <- table.Stats(cbind(port_returns_report)) 82 | b <- table.Variability(cbind(port_returns_report)) 83 | c <- table.Distributions(cbind(port_returns_report)) 84 | d <- table.DrawdownsRatio(cbind(port_returns_report)) 85 | e <- table.DownsideRiskRatio(cbind(port_returns_report)) 86 | f <- table.TrailingPeriods(cbind(port_returns_report)) 87 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f) 88 | 89 | charts.PerformanceSummary(as.data.frame(port_returns_report), wealth.index = TRUE) 90 | 91 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns), 92 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns), 93 | bh = table.Drawdowns(bh_returns$portfolio_returns)) 94 | 95 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2), 96 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2), 97 | bh = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2)) 98 | 99 | 100 | # final numeric tables printed to the console 101 | rowBinding 102 | drawdowns_list 103 | monthly_ret_list 104 | 105 | 106 | 107 | # ggplot2 histogram 108 | library(ggplot2) 109 | list_of_returns <- port_returns_report 110 | rename_returns <- lapply(list_of_returns, function(x) { 111 | colnames(x) <- c("returns") 112 | return(x) 113 | }) 114 | 115 | return1 <- as.data.frame(rename_returns[[1]]) 116 | return2 <- as.data.frame(rename_returns[[2]]) 117 | return3 <- as.data.frame(rename_returns[[3]]) 118 | 119 | return1$label <- "bh" 120 | return2$label <- "equal_weight" 121 | return3$label <- "risk_parity" 122 | all_returns <- rbind(return1, return2, return3) 123 | 124 | ggplot(all_returns, aes(returns, fill = label)) + geom_density(alpha = 0.3) 125 | 126 | 127 | 128 | # my current allocation decisions 129 | this_month_allocation <- as.data.frame(rbind(last(strat1_returns$allocation_matrix), 130 | last(strat2_returns$allocation_matrix)), 131 | row.names = c("equal_weight", "risk_parity")) 132 | 133 | 134 | colnames(this_month_allocation) <- c(universe, "CASH") 135 | this_month_allocation 136 | 137 | 138 | #last(strat2_returns$allocation_matrix) 139 | t(this_month_allocation[2, ]) * 1000 140 | 141 | -------------------------------------------------------------------------------- /systematic_investor/time_series_matching.R: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Load Systematic Investor Toolbox (SIT) 3 | # https://systematicinvestor.wordpress.com/systematic-investor-toolbox/ 4 | ############################################################################### 5 | rm(list=ls()) 6 | 7 | library(dtw) 8 | library(xts) 9 | library(quantmod) 10 | 11 | setwd("C:/Users/jlappen/Desktop/R/systematic_investor") 12 | con = gzcon(url('http://www.systematicportfolio.com/sit.gz', 'rb')) 13 | source(con) 14 | close(con) 15 | 16 | 17 | # Custom Function --------------------------------------------------------- 18 | bt.matching.find2 <- function( 19 | data, 20 | n.query=90, 21 | n.reference=252*30, 22 | n.match=10, 23 | normalize.fn = normalize.mean.sd, 24 | dist.fn = dist.euclidean, 25 | plot=FALSE, 26 | plot.dist=FALSE, 27 | layout = NULL, 28 | main = NULL 29 | ) 30 | { 31 | data = last(data, n.reference) 32 | reference = coredata(data) 33 | n = len(reference) 34 | query = reference[(n - n.query + 1):n] 35 | reference = reference[1:(n - n.query)] 36 | main = paste(main, join(format(range(index(data)[(n - n.query + 1):n]), '%d%b%Y'), ' - ')) 37 | n.query = len(query) 38 | n.reference = len(reference) 39 | dist.fn.name = '' 40 | if(is.character(dist.fn)) { 41 | dist.fn.name = paste('with',dist.fn) 42 | dist.fn = get(dist.fn) 43 | } 44 | dist = rep(NA, n.reference) 45 | query.normalized = match.fun(normalize.fn)(query) 46 | for( i in n.query : n.reference ) { 47 | window = reference[ (i - n.query + 1) : i] 48 | window.normalized = match.fun(normalize.fn)(window) 49 | dist[i] = match.fun(dist.fn)(rbind(query.normalized, window.normalized)) 50 | if( i %% 100 == 0) cat(i, '\n') 51 | } 52 | min.index = c() 53 | temp = dist 54 | temp[ temp > mean(dist, na.rm=T) ] = NA 55 | for(i in 1:n.match) { 56 | if(any(!is.na(temp))) { 57 | index = which.min(temp) 58 | min.index[i] = index 59 | temp[max(0,index - 2*n.query) : min(n.reference,(index + n.query))] = NA 60 | } 61 | } 62 | n.match = len(min.index) 63 | if(plot) { 64 | dates = index(data)[1:len(dist)] 65 | if(is.null(layout)) { 66 | if(plot.dist) layout(1:2) else layout(1) 67 | } 68 | par(mar=c(2, 4, 2, 2)) 69 | if(plot.dist) { 70 | plot(dates, dist, type='l',col='gray', main=paste('Top Historical Matches for', main, dist.fn.name), ylab='Distance', xlab='') 71 | abline(h = mean(dist, na.rm=T), col='darkgray', lwd=2) 72 | points(dates[min.index], dist[min.index], pch=22, col='red', bg='red') 73 | text(dates[min.index], dist[min.index], 1:n.match, adj=c(1,1), col='black',xpd=TRUE) 74 | } 75 | plota(data, type='l', col='gray', LeftMargin = 1, 76 | main=iif(!plot.dist, paste('Top Historical Matches for', main), NULL) 77 | ) 78 | plota.lines(last(data,90), col='blue') 79 | for(i in 1:n.match) { 80 | plota.lines(data[(min.index[i]-n.query + 1):min.index[i]], col='red') 81 | } 82 | text(index4xts(data)[min.index - n.query/2], reference[min.index - n.query/2], 1:n.match, 83 | adj=c(1,-1), col='black',xpd=TRUE) 84 | plota.legend(paste('Pattern: ', main, ',Match Number'),'blue,red') 85 | } 86 | return(list(min.index=min.index, dist=dist[min.index], query=query, reference=reference, dates = index(data), main = main)) 87 | } 88 | 89 | 90 | #***************************************************************** 91 | # Load historical data 92 | #****************************************************************** 93 | #load.packages('quantmod') 94 | #tickers <- 'SPY' 95 | 96 | #data <- getSymbols(tickers, src = 'yahoo', from = '1950-01-01', 97 | # auto.assign = FALSE) 98 | 99 | data1 <- read.csv("oil_data.csv", as.is = TRUE, header = TRUE, sep = ",") 100 | data1 <- xts(data1[,2:ncol(data1)], as.Date(data1[, 1])) 101 | data1 <- cbind(data1, data1[,"Close"],data1[,"Close"]) 102 | names(data1) <- c("Open", "High","Low", "Close","Volume", "Adjusted") 103 | data2 <- as.quantmod.OHLC(data1) 104 | 105 | 106 | par(mfrow=c(1, 2)) 107 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 108 | dist.fn = 'dist.euclidean', plot = TRUE) 109 | 110 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 111 | dist.fn = 'dist.DTW', plot=TRUE) 112 | par(mfrow=c(1, 1)) 113 | 114 | 115 | 116 | #***************************************************************** 117 | # Euclidean distance, one to one mapping 118 | #****************************************************************** 119 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 120 | dist.fn = 'dist.euclidean', plot = TRUE) 121 | 122 | matches <- bt.matching.overlay(obj, plot.index=1:90, plot=TRUE) 123 | 124 | layout(1:2) 125 | matches <- bt.matching.overlay(obj, plot=TRUE, layout=TRUE) 126 | bt.matching.overlay.table(obj, matches, plot=TRUE, layout=TRUE) 127 | 128 | 129 | #***************************************************************** 130 | # Dynamic time warping distance 131 | #****************************************************************** 132 | # http://en.wikipedia.org/wiki/Dynamic_time_warping 133 | # http://dtw.r-forge.r-project.org/ 134 | #****************************************************************** 135 | load.packages('dtw') 136 | 137 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 138 | dist.fn = 'dist.DTW', plot=TRUE) 139 | 140 | matches <- bt.matching.overlay(obj, plot.index=1:90, plot = TRUE) 141 | 142 | layout(1:2) 143 | matches <- bt.matching.overlay(obj, plot = TRUE, layout = TRUE) 144 | bt.matching.overlay.table(obj, matches, plot = TRUE, layout = TRUE) 145 | -------------------------------------------------------------------------------- /bar_charts/GCQ 2014-07-03.csv: -------------------------------------------------------------------------------- 1 | Date,open,high,low,close 2 | 2014-07-03,1316.8,1323.2,1309.4,1320.6 3 | 2014-07-02,1326.7,1333.2,1322.1,1330.9 4 | 2014-07-01,1326.5,1332.6,1325.2,1326.6 5 | 2014-06-30,1312.5,1322.4,1312.4,1322 6 | 2014-06-27,1317.4,1321.9,1317.3,1320 7 | 2014-06-26,1311.6,1319.3,1311.4,1317 8 | 2014-06-25,1314.5,1325.6,1314.3,1322.6 9 | 2014-06-24,1321,1323.9,1317.2,1321.3 10 | 2014-06-23,1314.6,1319.2,1313.3,1318.4 11 | 2014-06-20,1313.8,1320.5,1310.5,1316.6 12 | 2014-06-19,1287.3,1317.4,1286.4,1314.1 13 | 2014-06-18,1273.9,1274.4,1269,1272.7 14 | 2014-06-17,1264.6,1272.8,1258,1272 15 | 2014-06-16,1282.4,1283,1272.5,1275.3 16 | 2014-06-13,1272.6,1275.7,1272.2,1274.1 17 | 2014-06-12,1263.8,1274.6,1261.8,1274 18 | 2014-06-11,1264.3,1265.5,1258.6,1261.2 19 | 2014-06-10,1255.5,1263.8,1255.1,1260.1 20 | 2014-06-09,1257.1,1257.3,1252.3,1253.9 21 | 2014-06-06,1254.3,1258.2,1245.7,1252.5 22 | 2014-06-05,1244.5,1257.9,1243,1253.3 23 | 2014-06-04,1248.2,1249.4,1242.8,1244.3 24 | 2014-06-03,1247.2,1247.4,1240.2,1244.5 25 | 2014-06-02,1246.9,1251,1243.5,1244 26 | 2014-05-30,1250.5,1256.7,1242.2,1246 27 | 2014-05-29,1253.6,1261.5,1252.1,1257.1 28 | 2014-05-28,1266.2,1267.5,1256.1,1259.7 29 | 2014-05-27,1279,1279.1,1264.5,1265.7 30 | 2014-05-23,1289.9,1294.8,1286.7,1291.9 31 | 2014-05-22,1298.1,1304.1,1293.7,1295.2 32 | 2014-05-21,1290.1,1293.3,1286.5,1288.3 33 | 2014-05-20,1291.3,1297.2,1286.2,1294.8 34 | 2014-05-19,1300.3,1305.8,1293.5,1294 35 | 2014-05-16,1295.6,1295.9,1288,1293.6 36 | 2014-05-15,1303.4,1303.4,1291.4,1293.9 37 | 2014-05-14,1306.8,1309.4,1302.8,1306.2 38 | 2014-05-13,1291.5,1299,1290.5,1295.1 39 | 2014-05-12,1297.1,1303.7,1295.2,1296 40 | 2014-05-09,1293.5,1293.5,1286,1287.9 41 | 2014-05-08,1291.5,1292,1285.7,1288.1 42 | 2014-05-07,1302.5,1305.7,1287,1289.3 43 | 2014-05-06,1306.9,1309.6,1304.8,1308.9 44 | 2014-05-05,1314.6,1315.7,1309,1309.6 45 | 2014-05-02,1286.2,1304.8,1273,1303.1 46 | 2014-05-01,1280.4,1289,1278,1283.6 47 | 2014-04-30,1288.1,1296.8,1285.1,1296 48 | 2014-04-29,1290.5,1301.8,1289.8,1296.5 49 | 2014-04-28,1300.6,1303.7,1292.6,1299.1 50 | 2014-04-25,1302.2,1305,1298.6,1300.9 51 | 2014-04-24,1270,1298.5,1268.5,1290.7 52 | 2014-04-23,1287,1287.4,1281.3,1284.6 53 | 2014-04-22,1290.6,1293,1276.8,1281.1 54 | 2014-04-21,1283.8,1293,1283.1,1288.6 55 | 2014-04-17,1301.6,1302.4,1293.5,1294 56 | 2014-04-16,1301.2,1306.7,1298.1,1303.7 57 | 2014-04-15,1301.5,1304,1287.3,1300.5 58 | 2014-04-14,1323.9,1331,1322.7,1327.7 59 | 2014-04-11,1321.9,1322.3,1316.3,1319.2 60 | 2014-04-10,1323.6,1323.6,1316.8,1320.7 61 | 2014-04-09,1304.7,1306.8,1301.4,1306.1 62 | 2014-04-08,1313.5,1313.5,1308,1309.4 63 | 2014-04-07,1298.6,1301.9,1296,1298.6 64 | 2014-04-04,1293.2,1307.2,1290,1303.8 65 | 2014-04-03,1283.9,1289.5,1283.2,1284.9 66 | 2014-04-02,1289.2,1295,1289.2,1291.1 67 | 2014-04-01,1286.3,1287.1,1279.8,1280.2 68 | 2014-03-31,1293.8,1295.4,1283.6,1284.1 69 | 2014-03-28,1288.5,1297.5,1287.1,1294.6 70 | 2014-03-27,1293.8,1299.8,1293.8,1295.1 71 | 2014-03-26,1313.3,1313.9,1302,1303.7 72 | 2014-03-25,1309,1316.3,1307.2,1311.7 73 | 2014-03-24,1322.8,1323.6,1309.5,1311.4 74 | 2014-03-21,1339.6,1340.1,1334.5,1336.2 75 | 2014-03-20,1322.1,1331.7,1322.1,1331 76 | 2014-03-19,1348,1348,1338.9,1341.8 77 | 2014-03-18,1355.3,1363,1352,1359.4 78 | 2014-03-17,1380,1383,1370.7,1373.4 79 | 2014-03-14,1375.6,1388.1,1374.6,1379.5 80 | 2014-03-13,1366.2,1372.9,1366.1,1372.9 81 | 2014-03-12,1366,1371.4,1364.9,1371 82 | 2014-03-11,1351.8,1351.8,1339.3,1347.4 83 | 2014-03-10,1339.3,1344.6,1337.2,1342.2 84 | 2014-03-07,1344,1344,1329,1338.8 85 | 2014-03-06,1337.3,1352.4,1337.3,1352.4 86 | 2014-03-05,1338.4,1341.7,1335.4,1340.9 87 | 2014-03-04,1334.6,1338.7,1334.6,1338.5 88 | 2014-03-03,1348,1354.9,1347.5,1350.9 89 | 2014-02-28,1333.3,1333.3,1322.2,1322.2 90 | 2014-02-27,1331.8,1335,1331.6,1332.3 91 | 2014-02-26,1331.4,1336,1323.5,1328.5 92 | 2014-02-25,1338,1344,1334.4,1343.2 93 | 2014-02-24,1332.6,1338.5,1330,1338.5 94 | 2014-02-21,1323,1324.3,1321.5,1324.2 95 | 2014-02-20,1316.2,1317.4,1313.8,1317.4 96 | 2014-02-19,1319.8,1322,1319.8,1320.9 97 | 2014-02-18,1321.7,1325.2,1321.2,1325 98 | 2014-02-14,1317,1321,1316.3,1319.3 99 | 2014-02-13,1292.9,1300.9,1292.9,1300.7 100 | 2014-02-12,1290.8,1295.7,1290.5,1295.5 101 | 2014-02-11,1283.7,1295,1279,1290.7 102 | 2014-02-10,1278.3,1278.3,1274.5,1275.6 103 | 2014-02-07,1261,1271.8,1261,1263.9 104 | 2014-02-06,1262,1264.2,1254.7,1258.1 105 | 2014-02-05,1268,1273,1256.8,1257.8 106 | 2014-02-04,1251.7,1252,1249.5,1252 107 | 2014-02-03,1247.9,1266,1247.9,1260.9 108 | 2014-01-31,1253.5,1253.5,1240.8,1240.8 109 | 2014-01-30,1242.8,1247.4,1241,1243.6 110 | 2014-01-29,1268.2,1269.3,1262.6,1263.3 111 | 2014-01-28,1257.7,1258.2,1250.6,1251.7 112 | 2014-01-27,1261.9,1264.7,1259.9,1264.7 113 | 2014-01-24,1269,1269,1264.1,1265.7 114 | 2014-01-23,1256.6,1265.7,1256.6,1263.7 115 | 2014-01-22,1239.9,1239.9,1239.9,1239.9 116 | 2014-01-21,1240.3,1243.2,1238,1243.2 117 | 2014-01-17,1249.7,1253.7,1249,1253.5 118 | 2014-01-16,1243.3,1243.3,1241.7,1241.8 119 | 2014-01-15,1237.7,1241.2,1236.8,1240.2 120 | 2014-01-14,1253.7,1253.7,1244,1247.5 121 | 2014-01-13,1251.7,1253.2,1251.7,1253.1 122 | 2014-01-10,1247.5,1249.2,1246.6,1248.8 123 | 2014-01-09,1227.1,1231.5,1227.1,1231.1 124 | 2014-01-08,1223.5,1227.9,1221.9,1227.3 125 | 2014-01-07,1237,1237,1227.9,1231.4 126 | 2014-01-06,1244.7,1248.6,1222,1239.9 127 | 2014-01-03,1229.6,1241,1229.6,1240.6 128 | 2014-01-02,1220,1227.1,1220,1227.1 129 | 2013-12-31,1195,1215.5,1187.2,1204.2 130 | 2013-12-30,1205.8,1205.8,1205.8,1205.8 131 | 2013-12-27,1213,1217.6,1213,1216 132 | 2013-12-26,1216.6,1216.7,1214.4,1214.4 133 | 2013-12-24,1202.9,1205.6,1202.9,1205.4 134 | 2013-12-23,1204.1,1204.1,1199.1,1199.1 135 | 2013-12-20,1195.3,1207.4,1195.3,1205.8 136 | 2013-12-19,1204,1205.4,1195.8,1195.8 137 | 2013-12-18,1237.2,1237.2,1237.2,1237.2 138 | 2013-12-17,1235.4,1235.4,1230.2,1232.3 139 | 2013-12-16,1250.4,1250.4,1246.6,1246.6 140 | 2013-12-13,1233.7,1236.7,1233.2,1236.7 141 | 2013-12-12,1236.6,1237.2,1227,1227 142 | 2013-12-11,1259.3,1259.3,1259.3,1259.3 143 | 2013-12-10,1263.1,1263.1,1263.1,1263.1 144 | 2013-12-09,1237,1237,1234.6,1236.2 145 | 2013-12-06,1217,1236.7,1217,1231 146 | 2013-12-05,1230.7,1235.7,1221,1234.1 147 | 2013-12-04,1220.7,1249.5,1220.2,1249.5 148 | 2013-12-03,1221.3,1226.2,1221.3,1223.1 149 | 2013-12-02,1231.1,1231.1,1224.2,1224.2 150 | 2013-11-29,1253.4,1256.7,1252.8,1252.8 151 | 2013-11-27,1255,1255,1240.3,1240.3 152 | 2013-11-26,1245.7,1249.7,1243.8,1243.8 153 | 2013-11-25,1243.3,1245.4,1241,1243.8 154 | 2013-11-22,1246.8,1246.8,1246.8,1246.8 155 | 2013-11-21,1243.3,1248,1243.3,1246.4 156 | -------------------------------------------------------------------------------- /rfortraders/Chapter_11/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 11 2 | # Speed, Testing, and Reporting 3 | 4 | ################################## 5 | # Runtime execution improvements # 6 | ################################## 7 | sum_with_loop_in_r <- function(max_value) { 8 | sum <- 0 9 | for(i in 1:max_value) { 10 | sum <- sum + i 11 | } 12 | return(sum) 13 | } 14 | 15 | sum_with_vectorization_in_r = function(max_value) { 16 | numbers <- as.double(1:max_value) 17 | return(sum(numbers)) 18 | } 19 | 20 | ####################### 21 | # Benchmarking R code # 22 | ####################### 23 | library(microbenchmark) 24 | microbenchmark(loop = sum_with_loop_in_r(1e5), 25 | vectorized = sum_with_vectorization_in_r(1e5)) 26 | 27 | ## Unit: microseconds 28 | ## expr min lq median 29 | ## loop 57615.323 59424.740 60992.7720 30 | ## vectorized 260.602 273.673 286.5495 31 | 32 | ## uq max neval 33 | ## 89608.441 96694.469 100 34 | ## 294.236 414.349 100 35 | 36 | compiled_sum_with_loop_in_r <- cmpfun(sum_with_loop_in_r) 37 | microbenchmark(loop = sum_with_loop_in_r(1e5), 38 | compiled = compiled_sum_with_loop_in_r(1e5), 39 | vectorized = sum_with_vectorization_in_r(1e5)) 40 | 41 | ## Unit: microseconds 42 | ## expr min lq median 43 | ## loop 56746.652 58343.8945 60602.445 44 | ## compiled 4688.146 4758.6770 4892.246 45 | ## vectorized 249.457 273.8635 284.050 46 | 47 | ## uq max neval 48 | ## 86599.9875 96736.750 100 49 | ## 5498.9710 46484.009 100 50 | ## 292.3135 473.927 100 51 | 52 | ##################### 53 | # The Rcpp solution # 54 | ##################### 55 | long add_cpp(long max_value) { 56 | long sum = 0; 57 | for(long i = 1; i <= max_value; ++i) { 58 | sum = sum + i; 59 | } 60 | return sum; 61 | } 62 | 63 | lapply 64 | function (X, FUN, ...) 65 | { 66 | FUN <- match.fun(FUN) 67 | if (!is.vector(X) || is.object(X)) 68 | X <- as.list(X) 69 | .Internal(lapply(X, FUN)) 70 | } 71 | 72 | library(Rcpp) 73 | 74 | # Create a C++ function 75 | cppFunction(' 76 | long add_cpp(long max_value) { 77 | long sum = 0; 78 | for(long i = 1; i <= max_value; ++i) { 79 | sum = sum + i; 80 | } 81 | return sum; 82 | }' 83 | ) 84 | 85 | add_cpp 86 | ## function (max_value) 87 | ## .Primitive(".Call")(, max_value) 88 | 89 | add_cpp(1e5) 90 | ## [1] 5000050000 91 | 92 | microbenchmark(loop = sum_with_loop_in_r(1e5), 93 | compiled = compiled_sum_with_loop_in_r(1e5), 94 | vectorized = sum_with_vectorization_in_r(1e5), 95 | compiled_cpp = add_cpp(1e5)) 96 | 97 | ## Unit: microseconds 98 | ## expr min lq median 99 | ## loop 73049.461 76640.5945 79635.8810 100 | ## compiled 7359.040 7487.9655 7795.9125 101 | ## vectorized 804.773 932.9285 1031.9695 102 | ## compiled_cpp 79.573 88.2615 98.9385 103 | 104 | ## uq max neval 105 | ## 80676.6600 94618.174 100 106 | ## 12101.8610 135353.743 100 107 | ## 1373.8565 2148.409 100 108 | ## 105.2440 135.781 100 109 | 110 | sourceCpp('Chapter_11/add_2_file.cpp') 111 | 112 | add_2_cpp(100) 113 | ## [1] 5050 114 | 115 | ################################### 116 | # Calling R from C++ with RInside # 117 | ################################### 118 | #include 119 | int main(int argc, char *argv[]) { 120 | 121 | // create an embedded R instance 122 | RInside R(argc, argv); 123 | 124 | // assign a char* (string) to "txt" 125 | R["txt"] = "Hello, world!\n"; 126 | 127 | // eval the init string, ignoring any returns 128 | R.parseEvalQ("cat(txt)"); 129 | 130 | exit(0); 131 | } 132 | 133 | #################################### 134 | # Writing unit tests with testthat # 135 | #################################### 136 | # Define function 137 | convert_to_returns <- function(prices) { 138 | return(9) 139 | } 140 | 141 | require(testthat) 142 | 143 | # Group related functionality together with context() 144 | context("Price to log-return conversion") 145 | 146 | # Define the expectations using expect_that() 147 | test_that("convert_to_returns produces the correct values", { 148 | 149 | # For these inputs 150 | input_prices <- c(100, 101, 102, 103, 99) 151 | 152 | # Expect these outputs 153 | expected_returns <- c(0.009950331, 154 | 0.009852296, 0.009756175, -0.039609138) 155 | 156 | # Verify the expectation of equality 157 | expect_equal(expected_returns, 158 | convert_to_returns(input_prices)) 159 | }) 160 | 161 | ## Error: Test failed: 'convert_to_returns produces 162 | ## the correct values' 163 | ## Not expected: expected_returns not equal to 164 | ## convert_to_returns(input_prices) 165 | ## Numeric: lengths (1, 4) differ. 166 | 167 | # Define function 168 | convert_to_returns <- function(prices) { 169 | return(diff(log(prices))) 170 | } 171 | 172 | # Verify the error message 173 | input_prices <- c(100) 174 | msg <- "Not enough price entries." 175 | 176 | expect_message(msg, convert_to_returns(input_prices)) 177 | ## Error: expected_message no messages shown 178 | 179 | # Function with corner case check 180 | convert_to_returns <- function(prices) { 181 | if(length(prices) < 2) { 182 | message("Not enough price entries.") 183 | } 184 | return(diff(log(prices))) 185 | } 186 | 187 | test_file("example_test_file.r") 188 | 189 | ################################# 190 | # Using knitr for documentation # 191 | ################################# 192 | \documentclass{article} 193 | \usepackage[T1]{fontenc} 194 | 195 | \begin{document} 196 | 197 | This is an example LaTeX document with some embedded 198 | R code woven in for convenience. 199 | 200 | <>= 201 | 202 | x = 1:10 203 | y = x^2 204 | plot(x, y, main = "This is a graph") 205 | @ 206 | 207 | Inline expressions can be written by using the 208 | \verb|\Sexpr{}| convention, e.g. $\pi=\Sexpr{pi}$ 209 | and \Sexpr{2.3492e7} and \Sexpr{x / 2.0}. 210 | 211 | \subsection*{A different subsection} 212 | We can insert graphs without displaying the code. 213 | This can be done using the \texttt{echo = FALSE} 214 | command within the code chunk argument list. 215 | 216 | <>= 217 | x = 1:10 218 | y=x^3 219 | plot(x, y, main = "This is a second graph") 220 | @ 221 | 222 | Any R code can be run within the code chunks 223 | provided by knitr. This next example loads up 224 | \texttt{ggplot2}, and the code creates a nice looking 225 | density histogram. 226 | 227 | <>= 228 | require(ggplot2) 229 | my_data = data.frame(returns = c(0.03, 0.04, 0.05, 230 | 0.032, 0.01, 0.23, 0.4, 0.05, 0.066, 0.5), 231 | stock = c("SPY", "CVX", "CVX", "SPY", 232 | "XOM", "XOM", "CVX", "SPY", "SPY", "XOM")) 233 | 234 | ggplot(my_data, aes(x = returns, fill = stock)) + 235 | geom_density(alpha = 0.2) 236 | @ 237 | \end{document} 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | -------------------------------------------------------------------------------- /rfortraders/Chapter_05/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 5 2 | # Intermediate Statistics and Probability 3 | 4 | ############################# 5 | # Stock price distributions # 6 | ############################# 7 | # Extract prices and compute statistics 8 | prices <- SPY$SPY.Adjusted 9 | mean_prices <- round(mean(prices), 2) 10 | sd_prices <- round(sd(prices), 2) 11 | 12 | # Plot the histogram along with a legend 13 | hist(prices, breaks = 100, prob=T, cex.main = 0.9) 14 | abline(v = mean_prices, lwd = 2) 15 | legend("topright", cex = 0.8, border = NULL, bty = "n", 16 | paste("mean=", mean_prices, "; sd=", sd_prices)) 17 | 18 | plot_4_ranges <- function(data, start_date, end_date, title) { 19 | 20 | # Set the plot window to be 2 rows and 2 columns 21 | par(mfrow = c(2, 2)) 22 | for(i in 1:4) { 23 | # Create a string with the appropriate date range 24 | range <- paste(start_date[i], "::", end_date[i], sep = "") 25 | 26 | # Create the price vector and necessary statistics 27 | time_series <- data[range] 28 | 29 | mean_data <- round(mean(time_series, na.rm = TRUE), 3) 30 | sd_data <- round(sd(time_series, na.rm = TRUE), 3) 31 | 32 | # Plot the histogram along with a legend 33 | hist_title <- paste(title, range) 34 | hist(time_series, breaks = 100, prob=TRUE, 35 | xlab = "", main = hist_title, cex.main = 0.8) 36 | legend("topright", cex = 0.7, bty = 'n', 37 | paste("mean=", mean_data, "; sd=", sd_data)) 38 | } 39 | 40 | # Reset the plot window 41 | par(mfrow = c(1, 1)) 42 | } 43 | 44 | # Define start and end dates of interest 45 | begin_dates <- c("2007-01-01", "2008-06-06", 46 | "2009-10-10", "2011-03-03") 47 | end_dates <- c("2008-06-05", "2009-09-09", 48 | "2010-12-30", "2013-01-06") 49 | 50 | # Create plots 51 | plot_4_ranges(prices, begin_dates, 52 | end_dates, "SPY prices for:") 53 | 54 | ################ 55 | # Stationarity # 56 | ################ 57 | # Compute log returns 58 | returns <- diff(log(prices)) 59 | 60 | # Use the same function as before to plot returns rather than prices 61 | plot_4_ranges(returns, begin_dates, end_dates, "SPY log prices for:") 62 | 63 | ###################################### 64 | # Determining stationarity with urca # 65 | ###################################### 66 | # Get SPY data and let's confirm that it is non-stationary 67 | require(quantmod) 68 | getSymbols("SPY") 69 | spy <- SPY$SPY.Adjusted 70 | 71 | # Use the default settings 72 | require(urca) 73 | test <- ur.kpss(as.numeric(spy)) 74 | 75 | # The output is an S4 object 76 | class(test) 77 | ## [1] "ur.kpss" 78 | ## attr(,"package") 79 | ## [1] "urca" 80 | 81 | # Extract the test statistic 82 | test@teststat 83 | ## [1] 11.63543 84 | 85 | # Look at the critical values 86 | test@cval 87 | ## 10pct 5pct 2.5pct 1pct 88 | ## critical values 0.347 0.463 0.574 0.739 89 | 90 | spy_returns <- diff(log(spy)) 91 | 92 | # Test on the returns 93 | test_returns <- ur.kpss(as.numeric(spy_returns)) 94 | test_returns@teststat 95 | ## [1] 0.336143 96 | 97 | test_returns@cval 98 | ## 10pct 5pct 2.5pct 1pct 99 | ## critical values 0.347 0.463 0.574 0.739 100 | 101 | test_post_2013 <- ur.kpss(as.numeric(spy_returns['2013::'])) 102 | test_post_2013@teststat 103 | ## [1] 0.06936672 104 | 105 | ############################ 106 | # Assumptions of normality # 107 | ############################ 108 | # Plot histogram and density 109 | mu <- mean(returns, na.rm = TRUE) 110 | sigma <- sd(returns, na.rm = TRUE) 111 | x <- seq(-5 * sigma, 5 * sigma, length = nrow(returns)) 112 | 113 | hist(returns, breaks = 100, 114 | main = "Histogram of returns for SPY", 115 | cex.main = 0.8, prob=TRUE) 116 | lines(x, dnorm(x, mu, sigma), col = "red", lwd = 2) 117 | 118 | # Set plotting window 119 | par(mfrow = c(1, 2)) 120 | 121 | # SPY data 122 | qqnorm(as.numeric(returns), 123 | main = "SPY empirical returns qqplot()", 124 | cex.main = 0.8) 125 | qqline(as.numeric(returns), lwd = 2) 126 | grid() 127 | 128 | # Normal random data 129 | normal_data <- rnorm(nrow(returns), mean = mu, sd = sigma) 130 | 131 | qqnorm(normal_data, main = "Normal returns", cex.main = 0.8) 132 | qqline(normal_data, lwd = 2) 133 | grid() 134 | 135 | answer <- shapiro.test(as.numeric(returns)) 136 | 137 | answer[[2]] 138 | ## [1] 5.118396e-34 139 | 140 | set.seed(129) 141 | normal_numbers <- rnorm(5000, 0, 1) 142 | ans <- shapiro.test(normal_numbers) 143 | 144 | ans[[2]] 145 | ## [1] 0.9963835 146 | 147 | # Corrupt a single data point 148 | normal_numbers[50] <- 1000 149 | ans <- shapiro.test(normal_numbers) 150 | 151 | ans[[2]] 152 | ## [1] 1.775666e-95 153 | 154 | ############### 155 | # Correlation # 156 | ############### 157 | sv <- as.xts(returns_matrix[, c(1, 6)]) 158 | 159 | head(sv) 160 | ## SPY.Close VXX.Close 161 | ## 2009-02-02 -0.003022794 -0.003160468 162 | ## 2009-02-03 0.013949240 -0.047941603 163 | ## 2009-02-04 -0.004908132 0.003716543 164 | ## 2009-02-05 0.014770965 -0.006134680 165 | 166 | cor(sv) 167 | ## SPY.Close VXX.Close 168 | ## SPY.Close 1.0000000 -0.4603908 169 | ## VXX.Close -0.4603908 1.0000000 170 | 171 | ################## 172 | # Filtering data # 173 | ################## 174 | # Find the outliers 175 | outliers <- which(sv[, 2] > 1.0) 176 | 177 | # If any outliers exist, remove them 178 | if(length(outliers) > 0) { 179 | sv <- sv[-outliers, ] 180 | } 181 | 182 | cor(sv) 183 | ## SPY.Close VXX.Close 184 | ## SPY.Close 1.0000000 -0.8066466 185 | ## VXX.Close -0.8066466 1.0000000 186 | 187 | ############## 188 | # R formulas # 189 | ############## 190 | # Create a formula 191 | my_formula <- as.formula("y ~ x") 192 | 193 | # What is the output? 194 | my_formula 195 | ## y ~ x 196 | 197 | # What is the class of my_formula? 198 | class(my_formula) 199 | ## [1] "formula" 200 | 201 | # Create a linear regression object 202 | reg <- lm(VXX.Close ~ SPY.Close, data = sv) 203 | 204 | # Here is the output 205 | summary(reg) 206 | 207 | ## Call: 208 | ## lm(formula = VXX.Close ~ SPY.Close, data = sv) 209 | 210 | ## Residuals: 211 | ## Min 1Q Median 3Q Max 212 | ## -0.085607 -0.012830 -0.000865 0.012188 0.116349 213 | 214 | ## Coefficients: 215 | ## Estimate Std. Error t value Pr(>|t|) 216 | ## (Intercept) -0.0024365 0.0006641 -3.669 0.000254 *** 217 | ## SPY.Close -2.5848492 0.0552193 -46.811 < 2e-16 *** 218 | ## --- 219 | ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 220 | 221 | ## Residual standard error: 0.02287 on 1187 degrees of freedom 222 | ## Multiple R-squared: 0.6486,Adjusted R-squared: 0.6483 223 | ## F-statistic: 2191 on 1 and 1187 DF, p-value: < 2.2e-16 224 | 225 | b <- reg$coefficients[1] 226 | a <- reg$coefficients[2] 227 | 228 | par(mfrow = c(2, 2)) 229 | plot(reg$residuals, 230 | main = "Residuals through time", 231 | xlab = "Days", ylab = "Residuals") 232 | hist(reg$residuals, breaks = 100, 233 | main = "Distribution of residuals", 234 | xlab = "Residuals") 235 | qqnorm(reg$residuals) 236 | qqline(reg$residuals) 237 | acf(reg$residuals, main = "Autocorrelation") 238 | 239 | 240 | vxx_lag_1 <- lag(VXX$VXX.Close, k = 1) 241 | 242 | head(vxx_lag_1) 243 | ## VXX.Close 244 | ## 2009-01-30 NA 245 | ## 2009-02-02 104.58 246 | ## 2009-02-03 104.25 247 | ## 2009-02-04 99.37 248 | ## 2009-02-05 99.74 249 | ## 2009-02-06 99.13 250 | 251 | head(VXX$VXX.Close) 252 | ## VXX.Close 253 | ## 2009-01-30 104.58 254 | ## 2009-02-02 104.25 255 | ## 2009-02-03 99.37 256 | ## 2009-02-04 99.74 257 | ## 2009-02-05 99.13 258 | ## 2009-02-06 97.70 259 | 260 | # Merge returns with lagged returns 261 | sv <- merge(sv, lag(sv)) 262 | 263 | # Scatter plot of lagged SPY vs. VXX 264 | plot(as.numeric(sv[, 3]), as.numeric(sv[, 2]), 265 | main = "Scatter plot SPY lagged vs. VXX.", 266 | xlab = "SPY lagged", 267 | ylab = "VXX" 268 | cex.main = 0.8, 269 | cex.axis = 0.8, 270 | cex.lab = 0.8) 271 | grid() 272 | 273 | reg2 <- lm(VXX.Close ~ SPY.Close.1, data = sv) 274 | 275 | summary(reg2) 276 | ## Coefficients: 277 | ## Estimate Std. Error t value Pr(>|t|) 278 | ## (Intercept) -0.004140 0.001121 -3.694 0.000231 *** 279 | ## SPY.Close.1 0.104119 0.093154 1.118 0.263918 280 | 281 | ## Residual standard error: 0.03857 on 1186 degrees of freedom 282 | ## (1 observation deleted due to missingness) 283 | ## Multiple R-squared: 0.001052,Adjusted R-squared: 0.00021 284 | ## F-statistic: 1.249 on 1 and 1186 DF, p-value: 0.2639 285 | 286 | ccf(as.numeric(sv[, 1]), as.numeric(sv[, 2]), 287 | main = "Cross correlation between SPY and VXX", 288 | ylab = "Cross correlation", xlab = "Lag", cex.main = 0.8, 289 | cex.lab = 0.8, cex.axis = 0.8) 290 | 291 | ################################### 292 | # The linear in linear regression # 293 | ################################### 294 | x <- seq(1:100) 295 | y <- x ^ 2 296 | 297 | # Generate the plot 298 | plot(x, y) 299 | 300 | # Fit the regression 301 | reg_parabola <- lm(y ~ x) 302 | 303 | # Superimpose the best fit line on the plot 304 | abline(reg_parabola, lwd = 2) 305 | 306 | # Look at the results 307 | summary(reg_parabola) 308 | ## Coefficients: 309 | ## Estimate Std. Error t value Pr(>|t|) 310 | ## (Intercept) -1717.000 151.683 -11.32 <2e-16 *** 311 | ## x 101.000 2.608 38.73 <2e-16 *** 312 | ## --- 313 | ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 314 | 315 | ## Residual standard error: 752.7 on 98 degrees of freedom 316 | ## Multiple R-squared: 0.9387,Adjusted R-squared: 0.9381 317 | ## F-statistic: 1500 on 1 and 98 DF, p-value: < 2.2e-16 318 | 319 | plot(x, sqrt(y)) 320 | reg_transformed <- lm(sqrt(y) ~ x) 321 | abline(reg_transformed) 322 | 323 | summary(reg_transformed) 324 | ## Coefficients: 325 | ## Estimate Std. Error t value Pr(>|t|) 326 | ## (Intercept) -5.684e-14 5.598e-15 -1.015e+01 <2e-16 *** 327 | ## x 1.000e+00 9.624e-17 1.039e+16 <2e-16 *** 328 | ## --- 329 | ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 330 | 331 | ## Residual standard error: 2.778e-14 on 98 degrees of freedom 332 | ## Multiple R-squared: 1,Adjusted R-squared: 1 333 | ## F-statistic: 1.08e+32 on 1 and 98 DF, p-value: < 2.2e-16 334 | 335 | ############## 336 | # Volatility # 337 | ############## 338 | # Generate 1000 IID numbers from a normal distribution. 339 | z <- rnorm(1000, 0, 1) 340 | 341 | # Autocorrelation of returns and squared returns 342 | par(mfrow = c(2, 1)) 343 | acf(z, main = "returns", cex.main = 0.8, 344 | cex.lab = 0.8, cex.axis = 0.8) 345 | 346 | grid() 347 | acf(z ^ 2, main = "returns squared", 348 | cex.lab = 0.8, cex.axis = 0.8) 349 | grid() 350 | 351 | par(mfrow = c(1, 1)) 352 | acf(sv[, 1] ^ 2, main = "Actual returns squared", 353 | cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) 354 | grid() 355 | 356 | par(mfrow = c(1, 2)) 357 | acf(sv[, 1]^3) 358 | acf(abs(sv[, 1]) 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | -------------------------------------------------------------------------------- /rfortraders/Chapter_09/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 9 2 | # Options 3 | 4 | ######################## 5 | # Valuation of options # 6 | ######################## 7 | install.packages("RQuantLib") 8 | library(RQuantLib) 9 | 10 | lsf.str("package:RQuantLib") 11 | ## adjust : function (calendar = "TARGET", dates = Sys.Date(), 12 | ## bdc = 0) 13 | ## advance : function (calendar = "TARGET", dates = Sys.Date(), 14 | ## n, timeUnit, period, bdc = 0, emr = 0) 15 | ## AmericanOption : function (type, underlying, strike, dividendYield, 16 | ## riskFreeRate, maturity, volatility, timeSteps = 150, 17 | ## gridPoints = 149, engine = "BaroneAdesiWhaley") 18 | ## AmericanOption.default : function (type, underlying, strike, 19 | ## dividendYield, riskFreeRate, maturity, volatility, 20 | ## timeSteps = 150, gridPoints = 149, engine = "BaroneAdesiWhaley") 21 | ## AmericanOptionImpliedVolatility : function (type, value, 22 | ## underlying, strike, dividendYield, riskFreeRate, 23 | ## maturity, volatility, timeSteps = 150, gridPoints = 151) 24 | ## AmericanOptionImpliedVolatility.default : function (type, value, 25 | ## underlying, strike, dividendYield, riskFreeRate, maturity, 26 | ## volatility, timeSteps = 150, gridPoints = 151) 27 | ## ... 28 | 29 | call_value <- EuropeanOption(type = "call", underlying = 100, 30 | strike = 100, dividendYield = 0, riskFreeRate = 0.03, 31 | maturity = 1.0, volatility = 0.30) 32 | 33 | ## Concise summary of valuation for EuropeanOption 34 | ## value delta gamma vega theta 35 | ## 13.2833 0.5987 0.0129 38.6668 -7.1976 36 | ## rho divRho 37 | ## 46.5873 -59.8706 38 | 39 | class(call_value) 40 | ## [1] "EuropeanOption" "Option" 41 | 42 | type <- "call" 43 | underlying <- 20:180 44 | strike <- 100 45 | dividendYield <- 0 46 | riskFreeRate <- 0.03 47 | maturity <- 1.0 48 | volatility <- 0.10 49 | 50 | # Function to create plots of option values and Greeks. 51 | option_values <- function(type, underlying, strike, 52 | dividendYield, riskFreeRate, maturity, volatility) { 53 | 54 | # Output list with option values and Greeks 55 | out <- list() 56 | for(i in seq_along(underlying)) { 57 | out[[i]] <- EuropeanOption(type = type, underlying = i, 58 | strike = strike, dividendYield = dividendYield, 59 | riskFreeRate = riskFreeRate, maturity = maturity, 60 | volatility = volatility) 61 | } 62 | 63 | # Set up the plot window 64 | par(mfrow = c(3, 2)) 65 | names <- c("Value", "Delta", "Gamma", 66 | "Vega", "Theta", "Rho") 67 | 68 | for(i in 1:6) { 69 | plot(unlist(lapply(out, "[", i)) , type = "l", 70 | main = paste(names[i], "vs. Underlying"), 71 | xlab = "Underlying", ylab = names[i]) 72 | grid() 73 | abline(v = strike, col = "red") 74 | } 75 | return(out) 76 | } 77 | 78 | option_values(type, underlying, strike, dividendYield, 79 | riskFreeRate, maturity, volatility) 80 | ## ... 81 | ## [[96]] 82 | ## Concise summary of valuation for EuropeanOption 83 | ## value delta gamma vega theta 84 | ## 3.3493 0.4768 0.0415 38.2336 -3.1843 85 | ## rho divRho 86 | ## 42.4222 -45.7715 87 | 88 | ## [[97]] 89 | ## Concise summary of valuation for EuropeanOption 90 | ## value delta gamma vega theta 91 | ## 3.8468 0.5181 0.0411 38.6575 -3.3252 92 | ## rho divRho 93 | ## 46.4098 -50.2566 94 | 95 | ## [[98]] 96 | ## Concise summary of valuation for EuropeanOption 97 | ## value delta gamma vega theta 98 | ## 4.3853 0.5588 0.0403 38.6707 -3.4449 99 | ## rho divRho 100 | ## 50.3788 -54.7642 101 | 102 | option_values(type, underlying, strike, dividendYield, 103 | riskFreeRate, maturity = 0.1, volatility) 104 | 105 | ################################ 106 | # Exploring options trade data # 107 | ################################ 108 | # Create a vector of filenames 109 | folder <- "path/Options/SPY_20130415_T/" 110 | available_files <- list.files(folder) 111 | 112 | # Explore the first few lines of the file 113 | temp <- read.csv(file = paste0(folder, available_files[1]), 114 | header = FALSE, stringsAsFactors = FALSE) 115 | 116 | column_names <- c("date", "time", "trade_indicator", 117 | "sequence_number", "option_exchange_code", 118 | "option_condition_code", "sale_price", "sale_size", 119 | "underlying_last_trade_price", 120 | "underling_last_trade_size", 121 | "stock_exchange_code", "stock_condition_code", 122 | "underlying_bid_price", "underlying_bid_size", 123 | "underlying_ask_price", "underlying_ask_size") 124 | names(temp) <- column_names 125 | 126 | output <- list() 127 | for(i in 1:length(available_files)) { 128 | file_name <- available_files[i] 129 | 130 | type <- substr(file_name, 5, 5) 131 | date <- substr(file_name, 7, 14) 132 | date <- as.Date(date, format = "%Y%m%d") 133 | strike <- substr(file_name, 16, 26) 134 | strike <- strsplit(strike, "_XX")[[1]][1] 135 | 136 | temp <- read.csv(file = paste0(folder, file_name), 137 | header = FALSE, stringsAsFactors = FALSE) 138 | names(temp) <- column_names 139 | 140 | number_of_trades <- nrow(temp) 141 | avg_trade_price <- round(mean(temp$sale_price, 142 | na.rm = TRUE), 3) 143 | 144 | if(number_of_trades <= 1) { 145 | sd_trade_price <- 0 146 | } else { 147 | sd_trade_price <- round(sd(temp$sale_price, 148 | na.rm = TRUE), 3) 149 | } 150 | 151 | total_volume <- sum(temp$sale_size, na.rm = TRUE) 152 | avg_underlying_price <- round(mean( 153 | temp$underlying_bid_price, na.rm = TRUE), 2) 154 | underlying_range <- max(temp$underlying_ask_price) - 155 | min(temp$underlying_bid_price) 156 | 157 | output[[i]] <- data.frame(symbol = 'SPY', date = date, 158 | type = type, strike = strike, 159 | trades = number_of_trades, 160 | volume = total_volume, 161 | avg_price = avg_trade_price, 162 | sd_price = sd_trade_price, 163 | avg_stock_price = avg_underlying_price, 164 | stock_range = underlying_range, 165 | stringsAsFactors = FALSE) 166 | } 167 | 168 | # Convert the list into a table 169 | results <- do.call(rbind, output) 170 | 171 | head(results) 172 | ## symbol date type strike trades volume 173 | ## 1 SPY 2013-04-20 C 120.00 12 33000 174 | ## 2 SPY 2013-04-20 C 124.00 1 15 175 | ## 3 SPY 2013-04-20 C 130.00 1 2 176 | ## 4 SPY 2013-04-20 C 133.00 1 1 177 | ## 5 SPY 2013-04-20 C 140.00 1 95 178 | ## 6 SPY 2013-04-20 C 142.00 1 6 179 | 180 | ## avg_price sd_price avg_stock_price stock_range 181 | ## 35.973 0.261 155.74 0.68 182 | ## 31.380 0.000 155.44 0.01 183 | ## 26.210 0.000 156.24 0.02 184 | ## 24.600 0.000 157.58 0.01 185 | ## 16.465 0.751 156.44 1.51 186 | ## 13.920 0.000 155.87 0.01 187 | 188 | unique_maturities <- unique(results$date) 189 | 190 | today <- as.Date("2013-04-15") 191 | days_to_expiration <- as.Date(unique_maturities[1]) - today 192 | 193 | # Extract only the relevant maturity range 194 | single_maturity_table <- results[results$date == 195 | unique_maturities[1], ] 196 | 197 | # Look at the calls and puts separately 198 | calls <- single_maturity_table[ 199 | single_maturity_table$type == "C", ] 200 | puts <- single_maturity_table[ 201 | single_maturity_table$type == "P", ] 202 | 203 | par(mfrow = c(2, 1)) 204 | plot(calls$strike, calls$volume, 205 | xlab = "Strike", ylab = "Volume", 206 | main = "Call volume", cex.main = 0.9) 207 | abline(v = mean(calls$avg_stock_price), lty = 2) 208 | grid() 209 | 210 | plot(puts$strike, puts$volume, 211 | xlab = "Strike", ylab = "Volume", 212 | main = "Put volume", cex.main = 0.9) 213 | abline(v = mean(puts$avg_stock_price), lty = 2) 214 | grid() 215 | 216 | ###################### 217 | # Implied volatility # 218 | ###################### 219 | # Create a vector of filenames 220 | folder <- "Chapter_09/SPY_20130410_QT/" 221 | 222 | available_files <- list.files(folder) 223 | 224 | # Explore the first few lines of the file 225 | temp <- read.csv(file = paste0(folder, available_files[1]), 226 | header = FALSE, stringsAsFactors = FALSE) 227 | head(temp) 228 | # Output omitted for clarity 229 | 230 | column_names <- c("date", "time", "trade_indicator", 231 | "sequence_number", "option_exchange_code", 232 | "option_condition_code", "bid_price", "bid_size", 233 | "ask_price", "ask_size", "stock_exchange_code", 234 | "stock_condition_code", "underlying_bid_price", 235 | "underlying_bid_size", "underlying_ask_price", 236 | "underlying_ask_size") 237 | 238 | # Find files for July 20, 2013 expiry 239 | files_to_use <- available_files[grep("20130720", 240 | available_files)] 241 | 242 | length(files_to_use) 243 | ## [1] 142 244 | 245 | strikes <- sapply(strsplit(files_to_use, "_"), "[", 4) 246 | type <- sapply(strsplit(files_to_use, "_"), "[", 2) 247 | 248 | # Extract relevant columns of data 249 | quote_list <- list() 250 | 251 | for(i in 1:length(files_to_use)) { 252 | temp <- read.csv(file = paste0(folder, files_to_use[i]), 253 | header = FALSE, stringsAsFactors = FALSE) 254 | names(temp) <- column_names 255 | 256 | # Extract quotes from CBOE only 257 | filter <- temp$trade_indicator == "Q" & 258 | temp$option_exchange_code == "C" 259 | 260 | data <- temp[filter, ] 261 | 262 | # Create xts object 263 | require(xts) 264 | time_index <- as.POSIXct(paste(data$date, data$time), 265 | format = "%m/%d/%Y %H:%M:%OS") 266 | data_filtered <- data[, c("bid_price", "ask_price", 267 | "underlying_bid_price", "underlying_ask_price")] 268 | data_filtered$type <- type[i] 269 | data_filtered$strike <- strikes[i] 270 | xts_prices <- xts(data_filtered, time_index) 271 | 272 | quote_list[[i]] <- xts_prices 273 | } 274 | 275 | data <- quote_list[[49]] 276 | spread <- as.numeric(data$ask_price) - 277 | as.numeric(data$bid_price) 278 | plot(xts(spread, index(data)), 279 | main = "SPY | Expiry = July 20, 2013 | K = 158", 280 | cex.main = 0.8, ylab = "Quote bid-ask spread") 281 | 282 | time_of_interest <- "2013-04-10 10:30:00:: 283 | 2013-04-10 10:30:10" 284 | 285 | strike_list <- list() 286 | for(i in 1:length(quote_list)) { 287 | data <- quote_list[[i]][time_of_interest] 288 | if(nrow(data) > 0) { 289 | mid_quote <- (as.numeric(data$bid_price) + 290 | as.numeric(data$ask_price)) / 2 291 | mid_underlying <- (as.numeric(data$underlying_bid_price) + 292 | as.numeric(data$underlying_ask_price)) / 2 293 | strike_list[[i]] <- c(as.character(index(data[1])), 294 | data$type[1], data$strike[1], names(quote_list[i]), 295 | mid_quote[1], mid_underlying[1]) 296 | } 297 | } 298 | 299 | # Aggregate the columns 300 | df <- as.data.frame(do.call(rbind, strike_list), 301 | stringsAsFactors = FALSE) 302 | names(df) <- c("time", "type", "strike", 303 | "mid_quote", "mid_underlying") 304 | 305 | head(df) 306 | # Output omitted for clarity 307 | 308 | plot(as.numeric(df$strike), as.numeric(df$mid_quote), 309 | main = "Option Price vs. Strike for Calls and Puts", 310 | ylab = "Premium", 311 | xlab = "Strike", 312 | cex.main = 0.8) 313 | grid() 314 | 315 | # Filter the otm options 316 | otm_calls <- df$type == "C" & df$mid_underlying <= df$strike 317 | otm_puts <- df$type == "P" & df$mid_underlying > df$strike 318 | otm <- df[otm_calls | otm_puts, ] 319 | 320 | # Order by strike 321 | otm <- otm[order(otm[, "strike"]), ] 322 | plot(otm$strike, otm$mid_quote, 323 | main = "OTM prices", 324 | xlab = "Strike", 325 | ylab = "Premium", 326 | cex.main = 0.8) 327 | grid() 328 | 329 | # Compute the implied vols for otm options 330 | otm$iv <- NA 331 | for(i in 1:nrow(otm)) { 332 | type <- ifelse(otm$type[i] == "C", "call", "put") 333 | value <- as.numeric(otm$mid_quote[i]) 334 | underlying <- as.numeric(otm$mid_underlying[i]) 335 | strike <- as.numeric(otm$strike[i]) 336 | dividendYield <- 0.03 337 | riskFreeRate <- 0.02 338 | maturity <- 101/252 339 | volatility <- 0.15 340 | otm$iv[i] <- AmericanOptionImpliedVolatility(type, 341 | value, underlying, strike,dividendYield, 342 | riskFreeRate, maturity, volatility)$impliedVol 343 | } 344 | # Generate plot 345 | plot(otm$strike, otm$iv, 346 | main = "Implied Volatility skew for SPY on April 10, 2013 10:30 am", 347 | xlab = "Strike", 348 | ylab = "Implied Volatility", 349 | cex.main = 0.8) 350 | grid() 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | -------------------------------------------------------------------------------- /rfortraders/Chapter_10/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 10 2 | # Optimization 3 | 4 | ########################### 5 | # The motivating parabola # 6 | ########################### 7 | # Create the function 8 | f <- function(x) { 9 | return((1 + x) ^ 2) 10 | } 11 | 12 | # Create the derivative 13 | fp <- function(x) { 14 | return(2 * (1 + x)) 15 | } 16 | 17 | # Plot the function and its derivative 18 | x <- seq(-5, 5, 0.1) 19 | plot(x, f(x), type = 'l', lwd = 2, 20 | main = "f(x) and f'(x)", 21 | cex.main = 0.8, 22 | cex.lab = 0.8, 23 | cex.axis = 0.8) 24 | grid() 25 | lines(x, fp(x), lty = 3, lwd = 2) 26 | abline(h = 0) 27 | abline(v = 0) 28 | 29 | ################### 30 | # Newton's method # 31 | ################### 32 | f <- function(x) { 33 | return(x ^ 2 - 4 * x + 1) 34 | } 35 | 36 | uniroot(f, c(-8, -1)) 37 | ## $root 38 | ## [1] -4.236068 39 | 40 | ## $f.root 41 | ## [1] -2.568755e-07 42 | 43 | ## $iter 44 | ## [1] 8 45 | 46 | ## $estim.prec 47 | ## [1] 6.103516e-05 48 | 49 | uniroot(f, c(-1, 2)) 50 | ## $root 51 | ## [1] 0.236044 52 | 53 | ## $f.root 54 | ## [1] -0.0001070205 55 | 56 | ## $iter 57 | ## [1] 6 58 | 59 | ## $estim.prec 60 | ## [1] 6.103516e-05 61 | 62 | # Newton's method with a first order approximation 63 | newton <- function(f, tol = 1E-12, x0 = 1, N = 20) { 64 | # N = total number of iterations 65 | # x0 = initial guess 66 | # tol = abs(xn+1 - xn) 67 | # f = function to be evaluated for a root 68 | 69 | h <- 0.001 70 | i <- 1; x1 <- x0 71 | p <- numeric(N) 72 | while (i <= N) { 73 | df_dx <- (f(x0 + h) - f(x0)) / h 74 | x1 <- (x0 - (f(x0) / df_dx)) 75 | p[i] <- x1 76 | i <- i + 1 77 | if (abs(x1 - x0) < tol) { 78 | break 79 | } 80 | x0 <- x1 81 | } 82 | return(p[1:(i-1)]) 83 | } 84 | 85 | newton(f, x0 = -10) 86 | ## [1] -6.312270 -4.735693 -4.281609 -4.236513 87 | ## [5] -4.236068 -4.236068 -4.236068 -4.236068 88 | 89 | newton(f, x0 = 10) 90 | ## [1] 4.2085746 1.5071738 0.4665599 0.2468819 91 | ## [5] 0.2360964 0.2360680 0.2360680 0.2360680 92 | 93 | options(digits = 14) 94 | newton(f, x0 = 10) 95 | ## [1] 4.20857464272283 1.50717378796250 0.46655991958819 96 | ## [5] 0.24688186711921 0.23609640037269 0.23606798403444 97 | ## [8] 0.23606797750125 0.23606797749979 0.23606797749979 98 | 99 | newton(f, x0 = 0.25) 100 | ## [1] 0.23611419684515 0.23606798830978 0.23606797750221 101 | ## [4] 0.23606797749979 0.23606797749979 102 | 103 | # Create an expression 104 | e <- expression(sin(x)) 105 | 106 | # Compute the derivative 107 | D(e, "x") 108 | ## cos(x) 109 | 110 | f_expr <- expression(x ^ 2 + 4 * x - 1) 111 | 112 | eval(f_expr, list(x = 2)) 113 | ## [1] 11 114 | 115 | newton_alternate <- function(f, tol = 1E-12, x0 = 1, N = 20) { 116 | # N = total number of iterations 117 | # x0 = initial guess 118 | # tol = abs(xn+1 - xn) 119 | # f = expression to be evaluated for a root 120 | 121 | # Compute the symbolic derivative 122 | df_dx = D(f, "x") 123 | 124 | i <- 1; x1 <- x0 125 | p <- numeric(N) 126 | while (i <= N) { 127 | x1 <- (x0 - eval(f, list(x = x0)) / 128 | eval(df_dx, list(x = x0))) 129 | p[i] <- x1 130 | i <- i + 1 131 | if (abs(x1 - x0) < tol) { 132 | break 133 | } 134 | x0 <- x1 135 | } 136 | return(p[1:(i-1)]) 137 | } 138 | 139 | newton_alternate(f_expr, x0 = 10) 140 | ## [1] 4.20833333333333 1.50685123042506 0.46631585084907 141 | ## [4] 0.24681560399775 0.23609368309733 0.23606797764754 142 | ## [7] 0.23606797749979 0.23606797749979 143 | 144 | newton_alternate(f_expr, x0 = -10) 145 | ## [1] -6.3125000000000 -4.7359601449275 -4.2817360731259 146 | ## [4] -4.2365249924418 -4.2360680241934 -4.2360679774998 147 | ## [7] -4.2360679774998 148 | 149 | ############################ 150 | # The brute-force approach # 151 | ############################ 152 | # Create a set of random points x 153 | set.seed(123) 154 | x <- rnorm(100, 0, 1) 155 | 156 | # Make y a function of x 157 | y <- 3.2 + 2.9 * x + rnorm(100, 0, 0.1) 158 | 159 | plot(x, y) 160 | 161 | objective_function <- function(y, x, a, b) { 162 | value <- sum((y - (a * x + b)) ^ 2) 163 | return(value) 164 | } 165 | 166 | # Create a range of a and b values and loop through all of them 167 | a <- seq(-10, 10, 0.25) 168 | b <- seq(-10, 10, 0.25) 169 | 170 | output <- list() 171 | z <- 1 172 | for(i in 1:length(a)) { 173 | for(j in 1:length(b)) { 174 | output[[z]] <- c(objective_function(y, x, a[i], b[j]), 175 | a[i], b[j]) 176 | z <- z + 1 177 | } 178 | } 179 | 180 | # Create a matrix out of the list and find the minimum value 181 | mat <- do.call(rbind, output) 182 | colnames(mat) <- c("obj", "a", "b") 183 | 184 | smallest <- which(mat[, "obj"] == min(mat[, "obj"])) 185 | 186 | mat[smallest, ] 187 | ## obj a b 188 | ## 2.16076 3.00000 3.25000 189 | 190 | a = seq(-5, 5, 0.1) 191 | b = seq(-5, 5, 0.1) 192 | 193 | ## obj a b 194 | ## 0.9077592 2.9000 3.2000 195 | 196 | ########################### 197 | # R optimization routines # 198 | ########################### 199 | args(optim) 200 | function (par, fn, gr = NULL, ..., 201 | method = c("Nelder-Mead", 202 | "BFGS", "CG", "L-BFGS-B", "SANN", "Brent"), 203 | lower = -Inf, 204 | upper = Inf, 205 | control = list(), 206 | hessian = FALSE) 207 | 208 | ############################ 209 | # A curve-fitting exercise # 210 | ############################ 211 | # Create fictitious yields 212 | rates = c(0.025, 0.03, 0.034, 0.039, 0.04, 213 | 0.045, 0.05, 0.06, 0.07, 0.071, 214 | 0.07, 0.069, 0.07, 0.071, 0.072, 215 | 0.074, 0.076, 0.082, 0.088, 0.09) 216 | maturities = 1:20 217 | 218 | plot(maturities, rates, xlab = "years", 219 | main = "Yields", 220 | cex.main = 0.8, 221 | cex.lab = 0.8, 222 | cex.axis = 0.8) 223 | grid() 224 | 225 | poly_5 <- function(x, p) { 226 | f <- p[1] + p[2] * x + p[3] * x^2 + 227 | p[4] * x^3 + p[5] * x^4 + p[6] * x^5 228 | return(f) 229 | } 230 | 231 | obj_5 <- function(x, y, p) { 232 | error <- (y - poly_5(x, p)) ^ 2 233 | return(sum(error)) 234 | } 235 | 236 | # Fit the parameters. Assume 0 for all initial values 237 | out_5 = optim(obj_5, par = c(0, 0, 0, 0, 0, 0), 238 | x = maturities, y = rates) 239 | 240 | out 241 | ## $par 242 | ## [1] 2.4301235956099e-02 1.3138951147963e-03 243 | ## [3] 5.5229326602931e-04 7.5685740385076e-07 244 | ## [5] -4.2119475163787e-06 1.5330958809806e-07 245 | 246 | ## $value 247 | ## [1] 0.00017311660458207 248 | 249 | ## $counts 250 | ## function gradient 251 | ## 501 NA 252 | 253 | ## $convergence 254 | ## [1] 1 255 | 256 | ## $message 257 | ## NULL 258 | 259 | lines(poly_5(maturities, out_5$par), lwd = 1.5, lty = 2) 260 | 261 | poly_7 <- function(x, p) { 262 | f <- p[1] + p[2] * x + p[3] * x^2 + 263 | p[4] * x^3 + p[5] * x^4 + 264 | p[6] * x^5 + p[6] * x^6 + 265 | p[7] * x^7 266 | return(f) 267 | } 268 | 269 | obj_7 <- function(x, y, p) { 270 | error <- (y - poly_7(x, p)) ^ 2 271 | return(sum(error)) 272 | } 273 | 274 | # Fit the parameters. Assume 0 for all initial values 275 | out_7 <- optim(obj_7, par = c(0, 0, 0, 0, 0, 0, 0, 0), 276 | x = maturities, y = rates) 277 | 278 | lines(poly_7(maturities, out_7$par), lwd = 1.5, lty = 3) 279 | 280 | # Specify two polynomials to be used for fitting purposes 281 | poly_5 <- function(x, a) { 282 | f <- a[1] + a[2] * x + a[3] * x ^ 2 + 283 | a[4] * x ^ 3 + a[5] * x ^ 4 + 284 | a[6] * x ^ 5 285 | return(f) 286 | } 287 | 288 | poly_3 <- function(x, offset, intercept, b) { 289 | f <- intercept + b[1] * (x - offset) + 290 | b[2] * (x - offset) ^ 2 + 291 | b[3] * (x - offset) ^ 3 292 | return(f) 293 | } 294 | 295 | obj_3_5 <- function(x, y, offset, p) { 296 | 297 | # All points are at infinity initially 298 | fit <- rep(Inf, length(x)) 299 | ind_5 <- x <= offset 300 | ind_3 <- x > offset 301 | 302 | fit[ind_5] <- poly_5(x[ind_5], p[1:6]) 303 | fit[ind_3] <- poly_3(x[ind_3], offset, 304 | poly_5(offset, p[1:6]), p[7:9]) 305 | 306 | error <- (y - fit) ^ 2 307 | return(sum(error)) 308 | } 309 | 310 | # Fit the parameters. Assume 0 for all initial values 311 | offset <- 9 312 | out_3_5 <- optim(obj_3_5, par = rep(0, 9), 313 | x = maturities, y = rates, offset = offset) 314 | 315 | plot(maturities, rates, xlab = "years", 316 | main = "Yields", 317 | cex.main = 0.8, 318 | cex.lab = 0.8, 319 | cex.axis = 0.8) 320 | grid() 321 | lines(poly_5(maturities[maturities <= offset], 322 | out_3_5$par[1:6]), lwd = 2) 323 | lines(c(rep(NA, offset), 324 | poly_3(maturities[maturities > offset], offset, 325 | poly_5(offset, out_3_5$par[1:6]), 326 | out_3_5$par[7:9])), lwd = 2) 327 | abline(v = offset) 328 | 329 | # Fit loess to the data 330 | obj <- loess(rates ~ maturities, span = 0.5) 331 | 332 | # Plot the data and the fit 333 | plot(maturities, rates, main = "Rates", cex.main = 0.8) 334 | lines(predict(obj), lty = 2) 335 | 336 | ########################## 337 | # Portfolio optimization # 338 | ########################## 339 | install.packages("DEoptim") 340 | 341 | require(DEoptim) 342 | 343 | # Drawdown function 344 | compute_drawdown <- function(x, returns_default = TRUE, 345 | geometric = TRUE) { 346 | # x = Vector of raw pnl or returns 347 | # If returns_default = FALSE, the geometric 348 | # argument is ignored and the pnl is used. 349 | # Output = the maximum drawdown 350 | if(returns_default) { 351 | # Cumulative return calculation 352 | if(geometric) { 353 | cumulative_return <- cumprod(1 + x) 354 | } else { 355 | cumulative_return <- 1 + cumsum(x) 356 | } 357 | max_cumulative_return <- cummax(c(1, cumulative_return))[-1] 358 | drawdown <- -(cumulative_return / max_cumulative_return - 1) 359 | } else { 360 | # PnL vector is used 361 | cumulative_pnl <- c(0, cumsum(x)) 362 | drawdown <- cummax(cumulative_pnl) - cumulative_pnl 363 | drawdown <- drawdown[-1] 364 | } 365 | # Drawdown vector for either pnl or returns 366 | return(drawdown) 367 | } 368 | 369 | obj_max_drawdown <- function(w, r_matrix, small_weight) { 370 | # w is the weight of every stock 371 | # r_matrix is the returns matrix of all stocks 372 | 373 | # Portfolio return 374 | portfolio_return <- r_matrix %*% w 375 | 376 | # Max drawdown 377 | drawdown_penalty <- max(compute_drawdown(portfolio_return)) 378 | 379 | # Create penalty component for sum of weights 380 | weight_penalty <- 100 * (1 - sum(w)) ^ 2 381 | 382 | # Create a penalty component for negative weights 383 | negative_penalty <- -sum(w[w < 0]) 384 | 385 | # Create penalty component for small weights 386 | small_weight_penalty <- 100 * sum(w[w < small_weight]) 387 | 388 | # Objective function to minimize 389 | obj <- drawdown_penalty + weight_penalty + 390 | negative_penalty + small_weight_penalty 391 | return(obj) 392 | } 393 | 394 | # Calculate a returns matrix for multiple stocks 395 | symbol_names <- c("AXP", "BA", "CAT", "CVX", 396 | "DD", "DIS", "GE", "HD", "IBM", 397 | "INTC", "KO", "MMM", "MRK", 398 | "PG", "T", "UTX", "VZ") 399 | 400 | # Load these prices into memory 401 | price_matrix <- NULL 402 | for(name in symbol_names) { 403 | # Extract the adjusted close price vector 404 | price_matrix <- cbind(price_matrix, get(name)[, 6]) 405 | } 406 | 407 | colnames(price_matrix) <- symbol_names 408 | 409 | # Compute returns 410 | returns_matrix <- apply(price_matrix, 2, function(x) diff(log(x))) 411 | 412 | # Specify a small weight below which the allocation should be 0% 413 | small_weight_value <- 0.02 414 | 415 | # Specify lower and upper bounds for the weights 416 | lower <- rep(0, ncol(returns_matrix)) 417 | upper <- rep(1, ncol(returns_matrix)) 418 | 419 | optim_result <- DEoptim(obj_max_drawdown, lower, upper, 420 | control = list(NP = 400, itermax = 300, F = 0.25, CR = 0.75), 421 | returns_matrix, small_weight_value) 422 | 423 | weights <- optim_result$optim$bestmem 424 | 425 | sum(weights) 426 | ## 0.9978 427 | 428 | weights <- weights / sum(weights) 429 | 430 | # Equally weighted portfolio 431 | equal_weights <- rep(1 / 17, 17) 432 | equal_portfolio <- returns_matrix %*% equal_weights 433 | equal_portfolio_cumprod <- cumprod(1 + equal_portfolio) 434 | 435 | # Optimal max drawdown portfolio 436 | optimized_portfolio <- returns_matrix %*% weights 437 | drawdown_portfolio_cumprod <- cumprod(1 + optimized_portfolio) 438 | 439 | main_title <- "Equal vs. Optimized Weights" 440 | plot(drawdown_portfolio_cumprod, type = 'l', xaxt = 'n', 441 | main = main_title, xlab = "", ylab = "cumprod(1 + r)") 442 | lines(equal_portfolio_cumprod, lty = 3) 443 | grid(col = 'black') 444 | 445 | # Set x-axis labels 446 | label_location <- seq(1, length(drawdown_portfolio_cumprod), 447 | by = 90) 448 | labels <- rownames(returns_matrix)[label_location] 449 | axis(side = 1, at = label_location, labels = labels, 450 | las = 2, cex.axis= 0.8) 451 | 452 | # Equal weighted 453 | max(compute_drawdown(equal_portfolio)) 454 | ## [1] 0.597 455 | 456 | # Optimized for the smallest max drawdown 457 | max(compute_drawdown(optimized_portfolio)) 458 | ## [1] 0.515 459 | -------------------------------------------------------------------------------- /rfortraders/Chapter_08/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 8 2 | # High-Frequency Data 3 | 4 | ######################### 5 | # High-frequency quotes # 6 | ######################### 7 | 8 | # The following data is not available for public use. 9 | # Those interested in using tick data can contact Tick Data, Inc. 10 | spy_file <- "Chapter_08/SPY_2013_04_15_X_Q.asc" 11 | spy_quotes <- read.csv(file = spy_file, header = FALSE, 12 | stringsAsFactors = FALSE) 13 | 14 | head(spy_quotes, 3) 15 | ## V1 V2 V3 V4 V5 V6 V7 16 | ## 1 04/15/2013 04:00:00.065 T 156.60 0.00 1 0 R 17 | ## 2 04/15/2013 04:00:00.626 P 0.00 0.00 0 0 R 18 | ## 3 04/15/2013 04:00:00.633 P 158.25 158.90 1 47 R 19 | 20 | ## V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 21 | ## NA 1 T T 1 2 A C NA NA NA NA 22 | ## NA 2 P P 0 2 A C NA NA NA NA 23 | ## NA 3 P P 1 2 A C NA NA NA NA 24 | 25 | names(spy_quotes) <- c("date", "time", "exchange", "bid_price", 26 | "ask_price", "bid_size", "ask_size", "quote_condition", 27 | "mode", "market_maker_id", "sequence_number", "bid_exchange", 28 | "ask_exchange", "national_bbo_indicator", 29 | "nasdaq_bbo_indicator", "quote_cancel_correction", 30 | "quote_source", "short_sale_restriction_indicator", 31 | "limit_up_down_bbo_indicator_cqs", 32 | "limit_up_down_bbo_indicator_utp", 33 | "finra_adf_mpid_indicator") 34 | 35 | spy_quotes_arca <- spy_quotes[spy_quotes$exchange %in% c("P"), 36 | c("date", "time", "bid_price", 37 | "ask_price", "bid_size", "ask_size")] 38 | 39 | require(xts) 40 | 41 | # Setting to allow us to view the millisecond precision 42 | options(digits.secs = 3) 43 | 44 | time_index <- as.POSIXct(paste(spy_quotes_arca$date, 45 | spy_quotes_arca$time), format = "%m/%d/%Y %H:%M:%OS") 46 | spy <- xts(spy_quotes_arca[, -c(1, 2)], time_index) 47 | rm(time_index) 48 | 49 | plot(spy$bid_price, type = 'l', 50 | main = "SPY bid price", 51 | cex.main = 0.8, 52 | cex.lab = 0.8, 53 | cex.axis = 0.8) 54 | 55 | spy_filtered <- spy[spy$bid_price > 0, ] 56 | 57 | rows_removed <- nrow(spy) - nrow(spy_filtered) 58 | 59 | rows_removed 60 | ## [1] 2 61 | 62 | plot(spy_filtered$bid_price, type = 'l', 63 | main = "SPY filtered bid price", 64 | cex.main = 0.8, 65 | cex.lab = 0.8, 66 | cex.axis = 0.8) 67 | 68 | summary(as.numeric(spy_filtered$ask_price)) 69 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 70 | ## 154.3 156.1 156.7 156.8 157.7 158.9 71 | 72 | summary(as.numeric(spy_filtered$bid_size)) 73 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 74 | ## 1.00 22.00 52.00 94.93 100.00 1565.00 75 | 76 | summary(as.numeric(spy_filtered$ask_size)) 77 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 78 | ## 1.0 24.0 59.0 110.8 118.0 1412.0 79 | 80 | # Boolean vector where bid price >= the ask price 81 | crossed_prices <- spy_filtered$bid_price >= 82 | spy_filtered$ask_price 83 | 84 | any(crossed_prices) 85 | ## [1] FALSE 86 | 87 | ############################# 88 | # Inter-quote arrival times # 89 | ############################# 90 | # Extract the time index. 91 | quote_times <- index(spy_filtered) 92 | 93 | # Compute the consecutive time differences 94 | time_differences <- as.numeric(diff(quote_times)) 95 | 96 | summary(time_differences) 97 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 98 | ## 0.0000 0.0000 0.0010 0.0645 0.0100 1010.0000 99 | 100 | # Identify times that seem abnormal 101 | long_times <- which(time_differences > 1000) 102 | long_times 103 | ## [1] 884312 104 | 105 | # Show the data around the abnormally long time 106 | spy_abnormal <- spy_filtered[(long_times - 2):(long_times + 2), ] 107 | ## bid_price ask_price bid_size ask_size 108 | ## 2013-04-15 16:13:00.987 155.00 155.02 289 270 109 | ## 2013-04-15 16:13:01.256 155.00 155.02 290 270 110 | ## 2013-04-15 16:13:01.282 155.00 155.02 295 270 111 | ## 2013-04-15 16:29:50.869 154.76 154.79 3 3 112 | ## 2013-04-15 16:29:50.887 154.76 154.77 3 10 113 | 114 | ################################# 115 | # Identifying liquidity regimes # 116 | ################################# 117 | # Calculate the bid-ask spread. 118 | bid_ask_spread <- spy_filtered$ask_price - 119 | spy_filtered$bid_price 120 | 121 | # Filter out abnormal value 122 | outliers <- which(bid_ask_spread > 0.25) 123 | if(length(outliers) > 0) { 124 | bid_ask_spread <- bid_ask_spread[-outliers, ] 125 | } 126 | 127 | # Plot the spread. 128 | plot(bid_ask_spread, type = "l", 129 | main = "Bid ask spread", 130 | cex.main = 0.8, 131 | cex.lab = 0.8, 132 | cex.axis = 0.8) 133 | 134 | # Create three time partitions for the SPY data 135 | early_morning <- "2013-04-15 04:00:00::2013-04-15 08:29:00" 136 | regular_trading <- "2013-04-15 08:30:00::2013-04-15 16:15:00" 137 | after_hours <- "2013-04-15 16:15:01::2013-04-15 20:00:00" 138 | 139 | # Create a histogram of the bid-ask spread for each period 140 | par(mfrow = c(3, 1)) 141 | 142 | # Morning 143 | data <- bid_ask_spread[early_morning] 144 | hist(data, main = early_morning, breaks = 1000, 145 | xlim = c(0, 0.1)) 146 | abline(v = mean(data), lwd = 2, lty = 2) 147 | 148 | # Afternoon 149 | data <- bid_ask_spread[regular_trading] 150 | hist(data, main = regular_trading, breaks = 1000, 151 | xlim = c(0, 0.1)) 152 | abline(v = mean(data), lwd = 2, lty = 2) 153 | 154 | # Evening 155 | data <- bid_ask_spread[after_hours] 156 | hist(data, main = after_hours, breaks = 1000, 157 | xlim = c(0, 0.1)) 158 | abline(v = mean(data), lwd = 2, lty = 2) 159 | 160 | spy_day <- spy_filtered[regular_trading] 161 | 162 | ################### 163 | # The micro-price # 164 | ################### 165 | spy_micro_price <- (spy_day$bid_price * spy_day$ask_size + 166 | spy_day$ask_price * spy_day$bid_size) / 167 | (spy_day$bid_size + spy_day$ask_size) 168 | 169 | par(mfrow = c(1, 1)) 170 | range <- 10000:10100 171 | title <- "Micro-price between bid-ask prices" 172 | 173 | plot(spy_day$ask_price[range], 174 | ylim = c(min(spy_day$bid_price[range]), 175 | max(spy_day$ask_price[range])), 176 | main = title, 177 | cex.main = 0.8, 178 | cex.lab = 0.8, 179 | cex.axis = 0.8) 180 | lines(spy_day$bid_price[range]) 181 | lines(spy_micro_price[range], lty = 2) 182 | 183 | ###################################### 184 | # Distributions and autocorrelations # 185 | ###################################### 186 | spy_returns <- diff(log(spy_micro_price)) 187 | 188 | par(mfrow = c(2, 1)) 189 | plot(spy_returns, 190 | main = "Time series plot of micro-price returns", 191 | cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) 192 | hist(spy_returns, breaks = 1000, 193 | main = "Micro-price distribution", 194 | cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) 195 | 196 | par(mfrow = c(1, 1)) 197 | mean_spy <- mean(as.numeric(spy_returns), na.rm = TRUE) 198 | sd_spy <- sd(as.numeric(spy_returns), na.rm = TRUE) 199 | 200 | hist(spy_returns, breaks = 10000, prob = TRUE, 201 | xlim = c(-0.00003, 0.00003), 202 | main = "Micro-price distribution vs. Normal", 203 | cex.main = 0.8, 204 | cex.lab = 0.8, 205 | cex.axis = 0.8) 206 | 207 | curve(dnorm(x, mean_spy, sd_spy), add = TRUE, 208 | yaxt = "n", lwd = 3, lty = 3) 209 | 210 | spy_acf <- acf(as.numeric(spy_returns), 211 | na.action = na.pass, 212 | main = "Autocorrelation", 213 | cex.main = 0.8, 214 | cex.lab = 0.8, 215 | cex.axis = 0.8) 216 | 217 | # Get the SPY traded prices from our tick data file 218 | spy_trades_file <- "path/SPY_2013_04_15_X_T.asc" 219 | spy_trades <- read.csv(file = spy_trades_file, 220 | header = FALSE, stringsAsFactors = FALSE) 221 | 222 | names(spy_trades) <- c("date", "time", "price", 223 | "volume", "exchange", "sales_condition", 224 | "correction_indicator", "sequence_number", 225 | "trade_stop_indicator", "source_of_trade", "trf", 226 | "exclude_record_flag", "filtered_price") 227 | 228 | # Extract only the ARCA trades 229 | spy_trades_arca <- spy_trades[spy_trades$exchange %in% c("P"), 230 | c("date", "time", "price", "volume", "correction_indicator", 231 | "filtered_price")] 232 | 233 | # Check if any filtered prices exist 234 | any(!is.na(spy_trades_arca$filtered_price)) 235 | ## [1] FALSE 236 | 237 | # Check if there are any special correction indicators present 238 | unique(spy_trades_arca$correction_indicator) 239 | ## [1] 0 240 | 241 | # Drop the last two columns from the data frame 242 | spy_trades_arca <- spy_trades_arca[, 1:4] 243 | 244 | # Convert to an xts object for subsequent analysis 245 | time_index <- as.POSIXct(paste(spy_trades_arca$date, 246 | spy_trades_arca$time), format = "%m/%d/%Y %H:%M:%OS") 247 | 248 | spy_t <- xts(spy_trades_arca[, -c(1, 2)], time_index) 249 | rm(time_index) 250 | 251 | # First 6 entries 252 | head(spy_t) 253 | ## price volume 254 | ## 2013-04-15 04:00:00.697 158.25 100 255 | ## 2013-04-15 04:00:00.697 158.24 200 256 | ## 2013-04-15 04:00:00.697 158.15 150 257 | ## 2013-04-15 04:01:42.190 158.06 200 258 | ## 2013-04-15 04:07:16.545 157.94 100 259 | ## 2013-04-15 04:12:45.265 157.92 10000 260 | 261 | # Subset to regular trading hour range 262 | regular_trading <- "2013-04-15 08:30:00::2013-04-15 16:15:00" 263 | spy_t_day <- spy_t[regular_trading] 264 | 265 | # Look at the number of trade entries 266 | dim(spy_t_day) 267 | ## [1] 93197 2 268 | 269 | # Look at the amount of memory taken up 270 | object.size(spy_t_day) 271 | ## [1] 2239080 bytes 272 | 273 | # Compute returns 274 | spy_t_day_returns <- diff(log(spy_t_day$price))[-1] 275 | 276 | # Plot the distribution and the autocorrelation plot 277 | par(mfrow = c(2, 1)) 278 | plot(spy_t_day_returns, main = "SPY returns on trades", 279 | cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) 280 | acf(as.numeric(spy_t_day_returns), main = "SPY trades acf", 281 | cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) 282 | 283 | # Distribution of trade returns 284 | par(mfrow = c(1, 1)) 285 | hist(spy_t_day_returns, breaks = 1000, prob = TRUE, 286 | xlim = c(-0.0001, 0.0001), 287 | main = "Distribution of SPY trade returns", 288 | cex.main = 0.8, 289 | cex.lab = 0.8, 290 | cex.axis = 0.8) 291 | 292 | curve(dnorm(x, mean(spy_t_day_returns), 293 | sd(spy_t_day_returns)), 294 | add = TRUE, 295 | yaxt = "n", 296 | lwd = 3, 297 | lty = 3) 298 | 299 | # Use the rle() function to find price sequences 300 | prices_rle <- rle(as.numeric(spy_t_day$price)) 301 | 302 | # Here are the row indexes we want to keep 303 | end_indexes <- cumsum(prices_rle$lengths) 304 | 305 | # Here are the start indexes we want to sum the volumes from 306 | start_indexes <- end_indexes - prices_rle$lengths + 1 307 | 308 | # Create a vector of total volumes for each price 309 | volume_list <- list() 310 | volume_vector <- as.numeric(spy_t_day$volume) 311 | for (i in 1:length(end_indexes)) { 312 | volume_list[[i]] <- sum(volume_vector[start_indexes[i]: 313 | end_indexes[i]], na.rm = TRUE) 314 | } 315 | 316 | # Create a reduced data set with distinct trade prices 317 | spy_t_day_reduced <- spy_t_day[end_indexes, ] 318 | spy_t_day_reduced$volume <- unlist(volume_list) 319 | 320 | head(spy_t_day_reduced, 10) 321 | ## price volume 322 | ## 2013-04-15 08:30:01.964 158.17 510 323 | ## 2013-04-15 08:30:02.783 158.15 1000 324 | ## 2013-04-15 08:30:04.930 158.14 340 325 | ## 2013-04-15 08:30:11.964 158.12 100 326 | ## 2013-04-15 08:30:23.763 158.11 1100 327 | ## 2013-04-15 08:30:29.739 158.10 1720 328 | ## 2013-04-15 08:30:31.963 158.09 200 329 | ## 2013-04-15 08:30:45.164 158.08 4995 330 | ## 2013-04-15 08:30:46.888 158.07 100 331 | ## 2013-04-15 08:30:46.970 158.06 3330 332 | 333 | head(spy_t_day, 10) 334 | ## price volume 335 | ## 2013-04-15 08:30:01.964 158.17 100 336 | ## 2013-04-15 08:30:01.964 158.17 410 337 | ## 2013-04-15 08:30:02.783 158.15 1000 338 | ## 2013-04-15 08:30:04.930 158.14 340 339 | ## 2013-04-15 08:30:11.964 158.12 100 340 | ## 2013-04-15 08:30:23.763 158.11 1000 341 | ## 2013-04-15 08:30:23.763 158.11 100 342 | ## 2013-04-15 08:30:28.153 158.10 400 343 | ## 2013-04-15 08:30:28.529 158.10 320 344 | ## 2013-04-15 08:30:29.739 158.10 180 345 | 346 | # Identify the most traded prices throughout the day 347 | hist(as.numeric(spy_t_day_reduced$price), breaks = 200, 348 | main = "Histogram of traded prices for SPY on 2013-04-15", 349 | cex.main = 0.8, 350 | cex.lab = 0.8, 351 | cex.axis = 0.8) 352 | 353 | acf(diff(log(as.numeric(spy_t_day_reduced$price))), 354 | main = "Autocorrelation of trades", 355 | cex.main = 0.8, 356 | cex.lab = 0.8, 357 | cex.axis = 0.8) 358 | 359 | # Random return cloud with lag 1 360 | n <- rnorm(50, 0, .20) 361 | n_lag1 <- c(NA, n[-length(n)]) 362 | plot(n_lag1, n) 363 | 364 | # Create arrows between the points 365 | s <- seq(length(n)-1) 366 | arrows(n_lag1[s], n[s], n_lag1[s+1], n[s+1]) 367 | 368 | # SPY return cloud with lag 1 369 | spy_t_returns <- diff(log(as.numeric( 370 | spy_t_day_reduced$price[100:150]))) 371 | spy_t_returns_lag1 <- c(NA, spy_t_returns[ 372 | -length(spy_t_returns)]) 373 | plot(spy_t_returns_lag1, spy_t_returns) 374 | 375 | s <- seq(length(spy_t_returns)-1) 376 | arrows(spy_t_returns_lag1[s], spy_t_returns[s], 377 | spy_t_returns_lag1[s+1], spy_t_returns[s+1]) 378 | 379 | ############################# 380 | # The highfrequency package # 381 | ############################# 382 | library("devtools") 383 | install_github("highfrequency", "jonathancornelissen") 384 | 385 | 386 | 387 | 388 | 389 | -------------------------------------------------------------------------------- /rfortraders/Chapter_02/code.R: -------------------------------------------------------------------------------- 1 | ########################## 2 | # Getting started with R # 3 | ########################## 4 | 1 + 1 5 | sqrt(2) 6 | 20 + (26.8 * 23.4) / 2 + exp(1.34) * cos(1) 7 | sin(1) 8 | 5^4 9 | sqrt(-1 + 0i) 10 | 11 | integrand <- function(x) 1 / ((x + 1) * sqrt(x)) 12 | integrate(integrand, lower = 0, upper = Inf) 13 | 14 | x <- 3 15 | x <- x + 1 16 | z <- x ^ 2 17 | z <- "hello quants" 18 | y <- "a" 19 | Z <- sqrt(2) 20 | new.X <- 2.3 21 | 22 | 5+4 23 | ## [1] 9 24 | 25 | ################## 26 | # The c() object # 27 | ################## 28 | first_vector <- c(1, 2, 3, 4, 5, 6) 29 | second_vector <- c("a", "b", "hello") 30 | third_vector <- c("a", 2, 23) 31 | 32 | first_vector 33 | ## [1] 1 2 3 4 5 6 34 | 35 | third_vector 36 | ## [1] "a" "2" "23" 37 | 38 | new_vector <- c(first_vector, 7, 8, 9) 39 | new_vector 40 | ## [1] 1 2 3 4 5 6 7 8 9 41 | 42 | # Extract the 4th element 43 | example_1 <- new_vector[4] 44 | 45 | # Extract the 5th and the 8th elements 46 | example_2 <- new_vector[c(5, 8)] 47 | 48 | example_2 49 | ## [1] 5 8 50 | 51 | x <-c(1,5,10,15,20) 52 | ## [1] 1 5 10 15 20 53 | 54 | x2 <- 2 * x 55 | ## [1] 2 10 20 30 40 56 | 57 | x3 <- x ^ 2 58 | ## [1] 1 25 100 225 400 59 | 60 | x4 <- x / x2 61 | ## [1] 0.5 0.5 0.5 0.5 0.5 62 | 63 | x5 <- round(x * (x / x2) ^ 3.5 + sqrt(x4), 3) 64 | ## [1] 0.795 1.149 1.591 2.033 2.475 65 | 66 | x6 <- round(c(c(x2[2:4], x3[1:2]), x5[4]), 2) 67 | ## [1] 10.00 20.00 30.00 1.00 25.00 2.03 68 | 69 | ####################### 70 | # The matrix() object # 71 | ####################### 72 | my_matrix <- matrix(c(1, 2, 3, 4, 5, 6), 73 | nrow = 2, ncol = 3) 74 | 75 | my_matrix 76 | ## [,1] [,2] [,3] 77 | ## [1,] 1 3 5 78 | ## [2,] 2 4 6 79 | 80 | my_matrix <- matrix(c(1, 2, 3, 4, 5, 6), 81 | nrow = 2, ncol = 3, byrow = TRUE) 82 | 83 | my_matrix 84 | ## [,1] [,2] [,3] 85 | ## [1,] 1 2 3 86 | ## [2,] 4 5 6 87 | 88 | dimnames(my_matrix) <- list(c("one", "hello"), 89 | c("column1", "column2", "c3")) 90 | 91 | my_matrix 92 | ## column1 column2 c3 93 | ## one 1 2 3 94 | ## hello 4 5 6 95 | 96 | attributes(my_matrix) 97 | ## $dim 98 | ## [1] 2 3 99 | 100 | ## $dimnames 101 | ## $dimnames[[1]] 102 | ## [1] "one" "hello" 103 | 104 | ## $dimnames[[2]] 105 | ## [1] "column1" "column2" "c3" 106 | 107 | ans <- my_matrix[1, 3] 108 | ans 109 | ## [1] 3 110 | 111 | new_matrix_1 <- my_matrix * my_matrix 112 | 113 | new_matrix_1 114 | ## [,1] [,2] [,3] 115 | ## [1,] 1 4 9 116 | ## [2,] 16 25 36 117 | 118 | new_matrix_2 <- sqrt(my_matrix) 119 | 120 | new_matrix_2 121 | ## [,1] [,2] [,3] 122 | ## [1,] 1 1.414214 1.732051 123 | ## [2,] 2 2.236068 2.449490 124 | 125 | mat1 <- matrix(rnorm(1000), nrow = 100) 126 | round(mat1[1:5, 2:6], 3) 127 | ## [,1] [,2] [,3] [,4] [,5] 128 | ## [1,] -1.544 1.281 1.397 0.407 -0.459 129 | ## [2,] 0.483 0.046 -1.817 -0.289 0.597 130 | ## [3,] 0.405 1.045 -0.726 -0.163 0.258 131 | ## [4,] 0.141 -0.294 -1.225 -0.217 -0.771 132 | ## [5,] -0.537 0.226 0.126 -1.584 -1.237 133 | 134 | mat2 <- mat1[1:25, ] ^ 2 135 | head(round(mat2, 0), 9)[,1:7] 136 | ## [,1] [,2] [,3] [,4] [,5] [,6] [,7] 137 | ## [1,] 1 2 2 2 0 0 7 138 | ## [2,] 0 0 0 3 0 0 0 139 | ## [3,] 0 0 1 1 0 0 1 140 | ## [4,] 0 0 0 2 0 1 4 141 | ## [5,] 1 0 0 0 3 2 1 142 | ## [6,] 2 1 3 1 1 1 1 143 | ## [7,] 0 0 0 0 0 1 0 144 | ## [8,] 1 2 0 0 1 2 0 145 | ## [9,] 0 0 3 0 2 2 0 146 | 147 | ########################### 148 | # The data.frame() object # 149 | ########################### 150 | df <- data.frame(price = c(89.2, 23.2, 21.2), 151 | symbol = c("MOT", "AAPL", "IBM"), 152 | action = c("Buy", "Sell", "Buy")) 153 | 154 | df 155 | ## price symbol action 156 | ## 1 89.2 MOT Buy 157 | ## 2 23.2 AAPL Sell 158 | ## 3 21.2 IBM Buy 159 | 160 | df3 <-data.frame(price = c(89.2, 23.2, 21.2), 161 | symbol = c("MOT", "AAPL", "IBM"), 162 | action = c("Buy", "Sell", "Buy"), 163 | stringsAsFactors = FALSE) 164 | 165 | class(df3$symbol) 166 | ## [1] "character" 167 | 168 | price <- df[1, 1] 169 | 170 | price 171 | ## [1] 89.2 172 | 173 | df2 <- data.frame(col1 = c(1, 2, 3), 174 | col2 = c(1, 2, 3, 4)) 175 | 176 | ## Error in data.frame(col1 = c(1,2,3), 177 | ## col2 = c(1,2,3,4)) : arguments imply 178 | ## differing number of rows: 3, 4 179 | 180 | symbols <- df$symbol 181 | 182 | symbols 183 | ## [1] MOT AAPL IBM 184 | ## Levels: AAPL IBM MOT 185 | 186 | class(symbols) 187 | ## [1] "factor" 188 | 189 | symbols <- df3$symbol 190 | 191 | symbols 192 | ## [1] "MOT" "AAPL" "IBM" 193 | 194 | ##################### 195 | # The list() object # 196 | ##################### 197 | my_list <- list(a = c(1, 2, 3, 4, 5), 198 | b = matrix(1:10, nrow = 2, ncol = 5), 199 | c = data.frame(price = c(89.3, 98.2, 21.2), 200 | stock = c("MOT", "IBM", "CSCO"))) 201 | 202 | my_list 203 | ## $a 204 | ## [1] 1 2 3 4 5 205 | 206 | ## $b 207 | ## [,1] [,2] [,3] [,4] [,5] 208 | ## [1,] 1 3 5 7 9 209 | ## [2,] 2 4 6 8 10 210 | 211 | ## $c 212 | ## price stock 213 | ## 1 89.3 MOT 214 | ## 2 98.2 IBM 215 | ## 3 21.2 CSCO 216 | 217 | first_element <- my_list[[1]] 218 | 219 | first_element 220 | ## [1] 1 2 3 4 5 221 | 222 | class(first_element) 223 | ## [1] "numeric" 224 | 225 | second_element <- my_list[["b"]] 226 | second_element 227 | ## [,1] [,2] [,3] [,4] [,5] 228 | ## [1,] 1 3 5 7 9 229 | ## [2,] 2 4 6 8 10 230 | 231 | class(second_element) 232 | ## [1] "matrix" 233 | 234 | part_of_list <- my_list[c(1, 3)] 235 | 236 | part_of_list 237 | ## $a 238 | ## [1] 1 2 3 4 5 239 | 240 | ## $c 241 | ## price stock 242 | ## 1 89.3 MOT 243 | ## 2 98.2 IBM 244 | ## 3 21.2 CSCO 245 | 246 | class(part_of_list) 247 | ## [1] "list" 248 | 249 | size_of_list <- length(my_list) 250 | 251 | size_of_list 252 | ## [1] 3 253 | 254 | ######################## 255 | # The new.env() object # 256 | ######################## 257 | env <- new.env() 258 | env[["first"]] <- 5 259 | env[["second"]] <- 6 260 | env$third <- 7 261 | 262 | env 263 | ## 264 | 265 | ls(env) 266 | ## [1] "first" "second" "third" 267 | 268 | get("first", envir = env) 269 | ## 5 270 | 271 | rm("second", envir = env) 272 | ls(env) 273 | ## [1] "first" "third" 274 | 275 | env_2 <- env 276 | env_2$third <- 42 277 | 278 | get("third", envir = env) 279 | ## [1] 42 280 | 281 | ############################# 282 | # Using the plot() function # 283 | ############################# 284 | # Create a vector of numbers x and plot them 285 | x <- c(1, 2, 3.2, 4, 3, 2.1, 9, 19) 286 | plot(x) 287 | 288 | # Convert the graph into a line plot 289 | plot(x, type = "l") 290 | 291 | # Set up the canvas 292 | plot(rnorm(1000), main = "Some returns", cex.main = 0.9, 293 | xlab = "Time", ylab = "Returns") 294 | 295 | # Superimpose a basic grid 296 | grid() 297 | 298 | # Create a few vertical and horizontal lines 299 | abline(v = 400, lwd = 2, lty = 1) 300 | abline(h = 2, lwd = 3, lty = 3) 301 | 302 | # Create a 2-row, 2-column format 303 | par(mfrow = c(2, 2)) 304 | 305 | # First plot (points). 306 | plot(rnorm(100), main = "Graph 1") 307 | 308 | # Second plot (lines). 309 | plot(rnorm(100), main = "Graph 2", type = "l") 310 | 311 | # Third plot (steps) with a vertical line 312 | plot(rnorm(100), main = "Graph 3", type = "s") 313 | abline(v = 50, lwd = 4) 314 | 315 | # Fourth plot 316 | plot(rnorm(100), type = "h", main = "Graph 4") 317 | 318 | # Reset the plot window 319 | par(mfrow = c(1, 1)) 320 | 321 | plot(rnorm(100), main = "A line plot", 322 | cex.main = 0.8, 323 | xlab = "x-axis", 324 | ylab = "y-axis", 325 | type = "l") 326 | 327 | # Extra text 328 | mtext("Some text at the top", side = 3) 329 | 330 | # At x = 40 and y = -1 coordinates 331 | legend(40, -1, "A legend") 332 | 333 | formals(plot.default) 334 | ## $x 335 | 336 | ## $y 337 | ## NULL 338 | 339 | ## $type 340 | ## [1] "p" 341 | ## ... 342 | 343 | ########################## 344 | # Functional programming # 345 | ########################## 346 | ans <- sum(1:100) 347 | 348 | ans 349 | ## [1] 5050 350 | 351 | answer <- 0 352 | for(i in 1:100) { 353 | answer <- answer + i 354 | } 355 | answer 356 | ## [1] 5050 357 | 358 | crack_eggs <- function(number_of_eggs) { 359 | # Code that determines whether eggs have been cracked. 360 | # If they have, set have_all_eggs_been_cracked <- TRUE, 361 | # otherwise, set to FALSE 362 | 363 | return(have_all_eggs_been_cracked) 364 | } 365 | 366 | # Create 100 standard normals 367 | x <- rnorm(100, mean = 0, sd = 1) 368 | 369 | # Find the length of the vector x. 370 | length(x) 371 | 372 | # Compute the mean of x 373 | mean(x) 374 | 375 | # Compute the standard deviation of x 376 | sd(x) 377 | 378 | # Compute the median value of the vector x 379 | median(x) 380 | 381 | # Compute the range (min, max) of a variable 382 | range(x) 383 | 384 | # Find the sum of all the numbers in x 385 | sum(x) 386 | 387 | # Do a cumulative sum of the values in x 388 | cumsum(x) 389 | 390 | # Display the first 3 elements of x 391 | head(x, 3) 392 | 393 | # Display summary statistics on x 394 | summary(x) 395 | 396 | # Sort x from largest to smallest. 397 | sort(x, decreasing = TRUE) 398 | 399 | # Compute the successive difference in x 400 | diff(x) 401 | 402 | # Create an integer sequence from 1 to 10 403 | 1:10 404 | 405 | # A sequence from 1 to 10 in steps of 0.1 406 | seq(1, 10, 0.1) 407 | 408 | # Print the string hello to the screen 409 | print("hello") 410 | 411 | ######################### 412 | # Branching and looping # 413 | ######################### 414 | 415 | # Define a boolean variable 416 | my_boolean <- 1 == 2 417 | 418 | if (my_boolean) { 419 | print("not correct") 420 | } else { 421 | print("XYZ") 422 | } 423 | 424 | for(i in 1:5) { 425 | cat(i, "\n") 426 | } 427 | ## 1 428 | ## 2 429 | ## 3 430 | ## 4 431 | ## 5 432 | 433 | some_list <- list() 434 | for(z in c("hello", "goodbye")) { 435 | some_list[[z]] <- z 436 | } 437 | 438 | some_list 439 | ## $hello 440 | ## [1] "hello" 441 | 442 | ## $goodbye 443 | ## [1] "goodbye" 444 | 445 | ############################# 446 | # A recommended style guide # 447 | ############################# 448 | #sum numbers 449 | x<-0;for(i in 1:10){x=x+1};x 450 | 451 | # Sum numbers 452 | x <- 0 453 | for(i in 1:10) { 454 | x <- x + 1 455 | } 456 | x 457 | 458 | ################################## 459 | # A pairwise correlation example # 460 | ################################## 461 | filter_and_sort_symbols <- function(symbols) { 462 | # Name: filter_symbols 463 | # Purpose: Convert to upper case if not 464 | # and remove any non valid symbols 465 | # Input: symbols = vector of stock tickers 466 | # Output: filtered_symbols = filtered symbols 467 | 468 | # Convert symbols to uppercase 469 | symbols <- toupper(symbols) 470 | 471 | # Validate the symbol names 472 | valid <- regexpr("^[A-Z]{2,4}$", symbols) 473 | 474 | # Return only the valid ones 475 | return(sort(symbols[valid == 1])) 476 | } 477 | 478 | filter_and_sort_symbols(c("MOT", "cvx", "123", "Gog2", "XLe")) 479 | ## "MOT" "CVX" "XLE" 480 | 481 | extract_prices <- function(filtered_symbols, file_path) { 482 | # Name: extract_prices 483 | # Purpose: Read price data from specified file 484 | # Inputs: filtered_symbols = vector of symbols, 485 | # file_path = location of price data 486 | # Output: prices = data.frame of prices per symbol 487 | 488 | # Read in the .csv price file 489 | all_prices <- read.csv(file = file_path, header = TRUE, 490 | stringsAsFactors = FALSE) 491 | 492 | # Make the dates row names 493 | rownames(all_prices) <- all_prices$Date 494 | 495 | # Remove the original Date column 496 | all_prices$Date <- NULL 497 | 498 | # Extract only the relevant data columns 499 | valid_columns <- colnames(all_prices) %in% filtered_symbols 500 | 501 | return(all_prices[, valid_columns]) 502 | } 503 | 504 | A <- c(1, 2, 5, 6, 9) 505 | B <- c(0, 3, 6, 9, 10) 506 | 507 | A %in% B 508 | ## [1] FALSE FALSE FALSE TRUE TRUE 509 | 510 | filter_prices <- function(prices) { 511 | # Name: filter_prices 512 | # Purpose: Identify the rows with missing values 513 | # Inputs: prices = data.frame of prices 514 | # Output: missing_rows = vector of indexes where 515 | # data is missing in any of the columns 516 | 517 | # Returns a boolean vector of good or bad rows 518 | valid_rows <- complete.cases(prices) 519 | 520 | # Identify the index of the missing rows 521 | missing_rows <- which(valid_rows == FALSE) 522 | 523 | return(missing_rows) 524 | } 525 | 526 | compute_pairwise_correlations <- function(prices) { 527 | # Name: compute_pairwise_correlations 528 | # Purpose: Calculates pairwise correlations of returns 529 | # and plots the pairwise relationships 530 | # Inputs: prices = data.frame of prices 531 | # Output: correlation_matrix = A correlation matrix 532 | 533 | # Convert prices to returns 534 | returns <- apply(prices, 2, function(x) diff(log(x))) 535 | 536 | # Plot all the pairwise relationships 537 | pairs(returns, main = "Pairwise return scatter plot") 538 | 539 | # Compute the pairwise correlations 540 | correlation_matrix <- cor(returns, use = "complete.obs") 541 | 542 | return(correlation_matrix) 543 | } 544 | 545 | # Stock tickers entered by user 546 | symbols <- c("IBM", "XOM", "2SG", "TEva", 547 | "G0og", "CVX", "AAPL", "BA") 548 | 549 | # Location of our database of prices 550 | file_path <- "Chapter_02/prices.csv" 551 | 552 | # Filter and sort the symbols 553 | filtered_symbols <- filter_and_sort_symbols(symbols) 554 | filtered_symbols 555 | ## [1] "AAPL" "BA" "CVX" "IBM" "TEVA" "XOM" 556 | 557 | # Extract prices 558 | prices <- extract_prices(filtered_symbols, file_path) 559 | 560 | # Filter prices 561 | missing_rows <- filter_prices(prices) 562 | missing_rows 563 | ## integer(0) 564 | 565 | # Compute correlations 566 | correlation_matrix <- compute_pairwise_correlations(prices) 567 | correlation_matrix 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | -------------------------------------------------------------------------------- /rfortraders/Chapter_06/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 6 2 | # Spreads, Betas and Risk 3 | 4 | ############################# 5 | # Defining the stock spread # 6 | ############################# 7 | pepsi <- getSymbols('PEP', from = '2013-01-01', 8 | to = '2014-01-01', adjust = T, auto.assign = FALSE) 9 | 10 | coke <- getSymbols('COKE', from = '2013-01-01', 11 | to = '2014-01-01', adjust = T, auto.assign = FALSE) 12 | 13 | Sys.setenv(TZ = "UTC") 14 | 15 | prices <- cbind(pepsi[, 6], coke[, 6]) 16 | price_changes <- apply(prices, 2, diff) 17 | plot(price_changes[, 1], price_changes[, 2], 18 | xlab = "Coke price changes", 19 | ylab = "Pepsi price changes", 20 | main = "Pepsi vs. Coke", 21 | cex.main = 0.8, 22 | cex.lab = 0.8, 23 | cex.axis = 0.8) 24 | grid() 25 | 26 | ans <- lm(price_changes[, 1] ~ price_changes[, 2]) 27 | beta <- ans$coefficients[2] 28 | 29 | ans2 <- lm(price_changes[, 2] ~ price_changes[, 1]) 30 | beta2 <- ans2$coefficients[2] 31 | 32 | beta 33 | ## [1] 0.2614627 34 | 35 | beta2 36 | ## [1] 0.2539855 37 | 38 | ##################################################### 39 | # Ordinary Least Squares versus Total Least Squares # 40 | ##################################################### 41 | # Get the data 42 | SPY <- getSymbols('SPY', from = '2011-01-01', 43 | to = '2012-12-31', adjust = T, auto.assign = FALSE) 44 | 45 | AAPL <- getSymbols('AAPL', from = '2011-01-01', 46 | to = '2012-12-31', adjust = T, auto.assign = FALSE) 47 | 48 | # Compute price differences 49 | x <- diff(as.numeric(SPY[, 4])) 50 | y <- diff(as.numeric(AAPL[, 4])) 51 | 52 | plot(x, y, main = "Scatter plot of returns. SPY vs. AAPL", 53 | cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) 54 | abline(lm(y ~ x)) 55 | abline(lm(x ~ y), lty = 2) 56 | grid() 57 | 58 | # Total least squares regression 59 | r <- prcomp( ~ x + y ) 60 | slope <- r$rotation[2, 1] / r$rotation[1, 1] 61 | intercept <- r$center[2] - slope * r$center[1] 62 | 63 | # Show the first principle component on the plot 64 | abline(a = intercept, b = slope, lty = 3) 65 | 66 | ########################### 67 | # Constructing the spread # 68 | ########################### 69 | # Function to calculate the spread 70 | calculate_spread <- function(x, y, beta) { 71 | return(y - beta * x) 72 | } 73 | 74 | # Function to calculate the beta and level 75 | # given start and end dates 76 | calculate_beta_and_level <- function(x, y, 77 | start_date, end_date) { 78 | require(xts) 79 | 80 | time_range <- paste(start_date, "::", 81 | end_date, sep = "") 82 | x <- x[time_range] 83 | y <- y[time_range] 84 | 85 | dx <- diff(x[time_range]) 86 | dy <- diff(y[time_range]) 87 | r <- prcomp( ~ dx + dy) 88 | 89 | beta <- r$rotation[2, 1] / r$rotation[1, 1] 90 | spread <- calculate_spread(x, y, beta) 91 | names(spread) <- "spread" 92 | level <- mean(spread, na.rm = TRUE) 93 | 94 | outL <- list() 95 | outL$spread <- spread 96 | outL$beta <- beta 97 | outL$level <- level 98 | 99 | return(outL) 100 | } 101 | 102 | # Function to calculate buy and sell signals 103 | # with upper and lower threshold 104 | calculate_buy_sell_signals <- function(spread, beta, 105 | level, lower_threshold, upper_threshold) { 106 | 107 | buy_signals <- ifelse(spread <= level - 108 | lower_threshold, 1, 0) 109 | sell_signals <- ifelse(spread >= level + 110 | upper_threshold, 1, 0) 111 | 112 | # bind these vectors into a matrix 113 | output <- cbind(spread, buy_signals, 114 | sell_signals) 115 | colnames(output) <- c("spread", "buy_signals", 116 | "sell_signals") 117 | 118 | return(output) 119 | } 120 | 121 | # Implementation 122 | # Pick an in-sample date range 123 | start_date <- "2009-01-01" 124 | end_date <- "2011-12-31" 125 | x <- SPY[, 6] 126 | y <- AAPL[, 6] 127 | 128 | results <- calculate_beta_and_level(x, y, 129 | start_date, end_date) 130 | 131 | results$beta 132 | ## [1] 4.923278 133 | 134 | results$level 135 | ## [1] -239.0602 136 | 137 | plot(results$spread, ylab = "Spread Value", 138 | main = "AAPL - beta * SPY", 139 | cex.main = 0.8, 140 | cex.lab = 0.8, 141 | cex.axis = 0.8) 142 | 143 | # Out of sample start and end dates 144 | start_date_out_sample <- "2012-01-01" 145 | end_date_out_sample <- "2012-10-22" 146 | range <- paste(start_date_out_sample, "::", 147 | end_date_out_sample, sep = "") 148 | 149 | # Out of sample analysis 150 | spread_out_of_sample <- calculate_spread(x[range], 151 | y[range], results$beta) 152 | 153 | plot(spread_out_of_sample, main = "AAPL - beta * SPY", 154 | cex.main = 0.8, 155 | cex.lab = 0.8, 156 | cex.axis = 0.8) 157 | abline(h = results$level, lwd = 2) 158 | 159 | 160 | #################################### 161 | # Signal generation and validation # 162 | #################################### 163 | # Rolling window of trading days 164 | window_length <- 10 165 | 166 | # Time range 167 | start_date <- "2011-01-01" 168 | end_date <- "2011-12-31" 169 | range <- paste(start_date, "::", 170 | end_date, sep = "") 171 | 172 | # Our stock pair 173 | x <- SPY[range, 6] 174 | y <- AAPL[range, 6] 175 | 176 | dF <- cbind(x, y) 177 | names(dF) <- c("x", "y") 178 | 179 | # Function that we will use to calculate betas 180 | run_regression <- function(dF) { 181 | return(coef(lm(y ~ x - 1, data = as.data.frame(dF)))) 182 | } 183 | 184 | rolling_beta <- function(z, width) { 185 | rollapply(z, width = width, FUN = run_regression, 186 | by.column = FALSE, align = "right") 187 | } 188 | 189 | betas <- rolling_beta(diff(dF), 10) 190 | 191 | data <- merge(betas, dF) 192 | data$spread <- data$y - lag(betas, 1) * data$x 193 | 194 | returns <- diff(dF) / dF 195 | return_beta <- rolling_beta(returns, 10) 196 | data$spreadR <- diff(data$y) / data$y - 197 | return_beta * diff(data$x) / data$x 198 | 199 | tail(data) 200 | ## betas x y spread spreadR 201 | ## 2011-12-22 2.770586 119.60 383.07 138.70795 -0.002322110 202 | ## 2011-12-23 3.094533 120.67 387.66 53.33343 0.003311904 203 | ## 2011-12-27 3.450416 120.76 390.74 17.04417 0.007083611 204 | ## 2011-12-28 3.364819 119.18 387.00 -24.22055 0.004194527 205 | ## 2011-12-29 3.004804 120.41 389.38 -15.77781 -0.003361064 206 | 207 | threshold <- sd(data$spread, na.rm = TRUE) 208 | 209 | threshold 210 | ## [1] 143.7734 211 | 212 | plot(data$spread, main = "AAPL vs. SPY In-Sample", 213 | cex.main = 0.8, 214 | cex.lab = 0.8, 215 | cex.axis = 0.8) 216 | abline(h = threshold, lty = 2) 217 | abline(h = -threshold, lty = 2) 218 | 219 | 220 | # Construct the out of sample spread 221 | # Keep the same 10 day rolling window 222 | window_length <- 10 223 | 224 | # Time range 225 | start_date <- "2012-01-01" 226 | end_date <- "2013-12-31" 227 | range <- paste(start_date, "::", 228 | end_date, sep = "") 229 | 230 | # Our stock pair 231 | x <- SPY[range, 6] 232 | y <- AAPL[range, 6] 233 | 234 | # Bind these together into a matrix 235 | dF <- cbind(x, y) 236 | names(dF) <- c("x", "y") 237 | 238 | # Calculate the out of sample rolling beta 239 | beta_out_of_sample <- rolling_beta(diff(dF), 10) 240 | 241 | # Buy and sell threshold 242 | data_out <- merge(beta_out_of_sample, dF) 243 | data_out$spread <- data_out$y - 244 | lag(beta_out_of_sample, 1) * data_out$x 245 | 246 | # Plot the spread with in-sample bands 247 | plot(data_out$spread, main = "AAPL vs. SPY out of sample", 248 | cex.main = 0.8, 249 | cex.lab = 0.8, 250 | cex.axis = 0.8) 251 | abline(h = threshold, lwd = 2) 252 | abline(h = -threshold, lwd = 2) 253 | 254 | # Generate sell and buy signals 255 | buys <- ifelse(data_out$spread > threshold, 1, 0) 256 | sells <- ifelse(data_out$spread < -threshold, -1, 0) 257 | data_out$signal <- buys + sells 258 | 259 | plot(data_out$spread, main = "AAPL vs. SPY out of sample", 260 | cex.main = 0.8, 261 | cex.lab = 0.8, 262 | cex.axis = 0.8) 263 | abline(h = threshold, lty = 2) 264 | abline(h = -threshold, lty = 2) 265 | 266 | point_type <- rep(NA, nrow(data_out)) 267 | buy_index <- which(data_out$signal == 1) 268 | sell_index <- which(data_out$signal == -1) 269 | 270 | point_type[buy_index] <- 21 271 | point_type[sell_index] <- 24 272 | points(data_out$spread, pch = point_type) 273 | 274 | num_of_buy_signals <- sum(buys, na.rm = TRUE) 275 | num_of_sell_signals <- sum(abs(sells), na.rm = TRUE) 276 | 277 | num_of_buy_signals 278 | ## [1] 303 279 | num_of_sell_signals 280 | ## [1] 189 281 | 282 | ###################### 283 | # Trading the spread # 284 | ###################### 285 | ## beta_out_of_sample x y spread signal 286 | ## 2011-01-13 NA 128.37 345.68 NA NA 287 | ## 2011-01-14 1.7511157 129.30 348.48 NA NA 288 | ## 2011-01-18 1.1630714 129.52 340.65 113.84550 1 289 | ## 2011-01-19 1.2803161 128.25 338.84 189.67609 1 290 | ## 2011-01-20 1.2286891 128.08 332.68 168.69711 1 291 | ## 2011-01-21 0.8045108 128.37 326.72 168.99319 1 292 | ## 2011-01-24 2.4936855 129.10 337.45 233.58766 1 293 | ## 2011-01-25 2.7762163 129.17 341.40 19.29065 0 294 | ## 2011-01-26 3.0802946 129.67 343.85 -16.14196 0 295 | 296 | prev_x_qty <- 0 297 | position <- 0 298 | trade_size <- 100 299 | signal <- as.numeric(data_out$signal) 300 | signal[is.na(signal)] <- 0 301 | beta <- as.numeric(data_out$beta_out_of_sample) 302 | 303 | qty_x <- rep(0, length(signal)) 304 | qty_y <- rep(0, length(signal)) 305 | 306 | for(i in 1:length(signal)) { 307 | if(signal[i] == 1 && position == 0) { 308 | # buy the spread 309 | prev_x_qty <- round(beta[i] * trade_size) 310 | qty_x[i] <- -prev_x_qty 311 | qty_y[i] <- trade_size 312 | position <- 1 313 | } 314 | 315 | if(signal[i] == -1 && position == 0) { 316 | # sell the spread initially 317 | prev_x_qty <- round(beta[i] * trade_size) 318 | qty_x[i] <- prev_x_qty 319 | qty_y[i] <- -trade_size 320 | position <- -1 321 | } 322 | 323 | if(signal[i] == 1 && position == -1) { 324 | # we are short the spread and need to buy 325 | qty_x[i] <- -(round(beta[i] * trade_size) + 326 | prev_x_qty) 327 | prev_x_qty <- round(beta[i] * trade_size) 328 | qty_y[i] <- 2 * trade_size 329 | position <- 1 330 | } 331 | 332 | if(signal[i] == -1 && position == 1) { 333 | # we are long the spread and need to sell 334 | qty_x[i] <- round(beta[i] * trade_size) + prev_x_qty 335 | prev_x_qty <- round(beta[i] * trade_size) 336 | qty_y[i] <- -2 * trade_size 337 | position <- -1 338 | } 339 | } 340 | 341 | qty_x[length(qty_x)] <- -sum(qty_x) 342 | qty_y[length(qty_y)] <- -sum(qty_y) 343 | 344 | data_out$qty_x <- qty_x 345 | data_out$qty_y <- qty_y 346 | 347 | data_out[1:3, ] 348 | ## beta_out_of_sample x y spread signal qty_x qty_y 349 | ## 2012-01-17 2.1511279 123.48 408.20 NA NA 0 0 350 | ## 2012-01-18 2.5890817 124.85 412.44 143.87168 1 -259 100 351 | ## 2012-01-19 2.0711505 125.51 411.13 86.17435 0 0 0 352 | 353 | tail(data_out, 3) 354 | ## beta_out_of_sample x y spread signal qty_x qty_y 355 | ## 2012-12-27 6.5051194 138.15 499.45 -404.90307 -1 0 0 356 | ## 2012-12-28 5.6770827 136.66 494.14 -394.84962 -1 0 0 357 | ## 2012-12-31 6.3934172 138.98 516.04 -272.96095 -1 -668 100 358 | 359 | # function for computing the equity curve 360 | compute_equity_curve <- function(qty, price) { 361 | 362 | cash_buy <- ifelse(sign(qty) == 1, 363 | qty * price, 0) 364 | cash_sell <- ifelse(sign(qty) == -1, 365 | -qty * price, 0) 366 | position <- cumsum(qty) 367 | cumulative_buy <- cumsum(cash_buy) 368 | cumulative_sell <- cumsum(cash_sell) 369 | 370 | equity <- cumulative_sell - cumulative_buy + 371 | position * price 372 | return(equity) 373 | } 374 | 375 | # Add the equity curve columns to the data_out table 376 | data_out$equity_curve_x <- compute_equity_curve( 377 | data_out$qty_x, data_out$x) 378 | data_out$equity_curve_y <- compute_equity_curve( 379 | data_out$qty_y, data_out$y) 380 | 381 | plot(data_out$equity_curve_x + 382 | data_out$equity_curve_y, type = 'l', 383 | main = "AAPL / SPY spread", ylab = "P&L", 384 | cex.main = 0.8, 385 | cex.axis = 0.8, 386 | cex.lab = 0.8) 387 | 388 | ############################ 389 | # More on the equity curve # 390 | ############################ 391 | # Calculates the Sharpe ratio 392 | sharpe_ratio <- function(x, rf) { 393 | sharpe <- (mean(x, na.rm = TRUE) - rf) / 394 | sd(x, na.rm = TRUE) 395 | return(sharpe) 396 | } 397 | 398 | # Calculates the maximum drawdown profile 399 | drawdown <- function(x) { 400 | cummax(x) - x 401 | } 402 | 403 | par(mfrow = c(2, 1)) 404 | equity_curve <- data_out$equity_curve_x + data_out$equity_curve_y 405 | 406 | plot(equity_curve, main = "Equity Curve", 407 | cex.main = 0.8, 408 | cex.lab = 0.8, 409 | cex.axis = 0.8) 410 | 411 | plot(drawdown(equity_curve), main = "Drawdown of equity curve", 412 | cex.main = 0.8, 413 | cex.lab = 0.8, 414 | cex.axis = 0.8) 415 | 416 | equity <- as.numeric(equity_curve[, 1]) 417 | equity_curve_returns <- diff(equity) / equity[-length(equity)] 418 | 419 | # Remove any infinities and NaN 420 | invalid_values <- is.infinite(equity_curve_returns) 421 | | is.nan(equity_curve_returns) 422 | 423 | sharpe_ratio(equity_curve_returns[!invalid_values], 0.03) 424 | [1] 0.0658528 425 | 426 | omega_ratio <- function(r, T) { 427 | omega <- mean(pmax(r - T, 0)) / mean(pmax(T - r, 0)) 428 | return(omega) 429 | } 430 | 431 | ####################### 432 | # Strategy attributes # 433 | ####################### 434 | # Find out where the trades occur 435 | trade_dates <- data_out$qty_x[data_out$qty_x != 0] 436 | 437 | # The trade_dates object is an xts object whose index 438 | # contains the necessary time information 439 | duration <- as.numeric(diff(index(trade_dates))) 440 | 441 | # Summary statistics 442 | summary(duration) 443 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 444 | ## 1.00 13.50 21.00 31.84 44.00 128.00 445 | 446 | # Histogram of trade duration 447 | hist(duration, breaks = 20, 448 | main = "Histogram of trade durations", 449 | cex.main = 0.8, 450 | cex.lab = 0.8, 451 | cex.axis = 0.8) 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | -------------------------------------------------------------------------------- /rfortraders/Chapter_03/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 3 2 | # Working with Data 3 | 4 | ####################### 5 | # Getting data into R # 6 | ####################### 7 | # In Windows 8 | aapl <- read.table("clipboard") 9 | 10 | # On Mac/Linux 11 | aapl <- read.table(pipe("pbpaste")) 12 | 13 | head(aapl) 14 | ## V1 15 | ## 1 104.08 16 | ## 2 110.26 17 | ## 3 96.80 18 | ## 4 88.74 19 | ## 5 89.79 20 | ## 6 89.16 21 | 22 | class(aapl) 23 | ## [1] "data.frame" 24 | 25 | aapl <- aapl[rev(rownames(aapl)), , drop = FALSE] 26 | 27 | prices <- aapl$V1 28 | plot(prices, main = "AAPL plot", type = 'l') 29 | 30 | # Load the .csv file 31 | aapl_2 <- read.csv(file = "Chapter_03/aapl.csv", header = TRUE, stringsAsFactors = FALSE) 32 | 33 | # Reverse the entries 34 | aapl_2 <- aapl_2[rev(rownames(aapl_2)), ] 35 | 36 | aapl_close <- aapl_2[, "Close"] 37 | 38 | summary(aapl_close) 39 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 40 | ## 12.94 24.69 38.13 46.80 53.61 199.80 41 | 42 | ############################ 43 | # Installing packages in R # 44 | ############################ 45 | install.packages(pkgs, lib, repos = getOption("repos"), 46 | contriburl = contrib.url(repos, type), 47 | method, available = NULL, destdir = NULL, 48 | dependencies = NA, type = getOption("pkgType"), 49 | configure.args = getOption("configure.args"), 50 | configure.vars = getOption("configure.vars"), 51 | clean = FALSE, Ncpus = getOption("Ncpus", 1L), 52 | verbose = getOption("verbose"), 53 | libs_only = FALSE, INSTALL_opts, quiet = FALSE, 54 | keep_outputs = FALSE, ...) 55 | 56 | ################################# 57 | # Storing and transmitting data # 58 | ################################# 59 | { 60 | "CVX": 61 | { 62 | "Currency": "USD", 63 | "Sector": "Basic Materials", 64 | "Industry": "Major Integrated Oil & Gas" 65 | }, 66 | "GOOG": 67 | { 68 | "Currency": "USD", 69 | "Sector": "Technology", 70 | "Industry": "Internet Information Providers" 71 | } 72 | } 73 | 74 | # Install and load the package 75 | install.packages("RJSONIO") 76 | library(RJSONIO) 77 | 78 | # Read the file 79 | out <- fromJSON(content = "Chapter_03/sample_json_file.json" ) 80 | 81 | # Look at the structure of the resulting object 82 | str(out) 83 | ## List of 2 84 | ## $ CVX : Named chr [1:3] "USD" "Basic Materials... 85 | ## ..- attr(*, "names")= chr [1:3] "Currency"... 86 | ## $ GOOG: Named chr [1:3] "USD" "Technology"... 87 | ## ..- attr(*, "names")= chr [1:3] "Currency"... 88 | 89 | write.csv(aapl_2, file = "Chapter_03/aapl_2.csv") 90 | 91 | save(aapl_2, file = "Chapter_03/aapl_2.rdata") 92 | 93 | aapl_old <- aapl_2 94 | rm(aapl_2) 95 | load(file = "Chapter_03/aapl_2.rdata") 96 | 97 | identical(aapl_old, aapl_2) 98 | ## [1] TRUE 99 | 100 | ###################################### 101 | # Extracting data from a spreadsheet # 102 | ###################################### 103 | library(XLConnect) 104 | # Create a workbook object 105 | # Needs Java 6 runtime as a default 106 | 107 | book <- loadWorkbook("Chapter_03/strategy.xlsx") 108 | 109 | # Convert it into a data frame 110 | signals <- readWorksheet(book, sheet = "signals", header = TRUE) 111 | 112 | signals 113 | ## time signal1 signal2 114 | ## 1 08:30:00 0.43 -0.20 115 | ## 2 08:31:00 0.54 0.33 116 | ## 3 08:32:00 0.32 -0.21 117 | 118 | strength <- readWorksheet(book, sheet = "strength", header = TRUE) 119 | 120 | strength 121 | ## intensity score 122 | ## 1 2 7.5 123 | ## 2 3 8.4 124 | ## 3 6 5.4 125 | 126 | # Setup a new spreadsheet 127 | book <- loadWorkbook("demo_sheet.xlsx", create = TRUE) 128 | 129 | # Create a sheet called stock1 130 | createSheet(book, name = "stock1") 131 | 132 | # Creating a sheet called stock2 133 | createSheet(book, name = "stock2") 134 | 135 | # Load data into workbook 136 | df <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6)) 137 | writeWorksheet (book, data=df, sheet="stock1", header = TRUE) 138 | 139 | # Save the workbook 140 | saveWorkbook(book, file = "Chapter_03/demo_sheet.xlsx") 141 | 142 | ######################## 143 | # Accessing a database # 144 | ######################## 145 | # Load the RODBC package 146 | require(RODBC) 147 | 148 | # Establish a connection to MySQL 149 | con <- odbcConnect("rfortraders") 150 | 151 | # Choose the database name and table name 152 | database_name <- "OptionsData" 153 | table_name <- "ATMVolatilities" 154 | symbol <- "SPY" 155 | 156 | sql_command <- paste0("SELECT Symbol, Date, Maturity, 157 | Delta, CallPut, ImpliedVolatility 158 | FROM ", database_name, ".", table_name, 159 | " WHERE Maturity = 91 160 | AND Symbol IN ('", symbol, "');") 161 | 162 | iv <- sqlQuery(con, sql_command) 163 | 164 | # disconnect from database 165 | odbcClose(con) 166 | 167 | head(iv) 168 | ## Symbol Date Maturity Delta CallPut ImpliedVolatility 169 | ## SPY 6/9/2014 91 55 C 0.115925 170 | ## SPY 6/9/2014 91 60 C 0.119577 171 | ## SPY 6/9/2014 91 65 C 0.123468 172 | ## SPY 6/9/2014 91 70 C 0.127629 173 | ## SPY 6/9/2014 91 75 C 0.132094 174 | ## SPY 6/9/2014 91 80 C 0.136776 175 | 176 | 177 | # Load the necessary package 178 | require(RMySQL) 179 | 180 | # Establish a connection 181 | con <- dbConnect(MySQL(), 182 | user="your_login", 183 | password="your_password", 184 | dbname="OptionsData", 185 | host="location_of_database") 186 | 187 | # List the tables and fields 188 | dbListTables(con) 189 | 190 | # Define the command and extract a data frame 191 | sql_command <- paste0("SELECT Symbol, Date, Maturity, 192 | Delta, CallPut, ImpliedVolatility FROM ", 193 | database_name, ".", table_name, 194 | " WHERE Maturity = 91 195 | AND Symbol IN ('", symbol, "');") 196 | 197 | result <- dbGetQuery(con, sql_command) 198 | 199 | # Close the connection 200 | dbDisconnect(con) 201 | 202 | results <- dbSendQuery(con, sql_command) 203 | partial_results <- fetch(results, n = 100) 204 | 205 | ##################### 206 | # The dplyr package # 207 | ##################### 208 | # Get the CRAN version 209 | install.packages("dplyr") 210 | require(dplyr) 211 | 212 | # Or, first load devtools 213 | install.packages("devtools") 214 | require(devtools) 215 | 216 | # Get the github version 217 | devtools::install_github("hadley/dplyr") 218 | require(dplyr) 219 | 220 | # Load the flight database that comes with dplyr 221 | library(hflights) 222 | 223 | # Look at number of rows and columns 224 | dim(hflights) 225 | ## [1] 227496 21 226 | 227 | # First, coerce the data into a data.table 228 | flights_dt <- tbl_dt(hflights) 229 | 230 | # What type of object is this? 231 | class(flights_dt) 232 | ## [1] "tbl_dt" "tbl" "data.table" "data.frame" 233 | 234 | # Create a grouping by carrier 235 | carrier_group <- group_by(flights_dt, UniqueCarrier) 236 | 237 | # Now compute the summary statistics 238 | summarise(carrier_group, avg_delay = mean(ArrDelay, na.rm = TRUE)) 239 | 240 | # load the library xts 241 | library(xts) 242 | 243 | # Load a small dataset that comes along with xts. 244 | # We could have used our original .csv file as well. 245 | data(sample_matrix) 246 | 247 | # Look at the data 248 | head(sample_matrix) 249 | ## [1] "matrix" 250 | 251 | # What is the type of this object? 252 | class(sample_matrix) 253 | ## [1] "matrix" 254 | 255 | # Use the str() command to get more details about this object. 256 | str(sample_matrix) 257 | ## num [1:180, 1:4] 50 50.2 50.4 50.4 50.2 ... 258 | ## - attr(*, "dimnames")=List of 2 259 | ## ..$ : chr [1:180] "2007-01-02" "2007-01-03" 260 | ## "2007-01-04" "2007-01-05" ... 261 | ## ..$ : chr [1:4] "Open" "High" "Low" "Close" 262 | 263 | xts_matrix <- as.xts(sample_matrix, descr = 'my new xts object') 264 | 265 | str(xts_matrix} 266 | ## An 'xts' object on 2007-01-02/2007-06-30 containing: ## Data: num [1:180, 1:4] 50 50.2 50.4 50.4 50.2 ... 267 | ## - attr(*, "dimnames")=List of 2 268 | ## ..$ : NULL 269 | ## ..$ : chr [1:4] "Open" "High" "Low" "Close" 270 | ## Indexed by objects of class: [POSIXct,POSIXt] TZ: ## xts Attributes: 271 | ## List of 3 272 | ## $ tclass: chr [1:2] "POSIXct" "POSIXt" 273 | ## $tzone:chr"" 274 | ## $ descr : chr "my new xts object" 275 | 276 | # Simple plot 277 | plot(xts_matrix[,1], main = "Our first xts plot", 278 | cex.main = 0.8) 279 | 280 | # Or we can try something fancier. 281 | plot(xts_matrix, main = "Candle plot on xts object", 282 | cex.main = 0.8, type = "candles") 283 | 284 | plot(xts_matrix["2007-01-01::2007-02-12"], 285 | main = "An xts candle plot with subsetting", 286 | cex.main = 0.8, type = "candles") 287 | 288 | range <- "2007-03-15::2007-06-15" 289 | plot(xts_matrix(range)) 290 | 291 | start_date <- "2007-05-05" 292 | end_date <- "2007-12-31" 293 | 294 | plot(xts_matrix[paste(start_date, "::", 295 | end_date, sep = "")]) 296 | 297 | # Defaults to space separator 298 | paste("Hello", "World", "in R") 299 | ## [1] "Hello World in R" 300 | 301 | paste("Hello", "Again", sep = "**") 302 | ## [1] "Hello**Again" 303 | 304 | paste(c(1,2,3,4,5), collapse = "oooo") 305 | ## [1] "1oooo2oooo3oooo4oooo5" 306 | 307 | # Create a vector of 10 fictitious stock prices along with 308 | # a time index in microsecond resolution. 309 | price_vector <- c(101.02, 101.03, 101.03, 101.04, 101.05, 310 | 101.03, 101.02, 101.01, 101.00, 100.99) 311 | 312 | dates <- c("03/12/2013 08:00:00.532123", 313 | "03/12/2013 08:00:01.982333", 314 | "03/12/2013 08:00:01.650321", 315 | "03/12/2013 08:00:02.402321", 316 | "03/12/2013 08:00:02.540432", 317 | "03/12/2013 08:00:03.004554", 318 | "03/12/2013 08:00:03.900213", 319 | "03/12/2013 08:00:04.050323", 320 | "03/12/2013 08:00:04.430345", 321 | "03/12/2013 08:00:05.700123") 322 | 323 | # Allow the R console to display the microsecond field 324 | options(digits.secs = 6) 325 | 326 | # Create the time index with the correct format 327 | time_index <- strptime(dates, format = "%d/%m/%Y %H:%M:%OS") 328 | 329 | # Pass the time index into the its object 330 | xts_price_vector <- xts(price_vector, time_index) 331 | 332 | # Plot the price of the fictitious stock 333 | plot(xts_price_vector, main = "Fictitious price series", 334 | cex.main = 0.8) 335 | 336 | # Add a horizontal line where the mean value is 337 | abline(h = mean(xts_price_vector), lwd = 2) 338 | 339 | # Add a vertical blue line at a specified time stamp 340 | my_time <- as.POSIXct("03/12/2013 08:00:03.004554", 341 | format = "%d/%m/%Y %H:%M:%OS") 342 | 343 | abline(v = my_time, lwd = 2, lty = 2) 344 | 345 | es_price <- c(1700.00, 1700.25, 1700.50, 1700.00, 1700.75, 346 | 1701.25, 1701.25, 1701.25, 1700.75, 1700.50) 347 | 348 | es_time <- c("09/12/2013 08:00:00.532123", 349 | "09/12/2013 08:00:01.982333", 350 | "09/12/2013 08:00:05.650321", 351 | "09/12/2013 08:10:02.402321", 352 | "09/12/2013 08:12:02.540432", 353 | "09/12/2013 08:12:03.004554", 354 | "09/12/2013 08:14:03.900213", 355 | "09/12/2013 08:15:07.090323", 356 | "09/12/2013 08:16:04.430345", 357 | "09/12/2013 08:18:05.700123") 358 | 359 | # create an xts time series object 360 | xts_es <- xts(es_price, as.POSIXct(es_time, 361 | format = "%d/%m/%Y %H:%M:%OS")) 362 | 363 | names(xts_es) <- c("price") 364 | 365 | time_diff <- difftime(index(xts_es)[2], index(xts_es)[1], 366 | units = "secs") 367 | 368 | time_diff 369 | ## Time difference of 1.45021 secs 370 | 371 | diffs <- c() 372 | for(i in 2:length(index(xts_es))) { 373 | diffs[i] <- difftime(index(xts_es)[i], index(xts_es)[i - 1], 374 | units = "secs") 375 | } 376 | 377 | diffs <- index(xts_es)[-1] - index(xts_es)[-length(index(xts_es))] 378 | 379 | diffs 380 | ## Time differences in secs 381 | ## [1] 1.4502099 3.6679881 596.7520001 382 | ## [4] 120.1381109 0.4641221 120.8956590 383 | ## [7] 63.1901100 57.3400221 121.2697780 384 | ## attr(,"tzone") 385 | 386 | class(diffs) 387 | ## [1] "difftime" 388 | 389 | es_times <- index(xts_es) 390 | diffs <- es_times[-1] - es_times[-length(es_times)] 391 | 392 | diffs 393 | ## Time differences in secs 394 | ## [1] 1.4502099 3.6679881 596.7520001 395 | ## [4] 120.1381109 0.4641221 120.8956590 396 | ## [7] 63.1901100 57.3400221 121.2697780 397 | ## attr(,"tzone") 398 | 399 | par(mfrow = c(2, 1)) 400 | diffs <- as.numeric(diffs) 401 | plot(diffs, main = "Time difference in seconds for ES trades", 402 | xlab = "", ylab = "Time differences", 403 | cex.lab = 0.8, 404 | cex.main = 0.8) 405 | grid() 406 | 407 | hist(diffs, main = "Time difference in seconds for ES trades", 408 | xlab = "Time difference (secs)", ylab = "Observations", 409 | breaks = 20, 410 | cex.lab = 0.8, 411 | cex.main = 0.8) 412 | grid() 413 | 414 | ############################## 415 | # Using the quantmod package # 416 | ############################## 417 | # Load the quantmod packages after installing it locally. 418 | library(quantmod) 419 | 420 | AAPL <- getSymbols("AAPL", auto.assign=FALSE) 421 | head(AAPL) 422 | 423 | ########################## 424 | # Charting with quantmod # 425 | ########################## 426 | # Adding some technical indicators on top of the original plot 427 | chartSeries(AAPL, subset='2010::2010-04', 428 | theme = chartTheme('white'), 429 | TA = "addVo(); addBBands()") 430 | 431 | reChart(subset='2009-01-01::2009-03-03') 432 | 433 | chartSeries(AAPL, subset='2011::2012', 434 | theme = chartTheme('white'), 435 | TA = "addBBands(); addDEMA()") 436 | 437 | addVo() 438 | addDPO() 439 | 440 | # Initial chart plot with no indicators 441 | chartSeries(AAPL, theme = chartTheme('white'), TA = NULL) 442 | 443 | # Custom function creation 444 | my_indicator <- function(x) { 445 | return(x + 90) 446 | } 447 | 448 | add_my_indicator <- newTA(FUN = my_indicator, preFUN=Cl, 449 | legend.name = "My Fancy Indicator", on = 1) 450 | 451 | add_my_indicator() 452 | 453 | ######################### 454 | # Graphing wiht ggplot2 # 455 | ######################### 456 | # Create a matrix with price and volume 457 | df <- AAPL[, c("AAPL.Adjusted", "AAPL.Volume")] 458 | names(df) <- c("price", "volume") 459 | 460 | # Create 461 | df$return <- diff(log(df[, 1])) 462 | df <- df[-1, ] 463 | 464 | df$cuts <- cut(abs(df$return), 465 | breaks = c(0, 0.02, 0.04, 0.25), 466 | include.lowest = TRUE) 467 | 468 | # Create another column for the mean 469 | df$means <- NA 470 | for(i in 1:3) { 471 | group <- which(df$cuts == i) 472 | if(length(group) > 0) { 473 | df$means[group] <- mean(df$volume[group]) 474 | } 475 | } 476 | 477 | # Load ggplot2 478 | library(ggplot2) 479 | 480 | ggplot(df) + 481 | geom_histogram(aes(x=volume)) + 482 | facet_grid(cuts ~ .) + 483 | geom_vline(aes(xintercept=means), linetype="dashed", size=1) 484 | 485 | 486 | -------------------------------------------------------------------------------- /bar_charts/bar_charts.R: -------------------------------------------------------------------------------- 1 | rm(list=ls(all=TRUE)) 2 | setwd("~/Desktop/ggplot2/bar_charts") 3 | 4 | library(quantmod) 5 | library(ggplot2) 6 | library(gtable) 7 | library(scales) 8 | library(grid) 9 | 10 | readData <- read.csv('GCQ 2014-07-03.csv',stringsAsFactors=FALSE,header=TRUE,sep=',') 11 | revData <- apply(readData,2,rev) 12 | dfData <- data.frame(readData) 13 | xtsData <- xts(dfData[,2:5],as.Date(dfData[,1])) 14 | 15 | 16 | #2013-12-26 17 | subsetDF<-xtsData["2013-12-18/"] 18 | plotDF<- data.frame(subsetDF[,1:4]) 19 | plotDF$xaxis <- as.Date(index(subsetDF)) 20 | 21 | 22 | # Y-Axis 23 | plotMIN <- min(plotDF[,1:4], na.rm=TRUE) 24 | plotMAX <- max(plotDF[,1:4], na.rm=TRUE) 25 | 26 | minDigits <- nchar(round(plotMIN))-1 27 | maxDigits <- nchar(round(plotMAX))-1 28 | 29 | evenMIN <- as.numeric(paste(substring(as.character(plotMIN*100), 1, minDigits),0,sep="")) 30 | evenMAX <- as.numeric(paste(substring(as.character(plotMAX*100), 1, maxDigits),0,sep="")) 31 | 32 | roundUpNice <- function(x, nice=c(1,2,4,5,6,8,10)) { 33 | if(length(x) != 1) stop("'x' must be of length 1") 34 | 10^floor(log10(x)) * nice[[which(x <= 10^floor(log10(x)) * nice)[[1]]]] 35 | } 36 | 37 | niceLimit <- roundUpNice(evenMAX)/100 38 | ySequence <- seq(evenMIN-niceLimit,evenMAX+niceLimit,by=niceLimit) 39 | #ySequence <- c(1700,1750,1800,1850,1900,1950) 40 | 41 | # X-Axis 42 | date1 <- first(index(subsetDF)) 43 | date2 <- last(index(subsetDF)) 44 | dateLength <- length(seq(from=date1, to=date2, by='month'))*2 # -1 for complete months 45 | 46 | xSequence <- seq(2,dateLength-1,2) 47 | 48 | 49 | ############################################################################################### 50 | # CQG 51 | ############################################################################################### 52 | # Draw OHLC Bars 53 | p <- ggplot(plotDF) 54 | p <- p+geom_segment(aes(x=xaxis,xend=xaxis,y=plotDF[,3],yend=plotDF[,2]),size=.80) 55 | p <- p+geom_segment(aes(x=xaxis-0.4,xend=xaxis,y=plotDF[,1],yend=plotDF[,1]),size=.90) 56 | p <- p+geom_segment(aes(x=xaxis,xend=xaxis+0.4,y=plotDF[,4],yend=plotDF[,4]),size=.90) 57 | # -------------------------------------------------------------------------------# 58 | # Setup 1 59 | setup1 <- which(rownames(plotDF) == "2014-02-18") 60 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1,])), y = plotDF[setup1,"high"]+5, label = "9", colour='blue',size=6) 61 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-1,])), y = plotDF[setup1-1,"high"]+3, label = "8", colour='blue',size=4) 62 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-2,])), y = plotDF[setup1-2,"high"]+3, label = "7", colour='blue',size=4) 63 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-3,])), y = plotDF[setup1-3,"high"]+3, label = "6", colour='blue',size=4) 64 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-4,])), y = plotDF[setup1-4,"high"]+3, label = "5", colour='blue',size=4) 65 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-5,])), y = plotDF[setup1-5,"high"]+3, label = "4", colour='blue',size=4) 66 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-6,])), y = plotDF[setup1-6,"high"]+3, label = "3", colour='blue',size=4) 67 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-7,])), y = plotDF[setup1-7,"high"]+3, label = "2", colour='blue',size=4) 68 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-8,])), y = plotDF[setup1-8,"high"]+3, label = "1", colour='blue',size=4) 69 | 70 | # Setup 2 71 | setup2<- which(rownames(plotDF) == "2014-03-28") 72 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2,])), y = plotDF[setup2,"low"]-5, label = "9", colour='blue',size=6) 73 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-1,])), y = plotDF[setup2-1,"low"]-3, label = "8", colour='blue',size=4) 74 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-2,])), y = plotDF[setup2-2,"low"]-3, label = "7", colour='blue',size=4) 75 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-3,])), y = plotDF[setup2-3,"low"]-3, label = "6", colour='blue',size=4) 76 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-4,])), y = plotDF[setup2-4,"low"]-3, label = "5", colour='blue',size=4) 77 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-5,])), y = plotDF[setup2-5,"low"]-3, label = "4", colour='blue',size=4) 78 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-6,])), y = plotDF[setup2-6,"low"]-3, label = "3", colour='blue',size=4) 79 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-7,])), y = plotDF[setup2-7,"low"]-3, label = "2", colour='blue',size=4) 80 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-8,])), y = plotDF[setup2-8,"low"]-3, label = "1", colour='blue',size=4) 81 | 82 | # Setup 3 83 | setup3 <- which(rownames(plotDF) == "2014-06-17") 84 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3,])), y = plotDF[setup3,"high"]+5, label = "9", colour='blue',size=6) 85 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-1,])), y = plotDF[setup3-1,"high"]+3, label = "8", colour='blue',size=4) 86 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-2,])), y = plotDF[setup3-2,"high"]+3, label = "7", colour='blue',size=4) 87 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-3,])), y = plotDF[setup3-3,"high"]+3, label = "6", colour='blue',size=4) 88 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-4,])), y = plotDF[setup3-4,"high"]+3, label = "5", colour='blue',size=4) 89 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-5,])), y = plotDF[setup3-5,"high"]+3, label = "4", colour='blue',size=4) 90 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-6,])), y = plotDF[setup3-6,"high"]+3, label = "3", colour='blue',size=4) 91 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-7,])), y = plotDF[setup3-7,"high"]+3, label = "2", colour='blue',size=4) 92 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-8,])), y = plotDF[setup3-8,"high"]+3, label = "1", colour='blue',size=4) 93 | 94 | # Setup 4 95 | setup4 <- which(rownames(plotDF) == "2014-07-01") 96 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4,])), y = plotDF[setup4,"high"]+5, label = "9", colour='blue',size=6) 97 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-1,])), y = plotDF[setup4-1,"high"]+3, label = "8", colour='blue',size=4) 98 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-2,])), y = plotDF[setup4-2,"high"]+3, label = "7", colour='blue',size=4) 99 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-3,])), y = plotDF[setup4-3,"high"]+3, label = "6", colour='blue',size=4) 100 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-4,])), y = plotDF[setup4-4,"high"]+3, label = "5", colour='blue',size=4) 101 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-5,])), y = plotDF[setup4-5,"high"]+3, label = "4", colour='blue',size=4) 102 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-6,])), y = plotDF[setup4-6,"high"]+3, label = "3", colour='blue',size=4) 103 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-7,])), y = plotDF[setup4-7,"high"]+3, label = "2", colour='blue',size=4) 104 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-8,])), y = plotDF[setup4-8,"high"]+3, label = "1", colour='blue',size=4) 105 | 106 | # 13 in March and May 107 | countdown1 <- which(rownames(plotDF) == "2014-05-30") 108 | countdown2 <- which(rownames(plotDF) == "2014-03-14") 109 | countdown3 <- which(rownames(plotDF) == "2013-12-20") 110 | p <- p+annotate("text", x = as.Date(rownames(plotDF[countdown1,])), y = plotDF[countdown1,"low"]-(niceLimit/2), label = "13", colour='magenta',size=18) 111 | p <- p+annotate("text", x = as.Date(rownames(plotDF[countdown2,])), y = plotDF[countdown2,"high"]+(niceLimit/2), label = "13", colour='magenta',size=18) 112 | p <- p+annotate("text", x = as.Date(rownames(plotDF[countdown3,])), y = plotDF[countdown3,"low"]-(niceLimit/2), label = "13", colour='magenta',size=18) 113 | 114 | # Countdown 1 115 | p <- p+annotate("text", x = as.Date("2014-03-13"), y = plotDF["2014-03-13","high"]+3, label = "12", colour='red',size=4) 116 | p <- p+annotate("text", x = as.Date("2014-03-12"), y = plotDF["2014-03-12","high"]+3, label = "11", colour='red',size=4) 117 | p <- p+annotate("text", x = as.Date("2014-03-06"), y = plotDF["2014-03-06","high"]+3, label = "10", colour='red',size=4) 118 | p <- p+annotate("text", x = as.Date("2014-03-03"), y = plotDF["2014-03-03","high"]+3, label = "9", colour='red',size=4) 119 | p <- p+annotate("text", x = as.Date("2014-02-25"), y = plotDF["2014-02-25","high"]+3, label = "8", colour='red',size=4) 120 | p <- p+annotate("text", x = as.Date("2014-02-24"), y = plotDF["2014-02-24","high"]+3, label = "7", colour='red',size=4) 121 | p <- p+annotate("text", x = as.Date("2014-02-18"), y = plotDF["2014-02-18","high"]+10, label = "6", colour='red',size=4) 122 | p <- p+annotate("text", x = as.Date("2014-02-14"), y = plotDF["2014-02-14","high"]+8, label = "5", colour='red',size=4) 123 | p <- p+annotate("text", x = as.Date("2014-02-13"), y = plotDF["2014-02-13","high"]+8, label = "4", colour='red',size=4) 124 | p <- p+annotate("text", x = as.Date("2014-02-12"), y = plotDF["2014-02-12","high"]+8, label = "3", colour='red',size=4) 125 | p <- p+annotate("text", x = as.Date("2014-02-11"), y = plotDF["2014-02-11","high"]+8, label = "2", colour='red',size=4) 126 | p <- p+annotate("text", x = as.Date("2014-02-10"), y = plotDF["2014-02-10","high"]+8, label = "1", colour='red',size=4) 127 | 128 | # Countdown 2 129 | p <- p+annotate("text", x = as.Date("2014-05-29"), y = plotDF["2014-05-29","low"]-2, label = "12", colour='red',size=4) 130 | p <- p+annotate("text", x = as.Date("2014-05-28"), y = plotDF["2014-05-28","low"]-2, label = "11", colour='red',size=4) 131 | p <- p+annotate("text", x = as.Date("2014-05-27"), y = plotDF["2014-05-27","low"]-2, label = "10", colour='red',size=4) 132 | p <- p+annotate("text", x = as.Date("2014-04-01"), y = plotDF["2014-04-01","low"]-2, label = "9", colour='red',size=4) 133 | p <- p+annotate("text", x = as.Date("2014-03-31"), y = plotDF["2014-03-31","low"]-2, label = "8", colour='red',size=4) 134 | p <- p+annotate("text", x = as.Date("2014-03-28"), y = plotDF["2014-03-28","low"]-10, label = "7", colour='red',size=4) 135 | p <- p+annotate("text", x = as.Date("2014-03-27"), y = plotDF["2014-03-27","low"]-8, label = "6", colour='red',size=4) 136 | p <- p+annotate("text", x = as.Date("2014-03-26"), y = plotDF["2014-03-26","low"]-8, label = "5", colour='red',size=4) 137 | p <- p+annotate("text", x = as.Date("2014-03-24"), y = plotDF["2014-03-24","low"]-8, label = "4", colour='red',size=4) 138 | p <- p+annotate("text", x = as.Date("2014-03-20"), y = plotDF["2014-03-20","low"]-8, label = "3", colour='red',size=4) 139 | p <- p+annotate("text", x = as.Date("2014-03-19"), y = plotDF["2014-03-19","low"]-8, label = "2", colour='red',size=4) 140 | p <- p+annotate("text", x = as.Date("2014-03-18"), y = plotDF["2014-03-18","low"]-8, label = "1", colour='red',size=4) 141 | 142 | # Countdown 3 143 | p <- p+annotate("text", x = as.Date("2014-07-02"), y = plotDF["2014-07-02","high"]+3, label = "12", colour='red',size=4) 144 | p <- p+annotate("text", x = as.Date("2014-07-01"), y = plotDF["2014-07-01","high"]+10, label = "11", colour='red',size=4) 145 | p <- p+annotate("text", x = as.Date("2014-06-25"), y = plotDF["2014-06-25","high"]+8, label = "10", colour='red',size=4) 146 | p <- p+annotate("text", x = as.Date("2014-06-24"), y = plotDF["2014-06-24","high"]+7, label = "9", colour='red',size=4) 147 | p <- p+annotate("text", x = as.Date("2014-06-20"), y = plotDF["2014-06-20","high"]+8, label = "8", colour='red',size=4) 148 | p <- p+annotate("text", x = as.Date("2014-06-19"), y = plotDF["2014-06-19","high"]+8, label = "7", colour='red',size=4) 149 | p <- p+annotate("text", x = as.Date("2014-06-16"), y = plotDF["2014-06-16","high"]+8, label = "6", colour='red',size=4) 150 | p <- p+annotate("text", x = as.Date("2014-06-13"), y = plotDF["2014-06-13","high"]+8, label = "5", colour='red',size=4) 151 | p <- p+annotate("text", x = as.Date("2014-06-12"), y = plotDF["2014-06-12","high"]+8, label = "4", colour='red',size=4) 152 | p <- p+annotate("text", x = as.Date("2014-06-11"), y = plotDF["2014-06-11","high"]+8, label = "3", colour='red',size=4) 153 | p <- p+annotate("text", x = as.Date("2014-06-10"), y = plotDF["2014-06-10","high"]+8, label = "2", colour='red',size=4) 154 | p <- p+annotate("text", x = as.Date("2014-06-05"), y = plotDF["2014-06-05","high"]+8, label = "1", colour='red',size=4) 155 | 156 | # Text: "GCQ combo version 1" 157 | p <- p+annotate("text", x = as.Date("2014-05-13"), y = 1390, label = "GCQ", colour='magenta',size=10) 158 | p <- p+annotate("text", x = as.Date("2014-05-13"), y = 1375, label = "combo version 1", colour='magenta',size=10) 159 | 160 | # Format the graph axis and panels 161 | p <- p+scale_y_continuous(scale_y_log10(),breaks=c(ySequence),limits=c(evenMIN,evenMAX+niceLimit)) 162 | p <- p+theme(axis.title=element_blank(), 163 | panel.background=element_rect(fill='#FFFFFF'), 164 | panel.grid.minor = element_line(colour = "#000000",size=0.1, linetype = "19"), 165 | panel.grid.major = element_line(colour = "#000000",size=0.3, linetype = "15"), 166 | axis.text.x=element_text(colour="black"), 167 | axis.text.y=element_text(colour="black")) 168 | p 169 | 170 | g <- ggplot_gtable(ggplot_build(p)) 171 | 172 | # axis tweaks 173 | ia <- which(g$layout$name == "axis-l") 174 | ax <- g$grobs[[ia]]$children[[2]] 175 | ax$widths <- rev(ax$widths) 176 | ax$grobs <- rev(ax$grobs) 177 | ax$grobs[[1]]$x <- ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm") 178 | pp <- c(subset(g$layout, name == "panel", select = t:r)) 179 | g <- gtable_add_cols(g, g$widths[g$layout[ia, ]$l], length(g$widths) - 1) 180 | g <- gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b) 181 | g$grobs[[ia]]$children[[2]] <- NULL 182 | ################################################################################ 183 | ia <- which(g$layout$name == "ylab") 184 | ylab <- g$grobs[[ia]] 185 | g <- gtable_add_cols(g, g$widths[g$layout[ia, ]$l], length(g$widths) - 1) 186 | g <- gtable_add_grob(g, ylab, pp$t, length(g$widths) - 1, pp$b) 187 | g$grobs[[ia]]$label = '' 188 | grid.draw(g) 189 | 190 | 191 | 192 | 193 | # THANK YOU --------------------------------------------------------------- 194 | 195 | # Method to Move Axis to the Right Side 196 | # http://stackoverflow.com/questions/15334494/how-to-change-positions-of-x-and-y-axis-in-ggplot2 197 | 198 | # x-axis breaks 199 | # http://r.789695.n4.nabble.com/ggplot2-scaling-and-tick-mark-of-x-axis-td857540.html 200 | 201 | # Arrow Annotation 202 | # http://stackoverflow.com/questions/17032393/how-to-draw-arrow-in-ggplot2-with-annotation 203 | 204 | # Round to the nearest factor 205 | # http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x/6463946#6463946 206 | 207 | # Hide Missing Dates from the X-axis 208 | # http://stackoverflow.com/questions/5169366/r-ggplot2-how-to-hide-missing-dates-from-x-axis 209 | 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /rfortraders/Chapter_07/code.R: -------------------------------------------------------------------------------- 1 | # CHAPTER 7 2 | # Backtesting with Quantstrat 3 | 4 | ################# 5 | # Initial setup # 6 | ################# 7 | 8 | # Suppresses warnings 9 | options("getSymbols.warning4.0" = FALSE) 10 | 11 | # Do some house cleaning 12 | rm(list = ls(.blotter), envir = .blotter) 13 | 14 | # Set the currency and the timezone 15 | currency('USD') 16 | Sys.setenv(TZ = "UTC") 17 | 18 | # Define symbols of interest 19 | symbols <- c("XLB", #SPDR Materials sector 20 | "XLE", #SPDR Energy sector 21 | "XLF", #SPDR Financial sector 22 | "XLP", #SPDR Consumer staples sector 23 | "XLI", #SPDR Industrial sector 24 | "XLU", #SPDR Utilities sector 25 | "XLV", #SPDR Healthcare sector 26 | "XLK", #SPDR Tech sector 27 | "XLY", #SPDR Consumer discretionary sector 28 | "RWR", #SPDR Dow Jones REIT ETF 29 | "EWJ", #iShares Japan 30 | "EWG", #iShares Germany 31 | "EWU", #iShares UK 32 | "EWC", #iShares Canada 33 | "EWY", #iShares South Korea 34 | "EWA", #iShares Australia 35 | "EWH", #iShares Hong Kong 36 | "EWS", #iShares Singapore 37 | "IYZ", #iShares U.S. Telecom 38 | "EZU", #iShares MSCI EMU ETF 39 | "IYR", #iShares U.S. Real Estate 40 | "EWT", #iShares Taiwan 41 | "EWZ", #iShares Brazil 42 | "EFA", #iShares EAFE 43 | "IGE", #iShares North American Natural Resources 44 | "EPP", #iShares Pacific Ex Japan 45 | "LQD", #iShares Investment Grade Corporate Bonds 46 | "SHY", #iShares 1-3 year TBonds 47 | "IEF", #iShares 3-7 year TBonds 48 | "TLT" #iShares 20+ year Bonds 49 | ) 50 | 51 | # SPDR ETFs first, iShares ETFs afterwards 52 | if(!"XLB" %in% ls()) { 53 | # If data is not present, get it from yahoo 54 | suppressMessages(getSymbols(symbols, from = from, 55 | to = to, src = "yahoo", adjust = TRUE)) 56 | } 57 | 58 | # Define the instrument type 59 | stock(symbols, currency = "USD", multiplier = 1) 60 | 61 | ############################################### 62 | # The first strategy: A simple trend follower # 63 | ############################################### 64 | "lagATR" <- function(HLC, n = 14, maType, lag = 1, ...) { 65 | ATR <- ATR(HLC, n = n, maType = maType, ...) 66 | ATR <- lag(ATR, lag) 67 | out <- ATR$atr 68 | colnames(out) <- "atr" 69 | return(out) 70 | } 71 | 72 | "osDollarATR" <- function(orderside, tradeSize, pctATR, 73 | maxPctATR = pctATR, data, timestamp, 74 | symbol, prefer = "Open", portfolio, integerQty = TRUE, 75 | atrMod = "", rebal = FALSE, ...) { 76 | if(tradeSize > 0 & orderside == "short"){ 77 | tradeSize <- tradeSize * -1 78 | } 79 | 80 | pos <- getPosQty(portfolio, symbol, timestamp) 81 | atrString <- paste0("atr", atrMod) 82 | atrCol <- grep(atrString, colnames(mktdata)) 83 | 84 | if(length(atrCol) == 0) { 85 | stop(paste("Term", atrString, 86 | "not found in mktdata column names.")) 87 | } 88 | 89 | atrTimeStamp <- mktdata[timestamp, atrCol] 90 | if(is.na(atrTimeStamp) | atrTimeStamp == 0) { 91 | stop(paste("ATR corresponding to", atrString, 92 | "is invalid at this point in time. Add a logical 93 | operator to account for this.")) 94 | } 95 | 96 | dollarATR <- pos * atrTimeStamp 97 | 98 | desiredDollarATR <- pctATR * tradeSize 99 | remainingRiskCapacity <- tradeSize * 100 | maxPctATR - dollarATR 101 | 102 | if(orderside == "long"){ 103 | qty <- min(tradeSize * pctATR / atrTimeStamp, 104 | remainingRiskCapacity / atrTimeStamp) 105 | } else { 106 | qty <- max(tradeSize * pctATR / atrTimeStamp, 107 | remainingRiskCapacity / atrTimeStamp) 108 | } 109 | 110 | if(integerQty) { 111 | qty <- trunc(qty) 112 | } 113 | if(!rebal) { 114 | if(orderside == "long" & qty < 0) { 115 | qty <- 0 116 | } 117 | if(orderside == "short" & qty > 0) { 118 | qty <- 0 } 119 | } 120 | if(rebal) { 121 | if(pos == 0) { 122 | qty <- 0 123 | } 124 | } 125 | return(qty) 126 | } 127 | 128 | require(quantstrat) 129 | require(PerformanceAnalytics) 130 | 131 | initDate = "1990-01-01" 132 | from = "2003-01-01" 133 | to = "2013-12-31" 134 | options(width = 70) 135 | 136 | # To rerun the strategy, rerun everything below this line 137 | # demoData.R contains all of the data-related boilerplate. 138 | source("demoData.R") 139 | 140 | # Trade sizing and initial equity settings 141 | tradeSize <- 10000 142 | initEq <- tradeSize * length(symbols) 143 | 144 | strategy.st <- "Clenow_Simple" 145 | portfolio.st <- "Clenow_Simple" 146 | account.st <- "Clenow_Simple" 147 | rm.strat(portfolio.st) 148 | rm.strat(strategy.st) 149 | 150 | initPortf(portfolio.st, symbols = symbols, 151 | initDate = initDate, currency = 'USD') 152 | 153 | initAcct(account.st, portfolios = portfolio.st, 154 | initDate = initDate, currency = 'USD', initEq = initEq) 155 | 156 | initOrders(portfolio.st, initDate = initDate) 157 | 158 | strategy(strategy.st, store=TRUE) 159 | 160 | ################################## 161 | # Backtesting the first strategy # 162 | ################################## 163 | nLag = 252 164 | pctATR = 0.02 165 | period = 10 166 | 167 | namedLag <- function(x, k = 1, na.pad = TRUE, ...) { 168 | out <- lag(x, k = k, na.pad = na.pad, ...) 169 | out[is.na(out)] <- x[is.na(out)] 170 | colnames(out) <- "namedLag" 171 | return(out) 172 | } 173 | 174 | add.indicator(strategy.st, name = "namedLag", 175 | arguments = list(x = quote(Cl(mktdata)), k = nLag), 176 | label = "ind") 177 | 178 | add.indicator(strategy.st, name = "lagATR", 179 | arguments = list(HLC = quote(HLC(mktdata)), n = period), 180 | label = "atrX") 181 | 182 | test <- applyIndicators(strategy.st, mktdata = OHLC(XLB)) 183 | head(round(test, 2), 253) 184 | 185 | # Signals 186 | add.signal(strategy.st, name = "sigCrossover", 187 | arguments = list(columns = c("Close", "namedLag.ind"), 188 | relationship = "gt"), 189 | label = "coverOrBuy") 190 | 191 | add.signal(strategy.st, name = "sigCrossover", 192 | arguments = list(columns = c("Close", "namedLag.ind"), 193 | relationship = "lt"), 194 | label = "sellOrShort") 195 | 196 | # Long rules 197 | add.rule(strategy.st, name = "ruleSignal", 198 | arguments = list(sigcol = "coverOrBuy", 199 | sigval = TRUE, ordertype = "market", 200 | orderside = "long", replace = FALSE, 201 | prefer = "Open", osFUN = osDollarATR, 202 | tradeSize = tradeSize, pctATR = pctATR, 203 | atrMod = "X"), type = "enter", path.dep = TRUE) 204 | 205 | add.rule(strategy.st, name = "ruleSignal", 206 | arguments = list(sigcol = "sellOrShort", 207 | sigval = TRUE, orderqty = "all", 208 | ordertype = "market", orderside = "long", 209 | replace = FALSE, prefer = "Open"), 210 | type = "exit", path.dep = TRUE) 211 | 212 | # Short rules 213 | add.rule(strategy.st, name = "ruleSignal", 214 | arguments = list(sigcol = "sellOrShort", 215 | sigval = TRUE, ordertype = "market", 216 | orderside = "short", replace = FALSE, 217 | prefer = "Open", osFUN = osDollarATR, 218 | tradeSize = -tradeSize, pctATR = pctATR, 219 | atrMod = "X"), type = "enter", path.dep = TRUE) 220 | 221 | add.rule(strategy.st, name = "ruleSignal", 222 | arguments = list(sigcol = "coverOrBuy", 223 | sigval = TRUE, orderqty = "all", 224 | ordertype = "market", orderside = "short", 225 | replace = FALSE, prefer = "Open"), 226 | type = "exit", path.dep = TRUE) 227 | 228 | # Get begin time 229 | t1 <- Sys.time() 230 | out <- applyStrategy(strategy = strategy.st, 231 | portfolios = portfolio.st) 232 | 233 | # Record end time 234 | t2 <- Sys.time() 235 | print(t2 - t1) 236 | 237 | applyStrategy() 238 | ## [1] "2007-10-22 00:00:00 XLY -655 @ 32.3578893111826" 239 | ## [1] "2007-10-22 00:00:00 XLY -393 @ 32.3578893111826" 240 | ## [1] "2007-10-23 00:00:00 XLY 393 @ 33.1349846702336" 241 | ## [1] "2007-10-23 00:00:00 XLY 358 @ 33.1349846702336" 242 | ## [1] "2007-10-25 00:00:00 XLY -358 @ 32.8639048938205" 243 | ## [1] "2007-10-25 00:00:00 XLY -333 @ 32.8639048938205" 244 | ## [1] "2009-09-30 00:00:00 XLY 333 @ 25.9947501843176" 245 | ## [1] "2009-09-30 00:00:00 XLY 449 @ 25.9947501843176" 246 | ## [1] "2009-10-02 00:00:00 XLY -449 @ 24.8800203565938" 247 | 248 | ############################## 249 | # Evaluating the performance # 250 | ############################## 251 | updatePortf(portfolio.st) 252 | dateRange <- time(getPortfolio(portfolio.st)$summary)[-1] 253 | updateAcct(portfolio.st, dateRange) 254 | updateEndEq(account.st) 255 | 256 | tStats <- tradeStats(Portfolios = portfolio.st, use = "trades", 257 | inclZeroDays = FALSE) 258 | tStats[, 4:ncol(tStats)] <- round(tStats[, 4:ncol(tStats)], 2) 259 | 260 | print(data.frame(t(tStats[,-c(1,2)]))) 261 | aggPF <- sum(tStats$Gross.Profits) / -sum(tStats$Gross.Losses) 262 | aggCorrect <- mean(tStats$Percent.Positive) 263 | numTrades <- sum(tStats$Num.Trades) 264 | meanAvgWLR <- mean(tStats$Avg.WinLoss.Ratio[ 265 | tStats$Avg.WinLoss.Ratio < Inf], na.rm = TRUE) 266 | 267 | tStats 268 | 269 | aggPF <- sum(tStats$Gross.Profits) / -sum(tStats$Gross.Losses) 270 | ## [1] 3.663545 271 | 272 | aggCorrect <- mean(tStats$Percent.Positive) 273 | ## [1] 36.00233 274 | 275 | numTrades <- sum(tStats$Num.Trades) 276 | ## [1] 1134 277 | 278 | meanAvgWLR <- mean(tStats$Avg.WinLoss.Ratio[ 279 | tStats$Avg.WinLoss.Ratio < Inf], na.rm = TRUE) 280 | ## [1] 9.871333 281 | 282 | instRets <- PortfReturns(account.st) 283 | portfRets <- xts(rowMeans(instRets) * ncol(instRets), 284 | order.by = index(instRets)) 285 | portfRets <- portfRets[!is.na(portfRets)] 286 | cumPortfRets <- cumprod(1 + portfRets) 287 | firstNonZeroDay <- as.character(index(portfRets)[ 288 | min(which(portfRets != 0))]) 289 | 290 | # Obtain symbol 291 | getSymbols("SPY", from = firstNonZeroDay, to = to) 292 | SPYrets <- diff(log(Cl(SPY)))[-1] 293 | cumSPYrets <- cumprod(1 + SPYrets) 294 | comparison <- cbind(cumPortfRets, cumSPYrets) 295 | colnames(comparison) <- c("strategy", "SPY") 296 | chart.TimeSeries(comparison, legend.loc = "topleft", 297 | colors=c("green", "red")) 298 | 299 | # Calculate risk metrics 300 | SharpeRatio.annualized(portfRets) 301 | Return.annualized(portfRets) 302 | maxDrawdown(portfRets) 303 | 304 | SharpeRatio.annualized(portfRets) 305 | ## [1] 0.6424366 306 | 307 | Return.annualized(portfRets) 308 | ## [1] 0.1392711 309 | 310 | maxDrawdown(portfRets) 311 | ## [1] 0.278221 312 | 313 | chart.Posn(portfolio.st, "XLB") 314 | tmp <- namedLag(Cl(XLB), k = nLag) 315 | add_TA(tmp$namedLag, col = "blue", on = 1) 316 | 317 | ############################################### 318 | # The second strategy: Cumulative Connors RSI # 319 | ############################################### 320 | # Compute Connor's RSI, depends on RSI TTR function 321 | connorsRSI <- function(price, nRSI = 3, nStreak = 2, 322 | nPercentLookBack = 100 ) { 323 | priceRSI <- RSI(price, nRSI) 324 | streakRSI <- RSI(computeStreak(price), nStreak) 325 | percents <- round(runPercentRank(x = diff(log(price)), 326 | n = 100, cumulative = FALSE, exact.multiplier = 1) * 100) 327 | ret <- (priceRSI + streakRSI + percents) / 3 328 | colnames(ret) <- "connorsRSI" 329 | return(ret) 330 | } 331 | 332 | # Computes a running streak of positives and 333 | # negatives of price changes 334 | computeStreak <- function(priceSeries) { 335 | signs <- sign(diff(priceSeries)) 336 | posDiffs <- negDiffs <- rep(0,length(signs)) 337 | posDiffs[signs == 1] <- 1 338 | negDiffs[signs == -1] <- -1 339 | 340 | # Create vector of cumulative sums and cumulative 341 | # sums not incremented during streaks. 342 | # Zero out any leading NAs after na.locf 343 | posCum <- cumsum(posDiffs) 344 | posNAcum <- posCum 345 | posNAcum[posDiffs == 1] <- NA 346 | posNAcum <- na.locf(posNAcum, na.rm = FALSE) 347 | posNAcum[is.na(posNAcum)] <- 0 348 | posStreak <- posCum - posNAcum 349 | 350 | # Repeat for negative cumulative sums 351 | negCum <- cumsum(negDiffs) 352 | negNAcum <- negCum 353 | negNAcum[negDiffs == -1] <- NA 354 | negNAcum <- na.locf(negNAcum, na.rm = FALSE) 355 | negNAcum[is.na(negNAcum)] <- 0 356 | negStreak <- negCum - negNAcum 357 | 358 | streak <- posStreak + negStreak 359 | streak <- xts(streak, order.by = index(priceSeries)) 360 | return (streak) 361 | } 362 | 363 | sigAND <- function(label, data=mktdata, 364 | columns, cross = FALSE) { 365 | ret_sig = NULL 366 | colNums <- rep(0, length(columns)) 367 | for(i in 1:length(columns)) { 368 | colNums[i] <- match.names(columns[i], colnames(data)) 369 | } 370 | ret_sig <- data[, colNums[1]] 371 | for(i in 2:length(colNums)) { 372 | ret_sig <- ret_sig & data[, colNums[i]] 373 | } 374 | ret_sig <- ret_sig * 1 375 | if (isTRUE(cross)) 376 | ret_sig <- diff(ret_sig) == 1 377 | colnames(ret_sig) <- label 378 | return(ret_sig) 379 | } 380 | 381 | cumCRSI <- function(price, nCum = 2, ...) { 382 | CRSI <- connorsRSI(price, ...) 383 | out <- runSum(CRSI, nCum) 384 | colnames(out) <- "cumCRSI" 385 | out 386 | } 387 | 388 | rm(list = ls(.blotter), envir = .blotter) 389 | initDate = '1990-01-01' 390 | from = "2003-01-01" 391 | to = "2013-12-31" 392 | initEq = 10000 393 | 394 | currency('USD') 395 | Sys.setenv(TZ="UTC") 396 | source("demoData.R") 397 | 398 | strategy.st <- "CRSIcumStrat" 399 | portfolio.st <- "CRSIcumStrat" 400 | account.st <- "CRSIcumStrat" 401 | 402 | rm.strat(portfolio.st) 403 | rm.strat(strategy.st) 404 | 405 | initPortf(portfolio.st, symbols = symbols, 406 | initDate = initDate, currency = 'USD') 407 | 408 | initAcct(account.st, portfolios = portfolio.st, 409 | initDate = initDate, currency = 'USD', 410 | initEq = initEq) 411 | 412 | initOrders(portfolio.st, initDate = initDate) 413 | strategy(strategy.st, store = TRUE) 414 | 415 | # Parameters 416 | cumThresh <- 40 417 | exitThresh <- 75 418 | nCum <- 2 419 | nRSI <- 3 420 | nStreak <- 2 421 | nPercentLookBack <- 100 422 | nSMA <- 200 423 | pctATR <- .02 424 | period <- 10 425 | 426 | # Indicators 427 | add.indicator(strategy.st, name = "cumCRSI", 428 | arguments = list(price = quote(Cl(mktdata)), nCum = nCum, 429 | nRSI = nRSI, nStreak = nStreak, 430 | nPercentLookBack = nPercentLookBack), 431 | label = "CRSIcum") 432 | 433 | add.indicator(strategy.st, name = "connorsRSI", 434 | arguments = list(price = quote(Cl(mktdata)), nRSI = nRSI, 435 | nStreak = nStreak, 436 | nPercentLookBack = nPercentLookBack), 437 | label = "CRSI") 438 | 439 | add.indicator(strategy.st, name = "SMA", 440 | arguments = list(x = quote(Cl(mktdata)), n = nSMA), 441 | label = "sma") 442 | 443 | add.indicator(strategy.st, name = "lagATR", 444 | arguments = list(HLC = quote(HLC(mktdata)), n = period), 445 | label = "atrX") 446 | 447 | # Signals 448 | add.signal(strategy.st, name = "sigThreshold", 449 | arguments = list(column = "cumCRSI.CRSIcum", 450 | threshold = cumThresh, relationship = "lt", cross = FALSE), 451 | label="cumCRSI.lt.thresh") 452 | 453 | add.signal(strategy.st, name = "sigComparison", 454 | arguments = list(columns = c("Close", "SMA.sma"), 455 | relationship = "gt"), label = "Cl.gt.SMA") 456 | 457 | add.signal(strategy.st, name = "sigAND", 458 | arguments = list(columns = c("cumCRSI.lt.thresh", 459 | "Cl.gt.SMA"), cross = TRUE), label = "longEntry") 460 | 461 | add.signal(strategy.st, name = "sigThreshold", 462 | arguments = list(column = "connorsRSI.CRSI", 463 | threshold = exitThresh, relationship = "gt", 464 | cross = TRUE), label = "longExit") 465 | 466 | # Rules 467 | add.rule(strategy.st, name = "ruleSignal", 468 | arguments = list(sigcol = "longEntry", sigval = TRUE, 469 | ordertype = "market", orderside ="long", replace = FALSE, 470 | prefer = "Open", osFUN = osDollarATR, tradeSize = tradeSize, 471 | pctATR = pctATR, atrMod = "X"), type = "enter", path.dep = TRUE) 472 | 473 | add.rule(strategy.st, name = "ruleSignal", 474 | arguments = list(sigcol = "longExit", sigval = TRUE, 475 | orderqty = "all", ordertype = "market", orderside = "long", 476 | replace = FALSE, prefer = "Open"), type = "exit", path.dep = TRUE) 477 | 478 | # Apply Strategy 479 | t1 <- Sys.time() 480 | out <- applyStrategy(strategy = strategy.st, 481 | portfolios = portfolio.st) 482 | t2 <- Sys.time() 483 | print(t2 - t1) 484 | 485 | # Set up analytics 486 | updatePortf(portfolio.st) 487 | dateRange <- time(getPortfolio(portfolio.st)$summary)[-1] 488 | updateAcct(portfolio.st,dateRange) 489 | updateEndEq(account.st) 490 | 491 | ########################################## 492 | # Evaluating the mean-reverting strategy # 493 | ########################################## 494 | aggPF <- sum(tStats$Gross.Profits)/-sum(tStats$Gross.Losses) 495 | ## [1] 1.699368 496 | 497 | aggCorrect <- mean(tStats$Percent.Positive) 498 | ## [1] 71.608 499 | 500 | numTrades <- sum(tStats$Num.Trades) 501 | ## [1] 1500 502 | 503 | meanAvgWLR <- mean(tStats$Avg.WinLoss.Ratio[ 504 | tStats$Avg.WinLoss.Ratio < Inf], na.rm = TRUE) 505 | ## [1] 0.725 506 | 507 | dStats <- dailyStats(Portfolios = portfolio.st, use = "Equity") 508 | rownames(dStats) <- gsub(".DailyEndEq", "", rownames(dStats)) 509 | print(data.frame(t(dStats))) 510 | 511 | durationStatistics <- function(Portfolio, Symbols, 512 | includeOpenTrade = FALSE, ...) { 513 | 514 | tmp <- list() 515 | length(tmp) <- length(Symbols) 516 | for(Symbol in Symbols) { 517 | pts <- perTradeStats(Portfolio = Portfolio, 518 | Symbol = Symbol, includeOpenTrade = includeOpenTrade) 519 | pts$diff <- pts$End - pts$Start 520 | 521 | durationSummary <- summary(as.numeric(pts$diff)) 522 | winDurationSummary <- summary(as.numeric( 523 | pts$diff[pts$Net.Trading.PL > 0])) 524 | lossDurationSummary <- summary(as.numeric( 525 | pts$diff[pts$Net.Trading.PL <= 0])) 526 | names(durationSummary) <- 527 | c("Min", "Q1", "Med", "Mean", "Q3", "Max") 528 | names(winDurationSummary) <- 529 | c("Min", "Q1", "Med", "Mean", "Q3", "Max") 530 | names(lossDurationSummary) <- 531 | c("Min", "Q1", "Med", "Mean", "Q3", "Max") 532 | names(winDurationSummary) <- 533 | paste0("W", names(winDurationSummary)) 534 | names(lossDurationSummary) <- 535 | paste0("L", names(lossDurationSummary)) 536 | dataRow <- data.frame(cbind(t(round(durationSummary)), 537 | t(round(winDurationSummary)), 538 | t(round(lossDurationSummary)))) 539 | tmp[[Symbol]] <- dataRow 540 | } 541 | out <- do.call(rbind, tmp) 542 | return(out) 543 | } 544 | 545 | durStats <- durationStatistics(Portfolio=portfolio.st, 546 | Symbols=sort(symbols)) 547 | print(t(durStats)) 548 | 549 | # Market exposure 550 | tmp <- list() 551 | length(tmp) <- length(symbols) 552 | for(i in 1:nrow(dStats)) { 553 | totalDays <- nrow(get(rownames(dStats)[i])) 554 | mktExposure <- dStats$Total.Days[i] / totalDays 555 | tmp[[i]] <- c(rownames(dStats)[i], round(mktExposure, 3)) 556 | } 557 | mktExposure <- data.frame(do.call(rbind, tmp)) 558 | colnames(mktExposure) <- c("Symbol", "MktExposure") 559 | 560 | print(mktExposure) 561 | 562 | print(mean(as.numeric(as.character(mktExposure$MktExposure)))) 563 | ## [1] 0.1257 564 | 565 | SharpeRatio.annualized(portfRets) 566 | ## [1] 0.6973019 567 | 568 | Return.annualized(portfRets) 569 | ## [1] 0.03370045 570 | 571 | maxDrawdown(portfRets) 572 | ## [1] 0.09120687 573 | 574 | chart.Posn(portfolio.st, "XLB") 575 | TA_CRSI <- connorsRSI(Cl(XLB), nRSI = nRSI, 576 | nStreak = nStreak, nPercentLookBack = nPercentLookBack) 577 | add_TA(TA_CRSI, col = "red") 578 | 579 | TA_cumCRSI <- cumCRSI(price = Cl(XLB), 580 | nCum = nCum, nRSI = nRSI, nStreak = nStreak, 581 | nPercentLookBack = nPercentLookBack) 582 | add_TA(TA_cumCRSI, col = "blue") 583 | 584 | TA_lagATR <- lagATR(HLC = HLC(XLB), n = period) 585 | add_TA(TA_lagATR, col = "purple") 586 | 587 | TA_SMA <- SMA(Cl(XLB), n = nSMA) 588 | add_TA(TA_SMA, col = "blue", lwd = 2, on = 1) 589 | 590 | 591 | -------------------------------------------------------------------------------- /backtesting/.Rhistory: -------------------------------------------------------------------------------- 1 | return(correlationIndicator) 2 | } 3 | # current portfolio ------------------------------------------------------- 4 | year2006 <- read.csv("2006.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 5 | year2007 <- read.csv("2007.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 6 | year2008 <- read.csv("2008.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 7 | year2009 <- read.csv("2009.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 8 | year2010 <- read.csv("2010.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 9 | year2011 <- read.csv("2011.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 10 | year2012 <- read.csv("2012.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 11 | year2013 <- read.csv("2013.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 12 | year2014 <- read.csv("2014.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE) 13 | total_portfolio <- rbind(year2006[1:2, ], 14 | year2007[1:16, ], 15 | year2008[1:14, ], 16 | year2009[1:24, ], 17 | year2010[1:34, ], 18 | year2011[1:26, ], 19 | year2012[1:26, ], 20 | year2013[1:26, ], 21 | year2014[1:22, ]) 22 | mom_tickers <- sort(unique(total_portfolio[, 3])) 23 | all_dates <- seq(as.Date("2012-12-01"), as.Date("2014-12-01"), by = 1) 24 | initial_portfolio <- data.frame(AAPL = rep(0, length(all_dates)), 25 | AGG = rep(0, length(all_dates)), 26 | ATVI = rep(0, length(all_dates)), 27 | DLB = rep(0, length(all_dates)), 28 | FORD = rep(0, length(all_dates)), 29 | GOOGL = rep(0, length(all_dates)), 30 | HAS = rep(0, length(all_dates)), 31 | LF = rep(0, length(all_dates)), 32 | LUV = rep(0, length(all_dates)), 33 | SPY = rep(0, length(all_dates)), 34 | TIBX = rep(0, length(all_dates))) 35 | # fill positions 36 | initial_portfolio["2012-12-10","AAPL"] <- 1 37 | initial_portfolio[c("2006-12-12","2007-01-09","2007-02-13", 38 | "2007-03-13","2007-04-10","2007-05-08", 39 | "2007-06-12","2007-07-10","2008-07-08", 40 | "2008-08-12","2008-09-09","2008-10-14", 41 | "2008-11-11","2008-12-09","2009-01-13", 42 | "2009-02-10","2009-03-10","2009-04-14", 43 | "2009-05-12","2009-06-09","2009-07-14", 44 | "2009-08-11","2009-09-08","2009-10-13", 45 | "2009-11-10","2009-12-08","2010-01-12", 46 | "2010-02-09","2010-03-09","2010-04-13", 47 | "2010-05-11","2010-06-08","2010-07-13", 48 | "2010-08-10","2010-09-14","2010-10-12", 49 | "2010-11-09","2010-12-14","2011-01-11", 50 | "2011-02-08","2011-03-08","2011-04-12", 51 | "2011-05-10","2011-06-14","2011-07-12", 52 | "2011-08-09","2011-09-13","2011-10-11", 53 | "2011-11-08","2011-12-07","2011-12-13", 54 | "2012-01-10","2012-02-14","2012-03-13", 55 | "2012-04-10","2012-05-08","2012-06-12", 56 | "2012-08-14","2012-09-11","2012-10-09", 57 | "2012-11-13","2012-12-11","2013-01-08", 58 | "2013-02-12","2013-03-12","2013-04-09", 59 | "2013-05-14","2013-06-11","2013-07-09", 60 | "2013-08-13","2013-09-10","2013-10-08", 61 | "2013-11-12","2013-12-10","2014-01-14", 62 | "2014-02-11","2014-03-11","2014-04-08", 63 | "2014-05-13","2014-06-10","2014-07-08", 64 | "2014-08-12","2014-09-09","2014-10-14", 65 | "2014-11-11"), "AGG"] <- 1 66 | initial_portfolio[c("2010-04-30","2010-05-20"),"ATVI"] <- 1 67 | initial_portfolio[c("2010-04-30","2010-05-05", 68 | "2010-05-21"), "DLB"] <- 1 69 | initial_portfolio[c("2010-08-24"), "GOOGL"] <- 1 70 | initial_portfolio[c("2010-04-30"), "HAS"] <- 1 71 | "2007-11-08", 72 | "2007-12-10","2008-02-04","2012-07-10","2012-12-10","2013-12-06", 73 | initial_xts <- xts(initial_portfolio, order.by = all_dates) 74 | # backtesting assumptions ------------------------------------------------- 75 | ## Define frictions (as percentage of trade) 76 | commission_pct <- c(.0003) 77 | slippage_pct <- c(.0005) 78 | friction_pct <- commission_pct + slippage_pct 79 | ## Data dates 80 | start_date <- as.Date("2007-04-01") 81 | end_date <- as.Date("2014-12-01") 82 | # download data ----------------------------------------------------------- 83 | #universe <- c("SPY", "MDY", "IWM", # Large-Mid-Small Cap equities 84 | # "EFA", "EEM", # Intl.and emerging markets 85 | # "AGG", "TIP", "TLT", "LQD", # Bonds 86 | # "GSG", "DBE", "GLD", # Commodities, Energy, Gold 87 | # "RWR", "RWX", "VNQ", "MBB") # Real Estate 88 | universe <- c("SPY", "MDY", "IWM", 89 | "EFA", "EEM", "AGG", 90 | "TIP", "GSG", "RWR", 91 | "RWX", "TLT", "LQD", 92 | "MBB") 93 | # "SHV" 94 | cash_ticker <- c("VFISX") 95 | all_symbols <- c(universe,cash_ticker) 96 | # first download 97 | getSymbols(all_symbols, from=start_date, to=end_date) 98 | for(i in all_symbols) 99 | assign(i, get(i)) 100 | close_prices <- do.call(merge, lapply(all_symbols, function(x) Cl(get(x)))) 101 | adj_prices <- do.call(merge, lapply(all_symbols, function(x) Ad(get(x)))) 102 | returns_matrix1 <- na.omit(diff(adj_prices)/lag(adj_prices, k = 1)) 103 | returns_monthly1 <- returns_matrix1[endpoints(index(returns_matrix1), 104 | on = "months"), ] 105 | # second download 106 | getSymbols(mom_tickers, from=start_date, to=end_date) 107 | for(i in mom_tickers) 108 | assign(i, get(i)) 109 | close_prices2 <- do.call(merge, lapply(mom_tickers, function(x) Cl(get(x)))) 110 | adj_prices2 <- do.call(merge, lapply(mom_tickers, function(x) Ad(get(x)))) 111 | returns_matrix2 <- na.omit(diff(adj_prices2)/lag(adj_prices2, k = 1)) 112 | returns_monthly2 <- returns_matrix2[endpoints(index(returns_matrix2), 113 | on = "months"), ] 114 | # model dates 115 | model_dates <- index(adj_prices)[85:nrow(adj_prices)] 116 | # indicators -------------------------------------------------------------- 117 | roc1DP <- ROC(close_prices[,1:ncol(close_prices)-1], n=1, 118 | type="discrete")*100 119 | ## Highest Momentum (ROC), 120 | ## Lowest Volatility (SD), 121 | ## Lowest Avg. Correlation 122 | ## 4-month look back 123 | momentumIndicator <- xts(apply(roc1DP,2,SMA,n=84), index(close_prices)) 124 | volatilityIndicator <- rollapply(roc1DP[,1:ncol(close_prices)-1], 84, sd, fill=NA) 125 | correlationIndicator <- corIndicator(universe = universe, data = roc1DP,n=84) 126 | momentumRank <- ranking(momentumIndicator) 127 | volatilityRank <- ranking(-volatilityIndicator) 128 | correlationRank <- ranking(-correlationIndicator) 129 | combineRankWeights <- (.50*momentumRank) + (.25*volatilityRank) + (.25*correlationRank) 130 | compositeRank <- ranking(combineRankWeights) 131 | colnames(compositeRank) <- universe 132 | compositeRank[compositeRank < 9] <- 0 133 | compositeRank[compositeRank >= 9] <- 1 134 | # Filter: - ON - OFF Filters 135 | momentumFilterBUY <- xts(ifelse(as.matrix(momentumIndicator) < 0, 0, 1), index(adj_prices)) 136 | # MVC portfolio ----------------------------------------------------------- 137 | strategy_nameBH <- c("Jonnie:MVC") 138 | strategy <- as.matrix(compositeRank)*as.matrix(momentumFilterBUY) *.20 139 | prelimallocmatrix <- xts(strategy, order.by = index(adj_prices)) 140 | prelimallocmatrix <- prelimallocmatrix[endpoints(index(prelimallocmatrix), on = "months"), ] 141 | colnames(prelimallocmatrix) <- universe 142 | ## allocation matrix + cash 143 | cashBH <- ifelse(rowSums(prelimallocmatrix) == 0, 1, 1 - rowSums(prelimallocmatrix)) 144 | allocmatrixBH <- cbind(prelimallocmatrix, cashBH) 145 | colnames(allocmatrixBH) <- all_symbols 146 | ## trade matrices 147 | prelimtradematrixBH <- allocmatrixBH 148 | prelimtradematrixBH[is.na(prelimtradematrixBH)] <- 0 149 | tradematrixBH <- prelimtradematrixBH - lag(prelimtradematrixBH, 1) 150 | ## friction matrices 151 | constituenttradematrixBH <- tradematrixBH[,1:length(universe)] 152 | frictionmatrixBH <- cbind(abs(constituenttradematrixBH) * friction_pct, 0) 153 | ## Create matrix of (monthly) returns for the portfolios 154 | wheninvested_retsBH <- as.numeric(returns_monthly1) * lag(allocmatrixBH, 1) - frictionmatrixBH 155 | ## Calculate portfolio returns vector 156 | port_retsBH3 <- rowSums(wheninvested_retsBH) 157 | ## Create final portfolio returns by coercing port_rets to xts object and remove NAs 158 | port_rets_xtsBH3 <- xts(port_retsBH3, order.by = index(prelimallocmatrix)) 159 | port_returnsBH3 <- port_rets_xtsBH3[is.na(port_rets_xtsBH3) == FALSE] 160 | colnames(port_returnsBH) <- strategy_nameBH 161 | ## Create equity curve and additional portfolio returns 162 | port_equity_curveBH <- cumprod(1 + port_returnsBH) 163 | port_returns_monthlyBH <- monthlyReturn(port_equity_curveBH) 164 | # Buy and Hold (my portfolio) --------------------------------------------- 165 | strategy_nameBH <- c("Jonnie") 166 | bh_allocation <- 1/length(universe) 167 | bh_initial <- matrix(1, nrow = nrow(returns_matrix1), ncol = ncol(returns_matrix1)-1) 168 | bh_weights <- bh_initial*bh_allocation 169 | prelimallocmatrixBH <- xts(bh_weights, order.by = index(returns_matrix1)) 170 | prelimallocmatrixBH <- prelimallocmatrixBH[endpoints(index(prelimallocmatrixBH), on = "months"), ] 171 | colnames(prelimallocmatrixBH) <- universe 172 | ## allocation matrix + cash 173 | cashBH <- ifelse(rowSums(prelimallocmatrixBH) == 0, 1, 1 - rowSums(prelimallocmatrixBH)) 174 | allocmatrixBH <- cbind(prelimallocmatrixBH, cashBH) 175 | colnames(allocmatrixBH) <- all_symbols 176 | ## trade matrices 177 | prelimtradematrixBH <- allocmatrixBH 178 | prelimtradematrixBH[is.na(prelimtradematrixBH)] <- 0 179 | tradematrixBH <- prelimtradematrixBH - lag(prelimtradematrixBH, 1) 180 | ## friction matrices 181 | constituenttradematrixBH <- tradematrixBH[,1:length(universe)] 182 | frictionmatrixBH <- cbind(abs(constituenttradematrixBH) * friction_pct, 0) 183 | ## Create matrix of (monthly) returns for the portfolios 184 | wheninvested_retsBH <- as.numeric(returns_monthly1) * lag(allocmatrixBH, 1) - frictionmatrixBH 185 | ## Calculate portfolio returns vector 186 | port_retsBH <- rowSums(wheninvested_retsBH) 187 | ## Create final portfolio returns by coercing port_rets to xts object and remove NAs 188 | port_rets_xtsBH <- xts(port_retsBH, order.by = index(prelimallocmatrixBH)) 189 | port_returnsBH <- port_rets_xtsBH[is.na(port_rets_xtsBH) == FALSE] 190 | colnames(port_returnsBH) <- strategy_nameBH 191 | ## Create equity curve and additional portfolio returns 192 | port_equity_curveBH <- cumprod(1 + port_returnsBH) 193 | port_returns_monthlyBH <- monthlyReturn(port_equity_curveBH) 194 | # buy and hold (mom's portfolio) ------------------------------------------ 195 | strategy_nameBH <- c("Mom") 196 | bh_allocation <- 1/length(mom_tickers) 197 | bh_initial <- matrix(1, nrow = nrow(returns_matrix2), ncol = ncol(returns_matrix2)) 198 | bh_weights <- bh_initial*bh_allocation 199 | prelimallocmatrixBH <- xts(bh_weights, order.by = index(returns_matrix2)) 200 | prelimallocmatrixBH <- prelimallocmatrixBH[endpoints(index(prelimallocmatrixBH), on = "months"), ] 201 | colnames(prelimallocmatrixBH) <- mom_tickers 202 | ## allocation matrix + cash 203 | allocmatrixBH <- prelimallocmatrixBH 204 | ## trade matrices 205 | prelimtradematrixBH <- allocmatrixBH 206 | prelimtradematrixBH[is.na(prelimtradematrixBH)] <- 0 207 | tradematrixBH <- prelimtradematrixBH - lag(prelimtradematrixBH, 1) 208 | ## friction matrices 209 | constituenttradematrixBH <- tradematrixBH[,1:length(mom_tickers)] 210 | frictionmatrixBH <- cbind(abs(constituenttradematrixBH) * friction_pct) 211 | ## Create matrix of (monthly) returns for the portfolios 212 | wheninvested_retsBH <- as.numeric(returns_monthly2) * lag(allocmatrixBH, 1) - frictionmatrixBH 213 | ## Calculate portfolio returns vector 214 | port_retsBH <- rowSums(wheninvested_retsBH) 215 | ## Create final portfolio returns by coercing port_rets to xts object and remove NAs 216 | port_rets_xtsBH2 <- xts(port_retsBH, order.by = index(prelimallocmatrixBH)) 217 | port_returnsBH2 <- port_rets_xtsBH2[is.na(port_rets_xtsBH2) == FALSE] 218 | colnames(port_returnsBH) <- strategy_nameBH 219 | ## Create equity curve and additional portfolio returns 220 | port_equity_curveBH2 <- cumprod(1 + port_returnsBH) 221 | port_returns_monthlyBH2 <- monthlyReturn(port_equity_curveBH) 222 | chart.CumReturns(cbind(port_returnsBH,port_returnsBH2,port_returnsBH3), wealth.index = TRUE) 223 | rm(list = ls(all = TRUE)) 224 | options(scipen=999) 225 | setwd("~/Dropbox/setup/quant_trading/mom_portfolio/mvc_20150109") 226 | #setwd("C:/Users/jlappen/Dropbox/setup/mom_portfolio/mvc_20150109") 227 | library(xts) 228 | library(plyr) 229 | library(quantmod) 230 | library(PerformanceAnalytics) 231 | # load custom functions --------------------------------------------------- 232 | source("backtest_function.R") 233 | source("run_corr_function.R") 234 | source("rank_function.R") 235 | # backtest parameters ----------------------------------------------------- 236 | start_date <- as.Date("2007-04-01") 237 | end_date <- as.Date("2014-12-31") 238 | ## Define frictions (as percentage of trade) 239 | commission_pct <- c(.0003) 240 | slippage_pct <- c(.0005) 241 | friction_pct <- commission_pct + slippage_pct 242 | ## Portfolio Symbols 243 | data_source <- c("yahoo") 244 | universe <- c("SPY", "MDY", "IWM", # Large-Mid-Small Cap equities 245 | "EFA", "EEM", # Intl.and emerging markets 246 | "AGG","TIP","TLT", "LQD", # Bonds 247 | "GSG", # Commodities 248 | "RWR", "RWX", "MBB") # Real Estateads 249 | ## Cash 250 | cash_symbol <- c("SHV") 251 | portfolio_symbols <- c(universe, cash_symbol) 252 | # download data and compute returns --------------------------------------- 253 | getSymbols(universe, src = data_source, from = start_date, to = end_date) 254 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date) 255 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x)))) 256 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x)))) 257 | ## Convert Daily to Monthly 258 | monthly_raw <- raw_close[endpoints(index(raw_close), on = "months"), ] 259 | monthly_adj <- adj_close[endpoints(index(adj_close), on = "months"), ] 260 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1) 261 | # market timing ----------------------------------------------------------- 262 | roc1DP <- ROC(raw_close[,1:length(universe)], n=1, type="discrete")*100 263 | ## Highest Momentum (ROC), 264 | ## Lowest Volatility (SD), 265 | ## Lowest Avg. Correlation 266 | ## 4-month look back 267 | momentum <- xts(apply(roc1DP, 2, SMA, n = 84), index(adj_close)) 268 | volatility <- rollapply(roc1DP[, 1:length(universe)], 84, sd, fill = NA) 269 | correlation <- corIndicator(universe = universe, data = roc1DP, n = 84) 270 | moment_rank <- ranking(momentum) 271 | vol_rank <- ranking(-volatility) 272 | corr_rank <- ranking(-correlation) 273 | all_ranks <- (.50 * moment_rank) + (.25 * vol_rank) + (.25 * corr_rank) 274 | composite_rank <- ranking(all_ranks) 275 | colnames(composite_rank) <- universe 276 | composite_rank[composite_rank < 9] <- 0 277 | composite_rank[composite_rank >= 9] <- 1 278 | ## Filters 279 | moment_filter <- xts(ifelse(as.matrix(momentum) < 0, 0, 1), index(adj_close)) 280 | # system 1: mvc + momentum filter ----------------------------------------- 281 | strategy1 <- (as.matrix(composite_rank) * as.matrix(moment_filter)) #* .20 282 | strategy1 <- strategy1 * (1 / rowSums(strategy1)) 283 | strategy1 <- xts(strategy1, index(adj_close)) 284 | strategy1 <- strategy1[endpoints(index(strategy1), on = "months"), ] 285 | # strategy 2: risk parity ------------------------------------------------- 286 | rp_buy <- as.matrix(composite_rank) * as.matrix(moment_filter) 287 | kt_buy <- 1/rowSums(rp_buy * volatility) 288 | kt_buy[kt_buy == Inf] <- 0 289 | rp_weights <- kt_buy * volatility 290 | strategy2 <- xts(rp_buy * rp_weights, index(adj_close)) 291 | strategy2 <- strategy2[endpoints(index(strategy2), on = "months"), ] 292 | # Buy and Hold ------------------------------------------------------------ 293 | bh_allocation <- 1/length(universe) 294 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1) 295 | bh_weights <- bh_initial * bh_allocation 296 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets)) 297 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ] 298 | colnames(bh_weights) <- universe 299 | # output ------------------------------------------------------------------ 300 | strat1_returns <- backtest_engine(strategy1, "MVC_filter", monthly_rets) 301 | strat2_returns <- backtest_engine(strategy2, "risk_parity", monthly_rets) 302 | bh_returns <- backtest_engine(bh_weights, "buy_hold", monthly_rets) 303 | port_returns_report <- cbind(strat1_returns$portfolio_returns, 304 | bh_returns$portfolio_returns, 305 | strat2_returns$portfolio_returns) 306 | aa <- table.AnnualizedReturns(cbind(port_returns_report)) 307 | bb <- table.DownsideRisk(cbind(port_returns_report)) 308 | a <- table.Stats(cbind(port_returns_report)) 309 | b <- table.Variability(cbind(port_returns_report)) 310 | c <- table.Distributions(cbind(port_returns_report)) 311 | d <- table.DrawdownsRatio(cbind(port_returns_report)) 312 | e <- table.DownsideRiskRatio(cbind(port_returns_report)) 313 | f <- table.TrailingPeriods(cbind(port_returns_report)) 314 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f) 315 | charts.PerformanceSummary(port_returns_report, wealth.index = TRUE) 316 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns), 317 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns), 318 | bh = table.Drawdowns(bh_returns$portfolio_returns)) 319 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2), 320 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2), 321 | bh = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2)) 322 | # final numeric tables printed to the console 323 | rowBinding 324 | drawdowns_list 325 | monthly_ret_list 326 | universe <- c("SPY", "QQQ", "IWM", "EEM", "EFA", 327 | "TLT", "LQD", "DBC", "GLD") 328 | ## Cash 329 | cash_symbol <- c("SHV") 330 | portfolio_symbols <- c(universe, cash_symbol) 331 | # download data and compute returns --------------------------------------- 332 | getSymbols(universe, src = data_source, from = start_date, to = end_date) 333 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date) 334 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x)))) 335 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x)))) 336 | ## Convert Daily to Monthly 337 | monthly_raw <- raw_close[endpoints(index(raw_close), on = "months"), ] 338 | monthly_adj <- adj_close[endpoints(index(adj_close), on = "months"), ] 339 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1) 340 | # market timing ----------------------------------------------------------- 341 | roc1DP <- ROC(raw_close[,1:length(universe)], n=1, type="discrete")*100 342 | ## Highest Momentum (ROC), 343 | ## Lowest Volatility (SD), 344 | ## Lowest Avg. Correlation 345 | ## 4-month look back 346 | momentum <- xts(apply(roc1DP, 2, SMA, n = 84), index(adj_close)) 347 | volatility <- rollapply(roc1DP[, 1:length(universe)], 84, sd, fill = NA) 348 | correlation <- corIndicator(universe = universe, data = roc1DP, n = 84) 349 | moment_rank <- ranking(momentum) 350 | vol_rank <- ranking(-volatility) 351 | corr_rank <- ranking(-correlation) 352 | all_ranks <- (.50 * moment_rank) + (.25 * vol_rank) + (.25 * corr_rank) 353 | composite_rank <- ranking(all_ranks) 354 | colnames(composite_rank) <- universe 355 | composite_rank[composite_rank < 9] <- 0 356 | composite_rank[composite_rank >= 9] <- 1 357 | ## Filters 358 | moment_filter <- xts(ifelse(as.matrix(momentum) < 0, 0, 1), index(adj_close)) 359 | # system 1: mvc + momentum filter ----------------------------------------- 360 | strategy1 <- (as.matrix(composite_rank) * as.matrix(moment_filter)) #* .20 361 | strategy1 <- strategy1 * (1 / rowSums(strategy1)) 362 | strategy1 <- xts(strategy1, index(adj_close)) 363 | strategy1 <- strategy1[endpoints(index(strategy1), on = "months"), ] 364 | # strategy 2: risk parity ------------------------------------------------- 365 | rp_buy <- as.matrix(composite_rank) * as.matrix(moment_filter) 366 | kt_buy <- 1/rowSums(rp_buy * volatility) 367 | kt_buy[kt_buy == Inf] <- 0 368 | rp_weights <- kt_buy * volatility 369 | strategy2 <- xts(rp_buy * rp_weights, index(adj_close)) 370 | strategy2 <- strategy2[endpoints(index(strategy2), on = "months"), ] 371 | # Buy and Hold ------------------------------------------------------------ 372 | bh_allocation <- 1/length(universe) 373 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1) 374 | bh_weights <- bh_initial * bh_allocation 375 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets)) 376 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ] 377 | colnames(bh_weights) <- universe 378 | # output ------------------------------------------------------------------ 379 | strat1_returns <- backtest_engine(strategy1, "MVC_filter", monthly_rets) 380 | strat2_returns <- backtest_engine(strategy2, "risk_parity", monthly_rets) 381 | bh_returns <- backtest_engine(bh_weights, "buy_hold", monthly_rets) 382 | port_returns_report <- cbind(strat1_returns$portfolio_returns, 383 | bh_returns$portfolio_returns, 384 | strat2_returns$portfolio_returns) 385 | aa <- table.AnnualizedReturns(cbind(port_returns_report)) 386 | bb <- table.DownsideRisk(cbind(port_returns_report)) 387 | a <- table.Stats(cbind(port_returns_report)) 388 | b <- table.Variability(cbind(port_returns_report)) 389 | c <- table.Distributions(cbind(port_returns_report)) 390 | d <- table.DrawdownsRatio(cbind(port_returns_report)) 391 | e <- table.DownsideRiskRatio(cbind(port_returns_report)) 392 | f <- table.TrailingPeriods(cbind(port_returns_report)) 393 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f) 394 | charts.PerformanceSummary(port_returns_report, wealth.index = TRUE) 395 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns), 396 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns), 397 | bh = table.Drawdowns(bh_returns$portfolio_returns)) 398 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2), 399 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2), 400 | bh = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2)) 401 | # final numeric tables printed to the console 402 | rowBinding 403 | drawdowns_list 404 | monthly_ret_list 405 | head(adj_close) 406 | universe <- c("SPY", "QQQ", "IWM", "EEM", "EFA", 407 | "TLT", "LQD", "DBC", "GLD", "AGG", 408 | "RWR","RWX","MBB") 409 | ## Cash 410 | cash_symbol <- c("SHV") 411 | portfolio_symbols <- c(universe, cash_symbol) 412 | # download data and compute returns --------------------------------------- 413 | getSymbols(universe, src = data_source, from = start_date, to = end_date) 414 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date) 415 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x)))) 416 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x)))) 417 | ## Convert Daily to Monthly 418 | monthly_raw <- raw_close[endpoints(index(raw_close), on = "months"), ] 419 | monthly_adj <- adj_close[endpoints(index(adj_close), on = "months"), ] 420 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1) 421 | # market timing ----------------------------------------------------------- 422 | roc1DP <- ROC(raw_close[,1:length(universe)], n=1, type="discrete")*100 423 | ## Highest Momentum (ROC), 424 | ## Lowest Volatility (SD), 425 | ## Lowest Avg. Correlation 426 | ## 4-month look back 427 | momentum <- xts(apply(roc1DP, 2, SMA, n = 84), index(adj_close)) 428 | volatility <- rollapply(roc1DP[, 1:length(universe)], 84, sd, fill = NA) 429 | correlation <- corIndicator(universe = universe, data = roc1DP, n = 84) 430 | moment_rank <- ranking(momentum) 431 | vol_rank <- ranking(-volatility) 432 | corr_rank <- ranking(-correlation) 433 | all_ranks <- (.50 * moment_rank) + (.25 * vol_rank) + (.25 * corr_rank) 434 | composite_rank <- ranking(all_ranks) 435 | colnames(composite_rank) <- universe 436 | composite_rank[composite_rank < 9] <- 0 437 | composite_rank[composite_rank >= 9] <- 1 438 | ## Filters 439 | moment_filter <- xts(ifelse(as.matrix(momentum) < 0, 0, 1), index(adj_close)) 440 | # system 1: mvc + momentum filter ----------------------------------------- 441 | strategy1 <- (as.matrix(composite_rank) * as.matrix(moment_filter)) #* .20 442 | strategy1 <- strategy1 * (1 / rowSums(strategy1)) 443 | strategy1 <- xts(strategy1, index(adj_close)) 444 | strategy1 <- strategy1[endpoints(index(strategy1), on = "months"), ] 445 | # strategy 2: risk parity ------------------------------------------------- 446 | rp_buy <- as.matrix(composite_rank) * as.matrix(moment_filter) 447 | kt_buy <- 1/rowSums(rp_buy * volatility) 448 | kt_buy[kt_buy == Inf] <- 0 449 | rp_weights <- kt_buy * volatility 450 | strategy2 <- xts(rp_buy * rp_weights, index(adj_close)) 451 | strategy2 <- strategy2[endpoints(index(strategy2), on = "months"), ] 452 | # Buy and Hold ------------------------------------------------------------ 453 | bh_allocation <- 1/length(universe) 454 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1) 455 | bh_weights <- bh_initial * bh_allocation 456 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets)) 457 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ] 458 | colnames(bh_weights) <- universe 459 | # output ------------------------------------------------------------------ 460 | strat1_returns <- backtest_engine(strategy1, "MVC_filter", monthly_rets) 461 | strat2_returns <- backtest_engine(strategy2, "risk_parity", monthly_rets) 462 | bh_returns <- backtest_engine(bh_weights, "buy_hold", monthly_rets) 463 | port_returns_report <- cbind(strat1_returns$portfolio_returns, 464 | bh_returns$portfolio_returns, 465 | strat2_returns$portfolio_returns) 466 | aa <- table.AnnualizedReturns(cbind(port_returns_report)) 467 | bb <- table.DownsideRisk(cbind(port_returns_report)) 468 | a <- table.Stats(cbind(port_returns_report)) 469 | b <- table.Variability(cbind(port_returns_report)) 470 | c <- table.Distributions(cbind(port_returns_report)) 471 | d <- table.DrawdownsRatio(cbind(port_returns_report)) 472 | e <- table.DownsideRiskRatio(cbind(port_returns_report)) 473 | f <- table.TrailingPeriods(cbind(port_returns_report)) 474 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f) 475 | charts.PerformanceSummary(port_returns_report, wealth.index = TRUE) 476 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns), 477 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns), 478 | bh = table.Drawdowns(bh_returns$portfolio_returns)) 479 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2), 480 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2), 481 | bh = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2)) 482 | # final numeric tables printed to the console 483 | rowBinding 484 | drawdowns_list 485 | monthly_ret_list 486 | rm(list = ls(all = TRUE)) 487 | gbm_function <- function(x, omega, mu, sigma, n) { 488 | # Geometric Brownian Motion: 489 | # x = initial value of the process at t0 490 | # alpha = start time 491 | # omega = final time 492 | # mu = rate of return (drift) 493 | # sigma = volatility 494 | # n = number of intervals in which to split [t0, t1] 495 | dt <- omega/n 496 | t <- seq(0, omega, length = n + 1) 497 | bm <- ts(cumsum(c(0, rnorm(n) * sqrt(dt))), start = 0, deltat = dt) 498 | gbm <- x * exp((mu - sigma ^ 2 / 2) * time(bm) + sigma * as.numeric(bm)) 499 | out <- ts(gbm, start = 0, deltat = deltat(bm)) 500 | } 501 | model1 <- gbm_function(x = 5, 502 | omega = 1, 503 | mu = 0.09, 504 | sigma = 0.3, 505 | n = 5000) 506 | plot(x = seq(1, length(model1), by = 1), 507 | y = model1, 508 | type = "l", 509 | lwd = 1.5, 510 | col = "#31a354", 511 | ylab = "$", 512 | xlab = "# of Observations") 513 | --------------------------------------------------------------------------------