├── backtesting
    ├── README.MD
    ├── rank_function.R
    ├── backtest_function.R
    ├── run_corr_function.R
    ├── portfolio_template.R
    └── .Rhistory
├── rfortraders
    ├── Chapter_01
    │   └── code.R
    ├── Chapter_03
    │   ├── strategy.xlsx
    │   ├── sample_json_file.json
    │   └── code.R
    ├── Chapter_11
    │   ├── add_2_file.cpp
    │   └── code.R
    ├── README.md
    ├── LICENSE
    ├── Errata.txt
    ├── Chapter_04
    │   └── code.R
    ├── Chapter_05
    │   └── code.R
    ├── Chapter_09
    │   └── code.R
    ├── Chapter_10
    │   └── code.R
    ├── Chapter_08
    │   └── code.R
    ├── Chapter_02
    │   └── code.R
    ├── Chapter_06
    │   └── code.R
    └── Chapter_07
    │   └── code.R
├── README.md
├── fb
    ├── density.png
    └── bday2016.R
├── extrema
    ├── output.PNG
    ├── README.md
    ├── main.R
    ├── load.R
    ├── inflection_points.R
    └── functions.R
├── bar_charts
    ├── .DS_Store
    ├── cqg_example.png
    ├── Screen Shot 2015-02-27 at 7.27.11 PM.png
    ├── Screen Shot 2015-02-27 at 7.27.21 PM.png
    ├── README.md~
    ├── README.md
    ├── GCQ 2014-07-03.csv
    └── bar_charts.R
├── survivorship
    ├── dpdf.PNG
    ├── metadata.txt
    ├── rBBG_splits.R
    ├── README.md
    ├── rBBG_prices.R
    └── rBBG_survivors.R
├── package_mgmt
    ├── sn_1.0-0.tar.gz
    └── install_packages.R
├── simulation
    ├── README.md
    └── GBM_20140120.R
├── systematic_investor
    ├── dtw_20_30.PNG
    ├── dtw_20_60.PNG
    ├── dtw_20_90.PNG
    ├── euclid_20_30.PNG
    ├── euclid_20_60.PNG
    ├── euclid_20_90.PNG
    └── time_series_matching.R
├── my_functions
    ├── rank_function.R
    ├── README.md
    ├── trading_calendar.R
    ├── percent_rank.R
    └── run_corr_function.R
├── bloomberg
    └── rBBG.R
├── varadi
    └── varadi_chaos.R
└── indicators
    └── indicators.R


/backtesting/README.MD:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_01/code.R:
--------------------------------------------------------------------------------
1 | # This chapter contains no code snippets


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # R
2 | a collection of R projects, mostly relating to finance
3 | 


--------------------------------------------------------------------------------
/fb/density.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/fb/density.png


--------------------------------------------------------------------------------
/extrema/output.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/extrema/output.PNG


--------------------------------------------------------------------------------
/bar_charts/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/.DS_Store


--------------------------------------------------------------------------------
/survivorship/dpdf.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/survivorship/dpdf.PNG


--------------------------------------------------------------------------------
/bar_charts/cqg_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/cqg_example.png


--------------------------------------------------------------------------------
/package_mgmt/sn_1.0-0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/package_mgmt/sn_1.0-0.tar.gz


--------------------------------------------------------------------------------
/simulation/README.md:
--------------------------------------------------------------------------------
1 | ######GBM
2 | Geometric Brownian Motion, simulates a stock's random walk
3 | 


--------------------------------------------------------------------------------
/systematic_investor/dtw_20_30.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/dtw_20_30.PNG


--------------------------------------------------------------------------------
/systematic_investor/dtw_20_60.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/dtw_20_60.PNG


--------------------------------------------------------------------------------
/systematic_investor/dtw_20_90.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/dtw_20_90.PNG


--------------------------------------------------------------------------------
/rfortraders/Chapter_03/strategy.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/rfortraders/Chapter_03/strategy.xlsx


--------------------------------------------------------------------------------
/systematic_investor/euclid_20_30.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/euclid_20_30.PNG


--------------------------------------------------------------------------------
/systematic_investor/euclid_20_60.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/euclid_20_60.PNG


--------------------------------------------------------------------------------
/systematic_investor/euclid_20_90.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/systematic_investor/euclid_20_90.PNG


--------------------------------------------------------------------------------
/bar_charts/Screen Shot 2015-02-27 at 7.27.11 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/Screen Shot 2015-02-27 at 7.27.11 PM.png


--------------------------------------------------------------------------------
/bar_charts/Screen Shot 2015-02-27 at 7.27.21 PM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geoquant/R/HEAD/bar_charts/Screen Shot 2015-02-27 at 7.27.21 PM.png


--------------------------------------------------------------------------------
/backtesting/rank_function.R:
--------------------------------------------------------------------------------
1 | ## Ranking function
2 | ## N.B.: rank = 1 for max(x)
3 | ranking <- function(x){
4 |   r <- as.xts(t(apply(x, 1, rank, na.last = "keep")))
5 |   return(r)
6 | }
7 | 


--------------------------------------------------------------------------------
/my_functions/rank_function.R:
--------------------------------------------------------------------------------
1 | ## Ranking function
2 | ## N.B.: rank = 1 for max(x)
3 | ranking <- function(x){
4 |   r <- as.xts(t(apply(x, 1, rank, na.last = "keep")))
5 |   return(r)
6 | }
7 | 


--------------------------------------------------------------------------------
/extrema/README.md:
--------------------------------------------------------------------------------
1 | #Extrema
2 | Comparable methods for determining global and local extrema: a simple approach vs. the pastecs turnpoint function
3 | 
4 | ![alt text] (https://github.com/geoquant/R/blob/master/extrema/output.PNG "Maxima and Minima")
5 | 


--------------------------------------------------------------------------------
/my_functions/README.md:
--------------------------------------------------------------------------------
 1 | #####percent_rank.R
 2 | percentrank-like functionality from Excel, in R
 3 | 
 4 | 
 5 | #####rank_function.R
 6 | general ranking function for rows
 7 | 
 8 | 
 9 | #####run_corr_function.R
10 | running correlation, with window length n
11 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_11/add_2_file.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | long add_2_cpp(long max_value) {
 6 |   long sum = 0;
 7 |   for(long i = 1; i <= max_value; ++i) {
 8 |     sum = sum + i;
 9 |   }
10 |   return sum;
11 | }


--------------------------------------------------------------------------------
/rfortraders/Chapter_03/sample_json_file.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CVX":
 3 |   {
 4 |     "Currency": "USD",
 5 |     "Sector": "Basic Materials",
 6 |     "Industry": "Major Integrated Oil & Gas"
 7 |   },
 8 |   "GOOG":
 9 |   {
10 |     "Currency": "USD",
11 |     "Sector": "Technology",
12 |     "Industry": "Internet Information Providers"
13 |   } 
14 | }


--------------------------------------------------------------------------------
/extrema/main.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls(all=TRUE))
 2 | 
 3 | library(xts)
 4 | library(pastecs)
 5 | library(quantmod)
 6 | 
 7 | WD <- setwd("C:/Users/jlappen/desktop/extrema")
 8 | WD
 9 | 
10 | # load R scripts ----------------------------------------------------------
11 | source("load.R")
12 | source("functions.R")
13 | source("inflection_points.R")
14 | 
15 | 


--------------------------------------------------------------------------------
/rfortraders/README.md:
--------------------------------------------------------------------------------
 1 | # Quantitative Trading with R: Understanding Mathematical and Computational Tools from a Quant's Perspective
 2 | 
 3 | This repository contains errata and R code from the book.
 4 | The book can be purchased on Amazon here: http://amzn.to/1DDNIn1
 5 | The code is provided under the MIT [license](LICENSE).
 6 | 
 7 | Please submit any corrections to this github repo. 
 8 | For further questions, email me here: rfortraders@gmail.com
 9 | 
10 | 


--------------------------------------------------------------------------------
/bar_charts/README.md~:
--------------------------------------------------------------------------------
1 | #bar_charts
2 | This is an example of reproducing CQG-like stock charts with some DeMark Indicators. Please note that there is absolutely zero DeMark data in the provided price series (GCQ is Gold, not to be confused with the charting platform CQG), and each indicator value that appears in the image below has been hard-coded by me. This is an exercise in plotting with ggplot2, and is not intended to be interpreted as any investing advice.
3 | 
4 | ![alt text](https://github.com/geoquant/R/blob/master/bar_charts/Screen%20Shot%202015-02-27%20at%207.27.21%20PM.png "A DeMark Chart")
5 | 


--------------------------------------------------------------------------------
/bar_charts/README.md:
--------------------------------------------------------------------------------
1 | #bar_charts
2 | This is an example of reproducing CQG-like stock charts with some DeMark Indicators. Please note that there is absolutely zero DeMark data in the provided price series (GCQ is Gold, not to be confused with the charting platform CQG), and each indicator value that appears in the image below has been hard-coded by me. This is an exercise in plotting with ggplot2, and is not intended to be interpreted as any investing advice.
3 | 
4 | ![alt text](https://github.com/geoquant/R/blob/master/bar_charts/Screen%20Shot%202015-02-27%20at%207.27.21%20PM.png "A DeMark Chart")
5 | 
6 | This is an alternate output that looks like an old-school Bloomberg terminal:
7 | ![alt text](https://github.com/geoquant/R/blob/master/bar_charts/Screen%20Shot%202015-02-27%20at%207.27.11%20PM.png)
8 | 


--------------------------------------------------------------------------------
/extrema/load.R:
--------------------------------------------------------------------------------
 1 | # read files --------------------------------------------------------------
 2 | load.data <- list.files(WD,pattern="*.csv",full.names=T)
 3 | 
 4 | all.data <- lapply(load.data, read.csv)
 5 | all.xts <- lapply(all.data, function(x) 
 6 |   xts(x[,2:ncol(x)],as.Date(as.character(x[,1]))))
 7 | 
 8 | 
 9 | 
10 | # OHLC lists --------------------------------------------------------------
11 | open.prices1 <- lapply(all.xts, function(x) x[,1])
12 | high.prices1 <- lapply(all.xts, function(x) x[,2])
13 | low.prices1 <- lapply(all.xts, function(x) x[,3])
14 | close.prices1 <- lapply(all.xts, function(x) x[,4])
15 | 
16 | 
17 | 
18 | # OHLC data frames --------------------------------------------------------
19 | open.prices2 <- do.call(cbind,open.prices1)
20 | high.prices2 <- do.call(cbind,high.prices1)
21 | low.prices2 <- do.call(cbind,low.prices1)
22 | close.prices2 <- do.call(cbind,close.prices1)


--------------------------------------------------------------------------------
/survivorship/metadata.txt:
--------------------------------------------------------------------------------
 1 | Two Folders to investigate:
 2 | Unaligned
 3 | Unassigned
 4 | 
 5 | 
 6 | The first folder contains 62 Tickers that have unequal dimensions of OHLC data
 7 | That is to day, there were instances where there were more close observations, than say, opens. Or any other combination. There is no general pattern.
 8 | 
 9 | ------------------------------------------------------------------------------------------------
10 | 
11 | 
12 | 3 Tickers yielded NULL data:
13 | W UN Equity
14 | SXCLD UQ Equity
15 | CNXT UQ Equity
16 | 
17 | 
18 | We believe these should be mapped to:
19 | 1051665D US Equity
20 | SXCL US Equity
21 | 1040983D UQ Equity
22 | 
23 | 
24 | 
25 | ------------------------------------------------------------------------------------------------
26 | 
27 | 3 Tickers Have no OHL data available, just the close:
28 | 292356Z US Equity
29 | 601964Q US Equity
30 | SVFI US Equity


--------------------------------------------------------------------------------
/bloomberg/rBBG.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls(all=TRUE))
 2 | Sys.setenv(JAVA_HOME="C:\\Program Files\\Java\\jre7")
 3 | 
 4 | library(Rbbg)
 5 | library(xts)
 6 | 
 7 | conn <- blpConnect(throw.ticker.errors = FALSE, log.level = "finest")    							
 8 | 
 9 | symbols    <- c("SPX Index", "USGG10YR Index", "USDJPY Curncy", "USDEUR Curncy")
10 | fields     <- c("PX_OPEN", "PX_HIGH", "PX_LOW", "PX_LAST") 
11 | start_date <- as.Date("1988-01-01") 
12 | end_date   <- as.Date("2015-05-11") 
13 | 
14 | bbg2xts <- function(symbols, ...) { 
15 |   bbg <- bdh(conn, symbols, ...) 
16 |   bbg$date <- NULL 
17 |   as.xts(bbg, order.by = as.Date(rownames(bbg))) 
18 | } 
19 | 
20 | BBG_list <- lapply(symbols, bbg2xts, fields, start_date, end_date) 
21 | names(BBG_list) <- symbols 
22 | 
23 | BBGdf <- lapply(BBG_list, "[", "PX_LAST")
24 | 
25 | 
26 | # export ------------------------------------------------------------------
27 | #for(i in seq_along(BBG_list)) {
28 | #  filename <- paste(names(BBG_list)[i], ".csv")
29 | #  write.csv(BBG_list[[i]], filename)
30 | #}
31 | 
32 | 


--------------------------------------------------------------------------------
/survivorship/rBBG_splits.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls(all=TRUE))
 2 | 
 3 | library(Rbbg)
 4 | library(xts)
 5 | 
 6 | start_time <- Sys.time()
 7 | 
 8 | # connect to bloomberg ----------------------------------------------------
 9 | conn <- blpConnect(throw.ticker.errors = FALSE,log.level = "finest")      						
10 | 
11 | 
12 | securities <- sort_tickers
13 | fields     <- c("EQY_DVD_HIST_SPLITS") 
14 | start_date <- as.Date("1999-11-01") 
15 | end_date   <- as.Date("2014-12-05") 
16 | 
17 | bbg2df <- function(security, ...) { 
18 |   bbg <- bds(conn, security, ...) 
19 |   as.data.frame(bbg) 
20 | } 
21 | 
22 | BBGList <- lapply(securities, bbg2df, fields) 
23 | names(BBGList) <- securities 
24 | 
25 | end_time <- Sys.time()
26 | end_time - start_time
27 | 
28 | 
29 | # write out ---------------------------------------------------------------
30 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/split_data/raw_data")
31 | files <- paste(securities,".split.csv",sep="")
32 | lapply(seq_along(BBGList), function(i){
33 |   write.zoo(BBGList[[i]], files[i],sep=",")
34 | })
35 | 
36 | 


--------------------------------------------------------------------------------
/backtesting/backtest_function.R:
--------------------------------------------------------------------------------
 1 | ## backtesting function
 2 | backtest_engine <- function(strategy, strategy_name, monthly_rets){
 3 |   
 4 |   # allocation matrix
 5 |   cash <- ifelse(rowSums(strategy) == 0, 1, 1 - rowSums(strategy))
 6 |   alloc_matrix <- cbind(strategy, cash)
 7 |   
 8 |   ## trade matrix
 9 |   prelim_trade <- alloc_matrix
10 |   prelim_trade[is.na(prelim_trade)] <- 0
11 |   trade_matrix <- prelim_trade - lag(prelim_trade, 1)
12 |   
13 |   ## friction matrix
14 |   only_tickers <- trade_matrix[,1:ncol(strategy)]
15 |   fric_matrix  <- cbind(abs(only_tickers) * friction_pct, 0)
16 |   wheninvested_rets <- as.numeric(monthly_rets) * lag(alloc_matrix, 1) - fric_matrix
17 |   
18 |   ## Calculate portfolio returns vector
19 |   port_rets     <- rowSums(wheninvested_rets)
20 |   port_rets_xts <- xts(port_rets, order.by = index(strategy))
21 |   port_returns  <- port_rets_xts[is.na(port_rets_xts) == FALSE]
22 |   colnames(port_returns) <- strategy_name
23 |   
24 |   list_output <- list(allocation_matrix = alloc_matrix,
25 |                       returns = wheninvested_rets,
26 |                       portfolio_returns = port_returns)
27 |   list_output
28 | }
29 | 


--------------------------------------------------------------------------------
/rfortraders/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Folk Creations, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/my_functions/trading_calendar.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # Get a vector of trading days. These are week days, with holidays removed. The trading days
 3 | # are relative to the New York Stock Exchange (NYSE).
 4 | #
 5 | # startDate and endDate are Date objects. The function returns a date ordered vector of Date
 6 | # objects which are the trading days.
 7 | #
 8 | # This function requires the timeDate package
 9 | #
10 | # Credit to Ian Kaplan, I modified syntax styling and renamed the function (tradingCalendar)
11 | # Source: http://www.bearcave.com/finance/random_r_hacks/
12 | trading_calendar <- function( startDate, endDate)
13 | {
14 |   require(timeDate)
15 |   timeSeq <- timeSequence(from = as.character(startDate),
16 |                          to = as.character(endDate), 
17 |                          by="day", format = "%Y-%m-%d",
18 |                          zone = "NewYork", FinCenter = "America/New_York")
19 |   
20 |   ix          <- as.logical(isWeekday(timeSeq, wday = 1:5))
21 |   tradingDays <- timeSeq[ix]
22 |   startYear   <- as.POSIXlt(startDate)$year + 1900
23 |   endYear     <- as.POSIXlt(endDate)$year + 1900
24 |   
25 |   tradingDays.dt <- as.Date(tradingDays)
26 |   hol <- as.Date(holidayNYSE(startYear:endYear))
27 |   ix  <- which(tradingDays.dt %in% hol)
28 |   tradingDays.dt <- tradingDays.dt[-ix]
29 |   return(tradingDays.dt)
30 | } # tradingCalendar
31 | 


--------------------------------------------------------------------------------
/survivorship/README.md:
--------------------------------------------------------------------------------
 1 | The scripts in this directory are utilized, in conjuction with a Bloomberg Terminal, to download the constituents of a stock index for a range of dates. This data collection is a prerequisite for any index-level analysis that aims not to be biased by survivorship. For example, these scripts reveal that between 1999-11-01 and 2014-12-05 that there were a total of 1082 companies that participated in the SP 500. Knowing when these companies' returns attributed to the index would be helpful to accurately understand the performance of the overall index. 
 2 | 
 3 | The output of these scripts is a singular matrix of 1s and 0s, that can then be multiplied by an equivallently sized matrix of price data. The number of rows equals the number of days between the dates, and number of columns equals the total number of companies. The sum of each of these rows would be 500, given that a 1 indicates participation in the index and a 0 represents a company out of the index. An interesting feature of the SP 500 data is that there have been two instances when there was only 499 consistuents and those periods begin in: 1999-11 and 2008-09.
 4 | 
 5 | ###### rBBG_survivors.R
 6 | Downloads the company name and date of participation in an index.
 7 | 
 8 | ###### rBBG_prices.R
 9 | Downloads the actual OHLC price data for each constituent, given the output from rBBG_survivors.R
10 | 
11 | ###### rBBG_splits.R
12 | Downloads the date and ratio amount for a stock split.
13 | 
14 | These are my DPDF data settings on the terminal.
15 | ![alt text] (https://github.com/geoquant/R/blob/master/survivorship/dpdf.PNG "{DPDF}")
16 | 


--------------------------------------------------------------------------------
/my_functions/percent_rank.R:
--------------------------------------------------------------------------------
 1 | # Function to calculate percentrank with a certain interval "gap"
 2 | # Input: "scoreVec" is a vector of scores
 3 | #        "gap" is any speceficied interval 
 4 | # output: vector of percentrank
 5 | percentrank <- function(scoreVec, gap)
 6 | {
 7 |   scoreVec <- as.vector(scoreVec)
 8 |   prank <- NULL
 9 |   
10 |   for(i in 1:length(scoreVec))
11 |   {
12 |     if(gap < length(scoreVec))
13 |     {
14 |       if(i <= (length(scoreVec) - gap))
15 |       {
16 |         testScore <- scoreVec[i:(i + gap - 1)]
17 |         temp_data <- data.frame(testScore, score.rank = rank(testScore, ties.method = "min"))
18 |         score <- unique(temp_data[temp_data$testScore == scoreVec[i], "score.rank"])
19 |         prank[i] <- as.integer((score - 1) / (gap - 1) * 1000) / 1000
20 |         rm(temp_data) ; rm(score)
21 |       }
22 |       if(i > (length(scoreVec) - gap))
23 |       {
24 |         testScore <- scoreVec[i:length(scoreVec)]
25 |         temp_data <- data.frame(testScore, score.rank = rank(testScore, ties.method = "min"))
26 |         score <- unique(temp_data[temp_data$testScore == scoreVec[i], "score.rank"])
27 |         prank[i] <- as.integer((score - 1) / (gap - 1) * 1000) / 1000
28 |         rm(temp_data) ; rm(score)
29 |       }
30 |     }
31 |     if(gap == length(scoreVec))
32 |     {
33 |       temp_data <- data.frame(scoreVec, score.rank = rank(scoreVec, ties.method = "min"))
34 |       score <- unique(temp_data[temp_data$scoreVec == scoreVec[i], "score.rank"])
35 |       prank[i] <- as.integer((score - 1) / (gap - 1) * 1000) / 1000
36 |       rm(temp_data) ; rm(score)
37 |     }
38 |   }
39 |   prank
40 | }


--------------------------------------------------------------------------------
/varadi/varadi_chaos.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls())
 2 | 
 3 | library(quantmod)
 4 | library(PerformanceAnalytics)
 5 | 
 6 | symbol <- "SPY"
 7 | data <- new.env()
 8 | getSymbols(symbol, from = "2005-01-01", env = data,auto.assign = TRUE)
 9 | 
10 | data_high  <- data[[symbol]][, paste(symbol, ".High", sep = "")]
11 | data_low   <- data[[symbol]][, paste(symbol, ".Low", sep = "")]
12 | data_close <- data[[symbol]][, paste(symbol, ".Close", sep = "")]
13 | data_ad  <- data[[symbol]][, paste(symbol, ".Adjusted", sep = "")]
14 | data_ret <- diff(data_ad) / data_ad[-length(data_ad)]
15 | 
16 | data_high   <- data_high * as.numeric(data_ad)/as.numeric(data_close)
17 | data_low    <- data_low * as.numeric(data_ad)/as.numeric(data_close)
18 | data_range  <- data_high - data_low
19 | data_range1 <- rollapply(data_range, 10, sum, fill = FALSE)
20 | data_range2 <- rollapply(data_high, 10, max, fill = FALSE) - rollapply(data_low, 10, min, fill = FALSE)
21 | 
22 | tmp   <- data_range1 / data_range2
23 | tmp   <- SMA(tmp, 60)
24 | tmp_z <- (tmp - SMA(tmp, 252)) / rollapply(tmp, 252, sd)
25 | tmp_p <- pnorm(tmp_z)
26 | colnames(tmp_p) <- c("Choas.Stability.Metric")
27 | 
28 | 
29 | 
30 | # Returns Chart -----------------------------------------------------------
31 | chaos_regime <- ifelse(tmp_p > 0.5, 1, 0)
32 | chaos_return <- chaos_regime * data_ret
33 | 
34 | stable_regime <- ifelse(tmp_p < 0.5, 1, 0)
35 | stable_return <- stable_regime * data_ret
36 | 
37 | all_returns <- cbind(stable_return, chaos_return)
38 | names(all_returns) <- c("Stable", "Chaos")
39 |   
40 | par(mfrow=c(1, 1))
41 | chart.CumReturns(as.data.frame(all_returns), wealth.index = TRUE,
42 |                  legend.loc = "topleft")
43 | 
44 |   
45 | 


--------------------------------------------------------------------------------
/survivorship/rBBG_prices.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls(all=TRUE))
 2 | 
 3 | library(Rbbg)
 4 | library(xts)
 5 | 
 6 | start_time <- Sys.time()
 7 | 
 8 | # connect to bloomberg ----------------------------------------------------
 9 | conn <- blpConnect(throw.ticker.errors = FALSE,log.level = "finest")    							
10 | 
11 | 
12 | # if you ran the rBBG_survivors code, you can use sort_tickers:
13 | securities <- "SPX Index"
14 | fields     <- c("PX_OPEN","PX_HIGH","PX_LOW","PX_LAST") 
15 | start_date <- as.Date("1982-04-21") 
16 | end_date   <- as.Date("2014-12-05") 
17 | 
18 | bbg2xts <- function(security, ...) { 
19 |   bbg <- bdh(conn, security, ...) 
20 |   bbg$date <- NULL 
21 |   as.xts(bbg, order.by = as.Date(rownames(bbg))) 
22 | } 
23 | 
24 | BBGList <- lapply(securities, bbg2xts, fields, start_date, end_date) 
25 | names(BBGList) <- securities 
26 | 
27 | BBGdf <- do.call("cbind",BBGList)
28 | #colnames(BBGdf) <- securities
29 | #weeklyData <- BBGdf[endpoints(BBGdf, "weeks")]
30 | write.zoo(BBGdf,file="SPX_data.csv", sep=",")
31 | 
32 | 
33 | end_time <- Sys.time()
34 | end_time - start_time  # for the SP500, 1999-2014, this takes ~30 mins
35 | 
36 | # determine if the download accurately collected verything
37 | length(securities) 
38 | ncol(BBGdf)/4
39 | 
40 | 
41 | 
42 | # output ------------------------------------------------------------------
43 | # change the working directory and the suffix in the 'files' object, 
44 | # according to the setting established on the terminal:
45 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/dpdf_OFF")
46 | files <- paste(securities,".ON.csv",sep="")
47 | lapply(seq_along(BBGList), function(i){
48 |   write.zoo(BBGList[[i]], files[i],sep=",")
49 | })
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/backtesting/run_corr_function.R:
--------------------------------------------------------------------------------
 1 | ## Average Correlation function
 2 | runningCor <- function(idseq,colIndicator,data,n)   
 3 | {
 4 |   x <- data[,colIndicator$Var1[idseq]]
 5 |   y <- data[,colIndicator$Var2[idseq]]
 6 |   runcor_est <- runCor(x,y,n)
 7 |   return(runcor_est)
 8 | }
 9 | 
10 | corIndicator <- function(universe,data,n)
11 | {
12 |   uniComb <- 1:length(universe)
13 |   uniComb <- expand.grid(uniComb,uniComb)
14 |   uniComb <- uniComb[uniComb$Var1!=uniComb$Var2,]
15 |   uniComb <- uniComb[order(uniComb$Var1),]
16 |   
17 |   idseq<-1:nrow(uniComb)
18 |   corDat <- alply(idseq,1,runningCor,colIndicator=uniComb,data=data,n=n)
19 |   
20 |   correlationIndicator <- NULL
21 |   for(i in 1:length(universe))
22 |   {
23 |     datseq <- (1+(length(universe)-1)*(i-1)):(i*(length(universe)-1))
24 |     tempCor <- eval(parse(text=paste("cbind(",
25 |                                      paste(paste("corDat[[",
26 |                                                  paste(datseq),
27 |                                                  paste("]]"),sep=""),
28 |                                            collapse=","),paste(")"),
29 |                                      sep=""))) 
30 |     colnames(tempCor) <- paste("cor",1:ncol(tempCor),sep="")
31 |     eval(parse(text=paste("avgCor",paste(i),
32 |                           paste("<- xts(rowMeans(tempCor),
33 |                                 index(tempCor))"),sep="")))
34 |     correlationIndicator <- cbind(correlationIndicator,
35 |                                   eval(parse(text=paste("avgCor",
36 |                                                         paste(i),sep=""))))
37 |     rm(tempCor) 
38 |   }
39 |   colnames(correlationIndicator) <- universe
40 |   return(correlationIndicator)
41 | }


--------------------------------------------------------------------------------
/my_functions/run_corr_function.R:
--------------------------------------------------------------------------------
 1 | ## Average Correlation function
 2 | runningCor <- function(idseq,colIndicator,data,n)   
 3 | {
 4 |   x <- data[,colIndicator$Var1[idseq]]
 5 |   y <- data[,colIndicator$Var2[idseq]]
 6 |   runcor_est <- runCor(x,y,n)
 7 |   return(runcor_est)
 8 | }
 9 | 
10 | corIndicator <- function(universe,data,n)
11 | {
12 |   uniComb <- 1:length(universe)
13 |   uniComb <- expand.grid(uniComb,uniComb)
14 |   uniComb <- uniComb[uniComb$Var1!=uniComb$Var2,]
15 |   uniComb <- uniComb[order(uniComb$Var1),]
16 |   
17 |   idseq<-1:nrow(uniComb)
18 |   corDat <- alply(idseq,1,runningCor,colIndicator=uniComb,data=data,n=n)
19 |   
20 |   correlationIndicator <- NULL
21 |   for(i in 1:length(universe))
22 |   {
23 |     datseq <- (1+(length(universe)-1)*(i-1)):(i*(length(universe)-1))
24 |     tempCor <- eval(parse(text=paste("cbind(",
25 |                                      paste(paste("corDat[[",
26 |                                                  paste(datseq),
27 |                                                  paste("]]"),sep=""),
28 |                                            collapse=","),paste(")"),
29 |                                      sep=""))) 
30 |     colnames(tempCor) <- paste("cor",1:ncol(tempCor),sep="")
31 |     eval(parse(text=paste("avgCor",paste(i),
32 |                           paste("<- xts(rowMeans(tempCor),
33 |                                 index(tempCor))"),sep="")))
34 |     correlationIndicator <- cbind(correlationIndicator,
35 |                                   eval(parse(text=paste("avgCor",
36 |                                                         paste(i),sep=""))))
37 |     rm(tempCor) 
38 |   }
39 |   colnames(correlationIndicator) <- universe
40 |   return(correlationIndicator)
41 | }


--------------------------------------------------------------------------------
/extrema/inflection_points.R:
--------------------------------------------------------------------------------
 1 | # sample plots ------------------------------------------------------------
 2 | par(mfrow=c(1,2))
 3 | 
 4 | # method 1
 5 | maxima.test(x=1:nrow(close.prices2["2014-06/",11]),
 6 |             y=coredata(close.prices2["2014-06/",11]),
 7 |             w=round(nrow(close.prices2)*.001),
 8 |             span=.1)
 9 | 
10 | # method 2
11 | sig <- .65
12 | x <- index(close.prices2["2014-06/"])
13 | y <- as.vector(close.prices2["2014-06/",11])
14 | plot(y, type="o", cex = 1/4,col="gray")
15 | p.1 <- Peak(turnpoints(y), level = sig)
16 | p.2 <- Valley(turnpoints(y), level = sig )
17 | points(seq(y)[p.1], y[p.1], col = "#d7191c", cex = 1.2,pch=19)
18 | points(seq(y)[p.2], y[p.2], col = "#1a9641", cex = 1.2,pch=19)
19 | mtext("extrema al level = 1%", col="black")
20 | par(mfrow=c(1,1))
21 | 
22 | 
23 | # Iterate through Study Parameters ----------------------------------------
24 | #all.params <- seq(.001,1,by=.001)
25 | 
26 | all.params <- .2
27 | all.turnpoints <- lapply(close.prices1,as.vector)
28 | all.turnpoints <- lapply(all.turnpoints,turnpoints)
29 | 
30 | 
31 | # Collect Peaks and Pits --------------------------------------------------
32 | all.blank <- list()
33 | for(i in 1:length(all.params)){
34 |   all.blank[[i]] <- lapply(all.turnpoints,Extrema,level=all.params[i])
35 | }
36 | 
37 | all.map <- do.call(Map,c(c,all.blank))
38 | all.map <- lapply(all.map,sort)
39 | names(all.map) <- colnames(close.prices2)
40 | #all.stop <- lapply(all.map,unique)
41 | all.stop <- all.map
42 | 
43 | # Identify a start date for a range of prices before the turnpoint
44 | all.start <- lapply(all.stop, function(x) x-25)
45 | all.start <- rapply(all.start, f= function(x) ifelse(x <= 0,1,x),how="replace")
46 | 
47 | ## Vectorized version of the previous function 
48 | all.extrema <- Map(min.max.subset, all.xts, all.start, all.stop)
49 | 
50 | 
51 | # Inflection Statistics ---------------------------------------------------
52 | number.of.inflections <- lapply(all.extrema,length)
53 | # frequency table
54 | # all.frequency <- lapply(all.map,table)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/simulation/GBM_20140120.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls(all = TRUE))
 2 | 
 3 | # Functional Method -------------------------------------------------------
 4 | gbm_function <- function(x, omega, mu, sigma, n) {
 5 |   # Geometric Brownian Motion:
 6 |   # x     = initial value of the process at t0
 7 |   # alpha = start time
 8 |   # omega = final time
 9 |   # mu    = rate of return (drift)
10 |   # sigma = volatility
11 |   # n     = number of intervals in which to split [t0, t1]
12 |   dt  <- omega/n
13 |   t   <- seq(0, omega, length = n + 1)
14 |   bm  <- ts(cumsum(c(0, rnorm(n) * sqrt(dt))), start = 0, deltat = dt)
15 |   gbm <- x * exp((mu - sigma ^ 2 / 2) * time(bm) + sigma * as.numeric(bm))
16 |   out <- ts(gbm, start = 0, deltat = deltat(bm))
17 | }
18 | 
19 | model1 <- gbm_function(x     = 5, 
20 |                        omega = 1,
21 |                        mu    = 0.09, 
22 |                        sigma = 0.3, 
23 |                        n     = 5000)
24 | 
25 | 
26 | # Iterative Method --------------------------------------------------------
27 | gbm_f <- function(t0, t1, mu, sigma, n){
28 |   # Geometric Brownian Motion:
29 |   # t0    = initial value of the process at t0
30 |   # t1    = final time
31 |   # mu    = expected annual rate of return (drift)
32 |   # sigma = expected annual variation
33 |   # n     = number of intervals in which to split [t0, t1]
34 |   dt  <- t1/n
35 |   out <- NULL
36 |   for (i in 1:n) {
37 |     out[1] <- t0
38 |     out[i + 1] <- out[i] * exp((mu - 0.5 * sigma ^ 2) * dt + 
39 |                                  sigma * sqrt(dt) * rnorm(n = 1))
40 |   }
41 |   out
42 | }
43 | 
44 | model_output <- gbm_f(t0 = 1, 
45 |                       t1 = 5, 
46 |                       mu = 0.0927, 
47 |                       sigma = 0.30, 
48 |                       n = 10000)
49 | 
50 | 
51 | # Plot each method --------------------------------------------------------
52 | par(mfrow=c(1, 2))
53 | plot(x    = seq(1, length(model1), by = 1),
54 |      y    = model1, 
55 |      type = "l",
56 |      lwd  = 1.5,
57 |      col  = "#31a354",
58 |      ylab = "$",
59 |      xlab = "# of Observations")
60 | 
61 | plot(model_output, type = "l")
62 | 


--------------------------------------------------------------------------------
/indicators/indicators.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls(all=TRUE))
 2 | library(xts)
 3 | library(quantmod)
 4 | 
 5 | prices <- rev(c(128.46, 130.415, 128.79, 132.17, 133.00, 129.495, 128.45,
 6 |             128.715, 127.83, 127.08, 126.46, 124.88, 122.02, 
 7 |             119.72, 118.93, 119.94, 119.56, 118.65, 118.63, 
 8 |             117.16))
 9 | 
10 | # Running Sum -------------------------------------------------------------
11 | running_sum <- function (x, n = 10, cumulative = FALSE) 
12 | {
13 |   x <- try.xts(x, error = as.matrix)
14 |   if (n < 1 || n > NROW(x)) 
15 |     stop("Invalid 'n'")
16 |   NAs <- sum(is.na(x))
17 |   if (NAs > 0) {
18 |     if (any(is.na(x[-(1:NAs)]))) 
19 |       stop("Series contains non-leading NAs")
20 |   }
21 |   beg <- 1 + NAs
22 |   len <- NROW(x) - NAs
23 |   result <- double(NROW(x))
24 |   if (cumulative) {
25 |     result[beg:NROW(x)] <- cumsum(x[beg:NROW(x)])
26 |   }
27 |   else {
28 |     result[(n + beg - 1)] <- sum(x[beg:(n + beg - 1)])
29 |     result <- .Fortran("runsum", ia = as.double(x[beg:NROW(x)]), 
30 |                        lia = as.integer(len), n = as.integer(n), oa = as.double(result[beg:NROW(x)]), 
31 |                        loa = as.integer(len), PACKAGE = "TTR", DUP = FALSE)$oa
32 |     result <- c(rep(NA, NAs), result)
33 |   }
34 |   is.na(result) <- c(1:(n - 1 + NAs))
35 |   reclass(result, x)
36 | }
37 | 
38 | # Running Mean ------------------------------------------------------------
39 | running_mean <- function (x, n = 10, cumulative = FALSE) 
40 | {
41 |   if (cumulative) {
42 |     result <- runSum(x, n, cumulative)/1:NROW(x)
43 |   }
44 |   else {
45 |     result <- runSum(x, n)/n
46 |   }
47 |   return(result)
48 | }
49 | 
50 | # Smoothing Average -------------------------------------------------------
51 | newSMA <- function (x, n = 10, ...) 
52 | {
53 |   ma <- running_mean(x, n)
54 |   if (!is.null(dim(ma))) {
55 |     colnames(ma) <- paste(colnames(x), "SMA", n, sep = ".")
56 |   }
57 |   return(ma)
58 | }
59 | 
60 | 
61 | window <- 6
62 | 
63 | sma_data <- cbind(SMA(prices, window))
64 | sma_data
65 | 
66 | # triangular moving average
67 | new_window <- ceiling((window + 1)/2)
68 | cbind(SMA(SMA(prices, new_window), new_window))
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/rfortraders/Errata.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | * Page 6
 3 | In figure 1.2, the price ladder should have an 8 
 4 | as opposed to a 10 for the best bid price.
 5 | 
 6 | * Page 8
 7 | "Financial data is the the lifeblood..."
 8 | Two the's in a row.
 9 | 
10 | * Page 30
11 | "...is the the concept of nonmutability of state[85]."
12 | Two the's in a row.
13 | 
14 | * Page 33
15 | The comment for the code at the top of the page 
16 | should read: # "Create 100 standard normals ..." and not
17 | # "Greate 100 standard normals ..."
18 | 
19 | * Page 37
20 | Towards the end of the first paragraph, 
21 | topper() should be toupper()
22 | 
23 | * Page 37
24 | Change filter_symbols(c("MOT", "cvx", "123", "Gog2", "XLe"))
25 | to: filter_and_sort_symbols(c("MOT", "cvx", "123", "Gog2", "XLe"))
26 | 
27 | * Page 63
28 | "...representation of the the time differences..."
29 | Two the's in a row.
30 | 
31 | * Page 87
32 | "The graphs of a few popular continuous distributions are illustrated below:", should read:
33 | "The graphs of a few popular continuous distributions are illustrated in Fig 4.13"
34 | 
35 | * Page 88
36 | "Here is what the histogram of the outcomes looks like."
37 | should read:
38 | "Figure 4.14 illustrates the histogram of the outcomes."
39 | 
40 | * Page 89
41 | "In the graph below, the mean shifts from 0.5 to 0.6."
42 | should read:
43 | "A shift in the mean from 0.5 to 0.6 can be seen in Figure 4.15."
44 | 
45 | * Page 104
46 | "The scatter plot between the SPY and VXX time series looks like this:"
47 | should read:
48 | "The SPY and VXX time series scatter plot can be seen in Figure 5.11."
49 | 
50 | * Page 124
51 | The function calculate_buy_sell_signals is defined but not used in subsequent
52 | examples. (TO EDITOR: No change required.)
53 | 
54 | * Page 130 and 131
55 | The same code "plot data_out$spread" is used twice.  
56 | (TO EDITOR: No change required.)
57 | 
58 | * Page 144
59 | The code title: " # 4. Histogram of trade duration", should be changed to 
60 | " Histogram of trade duration"
61 | 
62 | * Page 221
63 | The code snippet at the bottom of the page has:
64 | "[1] 11".  It should be "## [1] 11". 
65 | 
66 | * Page 234
67 | Insert ## before the number 0.9978 close to the end of the page.
68 | Should read "## 0.9978" instead of "0.9978"
69 | 
70 | * Page 235
71 | Same as above.  Insert ## before the outputs. 
72 | "## [1] 0.597" and "## [1] 0.515"
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/package_mgmt/install_packages.R:
--------------------------------------------------------------------------------
 1 | install.packages(c("AER",
 2 |                    "boot",
 3 |                    "car",
 4 |                    "caTools",
 5 |                    "copula",
 6 |                    "corpcor",
 7 |                    "corrplot",
 8 |                    "devtools",
 9 |                    "dtw",
10 |                    "Ecdat",
11 |                    "ellipse",
12 |                    "evir",
13 |                    "faraway",
14 |                    "fCopulae",
15 |                    "fEcofin",
16 |                    "fGarch",
17 |                    "FinTS",
18 |                    "FitAR",
19 |                    "foreach",
20 |                    "forecast",
21 |                    "ggplot2",
22 |                    "htmltools",
23 |                    "knitr",
24 |                    "lattice",
25 |                    "leaps",
26 |                    "lmtest",
27 |                    "markdown",
28 |                    "MASS",
29 |                    "mixAK",
30 |                    "mnormt",
31 |                    "nor1mix",
32 |                    "numDeriv",
33 |                    "pastecs",
34 |                    "pcaPP",
35 |                    "PerformanceAnalytics",
36 |                    "plyr",
37 |                    "quantmod",
38 |                    "reshape",
39 |                    "reshape2",
40 |                    "rJava",
41 |                    "robust",
42 |                    "rugarch",
43 |                    "tseries",
44 |                    "urca",
45 |                    "vars",
46 |                    "xtable",
47 |                    "xts",
48 |                    "yaml",
49 |                    "zoo"))
50 | 
51 | # R connection to Bloomberg
52 | install.packages("Rbbg", repos="http://r.findata.org/", dependencies = TRUE)
53 | 
54 | # Install an old version of the sn package for CFRM 542 homework (Winter 2015)
55 | install.packages("http://cran.r-project.org/src/contrib/Archive/sn/sn_1.0-0.tar.gz", 
56 |                  repos = NULL, type = "source")
57 | 
58 | 
59 | # Install Packages for CFRM 543
60 | install.packages(c("ROI","ROI.plugin.glpk","ROI.plugin.quadprog"))
61 | install.packages("mpo", repos = "http://r-forge.r-project.org")
62 | install.packages("factorAnalytics", repos = "http://r-forge.r-project.org")
63 | install.packages("PerformanceAnalytics", repos = "http://r-forge.r-project.org")
64 | 
65 | # Source:
66 | # https://rmkrug.wordpress.com/2014/11/10/update-all-user-installed-r-packages-again/
67 | #install.packages( 
68 | #  lib  = lib <- .libPaths()[1],
69 | #  pkgs = as.data.frame(installed.packages(lib), stringsAsFactors=FALSE)$Package,
70 | #  type = 'source'
71 | #)


--------------------------------------------------------------------------------
/fb/bday2016.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls(all = TRUE))
 2 | 
 3 | library(xts)
 4 | library(dplyr)
 5 | library(scales)
 6 | library(ggplot2)
 7 | library(gridExtra)
 8 | 
 9 | setwd("~/dev/fbook")
10 | 
11 | # load data ---------------------------------------------------------------
12 | # 'data.frame':	326 obs. of  2 variables:
13 | # $ year  : int 
14 | # $ person: chr  
15 | data <- read.csv("hbd_2016.csv", header = TRUE, as.is = TRUE, sep = ",")
16 | 
17 | 
18 | # individual frequency ----------------------------------------------------
19 | indiv_freq  <- count_(unique(data), vars = "person", sort = TRUE)
20 | 
21 | percent1 <- quantile(indiv_freq$n, 0.99) # 2 posts
22 | percent5 <- quantile(indiv_freq$n, 0.95) # 3 posts
23 | 
24 | who1 <- indiv_freq[indiv_freq$n >= percent1, ] #  4 people
25 | who5 <- indiv_freq[indiv_freq$n >= percent5, ] # 23 people
26 | 
27 | # density plot
28 | plot1 <- ggplot(indiv_freq, aes(n)) + 
29 |   geom_density(fill = "black") +
30 |   ggtitle("Kernel Density of FB HBD Posts 2008-2016") +
31 |   xlab("Cumulative Sum of Posts Per Person") +
32 |   ylab("Density")  +
33 |   geom_vline(xintercept = c(percent1, percent5), 
34 |              colour = c("#7fcdbb", "#2c7fb8"), size = 1.05) +
35 |   annotate("text", x = c(3, 5) + 0.25 , y = c(1, 1) + 0.05, 
36 |            label = c("95%", "99%"), 
37 |            colour = c("#2c7fb8", "#7fcdbb"))
38 | 
39 | # bar plot
40 | plot2 <- ggplot(data = who5, aes(x = reorder(person, n), y = n)) +
41 |   ggtitle("95th Percentile of HBD Posters 2008-2016") +
42 |   ylab("Cumulative Sum of Posts Per Person") +
43 |   xlab(NULL) +
44 |   geom_bar(stat = 'identity') + 
45 |   coord_flip() +
46 |   annotate("text", x = nrow(who5) - nrow(who1) + 0.35, y = 6 + 0.25, 
47 |            label = "99%", colour = "#7fcdbb") +
48 |   geom_vline(xintercept = nrow(who5) - nrow(who1) + 1, 
49 |              colour = "#7fcdbb", size = 1.05)
50 | 
51 | # plot density and barplot side-by-side
52 | grid.arrange(plot1, plot2, ncol = 2)
53 | 
54 | 
55 | # streaks -----------------------------------------------------------------
56 | split_names <- split(data, data$person)
57 | split_dates <- lapply(split_names, `[`, , 1)
58 | split_diff  <- lapply(split_dates, function(x) diff(rev(x)))
59 | split_con   <- lapply(split_diff, function(x) x[!x > 1])
60 | split_sum   <- lapply(split_con, sum)
61 | 
62 | consecutive_sums <- sort(unlist(split_sum))
63 | 
64 | # print to console consecutive count of 99% percentile
65 | cbind(consecutive_sums[who1$person])
66 | 
67 | 
68 | # annual frequency --------------------------------------------------------
69 | annual_count <- 
70 |   data %>%
71 |   group_by(year) %>%
72 |   summarise(count = n_distinct(person)) %>%
73 |   mutate(count = replace(count, count == 1, 0)) %>%
74 |   arrange(year) 
75 | 
76 | # line plot
77 | ggplot(data = annual_count, aes(x = year, y = count)) +
78 |   ggtitle("Total HBD Posts Per Year") +
79 |   ylab(NULL) +
80 |   xlab(NULL) +
81 |   geom_line()
82 | 


--------------------------------------------------------------------------------
/extrema/functions.R:
--------------------------------------------------------------------------------
  1 | # Method 1 ----------------------------------------------------------------
  2 | # http://stats.stackexchange.com/questions/36309/how-do-i-find-peaks-in-a-dataset
  3 | 
  4 | #maxima
  5 | argmax <- function(x=1:length(x),y=coredata(x), w=1, ...) {
  6 |   n <- length(y)
  7 |   y.smooth <- loess(y ~ x, ...)$fitted
  8 |   y.max <- rollapply(zoo(y.smooth), 2*w+1, max, align="center")
  9 |   delta <- y.max - y.smooth[-c(1:w, n+1-1:w)]
 10 |   i.max <- which(delta <= 0) + w
 11 |   list(x=x[i.max], i=i.max, y.hat=y.smooth)
 12 | }  
 13 | 
 14 | 
 15 | # minema
 16 | argmin <- function(x=1:length(x),y=coredata(x), w=1, ...) {
 17 |   n <- length(y)
 18 |   y.smooth <- loess(y ~ x, ...)$fitted
 19 |   y.min <- rollapply(zoo(y.smooth), 2*w+1, min, align="center")
 20 |   delta <- y.min - y.smooth[-c(1:w, n+1-1:w)]
 21 |   i.min <- which(delta >= 0) + w
 22 |   list(x=x[i.min], i=i.min, y.hat=y.smooth)
 23 | }
 24 | 
 25 | 
 26 | maxima.test <- function(x,y,w, span) {
 27 |   
 28 |   # plot peaks
 29 |   peaks <- argmax(x, y, w=w, span=span)
 30 |   plot(x, y, cex=1.25, type="l", col="Gray", main=paste("w = ", w, ", span = ", span, sep=""))
 31 |   lines(x, peaks$y.hat, cex=.75) #$
 32 |   y.max <- max(y)
 33 |   sapply(peaks$i, function(i) lines(c(x[i],x[i]), c(y.max, peaks$y.hat[i]),
 34 |                                     col="#d7191c", lty=2))
 35 |   points(x[peaks$i], peaks$y.hat[peaks$i], col="#d7191c", pch=19, cex=1.25)
 36 |   
 37 |   # plot valleys
 38 |   valley <- argmin(x, y, w=w, span=span)
 39 |   y.min <- min(y)
 40 |   sapply(valley$i, function(i) lines(c(x[i],x[i]), c(y.min, valley$y.hat[i]),
 41 |                                      col="#1a9641", lty=2))
 42 |   points(x[valley$i], valley$y.hat[valley$i], col="#1a9641", pch=19, cex=1.25)
 43 | }
 44 | 
 45 | 
 46 | 
 47 | 
 48 | # Method 2 ----------------------------------------------------------------
 49 | # https://stat.ethz.ch/pipermail/r-help/2005-November/083423.html
 50 | 
 51 | Peak <- function(x, level = 0.05) {
 52 |   if (!inherits(x, "turnpoints"))
 53 |     stop("x must be a 'turnpoints' object!")
 54 |   # Extract position and probability
 55 |   tp.pos <- x$tppos
 56 |   tp.proba <- x$proba
 57 |   # We have both peaks and pits. Keep only peaks
 58 |   keep <- 1:(x$nturns / 2) * 2
 59 |   if (x$firstispeak) keep <- keep - 1
 60 |   tp.pos <- tp.pos[keep]
 61 |   tp.proba <- tp.proba[keep]
 62 |   # Keep only peaks whose probability is lower than level
 63 |   return(tp.pos[tp.proba < level])
 64 | }
 65 | 
 66 | Valley <- function(x, level = 0.05) {
 67 |   if (!inherits(x, "turnpoints"))
 68 |     stop("x must be a 'turnpoints' object!")
 69 |   # Extract position and probability
 70 |   tp.pos <- x$tppos
 71 |   tp.proba <- x$proba
 72 |   # We have both peaks and pits. Keep only peaks
 73 |   keep <- 1:(x$nturns / 2) * 2
 74 |   if (x$firstispeak==FALSE) keep <- keep - 1
 75 |   tp.pos <- tp.pos[keep]
 76 |   tp.proba <- tp.proba[keep]
 77 |   # Keep only peaks whose probability is lower than level
 78 |   return(tp.pos[tp.proba < level])
 79 | }
 80 | 
 81 | 
 82 | 
 83 | Extrema <- function(x, level = 0.05) {
 84 |   if (!inherits(x, "turnpoints"))
 85 |     stop("x must be a 'turnpoints' object!")
 86 |   # Extract position and probability
 87 |   tp.pos <- x$tppos
 88 |   tp.proba <- x$proba
 89 |   # Keep only peaks whose probability is lower than level
 90 |   return(tp.pos[tp.proba < level])
 91 | }
 92 | 
 93 | 
 94 | 
 95 | 
 96 | # General Use -------------------------------------------------------------
 97 | 
 98 | ## function subset an xts object giving a min/max lists
 99 | min.max.subset <- function(l,min.list,max.list) 
100 |   Map(function(x,y)l[seq(x,y)],min.list,max.list)
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_04/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 4
  2 | # Basic Statistics and Probability
  3 | 
  4 | ############################
  5 | # Population versus sample #
  6 | ############################
  7 | # Set the seed
  8 | set.seed(100)
  9 | X <- rnorm(1000000, mean = 2.33, sd = 0.5)
 10 | mu <- mean(X)
 11 | sd <- sd(X)
 12 | hist(X, breaks = 100)
 13 | abline(v = mu, lwd = 3, lty = 2)
 14 | 
 15 | set.seed(12)
 16 | rnorm(5)
 17 | ## [1] -1.4805676  1.5771695 -0.9567445 -0.9200052 -1.9976421
 18 | 
 19 | rnorm(5)
 20 | ## [1] -0.2722960 -0.3153487 -0.6282552 -0.1064639  0.4280148
 21 | 
 22 | sample5  <- sample(X, 5, replace = TRUE)
 23 | sample10 <- sample(X, 10, replace = TRUE)
 24 | sample50 <- sample(X, 50, replace = TRUE)
 25 | 
 26 | sample5
 27 | ## [1] 2.497921 2.635927 2.291848 2.127974 2.268268
 28 | 
 29 | sample10
 30 | ## [1] 2.064451 2.274464 2.468938 1.800007 2.557669
 31 | ## [6] 2.535241 1.331020 1.159151 1.661762 2.285889
 32 | 
 33 | sample50
 34 | ## [1] 2.581844 2.138331 3.003670 1.864148 2.049141
 35 | ## [6] 2.808971 1.400057 2.527640 3.639216 3.311873
 36 | 
 37 | mean(sample5)
 38 | ## [1] 2.364388
 39 | 
 40 | mean(sample10)
 41 | ## 2.013859
 42 | 
 43 | mean(sample50)
 44 | ## 2.447003
 45 | 
 46 | mean(sample(X, 1000, replace = TRUE))
 47 | ## 2.323124
 48 | 
 49 | mean(sample(X, 10000, replace = TRUE))
 50 | ## [1] 2.334109
 51 | 
 52 | ##############################
 53 | # Central Limit Theorem in R #
 54 | ##############################
 55 | mean_list <- list()
 56 | for(i in 1:10000) {
 57 |   mean_list[[i]] <- mean(sample(X, 10, replace = TRUE))
 58 | }
 59 | 
 60 | hist(unlist(mean_list), breaks = 500,
 61 |   xlab = "Mean of 10 samples from X",
 62 |   main = "Convergence of sample distribution",
 63 |   cex.main = 0.8)
 64 | abline(v = mu, lwd = 3, col = "white", lty = 2)
 65 | 
 66 | population <- sample(c(0, 1), 100000, replace = TRUE)
 67 | hist(population, main = "Non-normal", cex.main = 0.8)
 68 | abline(v = mean(population), lwd = 3, lty = 3)
 69 | 
 70 | mean_list <- list()
 71 | for(i in 1:10000) {
 72 |   mean_list[[i]] <- mean(sample(population, 10, replace = TRUE))
 73 | }
 74 | 
 75 | hist(unlist(mean_list), main = "Distribution of averages",
 76 |   cex.main = 0.8,
 77 |   xlab = "Average of 10 samples")
 78 | abline(v = 0.5, lwd = 3)
 79 | 
 80 | ###############################
 81 | # Unbiasedness and efficiency #
 82 | ###############################
 83 | # Formula for population variance
 84 | population_variance <- function(x) {
 85 |   mean <- sum(x) / length(x)
 86 |   return(sum((x - mean) ^ 2) / length(x))
 87 | }
 88 | 
 89 | # Create a population
 90 | population <- as.numeric(1:100000)
 91 | variance <- population_variance(population)
 92 | 
 93 | variance
 94 | ## [1] 833333333
 95 | 
 96 | output <- list()
 97 | for(i in 1:1000) {
 98 |   output[[i]] <- population_variance(sample(population,
 99 |   10, replace = TRUE))
100 | }
101 | 
102 | variance_estimates <- unlist(output)
103 | hist(variance_estimates, breaks = 100, cex.main = 0.9)
104 | average_variance <- mean(variance_estimates)
105 | abline(v = average_variance, , lty = 2, lwd = 2)
106 | abline(v = variance, lwd = 2)
107 | 
108 | average_variance
109 | ## [1] 738123625
110 | 
111 | # Formula for unbiased variance estimator
112 | sample_variance <- function(x) {
113 |   mean <- sum(x) / length(x)
114 |   return(sum((x - mean) ^ 2) / (length(x) - 1))
115 | }
116 | 
117 | output <- list()
118 | 
119 | for( i in 1:1000 ) {
120 |   output[[i]] <- sample_variance(sample(population,
121 |   10, replace = TRUE))
122 | }
123 | 
124 | sample_variance_estimates <- unlist(output)
125 | average_sample_variance <- mean(sample_variance_estimates)
126 | 
127 | average_sample_variance
128 | ## [1] 836184961
129 | 
130 | #############################
131 | # Probability distributions #
132 | #############################
133 | plot(c(-1, 1), c(0.5, 0.5), type = "h", lwd = 3,
134 |   xlim = c(-2, 2), main = "Probability mass function of coin toss",
135 |   ylab = "Probability",
136 |   xlab = "Random Variable",
137 |   cex.main = 0.9)
138 | 
139 | ########################
140 | # Simulations of coins #
141 | ########################
142 | outcomes <- sample(c(0, 1), 1000, replace = TRUE)
143 | 
144 | set.seed(101)
145 | biased_outcomes <- sample(c(0, 1), 1000,
146 |   replace = TRUE, prob = c(0.4, 0.6))
147 | 
148 | prob_estimate <- sum(biased_outcomes) /
149 |   length(biased_outcomes)
150 | 
151 | prob_estimate
152 | ## [1] 0.603
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 


--------------------------------------------------------------------------------
/survivorship/rBBG_survivors.R:
--------------------------------------------------------------------------------
  1 | rm(list=ls(all=TRUE))
  2 | 
  3 | library(Rbbg)
  4 | library(xts)
  5 | library(plyr)
  6 | library(reshape2)
  7 | 
  8 | # connect to bloomberg ----------------------------------------------------
  9 | start_time <- Sys.time()
 10 | conn <- blpConnect(throw.ticker.errors = FALSE,log.level = "finest")      						
 11 | 
 12 | securities  <- c("SPX Index")
 13 | fields      <- c("INDX_MWEIGHT_HIST") 
 14 | date_seq    <- as.POSIXlt(seq(as.Date("1999-11-01"),as.Date("2014-12-05"),1))
 15 | date_final  <- format(date_seq[! date_seq$wday %in% c(0,6)],"%Y%m%d")
 16 | 
 17 | BBGList <- list()
 18 | for(i in 1:length(date_final)){
 19 |   BBGList[[i]] <- bds(conn, securities, fields, 
 20 |                       override_fields = c("END DT"), 
 21 |                       override_values = c(date_final[i])) 
 22 | }
 23 | 
 24 | # BBGdf       <- do.call("cbind",BBGList)
 25 | # BBGdf_final <- BBGdf[, seq(1, ncol(BBGdf)-1, 2)]
 26 | # colnames(BBGdf_final) <- date_final
 27 |   
 28 |   
 29 | end_time <- Sys.time()  
 30 | end_time - start_time  # for the SP500, 1999-2014, this takes ~2 hours
 31 | 
 32 | # output ------------------------------------------------------------------
 33 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/survivors/raw_data")
 34 | files <- paste(date_final,".csv",sep="")
 35 | lapply(seq_along(BBGList), function(i){
 36 |   write.zoo(BBGList[[i]], files[i],sep=",")
 37 | })
 38 | 
 39 | 
 40 | # read & reformat list data to data frame ---------------------------------
 41 | WD <- setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/survivors/raw_data")
 42 | WD
 43 | 
 44 | load_data <- list.files(WD,pattern="*.csv",full.names=T)
 45 | read_data <- lapply(load_data, read.csv, stringsAsFactors=FALSE)
 46 | 
 47 | 
 48 | max_length    <- max(sapply(read_data,nrow))
 49 | constituents  <- lapply(read_data, function(x) x[, 2])
 50 | equal_length  <- lapply(constituents, `[`, 1:max_length)
 51 | BBGdf         <- do.call("cbind", equal_length)
 52 | BBGdf         <- data.frame(BBGdf)
 53 | colnames(BBGdf) <- date_final
 54 | 
 55 | 
 56 | # suffix normalization
 57 | sub1 <- apply(BBGdf, 2, gsub, pattern = " UN", replacement = " ZZZZZZ" )
 58 | sub2 <- apply(sub1, 2, gsub, pattern = " UA", replacement = " ZZZZZZ" )
 59 | sub3 <- apply(sub2, 2, gsub, pattern = " UW", replacement = " ZZZZZZ" )
 60 | sub4 <- apply(sub3, 2, gsub, pattern = " UQ", replacement = " ZZZZZZ" )
 61 | sub5 <- apply(sub4, 2, gsub, pattern = " UP", replacement = " ZZZZZZ" )
 62 | sub6 <- apply(sub5, 2, gsub, pattern = " US", replacement = " ZZZZZZ" )
 63 | sub7 <- apply(sub6, 2, gsub, pattern = " ZZZZZZ Equity Equity", replacement = " ZZZZZZ")
 64 | sub8 <- apply(sub7, 2, gsub, pattern = " ZZZZZZ Equity", replacement = " ZZZZZZ")
 65 | sub9 <- apply(sub8, 2, gsub, pattern = " ZZZZZZ", replacement = " US Equity")
 66 | 
 67 | # ticker replacement
 68 | sub10 <- apply(sub9, 2, gsub, pattern = "ZTS-W US Equity", replacement = "ZTS US Equity")
 69 | final_output <- as.data.frame(sub10)
 70 | # write.csv(final_output,file="daily_survivors.csv")
 71 | 
 72 | 
 73 | 
 74 | # unique list of tickers to implement in rBBG_2000.R and rBBG_splits.R
 75 | sort_tickers <- unique(sort(na.omit(as.vector(as.matrix(final_output)))))
 76 | sort_tickers
 77 | 
 78 | 
 79 | 
 80 | 
 81 | # format survivors into an xts object -------------------------------------
 82 | setwd("C:/Users/jlappen/Dropbox/DeMark/database/SP500/survivors")
 83 | ticker_read <- read.csv("daily_survivors.csv", as.is = TRUE, header=FALSE)
 84 | ticker_read <- ticker_read[,2:ncol(ticker_read)]
 85 | 
 86 | # associate each ticker with a date
 87 | blank_list <- list()
 88 | for(i in 1:ncol(ticker_read)){
 89 |   blank_list[[i]] <- cbind(ticker_read[1,i], ticker_read[2:nrow(ticker_read),i])
 90 | }
 91 | 
 92 | long_df <- as.data.frame(do.call(rbind, blank_list))
 93 | 
 94 | # cleaning and convert the data frame from long to wide format
 95 | just_dates    <- as.character(long_df[, 1])
 96 | just_tickers  <- as.character(long_df[, 2])
 97 | clean_tickers <- substr(just_tickers, 1, nchar(just_tickers)-10)
 98 | long_df2      <- as.data.frame(cbind(just_dates, clean_tickers, value=1))
 99 | wide_df       <- dcast(long_df2, long_df2[,1] ~ long_df2[,2], value.var = "value")
100 | 
101 | clean_dates <- as.Date(as.character(wide_df[, 1]), "%Y%m%d")
102 | wide_xts <- xts(wide_df[,2:ncol(wide_df)], order.by = clean_dates)
103 | wide_xts <- xts(apply(wide_xts,2,as.numeric), order.by = clean_dates)
104 | wide_xts[is.na(wide_xts)] <- 0
105 | colnames(wide_xts)[1] <- "NA"
106 | 
107 | 
108 | # clean holidays
109 | trade_dates <- as.vector(as.matrix(read.table("NYSETradingDays.txt"))) # Jim Kragenbring provided this file
110 | trade_dates <- na.omit(as.Date(trade_dates, "%Y%m%d"))
111 | blank_xts   <- xts(1:length(trade_dates), order.by=trade_dates) 
112 | final_dates <- index(blank_xts["1999-11-01/2014-12-05"])
113 | last_xts    <- wide_xts[final_dates, 1:ncol(wide_xts)-1] 
114 |   
115 | 
116 | 
117 | # write.zoo(last_xts, file="xts_survivors.csv", sep = ",") 
118 | 
119 | 


--------------------------------------------------------------------------------
/backtesting/portfolio_template.R:
--------------------------------------------------------------------------------
  1 | rm(list = ls(all = TRUE))
  2 | options(scipen=999)
  3 | 
  4 | setwd()
  5 | 
  6 | library(xts)
  7 | library(plyr)
  8 | library(quantmod)
  9 | library(PerformanceAnalytics)
 10 | 
 11 | # load custom functions ---------------------------------------------------
 12 | source("backtest_function.R")
 13 | source("run_corr_function.R")
 14 | source("rank_function.R")
 15 | 
 16 | 
 17 | # backtest parameters -----------------------------------------------------
 18 | start_date  <- as.Date("2007-04-01")
 19 | end_date    <- as.Date("2015-01-31")
 20 | 
 21 | ## Define frictions (as percentage of trade)
 22 | commission_pct <- c(.0003)
 23 | slippage_pct <- c(.0005)
 24 | friction_pct <- commission_pct + slippage_pct
 25 | 
 26 | ## Portfolio Symbols
 27 | data_source <- c("yahoo")
 28 | universe <- c("SPY", "MDY", "IWM",      # Large-Mid-Small Cap equities
 29 |               "EFA", "EEM",             # Intl.and emerging markets
 30 |               "AGG","TIP","TLT", "LQD", # Fixed Income
 31 |               "GSG",                    # Commodities
 32 |               "RWR", "RWX",  "MBB")     # Real Estateads
 33 | 
 34 | 
 35 | #universe <- c("SPY", "QQQ", "IWM", "EEM", "EFA",
 36 | #              "TLT", "LQD", "DBC", "GLD", "AGG",
 37 | #              "RWR","RWX","MBB")
 38 | 
 39 | 
 40 | ## Cash 
 41 | cash_symbol <- c("SHV")
 42 | portfolio_symbols <- c(universe, cash_symbol)
 43 | 
 44 | 
 45 | # download data and compute returns ---------------------------------------
 46 | getSymbols(universe, src = data_source, from = start_date, to = end_date)
 47 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date)
 48 | 
 49 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x))))
 50 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x))))
 51 | 
 52 | ## Convert Daily to Monthly
 53 | monthly_raw  <- raw_close[endpoints(index(raw_close), on = "months"), ]
 54 | monthly_adj  <- adj_close[endpoints(index(adj_close), on = "months"), ]
 55 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1)
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | # Buy and Hold ------------------------------------------------------------
 62 | bh_allocation <- 1/length(universe)
 63 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1)
 64 | bh_weights <- bh_initial * bh_allocation
 65 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets))
 66 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ]
 67 | colnames(bh_weights) <- universe
 68 | 
 69 | 
 70 | # output ------------------------------------------------------------------
 71 | strat1_returns <-
 72 | strat2_returns <- 
 73 | bh_returns     <- backtest_engine(bh_weights, "buy_hold", monthly_rets)
 74 | 
 75 | port_returns_report <- cbind(strat1_returns$portfolio_returns, 
 76 |                              bh_returns$portfolio_returns, 
 77 |                              strat2_returns$portfolio_returns)
 78 | 
 79 | aa  <- table.AnnualizedReturns(cbind(port_returns_report))
 80 | bb  <- table.DownsideRisk(cbind(port_returns_report))
 81 | a   <- table.Stats(cbind(port_returns_report))
 82 | b   <- table.Variability(cbind(port_returns_report))
 83 | c   <- table.Distributions(cbind(port_returns_report))
 84 | d   <- table.DrawdownsRatio(cbind(port_returns_report))
 85 | e   <- table.DownsideRiskRatio(cbind(port_returns_report))
 86 | f   <- table.TrailingPeriods(cbind(port_returns_report))
 87 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f)
 88 | 
 89 | charts.PerformanceSummary(as.data.frame(port_returns_report), wealth.index = TRUE)
 90 | 
 91 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns),
 92 |                        strat2 = table.Drawdowns(strat2_returns$portfolio_returns),
 93 |                        bh     = table.Drawdowns(bh_returns$portfolio_returns))
 94 | 
 95 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2),
 96 |                          strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2),
 97 |                          bh     = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2))
 98 | 
 99 | 
100 | # final numeric tables printed to the console
101 | rowBinding
102 | drawdowns_list
103 | monthly_ret_list
104 | 
105 | 
106 | 
107 | # ggplot2 histogram
108 | library(ggplot2)
109 | list_of_returns <- port_returns_report
110 | rename_returns  <- lapply(list_of_returns, function(x) {
111 |   colnames(x) <- c("returns")
112 |   return(x)
113 | })
114 | 
115 | return1 <- as.data.frame(rename_returns[[1]])
116 | return2 <- as.data.frame(rename_returns[[2]])
117 | return3 <- as.data.frame(rename_returns[[3]])
118 | 
119 | return1$label <- "bh" 
120 | return2$label <- "equal_weight" 
121 | return3$label <- "risk_parity" 
122 | all_returns   <- rbind(return1, return2, return3)
123 | 
124 | ggplot(all_returns, aes(returns, fill = label)) + geom_density(alpha = 0.3)
125 | 
126 | 
127 | 
128 | # my current allocation decisions
129 | this_month_allocation <- as.data.frame(rbind(last(strat1_returns$allocation_matrix),
130 |                                              last(strat2_returns$allocation_matrix)), 
131 |                                        row.names = c("equal_weight", "risk_parity"))
132 | 
133 | 
134 | colnames(this_month_allocation) <- c(universe, "CASH")
135 | this_month_allocation
136 | 
137 | 
138 | #last(strat2_returns$allocation_matrix)
139 | t(this_month_allocation[2, ]) * 1000
140 | 
141 | 


--------------------------------------------------------------------------------
/systematic_investor/time_series_matching.R:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # Load Systematic Investor Toolbox (SIT)
  3 | # https://systematicinvestor.wordpress.com/systematic-investor-toolbox/
  4 | ###############################################################################
  5 | rm(list=ls())
  6 | 
  7 | library(dtw)
  8 | library(xts)
  9 | library(quantmod)
 10 | 
 11 | setwd("C:/Users/jlappen/Desktop/R/systematic_investor")
 12 | con = gzcon(url('http://www.systematicportfolio.com/sit.gz', 'rb'))
 13 | source(con)
 14 | close(con)
 15 | 
 16 | 
 17 | # Custom Function ---------------------------------------------------------
 18 | bt.matching.find2 <- function(
 19 |   data,
 20 |   n.query=90,
 21 |   n.reference=252*30,
 22 |   n.match=10,
 23 |   normalize.fn = normalize.mean.sd,
 24 |   dist.fn = dist.euclidean,
 25 |   plot=FALSE,
 26 |   plot.dist=FALSE,
 27 |   layout = NULL,
 28 |   main = NULL
 29 | )
 30 | {
 31 |   data = last(data, n.reference)
 32 |   reference = coredata(data)
 33 |   n = len(reference)
 34 |   query = reference[(n - n.query + 1):n]
 35 |   reference = reference[1:(n - n.query)]
 36 |   main = paste(main, join(format(range(index(data)[(n - n.query + 1):n]), '%d%b%Y'), ' - '))
 37 |   n.query = len(query)
 38 |   n.reference = len(reference)
 39 |   dist.fn.name = ''
 40 |   if(is.character(dist.fn)) {
 41 |     dist.fn.name = paste('with',dist.fn)
 42 |     dist.fn = get(dist.fn)
 43 |   }
 44 |   dist = rep(NA, n.reference)
 45 |   query.normalized = match.fun(normalize.fn)(query)
 46 |   for( i in n.query : n.reference ) {
 47 |     window = reference[ (i - n.query + 1) : i]
 48 |     window.normalized = match.fun(normalize.fn)(window)
 49 |     dist[i] = match.fun(dist.fn)(rbind(query.normalized, window.normalized))
 50 |     if( i %% 100 == 0) cat(i, '\n')
 51 |   }
 52 |   min.index = c()
 53 |   temp = dist
 54 |   temp[ temp > mean(dist, na.rm=T) ] = NA
 55 |   for(i in 1:n.match) {
 56 |     if(any(!is.na(temp))) {
 57 |       index = which.min(temp)
 58 |       min.index[i] = index
 59 |       temp[max(0,index - 2*n.query) : min(n.reference,(index + n.query))] = NA
 60 |     }
 61 |   }
 62 |   n.match = len(min.index)
 63 |   if(plot) {
 64 |     dates = index(data)[1:len(dist)]
 65 |     if(is.null(layout)) {
 66 |       if(plot.dist) layout(1:2) else layout(1)
 67 |     }
 68 |     par(mar=c(2, 4, 2, 2))
 69 |     if(plot.dist) {
 70 |       plot(dates, dist, type='l',col='gray', main=paste('Top Historical Matches for', main, dist.fn.name), ylab='Distance', xlab='')
 71 |       abline(h = mean(dist, na.rm=T), col='darkgray', lwd=2)
 72 |       points(dates[min.index], dist[min.index], pch=22, col='red', bg='red')
 73 |       text(dates[min.index], dist[min.index], 1:n.match, adj=c(1,1), col='black',xpd=TRUE)
 74 |     }
 75 |     plota(data, type='l', col='gray', LeftMargin = 1,
 76 |           main=iif(!plot.dist, paste('Top Historical Matches for', main), NULL)
 77 |     )
 78 |     plota.lines(last(data,90), col='blue')
 79 |     for(i in 1:n.match) {
 80 |       plota.lines(data[(min.index[i]-n.query + 1):min.index[i]], col='red')
 81 |     }
 82 |     text(index4xts(data)[min.index - n.query/2], reference[min.index - n.query/2], 1:n.match,
 83 |          adj=c(1,-1), col='black',xpd=TRUE)
 84 |     plota.legend(paste('Pattern: ', main, ',Match Number'),'blue,red')
 85 |   }
 86 |   return(list(min.index=min.index, dist=dist[min.index], query=query, reference=reference, dates = index(data), main = main))
 87 | }
 88 | 
 89 | 
 90 | #*****************************************************************
 91 | # Load historical data
 92 | #****************************************************************** 
 93 | #load.packages('quantmod')   
 94 | #tickers <- 'SPY'
 95 | 
 96 | #data <- getSymbols(tickers, src = 'yahoo', from = '1950-01-01', 
 97 | #                   auto.assign = FALSE)
 98 | 
 99 | data1 <- read.csv("oil_data.csv", as.is = TRUE, header = TRUE, sep = ",")
100 | data1 <- xts(data1[,2:ncol(data1)], as.Date(data1[, 1]))
101 | data1 <- cbind(data1, data1[,"Close"],data1[,"Close"])
102 | names(data1) <- c("Open", "High","Low", "Close","Volume", "Adjusted")
103 | data2 <- as.quantmod.OHLC(data1)
104 | 
105 | 
106 | par(mfrow=c(1, 2))
107 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 
108 |                          dist.fn = 'dist.euclidean', plot = TRUE)
109 | 
110 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 
111 |                          dist.fn = 'dist.DTW', plot=TRUE)
112 | par(mfrow=c(1, 1))
113 | 
114 | 
115 | 
116 | #*****************************************************************
117 | # Euclidean distance, one to one mapping
118 | #****************************************************************** 
119 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 
120 |                        dist.fn = 'dist.euclidean', plot = TRUE)
121 | 
122 | matches <- bt.matching.overlay(obj, plot.index=1:90, plot=TRUE)
123 | 
124 | layout(1:2)
125 | matches <- bt.matching.overlay(obj, plot=TRUE, layout=TRUE)
126 | bt.matching.overlay.table(obj, matches, plot=TRUE, layout=TRUE)
127 | 
128 | 
129 | #*****************************************************************
130 | # Dynamic time warping distance 
131 | #****************************************************************** 
132 | # http://en.wikipedia.org/wiki/Dynamic_time_warping
133 | # http://dtw.r-forge.r-project.org/
134 | #****************************************************************** 
135 | load.packages('dtw')
136 | 
137 | obj <- bt.matching.find2(Cl(data2), normalize.fn = normalize.mean, 
138 |                         dist.fn = 'dist.DTW', plot=TRUE)
139 | 
140 | matches <- bt.matching.overlay(obj, plot.index=1:90, plot = TRUE)
141 | 
142 | layout(1:2)
143 | matches <- bt.matching.overlay(obj, plot = TRUE, layout = TRUE)
144 | bt.matching.overlay.table(obj, matches, plot = TRUE, layout = TRUE)
145 | 


--------------------------------------------------------------------------------
/bar_charts/GCQ 2014-07-03.csv:
--------------------------------------------------------------------------------
  1 | Date,open,high,low,close
  2 | 2014-07-03,1316.8,1323.2,1309.4,1320.6
  3 | 2014-07-02,1326.7,1333.2,1322.1,1330.9
  4 | 2014-07-01,1326.5,1332.6,1325.2,1326.6
  5 | 2014-06-30,1312.5,1322.4,1312.4,1322
  6 | 2014-06-27,1317.4,1321.9,1317.3,1320
  7 | 2014-06-26,1311.6,1319.3,1311.4,1317
  8 | 2014-06-25,1314.5,1325.6,1314.3,1322.6
  9 | 2014-06-24,1321,1323.9,1317.2,1321.3
 10 | 2014-06-23,1314.6,1319.2,1313.3,1318.4
 11 | 2014-06-20,1313.8,1320.5,1310.5,1316.6
 12 | 2014-06-19,1287.3,1317.4,1286.4,1314.1
 13 | 2014-06-18,1273.9,1274.4,1269,1272.7
 14 | 2014-06-17,1264.6,1272.8,1258,1272
 15 | 2014-06-16,1282.4,1283,1272.5,1275.3
 16 | 2014-06-13,1272.6,1275.7,1272.2,1274.1
 17 | 2014-06-12,1263.8,1274.6,1261.8,1274
 18 | 2014-06-11,1264.3,1265.5,1258.6,1261.2
 19 | 2014-06-10,1255.5,1263.8,1255.1,1260.1
 20 | 2014-06-09,1257.1,1257.3,1252.3,1253.9
 21 | 2014-06-06,1254.3,1258.2,1245.7,1252.5
 22 | 2014-06-05,1244.5,1257.9,1243,1253.3
 23 | 2014-06-04,1248.2,1249.4,1242.8,1244.3
 24 | 2014-06-03,1247.2,1247.4,1240.2,1244.5
 25 | 2014-06-02,1246.9,1251,1243.5,1244
 26 | 2014-05-30,1250.5,1256.7,1242.2,1246
 27 | 2014-05-29,1253.6,1261.5,1252.1,1257.1
 28 | 2014-05-28,1266.2,1267.5,1256.1,1259.7
 29 | 2014-05-27,1279,1279.1,1264.5,1265.7
 30 | 2014-05-23,1289.9,1294.8,1286.7,1291.9
 31 | 2014-05-22,1298.1,1304.1,1293.7,1295.2
 32 | 2014-05-21,1290.1,1293.3,1286.5,1288.3
 33 | 2014-05-20,1291.3,1297.2,1286.2,1294.8
 34 | 2014-05-19,1300.3,1305.8,1293.5,1294
 35 | 2014-05-16,1295.6,1295.9,1288,1293.6
 36 | 2014-05-15,1303.4,1303.4,1291.4,1293.9
 37 | 2014-05-14,1306.8,1309.4,1302.8,1306.2
 38 | 2014-05-13,1291.5,1299,1290.5,1295.1
 39 | 2014-05-12,1297.1,1303.7,1295.2,1296
 40 | 2014-05-09,1293.5,1293.5,1286,1287.9
 41 | 2014-05-08,1291.5,1292,1285.7,1288.1
 42 | 2014-05-07,1302.5,1305.7,1287,1289.3
 43 | 2014-05-06,1306.9,1309.6,1304.8,1308.9
 44 | 2014-05-05,1314.6,1315.7,1309,1309.6
 45 | 2014-05-02,1286.2,1304.8,1273,1303.1
 46 | 2014-05-01,1280.4,1289,1278,1283.6
 47 | 2014-04-30,1288.1,1296.8,1285.1,1296
 48 | 2014-04-29,1290.5,1301.8,1289.8,1296.5
 49 | 2014-04-28,1300.6,1303.7,1292.6,1299.1
 50 | 2014-04-25,1302.2,1305,1298.6,1300.9
 51 | 2014-04-24,1270,1298.5,1268.5,1290.7
 52 | 2014-04-23,1287,1287.4,1281.3,1284.6
 53 | 2014-04-22,1290.6,1293,1276.8,1281.1
 54 | 2014-04-21,1283.8,1293,1283.1,1288.6
 55 | 2014-04-17,1301.6,1302.4,1293.5,1294
 56 | 2014-04-16,1301.2,1306.7,1298.1,1303.7
 57 | 2014-04-15,1301.5,1304,1287.3,1300.5
 58 | 2014-04-14,1323.9,1331,1322.7,1327.7
 59 | 2014-04-11,1321.9,1322.3,1316.3,1319.2
 60 | 2014-04-10,1323.6,1323.6,1316.8,1320.7
 61 | 2014-04-09,1304.7,1306.8,1301.4,1306.1
 62 | 2014-04-08,1313.5,1313.5,1308,1309.4
 63 | 2014-04-07,1298.6,1301.9,1296,1298.6
 64 | 2014-04-04,1293.2,1307.2,1290,1303.8
 65 | 2014-04-03,1283.9,1289.5,1283.2,1284.9
 66 | 2014-04-02,1289.2,1295,1289.2,1291.1
 67 | 2014-04-01,1286.3,1287.1,1279.8,1280.2
 68 | 2014-03-31,1293.8,1295.4,1283.6,1284.1
 69 | 2014-03-28,1288.5,1297.5,1287.1,1294.6
 70 | 2014-03-27,1293.8,1299.8,1293.8,1295.1
 71 | 2014-03-26,1313.3,1313.9,1302,1303.7
 72 | 2014-03-25,1309,1316.3,1307.2,1311.7
 73 | 2014-03-24,1322.8,1323.6,1309.5,1311.4
 74 | 2014-03-21,1339.6,1340.1,1334.5,1336.2
 75 | 2014-03-20,1322.1,1331.7,1322.1,1331
 76 | 2014-03-19,1348,1348,1338.9,1341.8
 77 | 2014-03-18,1355.3,1363,1352,1359.4
 78 | 2014-03-17,1380,1383,1370.7,1373.4
 79 | 2014-03-14,1375.6,1388.1,1374.6,1379.5
 80 | 2014-03-13,1366.2,1372.9,1366.1,1372.9
 81 | 2014-03-12,1366,1371.4,1364.9,1371
 82 | 2014-03-11,1351.8,1351.8,1339.3,1347.4
 83 | 2014-03-10,1339.3,1344.6,1337.2,1342.2
 84 | 2014-03-07,1344,1344,1329,1338.8
 85 | 2014-03-06,1337.3,1352.4,1337.3,1352.4
 86 | 2014-03-05,1338.4,1341.7,1335.4,1340.9
 87 | 2014-03-04,1334.6,1338.7,1334.6,1338.5
 88 | 2014-03-03,1348,1354.9,1347.5,1350.9
 89 | 2014-02-28,1333.3,1333.3,1322.2,1322.2
 90 | 2014-02-27,1331.8,1335,1331.6,1332.3
 91 | 2014-02-26,1331.4,1336,1323.5,1328.5
 92 | 2014-02-25,1338,1344,1334.4,1343.2
 93 | 2014-02-24,1332.6,1338.5,1330,1338.5
 94 | 2014-02-21,1323,1324.3,1321.5,1324.2
 95 | 2014-02-20,1316.2,1317.4,1313.8,1317.4
 96 | 2014-02-19,1319.8,1322,1319.8,1320.9
 97 | 2014-02-18,1321.7,1325.2,1321.2,1325
 98 | 2014-02-14,1317,1321,1316.3,1319.3
 99 | 2014-02-13,1292.9,1300.9,1292.9,1300.7
100 | 2014-02-12,1290.8,1295.7,1290.5,1295.5
101 | 2014-02-11,1283.7,1295,1279,1290.7
102 | 2014-02-10,1278.3,1278.3,1274.5,1275.6
103 | 2014-02-07,1261,1271.8,1261,1263.9
104 | 2014-02-06,1262,1264.2,1254.7,1258.1
105 | 2014-02-05,1268,1273,1256.8,1257.8
106 | 2014-02-04,1251.7,1252,1249.5,1252
107 | 2014-02-03,1247.9,1266,1247.9,1260.9
108 | 2014-01-31,1253.5,1253.5,1240.8,1240.8
109 | 2014-01-30,1242.8,1247.4,1241,1243.6
110 | 2014-01-29,1268.2,1269.3,1262.6,1263.3
111 | 2014-01-28,1257.7,1258.2,1250.6,1251.7
112 | 2014-01-27,1261.9,1264.7,1259.9,1264.7
113 | 2014-01-24,1269,1269,1264.1,1265.7
114 | 2014-01-23,1256.6,1265.7,1256.6,1263.7
115 | 2014-01-22,1239.9,1239.9,1239.9,1239.9
116 | 2014-01-21,1240.3,1243.2,1238,1243.2
117 | 2014-01-17,1249.7,1253.7,1249,1253.5
118 | 2014-01-16,1243.3,1243.3,1241.7,1241.8
119 | 2014-01-15,1237.7,1241.2,1236.8,1240.2
120 | 2014-01-14,1253.7,1253.7,1244,1247.5
121 | 2014-01-13,1251.7,1253.2,1251.7,1253.1
122 | 2014-01-10,1247.5,1249.2,1246.6,1248.8
123 | 2014-01-09,1227.1,1231.5,1227.1,1231.1
124 | 2014-01-08,1223.5,1227.9,1221.9,1227.3
125 | 2014-01-07,1237,1237,1227.9,1231.4
126 | 2014-01-06,1244.7,1248.6,1222,1239.9
127 | 2014-01-03,1229.6,1241,1229.6,1240.6
128 | 2014-01-02,1220,1227.1,1220,1227.1
129 | 2013-12-31,1195,1215.5,1187.2,1204.2
130 | 2013-12-30,1205.8,1205.8,1205.8,1205.8
131 | 2013-12-27,1213,1217.6,1213,1216
132 | 2013-12-26,1216.6,1216.7,1214.4,1214.4
133 | 2013-12-24,1202.9,1205.6,1202.9,1205.4
134 | 2013-12-23,1204.1,1204.1,1199.1,1199.1
135 | 2013-12-20,1195.3,1207.4,1195.3,1205.8
136 | 2013-12-19,1204,1205.4,1195.8,1195.8
137 | 2013-12-18,1237.2,1237.2,1237.2,1237.2
138 | 2013-12-17,1235.4,1235.4,1230.2,1232.3
139 | 2013-12-16,1250.4,1250.4,1246.6,1246.6
140 | 2013-12-13,1233.7,1236.7,1233.2,1236.7
141 | 2013-12-12,1236.6,1237.2,1227,1227
142 | 2013-12-11,1259.3,1259.3,1259.3,1259.3
143 | 2013-12-10,1263.1,1263.1,1263.1,1263.1
144 | 2013-12-09,1237,1237,1234.6,1236.2
145 | 2013-12-06,1217,1236.7,1217,1231
146 | 2013-12-05,1230.7,1235.7,1221,1234.1
147 | 2013-12-04,1220.7,1249.5,1220.2,1249.5
148 | 2013-12-03,1221.3,1226.2,1221.3,1223.1
149 | 2013-12-02,1231.1,1231.1,1224.2,1224.2
150 | 2013-11-29,1253.4,1256.7,1252.8,1252.8
151 | 2013-11-27,1255,1255,1240.3,1240.3
152 | 2013-11-26,1245.7,1249.7,1243.8,1243.8
153 | 2013-11-25,1243.3,1245.4,1241,1243.8
154 | 2013-11-22,1246.8,1246.8,1246.8,1246.8
155 | 2013-11-21,1243.3,1248,1243.3,1246.4
156 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_11/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 11
  2 | # Speed, Testing, and Reporting
  3 | 
  4 | ##################################
  5 | # Runtime execution improvements #
  6 | ##################################
  7 | sum_with_loop_in_r <- function(max_value) {
  8 |   sum <- 0
  9 |   for(i in 1:max_value) {
 10 |     sum <- sum + i
 11 |   }
 12 |   return(sum)
 13 | }
 14 | 
 15 | sum_with_vectorization_in_r = function(max_value) {
 16 |   numbers <- as.double(1:max_value)
 17 |   return(sum(numbers))
 18 | }
 19 | 
 20 | #######################
 21 | # Benchmarking R code #
 22 | #######################
 23 | library(microbenchmark)
 24 | microbenchmark(loop = sum_with_loop_in_r(1e5),
 25 |   vectorized = sum_with_vectorization_in_r(1e5))
 26 | 
 27 | ## Unit: microseconds
 28 | ## expr        min       lq        median
 29 | ## loop        57615.323 59424.740 60992.7720
 30 | ## vectorized  260.602   273.673   286.5495
 31 | 
 32 | ## uq        max       neval
 33 | ## 89608.441 96694.469 100
 34 | ## 294.236   414.349   100
 35 | 
 36 | compiled_sum_with_loop_in_r <- cmpfun(sum_with_loop_in_r)
 37 |   microbenchmark(loop = sum_with_loop_in_r(1e5),
 38 |   compiled = compiled_sum_with_loop_in_r(1e5),
 39 |   vectorized = sum_with_vectorization_in_r(1e5))
 40 | 
 41 | ## Unit: microseconds
 42 | ## expr        min       lq         median
 43 | ## loop        56746.652 58343.8945 60602.445
 44 | ## compiled    4688.146  4758.6770  4892.246
 45 | ## vectorized  249.457   273.8635   284.050
 46 | 
 47 | ##  uq         max        neval
 48 | ##  86599.9875 96736.750  100
 49 | ##  5498.9710  46484.009  100
 50 | ##  292.3135   473.927    100
 51 | 
 52 | #####################
 53 | # The Rcpp solution #
 54 | #####################
 55 | long add_cpp(long max_value) {
 56 |   long sum = 0;
 57 |   for(long i = 1; i <= max_value; ++i) {
 58 |     sum = sum + i;
 59 |   }
 60 |   return sum;
 61 | }
 62 | 
 63 | lapply
 64 | function (X, FUN, ...)
 65 | {
 66 |   FUN <- match.fun(FUN)
 67 |   if (!is.vector(X) || is.object(X))
 68 |     X <- as.list(X)
 69 |     .Internal(lapply(X, FUN))
 70 | }
 71 | 
 72 | library(Rcpp)
 73 | 
 74 | # Create a C++ function
 75 | cppFunction('
 76 |   long add_cpp(long max_value) {
 77 |     long sum = 0;
 78 |     for(long i = 1; i <= max_value; ++i) {
 79 |      sum = sum + i;
 80 |     }
 81 |     return sum;
 82 |    }'
 83 | )
 84 | 
 85 | add_cpp
 86 | ## function (max_value)
 87 | ## .Primitive(".Call")(<pointer: 0x10f52fbb0>, max_value)
 88 | 
 89 | add_cpp(1e5)
 90 | ## [1] 5000050000
 91 | 
 92 | microbenchmark(loop = sum_with_loop_in_r(1e5),
 93 |   compiled = compiled_sum_with_loop_in_r(1e5),
 94 |   vectorized = sum_with_vectorization_in_r(1e5),
 95 |   compiled_cpp = add_cpp(1e5))
 96 | 
 97 | ## Unit: microseconds
 98 | ## expr         min       lq         median
 99 | ## loop         73049.461 76640.5945 79635.8810
100 | ## compiled     7359.040  7487.9655  7795.9125
101 | ## vectorized   804.773   932.9285   1031.9695
102 | ## compiled_cpp 79.573    88.2615    98.9385
103 | 
104 | ## uq         max        neval
105 | ## 80676.6600 94618.174  100
106 | ## 12101.8610 135353.743 100
107 | ## 1373.8565  2148.409   100
108 | ## 105.2440   135.781    100
109 | 
110 | sourceCpp('Chapter_11/add_2_file.cpp')
111 | 
112 | add_2_cpp(100)
113 | ## [1] 5050
114 | 
115 | ###################################
116 | # Calling R from C++ with RInside #
117 | ###################################
118 | #include <RInside.h>
119 | int main(int argc, char *argv[]) {
120 | 
121 |  // create an embedded R instance
122 |  RInside R(argc, argv);
123 | 
124 |  // assign a char* (string) to "txt"
125 |  R["txt"] = "Hello, world!\n";
126 | 
127 |  // eval the init string, ignoring any returns
128 |  R.parseEvalQ("cat(txt)");
129 | 
130 |  exit(0);
131 | }
132 | 
133 | ####################################
134 | # Writing unit tests with testthat #
135 | ####################################
136 | # Define function
137 | convert_to_returns <- function(prices) {
138 |   return(9)
139 | }
140 | 
141 | require(testthat)
142 | 
143 | # Group related functionality together with context()
144 | context("Price to log-return conversion")
145 | 
146 | # Define the expectations using expect_that()
147 | test_that("convert_to_returns produces the correct values", {
148 | 
149 |   # For these inputs
150 |   input_prices <- c(100, 101, 102, 103, 99)
151 | 
152 |   # Expect these outputs
153 |   expected_returns <- c(0.009950331,
154 |     0.009852296, 0.009756175, -0.039609138)
155 | 
156 |   # Verify the expectation of equality
157 |   expect_equal(expected_returns,
158 |     convert_to_returns(input_prices))
159 | })
160 | 
161 | ## Error: Test failed: 'convert_to_returns produces
162 | ## the correct values'
163 | ## Not expected: expected_returns not equal to
164 | ## convert_to_returns(input_prices)
165 | ## Numeric: lengths (1, 4) differ.
166 | 
167 | # Define function
168 | convert_to_returns <- function(prices) {
169 |   return(diff(log(prices)))
170 | }
171 | 
172 | # Verify the error message
173 | input_prices <- c(100)
174 | msg <- "Not enough price entries."
175 | 
176 | expect_message(msg, convert_to_returns(input_prices))
177 | ## Error: expected_message no messages shown
178 | 
179 | # Function with corner case check
180 | convert_to_returns <- function(prices) {
181 |   if(length(prices) < 2) {
182 |     message("Not enough price entries.")
183 |   }
184 |   return(diff(log(prices)))
185 | }
186 | 
187 | test_file("example_test_file.r")
188 | 
189 | #################################
190 | # Using knitr for documentation #
191 | #################################
192 | \documentclass{article}
193 | \usepackage[T1]{fontenc}
194 | 
195 | \begin{document}
196 | 
197 | This is an example LaTeX document with some embedded
198 | R code woven in for convenience.
199 | 
200 | <<foo, fig.height = 4>>=
201 | 
202 | x = 1:10
203 | y = x^2
204 | plot(x, y, main = "This is a graph")
205 | @
206 | 
207 | Inline expressions can be written by using the
208 | \verb|\Sexpr{}| convention, e.g. $\pi=\Sexpr{pi}$
209 | and \Sexpr{2.3492e7} and \Sexpr{x / 2.0}.
210 | 
211 | \subsection*{A different subsection}
212 | We can insert graphs without displaying the code.
213 | This can be done using the \texttt{echo = FALSE}
214 | command within the code chunk argument list.
215 | 
216 | <<foo2, fig.height = 3, echo = FALSE>>=
217 | x = 1:10
218 | y=x^3
219 | plot(x, y, main = "This is a second graph")
220 | @
221 | 
222 | Any R code can be run within the code chunks
223 | provided by knitr. This next example loads up
224 | \texttt{ggplot2}, and the code creates a nice looking
225 | density histogram.
226 | 
227 | <<foo3, fig.height = 6, tidy = FALSE >>=
228 | require(ggplot2)
229 | my_data = data.frame(returns = c(0.03, 0.04, 0.05,
230 |   0.032, 0.01, 0.23, 0.4, 0.05, 0.066, 0.5),
231 |   stock = c("SPY", "CVX", "CVX", "SPY",
232 |   "XOM", "XOM", "CVX", "SPY", "SPY", "XOM"))
233 | 
234 | ggplot(my_data, aes(x = returns, fill = stock)) +
235 |   geom_density(alpha = 0.2)
236 | @
237 | \end{document}
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_05/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 5
  2 | # Intermediate Statistics and Probability
  3 | 
  4 | #############################
  5 | # Stock price distributions #
  6 | #############################
  7 | # Extract prices and compute statistics
  8 | prices <- SPY$SPY.Adjusted
  9 | mean_prices <- round(mean(prices), 2)
 10 | sd_prices <- round(sd(prices), 2)
 11 | 
 12 | # Plot the histogram along with a legend
 13 | hist(prices, breaks = 100, prob=T, cex.main = 0.9)
 14 | abline(v = mean_prices, lwd = 2)
 15 | legend("topright", cex = 0.8, border = NULL, bty = "n",
 16 |   paste("mean=", mean_prices, "; sd=", sd_prices))
 17 | 
 18 | plot_4_ranges <- function(data, start_date, end_date, title) {
 19 | 
 20 |   # Set the plot window to be 2 rows and 2 columns
 21 |   par(mfrow = c(2, 2))
 22 |   for(i in 1:4) {
 23 |     # Create a string with the appropriate date range
 24 |     range <- paste(start_date[i], "::", end_date[i], sep = "")
 25 | 
 26 |     # Create the price vector and necessary statistics
 27 |     time_series <- data[range]
 28 | 
 29 |     mean_data <- round(mean(time_series, na.rm = TRUE), 3)
 30 |     sd_data <- round(sd(time_series, na.rm = TRUE), 3)
 31 | 
 32 |     # Plot the histogram along with a legend
 33 |     hist_title <- paste(title, range)
 34 |     hist(time_series, breaks = 100, prob=TRUE,
 35 |      xlab = "", main = hist_title, cex.main = 0.8)
 36 |     legend("topright", cex = 0.7, bty = 'n',
 37 |      paste("mean=", mean_data, "; sd=", sd_data))
 38 |   }
 39 | 
 40 |   # Reset the plot window
 41 |   par(mfrow = c(1, 1))
 42 | }
 43 | 
 44 | # Define start and end dates of interest
 45 | begin_dates <- c("2007-01-01", "2008-06-06",
 46 |   "2009-10-10", "2011-03-03")
 47 | end_dates <- c("2008-06-05", "2009-09-09",
 48 |   "2010-12-30", "2013-01-06")
 49 | 
 50 | # Create plots
 51 | plot_4_ranges(prices, begin_dates,
 52 |   end_dates, "SPY prices for:")
 53 | 
 54 | ################
 55 | # Stationarity #
 56 | ################
 57 | # Compute log returns
 58 | returns <- diff(log(prices))
 59 | 
 60 | # Use the same function as before to plot returns rather than prices
 61 | plot_4_ranges(returns, begin_dates, end_dates, "SPY log prices for:")
 62 | 
 63 | ######################################
 64 | # Determining stationarity with urca #
 65 | ######################################
 66 | # Get SPY data and let's confirm that it is non-stationary
 67 | require(quantmod)
 68 | getSymbols("SPY")
 69 | spy <- SPY$SPY.Adjusted
 70 | 
 71 | # Use the default settings
 72 | require(urca)
 73 | test <- ur.kpss(as.numeric(spy))
 74 | 
 75 | # The output is an S4 object
 76 | class(test)
 77 | ## [1] "ur.kpss"
 78 | ## attr(,"package")
 79 | ## [1] "urca"
 80 | 
 81 | # Extract the test statistic
 82 | test@teststat
 83 | ## [1] 11.63543
 84 | 
 85 | # Look at the critical values
 86 | test@cval
 87 | ##                10pct  5pct 2.5pct  1pct
 88 | ## critical values 0.347 0.463  0.574 0.739
 89 | 
 90 | spy_returns <- diff(log(spy))
 91 | 
 92 | # Test on the returns
 93 | test_returns <- ur.kpss(as.numeric(spy_returns))
 94 | test_returns@teststat
 95 | ## [1] 0.336143
 96 | 
 97 | test_returns@cval
 98 | ##                10pct  5pct 2.5pct  1pct
 99 | ## critical values 0.347 0.463  0.574 0.739
100 | 
101 | test_post_2013 <- ur.kpss(as.numeric(spy_returns['2013::']))
102 | test_post_2013@teststat
103 | ## [1] 0.06936672
104 | 
105 | ############################
106 | # Assumptions of normality #
107 | ############################
108 | # Plot histogram and density
109 | mu <- mean(returns, na.rm = TRUE)
110 | sigma <- sd(returns, na.rm = TRUE)
111 | x <- seq(-5 * sigma, 5 * sigma, length = nrow(returns))
112 | 
113 | hist(returns, breaks = 100,
114 |   main = "Histogram of returns for SPY",
115 |   cex.main = 0.8, prob=TRUE)
116 | lines(x, dnorm(x, mu, sigma), col = "red", lwd = 2)
117 | 
118 |  # Set plotting window
119 | par(mfrow = c(1, 2))
120 | 
121 | # SPY data
122 | qqnorm(as.numeric(returns),
123 |   main = "SPY empirical returns qqplot()",
124 |   cex.main = 0.8)
125 | qqline(as.numeric(returns),  lwd = 2)
126 | grid()
127 | 
128 | # Normal random data
129 | normal_data <- rnorm(nrow(returns), mean = mu, sd = sigma)
130 | 
131 | qqnorm(normal_data, main = "Normal returns", cex.main = 0.8)
132 | qqline(normal_data, lwd = 2)
133 | grid()
134 | 
135 | answer <- shapiro.test(as.numeric(returns))
136 | 
137 | answer[[2]]
138 | ## [1] 5.118396e-34
139 | 
140 | set.seed(129)
141 | normal_numbers <- rnorm(5000, 0, 1)
142 | ans <- shapiro.test(normal_numbers)
143 | 
144 | ans[[2]]
145 | ## [1] 0.9963835
146 | 
147 | # Corrupt a single data point
148 | normal_numbers[50] <- 1000
149 | ans <- shapiro.test(normal_numbers)
150 | 
151 | ans[[2]]
152 | ## [1] 1.775666e-95
153 | 
154 | ###############
155 | # Correlation #
156 | ###############
157 | sv <- as.xts(returns_matrix[, c(1, 6)])
158 | 
159 | head(sv)
160 | ##               SPY.Close    VXX.Close
161 | ## 2009-02-02 -0.003022794 -0.003160468
162 | ## 2009-02-03  0.013949240 -0.047941603
163 | ## 2009-02-04 -0.004908132  0.003716543
164 | ## 2009-02-05  0.014770965 -0.006134680
165 | 
166 | cor(sv)
167 | ##            SPY.Close  VXX.Close
168 | ## SPY.Close  1.0000000 -0.4603908
169 | ## VXX.Close -0.4603908  1.0000000
170 | 
171 | ##################
172 | # Filtering data #
173 | ##################
174 | # Find the outliers
175 | outliers <- which(sv[, 2] > 1.0)
176 | 
177 | # If any outliers exist, remove them
178 | if(length(outliers) > 0) {
179 |   sv <- sv[-outliers, ]
180 | }
181 | 
182 | cor(sv)
183 | ##            SPY.Close  VXX.Close
184 | ## SPY.Close  1.0000000 -0.8066466
185 | ## VXX.Close -0.8066466  1.0000000
186 | 
187 | ##############
188 | # R formulas #
189 | ##############
190 | # Create a formula
191 | my_formula <- as.formula("y ~ x")
192 | 
193 | # What is the output?
194 | my_formula
195 | ## y ~ x
196 | 
197 | # What is the class of my_formula?
198 | class(my_formula)
199 | ## [1] "formula"
200 | 
201 | # Create a linear regression object
202 | reg <- lm(VXX.Close ~ SPY.Close, data = sv)
203 | 
204 | # Here is the output
205 | summary(reg)
206 | 
207 | ## Call:
208 | ## lm(formula = VXX.Close ~ SPY.Close, data = sv)
209 | 
210 | ## Residuals:
211 | ##    Min        1Q    Median        3Q       Max
212 | ## -0.085607 -0.012830 -0.000865  0.012188  0.116349
213 | 
214 | ## Coefficients:
215 | ##            Estimate Std. Error t value Pr(>|t|)
216 | ## (Intercept) -0.0024365  0.0006641  -3.669 0.000254 ***
217 | ## SPY.Close   -2.5848492  0.0552193 -46.811  < 2e-16 ***
218 | ## ---
219 | ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1
220 | 
221 | ## Residual standard error: 0.02287 on 1187 degrees of freedom
222 | ## Multiple R-squared:  0.6486,Adjusted R-squared:  0.6483
223 | ## F-statistic:  2191 on 1 and 1187 DF,  p-value: < 2.2e-16
224 | 
225 | b <- reg$coefficients[1]
226 | a <- reg$coefficients[2]
227 | 
228 | par(mfrow = c(2, 2))
229 | plot(reg$residuals,
230 |   main = "Residuals through time",
231 |   xlab = "Days", ylab = "Residuals")
232 | hist(reg$residuals, breaks = 100,
233 |   main = "Distribution of residuals",
234 |   xlab = "Residuals")
235 | qqnorm(reg$residuals)
236 | qqline(reg$residuals)
237 | acf(reg$residuals, main = "Autocorrelation")
238 | 
239 | 
240 | vxx_lag_1 <- lag(VXX$VXX.Close, k = 1)
241 | 
242 | head(vxx_lag_1)
243 | ##               VXX.Close
244 | ## 2009-01-30    NA
245 | ## 2009-02-02    104.58
246 | ## 2009-02-03    104.25
247 | ## 2009-02-04    99.37
248 | ## 2009-02-05    99.74
249 | ## 2009-02-06    99.13
250 | 
251 | head(VXX$VXX.Close)
252 | ##               VXX.Close
253 | ## 2009-01-30    104.58
254 | ## 2009-02-02    104.25
255 | ## 2009-02-03    99.37
256 | ## 2009-02-04    99.74
257 | ## 2009-02-05    99.13
258 | ## 2009-02-06    97.70
259 | 
260 | # Merge returns with lagged returns
261 | sv <- merge(sv, lag(sv))
262 | 
263 | # Scatter plot of lagged SPY vs. VXX
264 | plot(as.numeric(sv[, 3]), as.numeric(sv[, 2]),
265 |   main = "Scatter plot SPY lagged vs. VXX.",
266 |   xlab = "SPY lagged",
267 |   ylab = "VXX"
268 |   cex.main = 0.8,
269 |   cex.axis = 0.8,
270 |   cex.lab = 0.8)
271 | grid()
272 | 
273 | reg2 <- lm(VXX.Close ~ SPY.Close.1, data = sv)
274 | 
275 | summary(reg2)
276 | ## Coefficients:
277 | ##           Estimate Std. Error t value Pr(>|t|)
278 | ## (Intercept) -0.004140   0.001121  -3.694 0.000231 ***
279 | ## SPY.Close.1  0.104119   0.093154   1.118 0.263918
280 | 
281 | ## Residual standard error: 0.03857 on 1186 degrees of freedom
282 | ## (1 observation deleted due to missingness)
283 | ## Multiple R-squared:  0.001052,Adjusted R-squared:  0.00021
284 | ## F-statistic: 1.249 on 1 and 1186 DF,  p-value: 0.2639
285 | 
286 | ccf(as.numeric(sv[, 1]), as.numeric(sv[, 2]),
287 |   main = "Cross correlation between SPY and VXX",
288 |   ylab = "Cross correlation", xlab = "Lag", cex.main = 0.8,
289 |   cex.lab = 0.8, cex.axis = 0.8)
290 | 
291 | ###################################
292 | # The linear in linear regression #
293 | ###################################
294 | x <- seq(1:100)
295 | y <- x ^ 2
296 | 
297 | # Generate the plot
298 | plot(x, y)
299 | 
300 | # Fit the regression
301 | reg_parabola <- lm(y ~ x)
302 | 
303 | # Superimpose the best fit line on the plot
304 | abline(reg_parabola, lwd = 2)
305 | 
306 | # Look at the results
307 | summary(reg_parabola)
308 | ## Coefficients:
309 | ##               Estimate    Std. Error t value Pr(>|t|)
310 | ## (Intercept)  -1717.000   151.683  -11.32   <2e-16 ***
311 | ## x              101.000     2.608   38.73   <2e-16 ***
312 | ## ---
313 | ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1
314 | 
315 | ## Residual standard error: 752.7 on 98 degrees of freedom
316 | ## Multiple R-squared:  0.9387,Adjusted R-squared:  0.9381
317 | ## F-statistic:  1500 on 1 and 98 DF,  p-value: < 2.2e-16
318 | 
319 | plot(x, sqrt(y))
320 | reg_transformed <- lm(sqrt(y) ~ x)
321 | abline(reg_transformed)
322 | 
323 | summary(reg_transformed)
324 | ## Coefficients:
325 | ##               Estimate Std. Error  t value     Pr(>|t|)
326 | ## (Intercept) -5.684e-14  5.598e-15 -1.015e+01   <2e-16 ***
327 | ## x            1.000e+00  9.624e-17  1.039e+16   <2e-16 ***
328 | ## ---
329 | ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1
330 | 
331 | ## Residual standard error: 2.778e-14 on 98 degrees of freedom
332 | ## Multiple R-squared:      1,Adjusted R-squared:      1
333 | ## F-statistic: 1.08e+32 on 1 and 98 DF,  p-value: < 2.2e-16
334 | 
335 | ##############
336 | # Volatility #
337 | ##############
338 | # Generate 1000 IID numbers from a normal distribution.
339 | z <- rnorm(1000, 0, 1)
340 | 
341 | # Autocorrelation of returns and squared returns
342 | par(mfrow = c(2, 1))
343 | acf(z, main = "returns", cex.main = 0.8,
344 |   cex.lab = 0.8, cex.axis = 0.8)
345 | 
346 | grid()
347 | acf(z ^ 2, main = "returns squared",
348 |   cex.lab = 0.8, cex.axis = 0.8)
349 | grid()
350 | 
351 | par(mfrow = c(1, 1))
352 | acf(sv[, 1] ^ 2, main = "Actual returns squared",
353 |   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
354 | grid()
355 | 
356 | par(mfrow = c(1, 2))
357 | acf(sv[, 1]^3)
358 | acf(abs(sv[, 1])
359 | 
360 | 
361 | 
362 | 
363 | 
364 | 
365 | 
366 | 
367 | 
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 
386 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_09/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 9
  2 | # Options
  3 | 
  4 | ########################
  5 | # Valuation of options #
  6 | ########################
  7 | install.packages("RQuantLib")
  8 | library(RQuantLib)
  9 | 
 10 | lsf.str("package:RQuantLib")
 11 | ## adjust : function (calendar = "TARGET", dates = Sys.Date(),
 12 | ##   bdc = 0)
 13 | ## advance : function (calendar = "TARGET", dates = Sys.Date(),
 14 | ##   n, timeUnit, period, bdc = 0, emr = 0)
 15 | ## AmericanOption : function (type, underlying, strike, dividendYield,
 16 | ##   riskFreeRate, maturity, volatility, timeSteps = 150,
 17 | ##   gridPoints = 149, engine = "BaroneAdesiWhaley")
 18 | ## AmericanOption.default : function (type, underlying, strike,
 19 | ##   dividendYield, riskFreeRate, maturity, volatility,
 20 | ##   timeSteps = 150, gridPoints = 149, engine = "BaroneAdesiWhaley")
 21 | ## AmericanOptionImpliedVolatility : function (type, value,
 22 | ##   underlying, strike, dividendYield, riskFreeRate,
 23 | ##   maturity, volatility, timeSteps = 150, gridPoints = 151)
 24 | ## AmericanOptionImpliedVolatility.default : function (type, value,
 25 | ##   underlying, strike, dividendYield, riskFreeRate, maturity,
 26 | ##   volatility, timeSteps = 150, gridPoints = 151)
 27 | ## ...
 28 | 
 29 | call_value <- EuropeanOption(type = "call", underlying = 100,
 30 |   strike = 100, dividendYield = 0, riskFreeRate = 0.03,
 31 |   maturity = 1.0, volatility = 0.30)
 32 | 
 33 | ## Concise summary of valuation for EuropeanOption
 34 | ## value      delta    gamma     vega    theta
 35 | ## 13.2833   0.5987   0.0129  38.6668  -7.1976
 36 | ## rho   divRho
 37 | ## 46.5873 -59.8706
 38 | 
 39 | class(call_value)
 40 | ## [1] "EuropeanOption" "Option"
 41 | 
 42 | type <- "call"
 43 | underlying <- 20:180
 44 | strike <- 100
 45 | dividendYield <- 0
 46 | riskFreeRate <- 0.03
 47 | maturity <- 1.0
 48 | volatility <- 0.10
 49 | 
 50 | # Function to create plots of option values and Greeks.
 51 | option_values <- function(type, underlying, strike,
 52 |   dividendYield, riskFreeRate, maturity, volatility) {
 53 | 
 54 |   # Output list with option values and Greeks
 55 |   out <- list()
 56 |   for(i in seq_along(underlying)) {
 57 |       out[[i]] <- EuropeanOption(type = type, underlying = i,
 58 |         strike = strike, dividendYield = dividendYield,
 59 |         riskFreeRate = riskFreeRate, maturity = maturity,
 60 |         volatility = volatility)
 61 |   }
 62 | 
 63 |   # Set up the plot window
 64 |   par(mfrow = c(3, 2))
 65 |   names <- c("Value", "Delta", "Gamma",
 66 |     "Vega", "Theta", "Rho")
 67 | 
 68 |   for(i in 1:6) {
 69 |     plot(unlist(lapply(out, "[", i)) , type = "l",
 70 |       main = paste(names[i], "vs. Underlying"),
 71 |       xlab = "Underlying", ylab = names[i])
 72 |       grid()
 73 |       abline(v = strike, col = "red")
 74 |   }
 75 |   return(out)
 76 | }
 77 | 
 78 | option_values(type, underlying, strike, dividendYield,
 79 |   riskFreeRate, maturity, volatility)
 80 | ## ...
 81 | ## [[96]]
 82 | ## Concise summary of valuation for EuropeanOption
 83 | ## value    delta    gamma     vega    theta
 84 | ## 3.3493   0.4768   0.0415  38.2336  -3.1843
 85 | ## rho   divRho
 86 | ## 42.4222 -45.7715
 87 | 
 88 | ## [[97]]
 89 | ## Concise summary of valuation for EuropeanOption
 90 | ## value    delta    gamma     vega    theta
 91 | ## 3.8468   0.5181   0.0411  38.6575  -3.3252
 92 | ## rho   divRho
 93 | ## 46.4098 -50.2566
 94 | 
 95 | ## [[98]]
 96 | ## Concise summary of valuation for EuropeanOption
 97 | ## value    delta    gamma     vega    theta
 98 | ## 4.3853   0.5588   0.0403  38.6707  -3.4449
 99 | ## rho   divRho
100 | ## 50.3788 -54.7642
101 | 
102 | option_values(type, underlying, strike, dividendYield,
103 |   riskFreeRate, maturity = 0.1, volatility)
104 | 
105 | ################################
106 | # Exploring options trade data #
107 | ################################
108 | # Create a vector of filenames
109 | folder <- "path/Options/SPY_20130415_T/"
110 | available_files <- list.files(folder)
111 | 
112 | # Explore the first few lines of the file
113 | temp <- read.csv(file = paste0(folder, available_files[1]),
114 |   header = FALSE, stringsAsFactors = FALSE)
115 | 
116 | column_names <- c("date", "time", "trade_indicator",
117 |   "sequence_number", "option_exchange_code",
118 |   "option_condition_code", "sale_price", "sale_size",
119 |   "underlying_last_trade_price",
120 |   "underling_last_trade_size",
121 |   "stock_exchange_code", "stock_condition_code",
122 |   "underlying_bid_price", "underlying_bid_size",
123 |   "underlying_ask_price", "underlying_ask_size")
124 | names(temp) <- column_names
125 | 
126 | output <- list()
127 | for(i in 1:length(available_files)) {
128 |   file_name <- available_files[i]
129 | 
130 |   type <- substr(file_name, 5, 5)
131 |   date <- substr(file_name, 7, 14)
132 |   date <- as.Date(date, format = "%Y%m%d")
133 |   strike <- substr(file_name, 16, 26)
134 |   strike <- strsplit(strike, "_XX")[[1]][1]
135 | 
136 |   temp <- read.csv(file = paste0(folder, file_name),
137 |     header = FALSE, stringsAsFactors = FALSE)
138 |   names(temp) <- column_names
139 | 
140 |   number_of_trades <- nrow(temp)
141 |   avg_trade_price <- round(mean(temp$sale_price,
142 |     na.rm = TRUE), 3)
143 | 
144 |   if(number_of_trades <= 1) {
145 |     sd_trade_price <- 0
146 |   } else {
147 |     sd_trade_price <- round(sd(temp$sale_price,
148 |       na.rm = TRUE), 3)
149 |   }
150 | 
151 |   total_volume <- sum(temp$sale_size, na.rm = TRUE)
152 |   avg_underlying_price <- round(mean(
153 |     temp$underlying_bid_price, na.rm = TRUE), 2)
154 |   underlying_range <- max(temp$underlying_ask_price) -
155 |     min(temp$underlying_bid_price)
156 | 
157 |   output[[i]] <- data.frame(symbol = 'SPY', date = date,
158 |     type = type, strike = strike,
159 |     trades = number_of_trades,
160 |     volume = total_volume,
161 |     avg_price = avg_trade_price,
162 |     sd_price = sd_trade_price,
163 |     avg_stock_price = avg_underlying_price,
164 |     stock_range = underlying_range,
165 |     stringsAsFactors = FALSE)
166 | }
167 | 
168 | # Convert the list into a table
169 | results <- do.call(rbind, output)
170 | 
171 | head(results)
172 | ##  symbol       date type strike trades volume
173 | ## 1   SPY 2013-04-20  C  120.00   12    33000
174 | ## 2   SPY 2013-04-20  C  124.00    1       15
175 | ## 3   SPY 2013-04-20  C  130.00    1        2
176 | ## 4   SPY 2013-04-20  C  133.00    1        1
177 | ## 5   SPY 2013-04-20  C  140.00    1       95
178 | ## 6   SPY 2013-04-20  C  142.00    1        6
179 | 
180 | ## avg_price sd_price avg_stock_price stock_range
181 | ## 35.973    0.261      155.74      0.68
182 | ## 31.380    0.000      155.44      0.01
183 | ## 26.210    0.000      156.24      0.02
184 | ## 24.600    0.000      157.58      0.01
185 | ## 16.465    0.751      156.44      1.51
186 | ## 13.920    0.000      155.87      0.01
187 | 
188 | unique_maturities <- unique(results$date)
189 | 
190 | today <- as.Date("2013-04-15")
191 | days_to_expiration <- as.Date(unique_maturities[1]) - today
192 | 
193 | # Extract only the relevant maturity range
194 | single_maturity_table <- results[results$date ==
195 |   unique_maturities[1], ]
196 | 
197 | # Look at the calls and puts separately
198 | calls <- single_maturity_table[
199 |   single_maturity_table$type == "C", ]
200 | puts <- single_maturity_table[
201 |   single_maturity_table$type == "P", ]
202 | 
203 | par(mfrow = c(2, 1))
204 | plot(calls$strike, calls$volume,
205 |   xlab = "Strike", ylab = "Volume",
206 |   main = "Call volume", cex.main = 0.9)
207 | abline(v = mean(calls$avg_stock_price), lty = 2)
208 | grid()
209 | 
210 | plot(puts$strike, puts$volume,
211 |   xlab = "Strike", ylab = "Volume",
212 |   main = "Put volume", cex.main = 0.9)
213 | abline(v = mean(puts$avg_stock_price), lty = 2)
214 | grid()
215 | 
216 | ######################
217 | # Implied volatility #
218 | ######################
219 | # Create a vector of filenames
220 | folder <- "Chapter_09/SPY_20130410_QT/"
221 | 
222 | available_files <- list.files(folder)
223 | 
224 | # Explore the first few lines of the file
225 | temp <- read.csv(file = paste0(folder, available_files[1]),
226 | header = FALSE, stringsAsFactors = FALSE)
227 | head(temp)
228 | # Output omitted for clarity
229 | 
230 | column_names <- c("date", "time", "trade_indicator",
231 |   "sequence_number", "option_exchange_code",
232 |   "option_condition_code", "bid_price", "bid_size",
233 |   "ask_price", "ask_size", "stock_exchange_code",
234 |   "stock_condition_code", "underlying_bid_price",
235 |   "underlying_bid_size", "underlying_ask_price",
236 |   "underlying_ask_size")
237 | 
238 | # Find files for July 20, 2013 expiry
239 | files_to_use <- available_files[grep("20130720",
240 |   available_files)]
241 | 
242 | length(files_to_use)
243 | ## [1] 142
244 | 
245 | strikes <- sapply(strsplit(files_to_use, "_"), "[", 4)
246 | type <- sapply(strsplit(files_to_use, "_"), "[", 2)
247 | 
248 | # Extract relevant columns of data
249 | quote_list <- list()
250 | 
251 | for(i in 1:length(files_to_use)) {
252 |   temp <- read.csv(file = paste0(folder, files_to_use[i]),
253 |     header = FALSE, stringsAsFactors = FALSE)
254 |   names(temp) <- column_names
255 | 
256 |   # Extract quotes from CBOE only
257 |   filter <- temp$trade_indicator == "Q" &
258 |     temp$option_exchange_code == "C"
259 | 
260 |   data <- temp[filter, ]
261 | 
262 |   # Create xts object
263 |   require(xts)
264 |   time_index <- as.POSIXct(paste(data$date, data$time),
265 |     format = "%m/%d/%Y %H:%M:%OS")
266 |   data_filtered <- data[, c("bid_price", "ask_price",
267 |     "underlying_bid_price", "underlying_ask_price")]
268 |   data_filtered$type <- type[i]
269 |   data_filtered$strike <- strikes[i]
270 |   xts_prices <- xts(data_filtered, time_index)
271 | 
272 |   quote_list[[i]] <- xts_prices
273 | }
274 | 
275 | data <- quote_list[[49]]
276 | spread <- as.numeric(data$ask_price) -
277 |   as.numeric(data$bid_price)
278 | plot(xts(spread, index(data)),
279 |   main = "SPY | Expiry = July 20, 2013 | K = 158",
280 |   cex.main = 0.8, ylab = "Quote bid-ask spread")
281 | 
282 | time_of_interest <- "2013-04-10 10:30:00::
283 |   2013-04-10 10:30:10"
284 | 
285 | strike_list <- list()
286 | for(i in 1:length(quote_list)) {
287 |   data <- quote_list[[i]][time_of_interest]
288 |   if(nrow(data) > 0) {
289 |     mid_quote <- (as.numeric(data$bid_price) +
290 |       as.numeric(data$ask_price)) / 2
291 |     mid_underlying <- (as.numeric(data$underlying_bid_price) +
292 |       as.numeric(data$underlying_ask_price)) / 2
293 |     strike_list[[i]] <- c(as.character(index(data[1])),
294 |       data$type[1], data$strike[1], names(quote_list[i]),
295 |       mid_quote[1], mid_underlying[1])
296 |   }
297 | }
298 | 
299 | # Aggregate the columns
300 | df <- as.data.frame(do.call(rbind, strike_list),
301 |   stringsAsFactors = FALSE)
302 | names(df) <- c("time", "type", "strike",
303 |   "mid_quote", "mid_underlying")
304 | 
305 | head(df)
306 | # Output omitted for clarity
307 | 
308 | plot(as.numeric(df$strike), as.numeric(df$mid_quote),
309 |   main = "Option Price vs. Strike for Calls and Puts",
310 |   ylab = "Premium",
311 |   xlab = "Strike",
312 |   cex.main = 0.8)
313 | grid()
314 | 
315 | # Filter the otm options
316 | otm_calls <- df$type == "C" & df$mid_underlying <= df$strike
317 | otm_puts <- df$type == "P" & df$mid_underlying > df$strike
318 | otm <- df[otm_calls | otm_puts, ]
319 | 
320 | # Order by strike
321 | otm <- otm[order(otm[, "strike"]), ]
322 | plot(otm$strike, otm$mid_quote,
323 |   main = "OTM prices",
324 |   xlab = "Strike",
325 |   ylab = "Premium",
326 |   cex.main = 0.8)
327 | grid()
328 | 
329 | # Compute the implied vols for otm options
330 | otm$iv <- NA
331 | for(i in 1:nrow(otm)) {
332 |   type <- ifelse(otm$type[i] == "C", "call", "put")
333 |   value <- as.numeric(otm$mid_quote[i])
334 |   underlying <- as.numeric(otm$mid_underlying[i])
335 |   strike <- as.numeric(otm$strike[i])
336 |   dividendYield <- 0.03
337 |   riskFreeRate <- 0.02
338 |   maturity <- 101/252
339 |   volatility <- 0.15
340 |   otm$iv[i] <- AmericanOptionImpliedVolatility(type,
341 |     value, underlying, strike,dividendYield,
342 |     riskFreeRate, maturity, volatility)$impliedVol
343 | }
344 | # Generate plot
345 | plot(otm$strike, otm$iv,
346 |   main = "Implied Volatility skew for SPY on April 10, 2013 10:30 am",
347 |   xlab = "Strike",
348 |   ylab = "Implied Volatility",
349 |   cex.main = 0.8)
350 | grid()
351 | 
352 | 
353 | 
354 | 
355 | 
356 | 
357 | 
358 | 
359 | 
360 | 
361 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_10/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 10
  2 | # Optimization
  3 | 
  4 | ###########################
  5 | # The motivating parabola #
  6 | ###########################
  7 | # Create the function
  8 | f <- function(x) {
  9 |  return((1 + x) ^ 2)
 10 | }
 11 | 
 12 | # Create the derivative
 13 | fp <- function(x) {
 14 |  return(2 * (1 + x))
 15 | }
 16 | 
 17 | # Plot the function and its derivative
 18 | x <- seq(-5, 5, 0.1)
 19 | plot(x, f(x), type = 'l', lwd = 2,
 20 |   main = "f(x) and f'(x)",
 21 |   cex.main = 0.8,
 22 |   cex.lab = 0.8,
 23 |   cex.axis = 0.8)
 24 | grid()
 25 | lines(x, fp(x), lty = 3, lwd = 2)
 26 | abline(h = 0)
 27 | abline(v = 0)
 28 | 
 29 | ###################
 30 | # Newton's method #
 31 | ###################
 32 | f <- function(x) {
 33 |     return(x ^ 2 - 4 * x + 1)
 34 | }
 35 | 
 36 | uniroot(f, c(-8, -1))
 37 | ## $root
 38 | ## [1] -4.236068
 39 | 
 40 | ## $f.root
 41 | ## [1] -2.568755e-07
 42 | 
 43 | ## $iter
 44 | ## [1] 8
 45 | 
 46 | ## $estim.prec
 47 | ## [1] 6.103516e-05
 48 | 
 49 | uniroot(f, c(-1, 2))
 50 | ## $root
 51 | ## [1] 0.236044
 52 | 
 53 | ## $f.root
 54 | ## [1] -0.0001070205
 55 | 
 56 | ## $iter
 57 | ## [1] 6
 58 | 
 59 | ## $estim.prec
 60 | ## [1] 6.103516e-05
 61 | 
 62 | # Newton's method with a first order approximation
 63 | newton <- function(f, tol = 1E-12, x0 = 1, N = 20) {
 64 |   # N = total number of iterations
 65 |   # x0 = initial guess
 66 |   # tol = abs(xn+1 - xn)
 67 |   # f = function to be evaluated for a root
 68 | 
 69 |   h <- 0.001
 70 |   i <- 1; x1 <- x0
 71 |   p <- numeric(N)
 72 |   while (i <= N) {
 73 |     df_dx <- (f(x0 + h) - f(x0)) / h
 74 |     x1 <- (x0 - (f(x0) / df_dx))
 75 |     p[i] <- x1
 76 |     i <- i + 1
 77 |     if (abs(x1 - x0) < tol) {
 78 |       break
 79 |     }
 80 |     x0 <- x1
 81 |   }
 82 |   return(p[1:(i-1)])
 83 | }
 84 | 
 85 | newton(f, x0 = -10)
 86 | ## [1] -6.312270 -4.735693 -4.281609 -4.236513
 87 | ## [5] -4.236068 -4.236068 -4.236068 -4.236068
 88 | 
 89 | newton(f, x0 = 10)
 90 | ## [1] 4.2085746 1.5071738 0.4665599 0.2468819
 91 | ## [5] 0.2360964 0.2360680 0.2360680 0.2360680
 92 | 
 93 | options(digits = 14)
 94 | newton(f, x0 = 10)
 95 | ## [1] 4.20857464272283 1.50717378796250 0.46655991958819
 96 | ## [5] 0.24688186711921 0.23609640037269 0.23606798403444
 97 | ## [8] 0.23606797750125 0.23606797749979 0.23606797749979
 98 | 
 99 | newton(f, x0 = 0.25)
100 | ## [1] 0.23611419684515 0.23606798830978 0.23606797750221
101 | ## [4] 0.23606797749979 0.23606797749979
102 | 
103 | # Create an expression
104 | e <- expression(sin(x))
105 | 
106 | # Compute the derivative
107 | D(e, "x")
108 | ## cos(x)
109 | 
110 | f_expr <- expression(x ^ 2 + 4 * x - 1)
111 | 
112 | eval(f_expr, list(x = 2))
113 | ## [1] 11
114 | 
115 | newton_alternate <- function(f, tol = 1E-12, x0 = 1, N = 20) {
116 |   # N = total number of iterations
117 |   # x0 = initial guess
118 |   # tol = abs(xn+1 - xn)
119 |   # f = expression to be evaluated for a root
120 | 
121 |   # Compute the symbolic derivative
122 |   df_dx = D(f, "x")
123 | 
124 |   i <- 1; x1 <- x0
125 |   p <- numeric(N)
126 |   while (i <= N) {
127 |     x1 <- (x0 - eval(f, list(x = x0)) /
128 |       eval(df_dx, list(x = x0)))
129 |     p[i] <- x1
130 |     i <- i + 1
131 |     if (abs(x1 - x0) < tol) {
132 |       break
133 |     }
134 |     x0 <- x1
135 |   }
136 |   return(p[1:(i-1)])
137 | }
138 | 
139 | newton_alternate(f_expr, x0 = 10)
140 |  ## [1] 4.20833333333333 1.50685123042506 0.46631585084907
141 |  ## [4] 0.24681560399775 0.23609368309733 0.23606797764754
142 |  ## [7] 0.23606797749979 0.23606797749979
143 | 
144 | newton_alternate(f_expr, x0 = -10)
145 | ## [1] -6.3125000000000 -4.7359601449275 -4.2817360731259
146 | ## [4] -4.2365249924418 -4.2360680241934 -4.2360679774998
147 | ## [7] -4.2360679774998
148 | 
149 | ############################
150 | # The brute-force approach #
151 | ############################
152 | # Create a set of random points x
153 | set.seed(123)
154 | x <- rnorm(100, 0, 1)
155 | 
156 | # Make y a function of x
157 | y <- 3.2 + 2.9 * x + rnorm(100, 0, 0.1)
158 | 
159 | plot(x, y)
160 | 
161 | objective_function <- function(y, x, a, b) {
162 |   value <- sum((y - (a * x + b)) ^ 2)
163 |   return(value)
164 | }
165 | 
166 | # Create a range of a and b values and loop through all of them
167 | a <- seq(-10, 10, 0.25)
168 | b <- seq(-10, 10, 0.25)
169 | 
170 | output <- list()
171 | z <- 1
172 | for(i in 1:length(a)) {
173 |   for(j in 1:length(b)) {
174 |     output[[z]] <- c(objective_function(y, x, a[i], b[j]),
175 |       a[i], b[j])
176 |     z <- z + 1
177 |   }
178 | }
179 | 
180 | # Create a matrix out of the list and find the minimum value
181 | mat <- do.call(rbind, output)
182 | colnames(mat) <- c("obj", "a", "b")
183 | 
184 | smallest <- which(mat[, "obj"] == min(mat[, "obj"]))
185 | 
186 | mat[smallest, ]
187 | ##   obj      a      b
188 | ## 2.16076 3.00000 3.25000
189 | 
190 | a = seq(-5, 5, 0.1)
191 | b = seq(-5, 5, 0.1)
192 | 
193 | ##   obj       a      b
194 | ## 0.9077592 2.9000 3.2000
195 | 
196 | ###########################
197 | # R optimization routines #
198 | ###########################
199 | args(optim)
200 | function (par, fn, gr = NULL, ...,
201 |   method = c("Nelder-Mead",
202 |   "BFGS", "CG", "L-BFGS-B", "SANN", "Brent"),
203 |   lower = -Inf,
204 |   upper = Inf,
205 |   control = list(),
206 |   hessian = FALSE)
207 | 
208 | ############################
209 | # A curve-fitting exercise #
210 | ############################
211 | # Create fictitious yields
212 | rates = c(0.025, 0.03, 0.034, 0.039, 0.04,
213 |   0.045, 0.05, 0.06, 0.07, 0.071,
214 |   0.07, 0.069, 0.07, 0.071, 0.072,
215 |   0.074, 0.076, 0.082, 0.088, 0.09)
216 | maturities = 1:20
217 | 
218 | plot(maturities, rates, xlab = "years",
219 |   main = "Yields",
220 |   cex.main = 0.8,
221 |   cex.lab = 0.8,
222 |   cex.axis = 0.8)
223 | grid()
224 | 
225 | poly_5 <- function(x, p) {
226 |   f <- p[1] + p[2] * x + p[3] * x^2 +
227 |     p[4] * x^3 + p[5] * x^4 + p[6] * x^5
228 |   return(f)
229 | }
230 | 
231 | obj_5 <- function(x, y, p) {
232 |   error <- (y - poly_5(x, p)) ^ 2
233 |   return(sum(error))
234 | }
235 | 
236 | # Fit the parameters. Assume 0 for all initial values
237 | out_5 = optim(obj_5, par = c(0, 0, 0, 0, 0, 0),
238 |   x = maturities, y = rates)
239 | 
240 | out
241 | ## $par
242 | ## [1]  2.4301235956099e-02  1.3138951147963e-03
243 | ## [3]  5.5229326602931e-04  7.5685740385076e-07
244 | ## [5] -4.2119475163787e-06  1.5330958809806e-07
245 | 
246 | ## $value
247 | ## [1] 0.00017311660458207
248 | 
249 | ## $counts
250 | ## function gradient
251 | ##    501       NA
252 | 
253 | ## $convergence
254 | ## [1] 1
255 | 
256 | ## $message
257 | ## NULL
258 | 
259 | lines(poly_5(maturities, out_5$par), lwd = 1.5, lty = 2)
260 | 
261 | poly_7 <- function(x, p) {
262 |   f <- p[1] + p[2] * x + p[3] * x^2 +
263 |     p[4] * x^3 + p[5] * x^4 +
264 |     p[6] * x^5 + p[6] * x^6 +
265 |     p[7] * x^7
266 |   return(f)
267 | }
268 | 
269 | obj_7 <- function(x, y, p) {
270 |   error <- (y - poly_7(x, p)) ^ 2
271 |   return(sum(error))
272 | }
273 | 
274 | # Fit the parameters. Assume 0 for all initial values
275 | out_7 <- optim(obj_7, par = c(0, 0, 0, 0, 0, 0, 0, 0),
276 |   x = maturities, y = rates)
277 | 
278 | lines(poly_7(maturities, out_7$par), lwd = 1.5, lty = 3)
279 | 
280 | # Specify two polynomials to be used for fitting purposes
281 | poly_5 <- function(x, a) {
282 |   f <- a[1] + a[2] * x + a[3] * x ^ 2 +
283 |     a[4] * x ^ 3 + a[5] * x ^ 4 +
284 |     a[6] * x ^ 5
285 | return(f)
286 | }
287 | 
288 | poly_3 <- function(x, offset, intercept, b) {
289 |   f <- intercept + b[1] * (x - offset) +
290 |     b[2] * (x - offset) ^ 2 +
291 |     b[3] * (x - offset) ^ 3
292 |   return(f)
293 | }
294 | 
295 | obj_3_5 <- function(x, y, offset, p) {
296 | 
297 |   # All points are at infinity initially
298 |   fit <- rep(Inf, length(x))
299 |   ind_5 <- x <= offset
300 |   ind_3 <- x > offset
301 | 
302 |   fit[ind_5] <- poly_5(x[ind_5], p[1:6])
303 |   fit[ind_3] <- poly_3(x[ind_3], offset,
304 |     poly_5(offset, p[1:6]), p[7:9])
305 | 
306 |   error <- (y - fit) ^ 2
307 |   return(sum(error))
308 | }
309 | 
310 | # Fit the parameters.  Assume 0 for all initial values
311 | offset <- 9
312 | out_3_5 <- optim(obj_3_5, par = rep(0, 9),
313 |   x = maturities, y = rates, offset = offset)
314 | 
315 | plot(maturities, rates, xlab = "years",
316 |   main = "Yields",
317 |   cex.main = 0.8,
318 |   cex.lab = 0.8,
319 |   cex.axis = 0.8)
320 | grid()
321 | lines(poly_5(maturities[maturities <= offset],
322 |   out_3_5$par[1:6]), lwd = 2)
323 | lines(c(rep(NA, offset),
324 |   poly_3(maturities[maturities > offset], offset,
325 |   poly_5(offset, out_3_5$par[1:6]),
326 |   out_3_5$par[7:9])), lwd = 2)
327 | abline(v = offset)
328 | 
329 | # Fit loess to the data
330 | obj <- loess(rates ~ maturities, span = 0.5)
331 | 
332 | # Plot the data and the fit
333 | plot(maturities, rates, main = "Rates", cex.main = 0.8)
334 | lines(predict(obj), lty = 2)
335 | 
336 | ##########################
337 | # Portfolio optimization #
338 | ##########################
339 | install.packages("DEoptim")
340 | 
341 | require(DEoptim)
342 | 
343 | # Drawdown function
344 | compute_drawdown <- function(x, returns_default = TRUE,
345 |   geometric = TRUE) {
346 |   # x = Vector of raw pnl or returns
347 |   # If returns_default = FALSE, the geometric
348 |   # argument is ignored and the pnl is used.
349 |   # Output = the maximum drawdown
350 |   if(returns_default) {
351 |     # Cumulative return calculation
352 |     if(geometric) {
353 |       cumulative_return <- cumprod(1 + x)
354 |     } else {
355 |       cumulative_return <- 1 + cumsum(x)
356 |     }
357 |     max_cumulative_return <- cummax(c(1, cumulative_return))[-1]
358 |     drawdown <- -(cumulative_return / max_cumulative_return - 1)
359 |   } else {
360 |     # PnL vector is used
361 |     cumulative_pnl <- c(0, cumsum(x))
362 |     drawdown <- cummax(cumulative_pnl) - cumulative_pnl
363 |     drawdown <- drawdown[-1]
364 |   }
365 |   # Drawdown vector for either pnl or returns
366 |   return(drawdown)
367 | }
368 | 
369 | obj_max_drawdown <- function(w, r_matrix, small_weight) {
370 |   # w is the weight of every stock
371 |   # r_matrix is the returns matrix of all stocks
372 | 
373 |   # Portfolio return
374 |   portfolio_return <- r_matrix %*% w
375 | 
376 |   # Max drawdown
377 |   drawdown_penalty <- max(compute_drawdown(portfolio_return))
378 | 
379 |   # Create penalty component for sum of weights
380 |   weight_penalty <- 100 * (1 - sum(w)) ^ 2
381 | 
382 |   # Create a penalty component for negative weights
383 |   negative_penalty <- -sum(w[w < 0])
384 | 
385 |   # Create penalty component for small weights
386 |   small_weight_penalty <- 100 * sum(w[w < small_weight])
387 | 
388 |   # Objective function to minimize
389 |   obj <- drawdown_penalty + weight_penalty +
390 |     negative_penalty + small_weight_penalty
391 |   return(obj)
392 | }
393 | 
394 | # Calculate a returns matrix for multiple stocks
395 | symbol_names <- c("AXP", "BA", "CAT", "CVX",
396 |   "DD", "DIS", "GE", "HD", "IBM",
397 |   "INTC", "KO", "MMM", "MRK",
398 |   "PG", "T", "UTX", "VZ")
399 | 
400 | # Load these prices into memory
401 | price_matrix <- NULL
402 | for(name in symbol_names) {
403 |   # Extract the adjusted close price vector
404 |   price_matrix <- cbind(price_matrix, get(name)[, 6])
405 | }
406 | 
407 | colnames(price_matrix) <- symbol_names
408 | 
409 | # Compute returns
410 | returns_matrix <- apply(price_matrix, 2, function(x) diff(log(x)))
411 | 
412 | # Specify a small weight below which the allocation should be 0%
413 | small_weight_value <- 0.02
414 | 
415 | # Specify lower and upper bounds for the weights
416 | lower <- rep(0, ncol(returns_matrix))
417 | upper <- rep(1, ncol(returns_matrix))
418 | 
419 | optim_result <- DEoptim(obj_max_drawdown, lower, upper,
420 |   control = list(NP = 400, itermax = 300, F = 0.25, CR = 0.75),
421 |   returns_matrix, small_weight_value)
422 | 
423 | weights <- optim_result$optim$bestmem
424 | 
425 | sum(weights)
426 | ## 0.9978
427 | 
428 | weights <- weights / sum(weights)
429 | 
430 | # Equally weighted portfolio
431 | equal_weights <- rep(1 / 17, 17)
432 | equal_portfolio <- returns_matrix %*% equal_weights
433 | equal_portfolio_cumprod <- cumprod(1 + equal_portfolio)
434 | 
435 | # Optimal max drawdown portfolio
436 | optimized_portfolio <- returns_matrix %*% weights
437 | drawdown_portfolio_cumprod <- cumprod(1 + optimized_portfolio)
438 | 
439 | main_title <- "Equal vs. Optimized Weights"
440 | plot(drawdown_portfolio_cumprod, type = 'l', xaxt = 'n',
441 |   main = main_title, xlab = "", ylab = "cumprod(1 + r)")
442 | lines(equal_portfolio_cumprod, lty = 3)
443 | grid(col = 'black')
444 | 
445 | # Set x-axis labels
446 | label_location <- seq(1, length(drawdown_portfolio_cumprod),
447 |   by = 90)
448 | labels <- rownames(returns_matrix)[label_location]
449 | axis(side = 1, at = label_location, labels = labels,
450 |   las = 2, cex.axis= 0.8)
451 | 
452 | # Equal weighted
453 | max(compute_drawdown(equal_portfolio))
454 | ## [1] 0.597
455 | 
456 | # Optimized for the smallest max drawdown
457 | max(compute_drawdown(optimized_portfolio))
458 | ## [1] 0.515
459 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_08/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 8
  2 | # High-Frequency Data
  3 | 
  4 | #########################
  5 | # High-frequency quotes #
  6 | #########################
  7 | 
  8 | # The following data is not available for public use. 
  9 | # Those interested in using tick data can contact Tick Data, Inc.
 10 | spy_file <- "Chapter_08/SPY_2013_04_15_X_Q.asc"
 11 | spy_quotes <- read.csv(file = spy_file, header = FALSE,
 12 |   stringsAsFactors = FALSE)
 13 | 
 14 | head(spy_quotes, 3)
 15 | ##           V1               V2  V3      V4   V5 V6 V7
 16 | ## 1 04/15/2013 04:00:00.065  T 156.60   0.00  1  0  R
 17 | ## 2 04/15/2013 04:00:00.626  P   0.00   0.00  0  0  R
 18 | ## 3 04/15/2013 04:00:00.633  P 158.25 158.90  1 47  R
 19 | 
 20 | ## V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21
 21 | ## NA   1   T   T   1   2   A   C    NA   NA   NA   NA
 22 | ## NA   2   P   P   0   2   A   C    NA   NA   NA   NA
 23 | ## NA   3   P   P   1   2   A   C    NA   NA   NA   NA
 24 | 
 25 | names(spy_quotes) <- c("date", "time", "exchange", "bid_price",
 26 |   "ask_price", "bid_size", "ask_size", "quote_condition",
 27 |   "mode", "market_maker_id", "sequence_number", "bid_exchange",
 28 |   "ask_exchange",  "national_bbo_indicator",
 29 |   "nasdaq_bbo_indicator", "quote_cancel_correction",
 30 |   "quote_source", "short_sale_restriction_indicator",
 31 |   "limit_up_down_bbo_indicator_cqs",
 32 |   "limit_up_down_bbo_indicator_utp",
 33 |   "finra_adf_mpid_indicator")
 34 | 
 35 | spy_quotes_arca <- spy_quotes[spy_quotes$exchange %in% c("P"),
 36 |   c("date", "time", "bid_price",
 37 |   "ask_price", "bid_size", "ask_size")]
 38 | 
 39 | require(xts)
 40 | 
 41 | # Setting to allow us to view the millisecond precision
 42 | options(digits.secs = 3)
 43 | 
 44 | time_index <- as.POSIXct(paste(spy_quotes_arca$date,
 45 |   spy_quotes_arca$time), format = "%m/%d/%Y %H:%M:%OS")
 46 | spy <- xts(spy_quotes_arca[, -c(1, 2)], time_index)
 47 | rm(time_index)
 48 | 
 49 | plot(spy$bid_price, type = 'l',
 50 |   main = "SPY bid price",
 51 |   cex.main = 0.8,
 52 |   cex.lab = 0.8,
 53 |   cex.axis = 0.8)
 54 | 
 55 | spy_filtered <- spy[spy$bid_price > 0, ]
 56 | 
 57 | rows_removed <- nrow(spy) - nrow(spy_filtered)
 58 | 
 59 | rows_removed
 60 | ## [1] 2
 61 | 
 62 | plot(spy_filtered$bid_price, type = 'l',
 63 |   main = "SPY filtered bid price",
 64 |   cex.main = 0.8,
 65 |   cex.lab = 0.8,
 66 |   cex.axis = 0.8)
 67 | 
 68 | summary(as.numeric(spy_filtered$ask_price))
 69 |   ## Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
 70 |   ## 154.3   156.1   156.7   156.8   157.7   158.9
 71 | 
 72 | summary(as.numeric(spy_filtered$bid_size))
 73 | ## Min. 1st Qu.  Median  Mean 3rd Qu.  Max.
 74 | ## 1.00 22.00 52.00 94.93 100.00 1565.00
 75 | 
 76 | summary(as.numeric(spy_filtered$ask_size))
 77 | ## Min. 1st Qu. Median Mean 3rd Qu. Max.
 78 | ## 1.0  24.0  59.0  110.8 118.0 1412.0
 79 | 
 80 | # Boolean vector where bid price >= the ask price
 81 | crossed_prices <- spy_filtered$bid_price >=
 82 |   spy_filtered$ask_price
 83 | 
 84 | any(crossed_prices)
 85 | ## [1] FALSE
 86 | 
 87 | #############################
 88 | # Inter-quote arrival times #
 89 | #############################
 90 | # Extract the time index.
 91 | quote_times <- index(spy_filtered)
 92 | 
 93 | # Compute the consecutive time differences
 94 | time_differences <- as.numeric(diff(quote_times))
 95 | 
 96 | summary(time_differences)
 97 | ## Min.   1st Qu. Median   Mean   3rd Qu.    Max.
 98 | ## 0.0000 0.0000  0.0010  0.0645  0.0100  1010.0000
 99 | 
100 | # Identify times that seem abnormal
101 | long_times <- which(time_differences > 1000)
102 | long_times
103 | ## [1] 884312
104 | 
105 | # Show the data around the abnormally long time
106 | spy_abnormal <- spy_filtered[(long_times - 2):(long_times + 2), ]
107 | ##                            bid_price ask_price bid_size ask_size
108 | ## 2013-04-15 16:13:00.987    155.00    155.02      289      270
109 | ## 2013-04-15 16:13:01.256    155.00    155.02      290      270
110 | ## 2013-04-15 16:13:01.282    155.00    155.02      295      270
111 | ## 2013-04-15 16:29:50.869    154.76    154.79        3        3
112 | ## 2013-04-15 16:29:50.887    154.76    154.77        3       10
113 | 
114 | #################################
115 | # Identifying liquidity regimes #
116 | #################################
117 | # Calculate the bid-ask spread.
118 | bid_ask_spread <- spy_filtered$ask_price -
119 |   spy_filtered$bid_price
120 | 
121 | # Filter out abnormal value
122 | outliers <- which(bid_ask_spread > 0.25)
123 | if(length(outliers) > 0) {
124 |   bid_ask_spread <- bid_ask_spread[-outliers, ]
125 | }
126 | 
127 | # Plot the spread.
128 | plot(bid_ask_spread, type = "l",
129 |   main = "Bid ask spread",
130 |   cex.main = 0.8,
131 |   cex.lab = 0.8,
132 |   cex.axis = 0.8)
133 | 
134 | # Create three time partitions for the SPY data
135 | early_morning <- "2013-04-15 04:00:00::2013-04-15 08:29:00"
136 | regular_trading <- "2013-04-15 08:30:00::2013-04-15 16:15:00"
137 | after_hours <- "2013-04-15 16:15:01::2013-04-15 20:00:00"
138 | 
139 | # Create a histogram of the bid-ask spread for each period
140 | par(mfrow = c(3, 1))
141 | 
142 | # Morning
143 | data <- bid_ask_spread[early_morning]
144 | hist(data, main = early_morning, breaks = 1000,
145 |   xlim = c(0, 0.1))
146 | abline(v = mean(data), lwd = 2, lty = 2)
147 | 
148 | # Afternoon
149 | data <- bid_ask_spread[regular_trading]
150 | hist(data, main = regular_trading, breaks = 1000,
151 |   xlim = c(0, 0.1))
152 | abline(v = mean(data), lwd = 2, lty = 2)
153 | 
154 | # Evening
155 | data <- bid_ask_spread[after_hours]
156 | hist(data, main = after_hours, breaks = 1000,
157 |   xlim = c(0, 0.1))
158 | abline(v = mean(data), lwd = 2, lty = 2)
159 | 
160 | spy_day <- spy_filtered[regular_trading]
161 | 
162 | ###################
163 | # The micro-price #
164 | ###################
165 | spy_micro_price <- (spy_day$bid_price * spy_day$ask_size +
166 |   spy_day$ask_price * spy_day$bid_size) /
167 |   (spy_day$bid_size + spy_day$ask_size)
168 | 
169 | par(mfrow = c(1, 1))
170 | range <- 10000:10100
171 | title <- "Micro-price between bid-ask prices"
172 | 
173 | plot(spy_day$ask_price[range],
174 |   ylim = c(min(spy_day$bid_price[range]),
175 |   max(spy_day$ask_price[range])),
176 |   main = title,
177 |   cex.main = 0.8,
178 |   cex.lab = 0.8,
179 |   cex.axis = 0.8)
180 | lines(spy_day$bid_price[range])
181 | lines(spy_micro_price[range], lty = 2)
182 | 
183 | ######################################
184 | # Distributions and autocorrelations #
185 | ######################################
186 | spy_returns <- diff(log(spy_micro_price))
187 | 
188 | par(mfrow = c(2, 1))
189 | plot(spy_returns,
190 |   main = "Time series plot of micro-price returns",
191 |   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
192 | hist(spy_returns, breaks = 1000,
193 |   main = "Micro-price distribution",
194 |   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
195 | 
196 | par(mfrow = c(1, 1))
197 | mean_spy <- mean(as.numeric(spy_returns), na.rm = TRUE)
198 | sd_spy <- sd(as.numeric(spy_returns), na.rm = TRUE)
199 | 
200 | hist(spy_returns, breaks = 10000, prob = TRUE,
201 |   xlim = c(-0.00003, 0.00003),
202 |   main = "Micro-price distribution vs. Normal",
203 |   cex.main = 0.8,
204 |   cex.lab = 0.8,
205 |   cex.axis = 0.8)
206 | 
207 | curve(dnorm(x, mean_spy, sd_spy), add = TRUE,
208 |   yaxt = "n", lwd = 3, lty = 3)
209 | 
210 | spy_acf <- acf(as.numeric(spy_returns),
211 |   na.action = na.pass,
212 |   main = "Autocorrelation",
213 |   cex.main = 0.8,
214 |   cex.lab = 0.8,
215 |   cex.axis = 0.8)
216 | 
217 | # Get the SPY traded prices from our tick data file
218 | spy_trades_file <- "path/SPY_2013_04_15_X_T.asc"
219 | spy_trades <- read.csv(file = spy_trades_file,
220 |   header = FALSE, stringsAsFactors = FALSE)
221 | 
222 | names(spy_trades) <- c("date", "time", "price",
223 |   "volume", "exchange", "sales_condition",
224 |   "correction_indicator", "sequence_number",
225 |   "trade_stop_indicator", "source_of_trade", "trf",
226 |   "exclude_record_flag", "filtered_price")
227 | 
228 | # Extract only the ARCA trades
229 | spy_trades_arca <- spy_trades[spy_trades$exchange %in% c("P"),
230 |   c("date", "time", "price", "volume", "correction_indicator",
231 |   "filtered_price")]
232 | 
233 | # Check if any filtered prices exist
234 | any(!is.na(spy_trades_arca$filtered_price))
235 | ## [1] FALSE
236 | 
237 | # Check if there are any special correction indicators present
238 | unique(spy_trades_arca$correction_indicator)
239 | ## [1] 0
240 | 
241 | # Drop the last two columns from the data frame
242 | spy_trades_arca <- spy_trades_arca[, 1:4]
243 | 
244 | # Convert to an xts object for subsequent analysis
245 | time_index <- as.POSIXct(paste(spy_trades_arca$date,
246 |   spy_trades_arca$time), format = "%m/%d/%Y %H:%M:%OS")
247 | 
248 | spy_t <- xts(spy_trades_arca[, -c(1, 2)], time_index)
249 | rm(time_index)
250 | 
251 | # First 6 entries
252 | head(spy_t)
253 | ##                          price volume
254 | ## 2013-04-15 04:00:00.697 158.25    100
255 | ## 2013-04-15 04:00:00.697 158.24    200
256 | ## 2013-04-15 04:00:00.697 158.15    150
257 | ## 2013-04-15 04:01:42.190 158.06    200
258 | ## 2013-04-15 04:07:16.545 157.94    100
259 | ## 2013-04-15 04:12:45.265 157.92  10000
260 | 
261 | # Subset to regular trading hour range
262 | regular_trading <- "2013-04-15 08:30:00::2013-04-15 16:15:00"
263 | spy_t_day <- spy_t[regular_trading]
264 | 
265 | # Look at the number of trade entries
266 | dim(spy_t_day)
267 | ## [1] 93197     2
268 | 
269 | # Look at the amount of memory taken up
270 | object.size(spy_t_day)
271 | ## [1] 2239080 bytes
272 | 
273 | # Compute returns
274 | spy_t_day_returns <- diff(log(spy_t_day$price))[-1]
275 | 
276 | # Plot the distribution and the autocorrelation plot
277 | par(mfrow = c(2, 1))
278 | plot(spy_t_day_returns, main = "SPY returns on trades",
279 |   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
280 | acf(as.numeric(spy_t_day_returns), main = "SPY trades acf",
281 |   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
282 | 
283 | # Distribution of trade returns
284 | par(mfrow = c(1, 1))
285 | hist(spy_t_day_returns, breaks = 1000, prob = TRUE,
286 |   xlim = c(-0.0001, 0.0001),
287 |   main = "Distribution of SPY trade returns",
288 |   cex.main = 0.8,
289 |   cex.lab = 0.8,
290 |   cex.axis = 0.8)
291 | 
292 | curve(dnorm(x, mean(spy_t_day_returns),
293 |   sd(spy_t_day_returns)),
294 |   add = TRUE,
295 |   yaxt = "n",
296 |   lwd = 3,
297 |   lty = 3)
298 | 
299 | # Use the rle() function to find price sequences
300 | prices_rle <- rle(as.numeric(spy_t_day$price))
301 | 
302 | # Here are the row indexes we want to keep
303 | end_indexes <- cumsum(prices_rle$lengths)
304 | 
305 | # Here are the start indexes we want to sum the volumes from
306 | start_indexes <- end_indexes - prices_rle$lengths + 1
307 | 
308 | # Create a vector of total volumes for each price
309 | volume_list <- list()
310 | volume_vector <- as.numeric(spy_t_day$volume)
311 | for (i in 1:length(end_indexes)) {
312 |   volume_list[[i]] <- sum(volume_vector[start_indexes[i]:
313 |     end_indexes[i]], na.rm = TRUE)
314 | }
315 | 
316 | # Create a reduced data set with distinct trade prices
317 | spy_t_day_reduced <- spy_t_day[end_indexes, ]
318 | spy_t_day_reduced$volume <- unlist(volume_list)
319 | 
320 | head(spy_t_day_reduced, 10)
321 | ##                          price  volume
322 | ## 2013-04-15 08:30:01.964 158.17   510
323 | ## 2013-04-15 08:30:02.783 158.15   1000
324 | ## 2013-04-15 08:30:04.930 158.14   340
325 | ## 2013-04-15 08:30:11.964 158.12   100
326 | ## 2013-04-15 08:30:23.763 158.11   1100
327 | ## 2013-04-15 08:30:29.739 158.10   1720
328 | ## 2013-04-15 08:30:31.963 158.09   200
329 | ## 2013-04-15 08:30:45.164 158.08   4995
330 | ## 2013-04-15 08:30:46.888 158.07   100
331 | ## 2013-04-15 08:30:46.970 158.06   3330
332 | 
333 | head(spy_t_day, 10)
334 | ##                         price volume
335 | ## 2013-04-15 08:30:01.964 158.17   100
336 | ## 2013-04-15 08:30:01.964 158.17   410
337 | ## 2013-04-15 08:30:02.783 158.15   1000
338 | ## 2013-04-15 08:30:04.930 158.14   340
339 | ## 2013-04-15 08:30:11.964 158.12   100
340 | ## 2013-04-15 08:30:23.763 158.11   1000
341 | ## 2013-04-15 08:30:23.763 158.11   100
342 | ## 2013-04-15 08:30:28.153 158.10   400
343 | ## 2013-04-15 08:30:28.529 158.10   320
344 | ## 2013-04-15 08:30:29.739 158.10   180
345 | 
346 | # Identify the most traded prices throughout the day
347 | hist(as.numeric(spy_t_day_reduced$price), breaks = 200,
348 |   main = "Histogram of traded prices for SPY on 2013-04-15",
349 |   cex.main = 0.8,
350 |   cex.lab = 0.8,
351 |   cex.axis = 0.8)
352 | 
353 | acf(diff(log(as.numeric(spy_t_day_reduced$price))),
354 |   main = "Autocorrelation of trades",
355 |   cex.main = 0.8,
356 |   cex.lab = 0.8,
357 |   cex.axis = 0.8)
358 | 
359 | # Random return cloud with lag 1
360 | n <- rnorm(50, 0, .20)
361 | n_lag1 <- c(NA, n[-length(n)])
362 | plot(n_lag1, n)
363 | 
364 | # Create arrows between the points
365 | s <- seq(length(n)-1)
366 | arrows(n_lag1[s], n[s], n_lag1[s+1], n[s+1])
367 | 
368 | # SPY return cloud with lag 1
369 | spy_t_returns <- diff(log(as.numeric(
370 |   spy_t_day_reduced$price[100:150])))
371 | spy_t_returns_lag1 <- c(NA, spy_t_returns[
372 |   -length(spy_t_returns)])
373 | plot(spy_t_returns_lag1, spy_t_returns)
374 | 
375 | s <- seq(length(spy_t_returns)-1)
376 | arrows(spy_t_returns_lag1[s], spy_t_returns[s],
377 |   spy_t_returns_lag1[s+1], spy_t_returns[s+1])
378 | 
379 | #############################
380 | # The highfrequency package #
381 | #############################
382 | library("devtools")
383 | install_github("highfrequency", "jonathancornelissen")
384 | 
385 | 
386 | 
387 | 
388 | 
389 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_02/code.R:
--------------------------------------------------------------------------------
  1 | ##########################
  2 | # Getting started with R #
  3 | ##########################
  4 | 1 + 1
  5 | sqrt(2)
  6 | 20 + (26.8 * 23.4) / 2 + exp(1.34) * cos(1)
  7 | sin(1)
  8 | 5^4
  9 | sqrt(-1 + 0i)
 10 | 
 11 | integrand <- function(x) 1 / ((x + 1) * sqrt(x))
 12 | integrate(integrand, lower = 0, upper = Inf)
 13 | 
 14 | x <- 3
 15 | x <- x + 1
 16 | z <- x ^ 2
 17 | z <- "hello quants"
 18 | y <- "a"
 19 | Z <- sqrt(2)
 20 | new.X <- 2.3
 21 | 
 22 | 5+4
 23 | ## [1] 9
 24 | 
 25 | ##################
 26 | # The c() object #
 27 | ##################
 28 | first_vector  <- c(1, 2, 3, 4, 5, 6)
 29 | second_vector <- c("a", "b", "hello")
 30 | third_vector  <- c("a", 2, 23)
 31 | 
 32 | first_vector
 33 | ## [1] 1 2 3 4 5 6
 34 | 
 35 | third_vector
 36 | ## [1] "a" "2" "23"
 37 | 
 38 | new_vector <- c(first_vector, 7, 8, 9)
 39 | new_vector
 40 | ## [1] 1 2 3 4 5 6 7 8 9
 41 | 
 42 | # Extract the 4th element
 43 | example_1 <- new_vector[4]
 44 | 
 45 | # Extract the 5th and the 8th elements
 46 | example_2 <- new_vector[c(5, 8)]
 47 | 
 48 | example_2
 49 | ## [1] 5 8
 50 | 
 51 | x <-c(1,5,10,15,20)
 52 | ## [1] 1 5 10 15 20
 53 | 
 54 | x2 <- 2 * x
 55 | ## [1] 2 10 20 30 40
 56 | 
 57 | x3 <- x ^ 2
 58 | ## [1] 1 25 100 225 400
 59 | 
 60 | x4 <- x / x2
 61 | ## [1] 0.5 0.5 0.5 0.5 0.5
 62 | 
 63 | x5 <- round(x * (x / x2) ^ 3.5 + sqrt(x4), 3)
 64 | ## [1] 0.795 1.149 1.591 2.033 2.475
 65 | 
 66 | x6 <- round(c(c(x2[2:4], x3[1:2]), x5[4]), 2)
 67 | ## [1] 10.00 20.00 30.00 1.00 25.00 2.03
 68 | 
 69 | #######################
 70 | # The matrix() object #
 71 | #######################
 72 | my_matrix <- matrix(c(1, 2, 3, 4, 5, 6),
 73 |     nrow = 2, ncol = 3)
 74 | 
 75 | my_matrix
 76 | ##    [,1] [,2] [,3]
 77 | ## [1,] 1 3 5
 78 | ## [2,] 2 4 6
 79 | 
 80 | my_matrix <- matrix(c(1, 2, 3, 4, 5, 6),
 81 |     nrow = 2, ncol = 3, byrow = TRUE)
 82 | 
 83 | my_matrix
 84 | ## [,1] [,2] [,3]
 85 | ## [1,] 1 2 3
 86 | ## [2,] 4 5 6
 87 | 
 88 | dimnames(my_matrix) <- list(c("one", "hello"),
 89 |     c("column1", "column2", "c3"))
 90 | 
 91 | my_matrix
 92 | ## column1 column2 c3
 93 | ## one   1 2 3
 94 | ## hello 4 5 6
 95 | 
 96 | attributes(my_matrix)
 97 | ## $dim
 98 | ## [1] 2 3
 99 | 
100 | ## $dimnames
101 | ## $dimnames[[1]]
102 | ## [1] "one"   "hello"
103 | 
104 | ## $dimnames[[2]]
105 | ## [1] "column1" "column2" "c3"
106 | 
107 | ans <- my_matrix[1, 3]
108 | ans
109 | ## [1] 3
110 | 
111 | new_matrix_1 <- my_matrix * my_matrix
112 | 
113 | new_matrix_1
114 | ## [,1] [,2] [,3]
115 | ## [1,] 1 4 9
116 | ## [2,] 16 25 36
117 | 
118 | new_matrix_2 <- sqrt(my_matrix)
119 | 
120 | new_matrix_2
121 | ## [,1]    [,2]    [,3]
122 | ## [1,] 1  1.414214 1.732051
123 | ## [2,] 2  2.236068 2.449490
124 | 
125 | mat1 <- matrix(rnorm(1000), nrow = 100)
126 | round(mat1[1:5, 2:6], 3)
127 | ##        [,1]   [,2]   [,3]   [,4]   [,5]
128 | ## [1,] -1.544  1.281  1.397  0.407 -0.459
129 | ## [2,]  0.483  0.046 -1.817 -0.289  0.597
130 | ## [3,]  0.405  1.045 -0.726 -0.163  0.258
131 | ## [4,]  0.141 -0.294 -1.225 -0.217 -0.771
132 | ## [5,] -0.537  0.226  0.126 -1.584 -1.237
133 | 
134 | mat2 <- mat1[1:25, ] ^ 2
135 | head(round(mat2, 0), 9)[,1:7]        
136 | ## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
137 | ## [1,]   1    2    2    2    0    0    7
138 | ## [2,]   0    0    0    3    0    0    0
139 | ## [3,]   0    0    1    1    0    0    1
140 | ## [4,]   0    0    0    2    0    1    4
141 | ## [5,]   1    0    0    0    3    2    1
142 | ## [6,]   2    1    3    1    1    1    1
143 | ## [7,]   0    0    0    0    0    1    0
144 | ## [8,]   1    2    0    0    1    2    0
145 | ## [9,]   0    0    3    0    2    2    0
146 | 
147 | ###########################
148 | # The data.frame() object #
149 | ###########################
150 | df <- data.frame(price  = c(89.2, 23.2, 21.2),
151 |   symbol = c("MOT", "AAPL", "IBM"),
152 |   action = c("Buy", "Sell", "Buy"))
153 | 
154 | df
155 | ##   price symbol action
156 | ## 1  89.2    MOT    Buy
157 | ## 2  23.2   AAPL   Sell
158 | ## 3  21.2    IBM    Buy
159 | 
160 | df3 <-data.frame(price  = c(89.2, 23.2, 21.2),
161 |   symbol = c("MOT", "AAPL", "IBM"),
162 |   action = c("Buy", "Sell", "Buy"),
163 |   stringsAsFactors = FALSE)
164 | 
165 | class(df3$symbol)
166 | ## [1] "character"
167 | 
168 | price <- df[1, 1]
169 | 
170 | price
171 | ## [1] 89.2
172 | 
173 | df2 <- data.frame(col1 = c(1, 2, 3),
174 |   col2 = c(1, 2, 3, 4))
175 | 
176 | ## Error in data.frame(col1 = c(1,2,3),
177 | ## col2 = c(1,2,3,4)) : arguments imply
178 | ## differing number of rows: 3, 4
179 | 
180 | symbols <- df$symbol
181 | 
182 | symbols
183 | ## [1] MOT  AAPL IBM
184 | ## Levels: AAPL IBM MOT
185 | 
186 | class(symbols)
187 | ## [1] "factor"
188 | 
189 | symbols <- df3$symbol
190 | 
191 | symbols
192 | ## [1] "MOT"  "AAPL" "IBM"
193 | 
194 | #####################
195 | # The list() object #
196 | #####################
197 | my_list <- list(a = c(1, 2, 3, 4, 5),
198 |   b = matrix(1:10, nrow = 2, ncol = 5),
199 |   c = data.frame(price = c(89.3, 98.2, 21.2),
200 |   stock = c("MOT", "IBM", "CSCO")))
201 | 
202 | my_list
203 | ## $a
204 | ## [1] 1 2 3 4 5
205 | 
206 | ## $b
207 | ## [,1] [,2] [,3] [,4] [,5]
208 | ## [1,] 1 3 5 7 9
209 | ## [2,] 2 4 6 8 10
210 | 
211 | ## $c
212 | ## price stock
213 | ## 1 89.3 MOT
214 | ## 2 98.2 IBM
215 | ## 3 21.2 CSCO
216 | 
217 | first_element <- my_list[[1]]
218 | 
219 | first_element
220 | ## [1] 1 2 3 4 5
221 | 
222 | class(first_element)
223 | ## [1] "numeric"
224 | 
225 | second_element <- my_list[["b"]]
226 | second_element
227 | ## [,1] [,2] [,3] [,4] [,5]
228 | ## [1,] 1 3 5 7 9
229 | ## [2,] 2 4 6 8 10
230 | 
231 | class(second_element)
232 | ## [1] "matrix"
233 | 
234 | part_of_list <- my_list[c(1, 3)]
235 | 
236 | part_of_list
237 | ## $a
238 | ## [1] 1 2 3 4 5
239 | 
240 | ## $c
241 | ## price stock
242 | ## 1 89.3 MOT
243 | ## 2 98.2 IBM
244 | ## 3 21.2 CSCO
245 | 
246 | class(part_of_list)
247 | ## [1] "list"
248 | 
249 | size_of_list <- length(my_list)
250 | 
251 | size_of_list
252 | ## [1] 3
253 | 
254 | ########################
255 | # The new.env() object #
256 | ########################
257 | env <- new.env()
258 | env[["first"]] <- 5
259 | env[["second"]] <- 6
260 | env$third <- 7
261 | 
262 | env
263 | ## <environment: 0x101ef2f18>
264 | 
265 | ls(env)
266 | ## [1] "first"  "second" "third"
267 | 
268 | get("first", envir = env)
269 | ## 5
270 | 
271 | rm("second", envir = env)
272 | ls(env)
273 | ## [1] "first" "third"
274 | 
275 | env_2 <- env
276 | env_2$third <- 42
277 | 
278 | get("third", envir = env)
279 | ## [1] 42
280 | 
281 | #############################
282 | # Using the plot() function #
283 | #############################
284 | # Create a vector of numbers x and plot them
285 | x <- c(1, 2, 3.2, 4, 3, 2.1, 9, 19)
286 | plot(x)
287 | 
288 | # Convert the graph into a line plot
289 | plot(x, type = "l")
290 | 
291 | # Set up the canvas
292 | plot(rnorm(1000), main = "Some returns", cex.main = 0.9,
293 |   xlab = "Time", ylab = "Returns")
294 | 
295 | # Superimpose a basic grid
296 | grid()
297 | 
298 | # Create a few vertical and horizontal lines
299 | abline(v = 400, lwd = 2, lty = 1)
300 | abline(h = 2, lwd = 3, lty = 3)
301 | 
302 | # Create a 2-row, 2-column format
303 | par(mfrow = c(2, 2))
304 | 
305 | # First plot (points).
306 | plot(rnorm(100), main = "Graph 1")
307 | 
308 | # Second plot (lines).
309 | plot(rnorm(100), main = "Graph 2", type = "l")
310 | 
311 | # Third plot (steps) with a vertical line
312 | plot(rnorm(100), main = "Graph 3", type = "s")
313 | abline(v = 50, lwd = 4)
314 | 
315 | # Fourth plot
316 | plot(rnorm(100), type = "h", main = "Graph 4")
317 | 
318 | # Reset the plot window
319 | par(mfrow = c(1, 1))
320 | 
321 | plot(rnorm(100), main = "A line plot",
322 |   cex.main = 0.8,
323 |   xlab = "x-axis",
324 |   ylab = "y-axis",
325 |   type = "l")
326 | 
327 | # Extra text
328 | mtext("Some text at the top", side = 3)
329 | 
330 | # At x = 40 and y = -1 coordinates
331 | legend(40, -1, "A legend")
332 | 
333 | formals(plot.default)
334 | ## $x
335 | 
336 | ## $y
337 | ## NULL
338 | 
339 | ## $type
340 | ## [1] "p"
341 | ## ...
342 | 
343 | ##########################
344 | # Functional programming #
345 | ##########################
346 | ans <- sum(1:100)
347 | 
348 | ans
349 | ## [1] 5050
350 | 
351 | answer <- 0
352 | for(i in 1:100) {
353 |   answer <- answer + i
354 | }
355 | answer
356 | ## [1] 5050
357 | 
358 | crack_eggs <- function(number_of_eggs) {
359 |   # Code that determines whether eggs have been cracked.
360 |   # If they have, set have_all_eggs_been_cracked <- TRUE,
361 |   # otherwise, set to FALSE
362 |   
363 |   return(have_all_eggs_been_cracked)
364 | }
365 | 
366 | # Create 100 standard normals
367 | x <- rnorm(100, mean = 0, sd = 1)
368 | 
369 | # Find the length of the vector x.
370 | length(x)
371 | 
372 | # Compute the mean of x
373 | mean(x)
374 | 
375 | # Compute the standard deviation of x
376 | sd(x)
377 | 
378 | # Compute the median value of the vector x
379 | median(x)
380 | 
381 | # Compute the range (min, max) of a variable
382 | range(x)
383 | 
384 | # Find the sum of all the numbers in x
385 | sum(x)
386 | 
387 | # Do a cumulative sum of the values in x
388 | cumsum(x)
389 | 
390 | # Display the first 3 elements of x
391 | head(x, 3)
392 | 
393 | # Display summary statistics on x
394 | summary(x)
395 | 
396 | # Sort x from largest to smallest.
397 | sort(x, decreasing = TRUE)
398 | 
399 | # Compute the successive difference in x
400 | diff(x)
401 | 
402 | # Create an integer sequence from 1 to 10
403 | 1:10
404 | 
405 | # A sequence from 1 to 10 in steps of 0.1
406 | seq(1, 10, 0.1)
407 | 
408 | # Print the string hello to the screen
409 | print("hello")
410 | 
411 | #########################
412 | # Branching and looping #
413 | #########################
414 | 
415 | # Define a boolean variable
416 | my_boolean <- 1 == 2
417 | 
418 | if (my_boolean) {
419 |   print("not correct")
420 | } else {
421 |   print("XYZ")
422 | }
423 | 
424 | for(i in 1:5) {
425 |     cat(i, "\n")
426 | }
427 | ## 1
428 | ## 2
429 | ## 3
430 | ## 4
431 | ## 5
432 | 
433 | some_list <- list()
434 | for(z in c("hello", "goodbye")) {
435 |   some_list[[z]] <- z
436 | }
437 | 
438 | some_list
439 | ## $hello
440 | ## [1] "hello"
441 | 
442 | ## $goodbye
443 | ## [1] "goodbye"
444 | 
445 | #############################
446 | # A recommended style guide #
447 | #############################
448 | #sum numbers
449 | x<-0;for(i in 1:10){x=x+1};x
450 | 
451 | # Sum numbers
452 | x <- 0
453 | for(i in 1:10) {
454 |   x <- x + 1
455 | }
456 | x
457 | 
458 | ##################################
459 | # A pairwise correlation example #
460 | ##################################
461 | filter_and_sort_symbols <- function(symbols) {
462 |   # Name: filter_symbols
463 |   # Purpose: Convert to upper case if not
464 |   # and remove any non valid symbols
465 |   # Input: symbols = vector of stock tickers
466 |   # Output: filtered_symbols = filtered symbols
467 |   
468 |   # Convert symbols to uppercase
469 |   symbols <- toupper(symbols)
470 |   
471 |   # Validate the symbol names
472 |   valid <- regexpr("^[A-Z]{2,4}$", symbols)
473 |   
474 |   # Return only the valid ones
475 |   return(sort(symbols[valid == 1]))
476 | }
477 | 
478 | filter_and_sort_symbols(c("MOT", "cvx", "123", "Gog2", "XLe"))
479 | ## "MOT" "CVX" "XLE"
480 | 
481 | extract_prices <- function(filtered_symbols, file_path) {
482 |   # Name: extract_prices
483 |   # Purpose: Read price data from specified file
484 |   # Inputs: filtered_symbols = vector of symbols,
485 |   #         file_path = location of price data
486 |   # Output: prices = data.frame of prices per symbol
487 | 
488 |   # Read in the .csv price file
489 |   all_prices <- read.csv(file = file_path, header = TRUE,
490 |     stringsAsFactors = FALSE)
491 | 
492 |   # Make the dates row names
493 |   rownames(all_prices) <- all_prices$Date
494 | 
495 |   # Remove the original Date column
496 |   all_prices$Date <- NULL
497 | 
498 |   # Extract only the relevant data columns
499 |   valid_columns <- colnames(all_prices) %in% filtered_symbols
500 | 
501 |   return(all_prices[, valid_columns])
502 | }
503 | 
504 | A <- c(1, 2, 5, 6, 9)
505 | B <- c(0, 3, 6, 9, 10)
506 | 
507 | A %in% B
508 | ## [1] FALSE FALSE FALSE  TRUE  TRUE
509 | 
510 | filter_prices <- function(prices) {
511 |   # Name: filter_prices
512 |   # Purpose: Identify the rows with missing values
513 |   # Inputs: prices = data.frame of prices
514 |   # Output: missing_rows = vector of indexes where
515 |   # data is missing in any of the columns
516 | 
517 |   # Returns a boolean vector of good or bad rows
518 |   valid_rows <- complete.cases(prices)
519 | 
520 |   # Identify the index of the missing rows
521 |   missing_rows <- which(valid_rows == FALSE)
522 | 
523 |   return(missing_rows)
524 | }
525 | 
526 | compute_pairwise_correlations <- function(prices) {
527 |   # Name: compute_pairwise_correlations
528 |   # Purpose: Calculates pairwise correlations of returns
529 |   # and plots the pairwise relationships
530 |   # Inputs: prices = data.frame of prices
531 |   # Output: correlation_matrix = A correlation matrix
532 | 
533 |   # Convert prices to returns
534 |   returns <- apply(prices, 2, function(x) diff(log(x)))
535 |   
536 |   # Plot all the pairwise relationships
537 |   pairs(returns, main = "Pairwise return scatter plot")
538 |   
539 |   # Compute the pairwise correlations
540 |   correlation_matrix <- cor(returns, use = "complete.obs")
541 | 
542 |   return(correlation_matrix)
543 | }
544 | 
545 | # Stock tickers entered by user
546 | symbols <- c("IBM", "XOM", "2SG", "TEva",
547 |   "G0og", "CVX", "AAPL", "BA")
548 | 
549 | # Location of our database of prices
550 | file_path <- "Chapter_02/prices.csv"
551 | 
552 | # Filter and sort the symbols
553 | filtered_symbols <- filter_and_sort_symbols(symbols)
554 | filtered_symbols
555 | ## [1] "AAPL" "BA"   "CVX"  "IBM"  "TEVA" "XOM"
556 | 
557 | # Extract prices
558 | prices <- extract_prices(filtered_symbols, file_path)
559 | 
560 | # Filter prices
561 | missing_rows <- filter_prices(prices)
562 | missing_rows
563 | ## integer(0)
564 | 
565 | # Compute correlations
566 | correlation_matrix <- compute_pairwise_correlations(prices)
567 | correlation_matrix
568 | 
569 | 
570 | 
571 | 
572 | 
573 | 
574 | 
575 | 
576 | 
577 | 
578 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_06/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 6
  2 | # Spreads, Betas and Risk
  3 | 
  4 | #############################
  5 | # Defining the stock spread #
  6 | #############################
  7 | pepsi <- getSymbols('PEP', from = '2013-01-01',
  8 |   to = '2014-01-01', adjust = T, auto.assign = FALSE)
  9 | 
 10 | coke <- getSymbols('COKE', from = '2013-01-01',
 11 |   to = '2014-01-01', adjust = T, auto.assign = FALSE)
 12 | 
 13 | Sys.setenv(TZ = "UTC")
 14 | 
 15 | prices <- cbind(pepsi[, 6], coke[, 6])
 16 | price_changes <- apply(prices, 2, diff)
 17 | plot(price_changes[, 1], price_changes[, 2],
 18 |   xlab = "Coke price changes",
 19 |   ylab = "Pepsi price changes",
 20 |   main = "Pepsi vs. Coke",
 21 |   cex.main = 0.8,
 22 |   cex.lab = 0.8,
 23 |   cex.axis = 0.8)
 24 | grid()
 25 | 
 26 | ans <- lm(price_changes[, 1] ~ price_changes[, 2])
 27 | beta <- ans$coefficients[2]
 28 | 
 29 | ans2 <- lm(price_changes[, 2] ~ price_changes[, 1])
 30 | beta2 <- ans2$coefficients[2]
 31 | 
 32 | beta
 33 | ## [1] 0.2614627
 34 | 
 35 | beta2
 36 | ## [1] 0.2539855
 37 | 
 38 | #####################################################
 39 | # Ordinary Least Squares versus Total Least Squares #
 40 | #####################################################
 41 | # Get the data
 42 | SPY <- getSymbols('SPY', from = '2011-01-01',
 43 |   to = '2012-12-31', adjust = T, auto.assign = FALSE)
 44 | 
 45 | AAPL <- getSymbols('AAPL', from = '2011-01-01',
 46 |   to = '2012-12-31', adjust = T, auto.assign = FALSE)
 47 | 
 48 | # Compute price differences
 49 | x <- diff(as.numeric(SPY[, 4]))
 50 | y <- diff(as.numeric(AAPL[, 4]))
 51 | 
 52 | plot(x, y, main = "Scatter plot of returns. SPY vs. AAPL",
 53 |   cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)
 54 | abline(lm(y ~ x))
 55 | abline(lm(x ~ y), lty = 2)
 56 | grid()
 57 | 
 58 | # Total least squares regression
 59 | r <- prcomp( ~ x + y )
 60 | slope <- r$rotation[2, 1]  /  r$rotation[1, 1]
 61 | intercept <- r$center[2] - slope * r$center[1]
 62 | 
 63 | # Show the first principle component on the plot
 64 | abline(a = intercept, b = slope, lty = 3)
 65 | 
 66 | ###########################
 67 | # Constructing the spread #
 68 | ###########################
 69 | # Function to calculate the spread
 70 |   calculate_spread <- function(x, y, beta) {
 71 |     return(y - beta * x)
 72 |   }
 73 | 
 74 | # Function to calculate the beta and level
 75 | # given start and end dates
 76 | calculate_beta_and_level <- function(x, y,
 77 |   start_date, end_date) {
 78 |   require(xts)
 79 | 
 80 |   time_range <- paste(start_date, "::",
 81 |     end_date, sep = "")
 82 |   x <- x[time_range]
 83 |   y <- y[time_range]
 84 | 
 85 |   dx <- diff(x[time_range])
 86 |   dy <- diff(y[time_range])
 87 |   r <- prcomp( ~ dx + dy)
 88 | 
 89 |   beta <- r$rotation[2, 1] / r$rotation[1, 1]
 90 |   spread <- calculate_spread(x, y, beta)
 91 |   names(spread) <- "spread"
 92 |   level <- mean(spread, na.rm = TRUE)
 93 | 
 94 |   outL <- list()
 95 |   outL$spread <- spread
 96 |   outL$beta <- beta
 97 |   outL$level <- level
 98 | 
 99 |   return(outL)
100 | }
101 | 
102 | # Function to calculate buy and sell signals
103 | # with upper and lower threshold
104 | calculate_buy_sell_signals <- function(spread, beta,
105 |   level, lower_threshold, upper_threshold) {
106 | 
107 |   buy_signals <- ifelse(spread <= level -
108 |    lower_threshold, 1, 0)
109 |   sell_signals <- ifelse(spread >= level +
110 |     upper_threshold, 1, 0)
111 | 
112 |   # bind these vectors into a matrix
113 |   output <- cbind(spread, buy_signals,
114 |     sell_signals)
115 |   colnames(output) <- c("spread", "buy_signals",
116 |     "sell_signals")
117 | 
118 |   return(output)
119 | }
120 | 
121 | # Implementation
122 | # Pick an in-sample date range
123 | start_date <- "2009-01-01"
124 | end_date <- "2011-12-31"
125 | x <- SPY[, 6]
126 | y <- AAPL[, 6]
127 | 
128 | results <- calculate_beta_and_level(x, y,
129 |   start_date, end_date)
130 | 
131 | results$beta
132 | ## [1] 4.923278
133 | 
134 | results$level
135 | ## [1] -239.0602
136 | 
137 | plot(results$spread, ylab = "Spread Value",
138 |   main = "AAPL - beta * SPY",
139 |   cex.main = 0.8,
140 |   cex.lab = 0.8,
141 |   cex.axis = 0.8)
142 | 
143 | # Out of sample start and end dates
144 | start_date_out_sample <- "2012-01-01"
145 | end_date_out_sample <- "2012-10-22"
146 | range <- paste(start_date_out_sample, "::",
147 |   end_date_out_sample, sep = "")
148 | 
149 | # Out of sample analysis
150 | spread_out_of_sample <- calculate_spread(x[range],
151 |   y[range], results$beta)
152 | 
153 | plot(spread_out_of_sample, main = "AAPL - beta * SPY",
154 |   cex.main = 0.8,
155 |   cex.lab = 0.8,
156 |   cex.axis = 0.8)
157 | abline(h = results$level, lwd = 2)
158 | 
159 | 
160 | ####################################
161 | # Signal generation and validation #
162 | ####################################
163 | # Rolling window of trading days
164 | window_length <- 10
165 | 
166 | # Time range
167 | start_date <- "2011-01-01"
168 | end_date <- "2011-12-31"
169 | range <- paste(start_date, "::",
170 |   end_date, sep = "")
171 | 
172 | # Our stock pair
173 | x <- SPY[range, 6]
174 | y <- AAPL[range, 6]
175 | 
176 | dF <- cbind(x, y)
177 | names(dF) <- c("x", "y")
178 | 
179 | # Function that we will use to calculate betas
180 | run_regression <- function(dF) {
181 |   return(coef(lm(y ~ x - 1, data = as.data.frame(dF))))
182 | }
183 | 
184 | rolling_beta <- function(z, width) {
185 |   rollapply(z, width = width, FUN = run_regression,
186 |   by.column = FALSE, align = "right")
187 | }
188 | 
189 | betas <- rolling_beta(diff(dF), 10)
190 | 
191 | data <- merge(betas, dF)
192 | data$spread <- data$y - lag(betas, 1) * data$x
193 | 
194 | returns <- diff(dF) / dF
195 | return_beta <- rolling_beta(returns, 10)
196 | data$spreadR <- diff(data$y) / data$y -
197 |   return_beta * diff(data$x) / data$x
198 | 
199 | tail(data)
200 | ##            betas    x      y      spread     spreadR
201 | ## 2011-12-22 2.770586 119.60 383.07 138.70795 -0.002322110
202 | ## 2011-12-23 3.094533 120.67 387.66  53.33343  0.003311904
203 | ## 2011-12-27 3.450416 120.76 390.74  17.04417  0.007083611
204 | ## 2011-12-28 3.364819 119.18 387.00 -24.22055  0.004194527
205 | ## 2011-12-29 3.004804 120.41 389.38 -15.77781 -0.003361064
206 | 
207 | threshold <- sd(data$spread, na.rm = TRUE)
208 | 
209 | threshold
210 | ## [1] 143.7734
211 | 
212 | plot(data$spread, main = "AAPL vs. SPY In-Sample",
213 |   cex.main = 0.8,
214 |   cex.lab = 0.8,
215 |   cex.axis = 0.8)
216 | abline(h = threshold, lty = 2)
217 | abline(h = -threshold, lty = 2)
218 | 
219 | 
220 | # Construct the out of sample spread
221 | # Keep the same 10 day rolling window
222 | window_length <- 10
223 | 
224 | # Time range
225 | start_date <- "2012-01-01"
226 | end_date <- "2013-12-31"
227 | range <- paste(start_date, "::",
228 |   end_date, sep = "")
229 | 
230 | # Our stock pair
231 | x <- SPY[range, 6]
232 | y <- AAPL[range, 6]
233 | 
234 | # Bind these together into a matrix
235 | dF <- cbind(x, y)
236 | names(dF) <- c("x", "y")
237 | 
238 | # Calculate the out of sample rolling beta
239 | beta_out_of_sample <- rolling_beta(diff(dF), 10)
240 | 
241 | # Buy and sell threshold
242 | data_out <- merge(beta_out_of_sample, dF)
243 | data_out$spread <- data_out$y -
244 |   lag(beta_out_of_sample, 1) * data_out$x
245 | 
246 | # Plot the spread with in-sample bands
247 | plot(data_out$spread, main = "AAPL vs. SPY out of sample",
248 |   cex.main = 0.8,
249 |   cex.lab = 0.8,
250 |   cex.axis = 0.8)
251 | abline(h = threshold, lwd = 2)
252 | abline(h = -threshold, lwd = 2)
253 | 
254 | # Generate sell and buy signals
255 | buys <- ifelse(data_out$spread > threshold, 1, 0)
256 | sells <- ifelse(data_out$spread < -threshold, -1, 0)
257 | data_out$signal <- buys + sells
258 | 
259 | plot(data_out$spread, main = "AAPL vs. SPY out of sample",
260 |  cex.main = 0.8,
261 |  cex.lab = 0.8,
262 |  cex.axis = 0.8)
263 | abline(h = threshold, lty = 2)
264 | abline(h = -threshold, lty = 2)
265 | 
266 | point_type <- rep(NA, nrow(data_out))
267 | buy_index <- which(data_out$signal == 1)
268 | sell_index <- which(data_out$signal == -1)
269 | 
270 | point_type[buy_index] <- 21
271 | point_type[sell_index] <- 24
272 | points(data_out$spread, pch = point_type)
273 | 
274 | num_of_buy_signals <- sum(buys, na.rm = TRUE)
275 | num_of_sell_signals <- sum(abs(sells), na.rm = TRUE)
276 | 
277 | num_of_buy_signals
278 | ## [1] 303
279 | num_of_sell_signals
280 | ## [1] 189
281 | 
282 | ######################
283 | # Trading the spread #
284 | ######################
285 | ##     beta_out_of_sample     x      y    spread signal
286 | ## 2011-01-13         NA 128.37 345.68        NA     NA
287 | ## 2011-01-14  1.7511157 129.30 348.48        NA     NA
288 | ## 2011-01-18  1.1630714 129.52 340.65 113.84550      1
289 | ## 2011-01-19  1.2803161 128.25 338.84 189.67609      1
290 | ## 2011-01-20  1.2286891 128.08 332.68 168.69711      1
291 | ## 2011-01-21  0.8045108 128.37 326.72 168.99319      1
292 | ## 2011-01-24  2.4936855 129.10 337.45 233.58766      1
293 | ## 2011-01-25  2.7762163 129.17 341.40  19.29065      0
294 | ## 2011-01-26  3.0802946 129.67 343.85 -16.14196      0
295 | 
296 | prev_x_qty <- 0
297 | position <- 0
298 | trade_size <- 100
299 | signal <- as.numeric(data_out$signal)
300 | signal[is.na(signal)] <- 0
301 | beta <- as.numeric(data_out$beta_out_of_sample)
302 | 
303 | qty_x <- rep(0, length(signal))
304 | qty_y <- rep(0, length(signal))
305 | 
306 | for(i in 1:length(signal)) {
307 |   if(signal[i] == 1 && position == 0) {
308 |     # buy the spread
309 |     prev_x_qty <- round(beta[i] * trade_size)
310 |     qty_x[i] <- -prev_x_qty
311 |     qty_y[i] <- trade_size
312 |     position <- 1
313 |   }
314 | 
315 |   if(signal[i] == -1 && position == 0) {
316 |     # sell the spread initially
317 |     prev_x_qty <- round(beta[i] * trade_size)
318 |     qty_x[i] <- prev_x_qty
319 |     qty_y[i] <- -trade_size
320 |     position <- -1
321 |   }
322 | 
323 |   if(signal[i] == 1 && position == -1) {
324 |   # we are short the spread and need to buy
325 |   qty_x[i] <- -(round(beta[i] * trade_size) +
326 |     prev_x_qty)
327 |   prev_x_qty <- round(beta[i] * trade_size)
328 |   qty_y[i] <- 2 * trade_size
329 |   position <- 1
330 |   }
331 | 
332 |   if(signal[i] == -1 && position == 1) {
333 |     # we are long the spread and need to sell
334 |     qty_x[i] <- round(beta[i] * trade_size) + prev_x_qty
335 |     prev_x_qty <- round(beta[i] * trade_size)
336 |     qty_y[i] <- -2 * trade_size
337 |     position <- -1
338 |   }
339 | }
340 | 
341 | qty_x[length(qty_x)] <- -sum(qty_x)
342 | qty_y[length(qty_y)] <- -sum(qty_y)
343 | 
344 | data_out$qty_x <- qty_x
345 | data_out$qty_y <- qty_y
346 | 
347 | data_out[1:3, ]
348 | ##  beta_out_of_sample        x      y  spread signal qty_x qty_y
349 | ## 2012-01-17  2.1511279 123.48 408.20     NA     NA    0     0
350 | ## 2012-01-18  2.5890817 124.85 412.44  143.87168  1 -259   100
351 | ## 2012-01-19  2.0711505 125.51 411.13   86.17435  0    0     0
352 | 
353 | tail(data_out, 3)
354 | ##   beta_out_of_sample       x      y  spread signal qty_x qty_y
355 | ## 2012-12-27  6.5051194 138.15 499.45 -404.90307 -1    0     0
356 | ## 2012-12-28  5.6770827 136.66 494.14 -394.84962 -1    0     0
357 | ## 2012-12-31  6.3934172 138.98 516.04 -272.96095 -1 -668   100
358 | 
359 | # function for computing the equity curve
360 | compute_equity_curve <- function(qty, price) {
361 | 
362 |   cash_buy <- ifelse(sign(qty) == 1,
363 |     qty * price, 0)
364 |   cash_sell <- ifelse(sign(qty) == -1,
365 |     -qty * price, 0)
366 |   position <- cumsum(qty)
367 |   cumulative_buy <- cumsum(cash_buy)
368 |   cumulative_sell <- cumsum(cash_sell)
369 | 
370 |   equity <- cumulative_sell - cumulative_buy +
371 |     position * price
372 |   return(equity)
373 | }
374 | 
375 | # Add the equity curve columns to the data_out table
376 | data_out$equity_curve_x <- compute_equity_curve(
377 |   data_out$qty_x, data_out$x)
378 | data_out$equity_curve_y <- compute_equity_curve(
379 |   data_out$qty_y, data_out$y)
380 | 
381 | plot(data_out$equity_curve_x +
382 |   data_out$equity_curve_y, type = 'l',
383 |   main = "AAPL / SPY spread", ylab = "P&L",
384 |   cex.main = 0.8,
385 |   cex.axis = 0.8,
386 |   cex.lab = 0.8)
387 | 
388 | ############################
389 | # More on the equity curve #
390 | ############################
391 | # Calculates the Sharpe ratio
392 | sharpe_ratio <- function(x, rf) {
393 |   sharpe <- (mean(x, na.rm = TRUE) - rf) /
394 |     sd(x, na.rm = TRUE)
395 |   return(sharpe)
396 | }
397 | 
398 | # Calculates the maximum drawdown profile
399 | drawdown <- function(x) {
400 |   cummax(x) - x
401 | }
402 | 
403 | par(mfrow = c(2, 1))
404 | equity_curve <- data_out$equity_curve_x + data_out$equity_curve_y
405 | 
406 | plot(equity_curve, main = "Equity Curve",
407 |   cex.main = 0.8,
408 |   cex.lab = 0.8,
409 |   cex.axis = 0.8)
410 | 
411 | plot(drawdown(equity_curve), main = "Drawdown of equity curve",
412 |  cex.main = 0.8,
413 |  cex.lab = 0.8,
414 |  cex.axis = 0.8)
415 | 
416 | equity <- as.numeric(equity_curve[, 1])
417 | equity_curve_returns <- diff(equity) / equity[-length(equity)]
418 | 
419 | # Remove any infinities and NaN
420 | invalid_values <- is.infinite(equity_curve_returns)
421 |   | is.nan(equity_curve_returns)
422 | 
423 | sharpe_ratio(equity_curve_returns[!invalid_values], 0.03)
424 | [1] 0.0658528
425 | 
426 | omega_ratio <- function(r, T) {
427 |   omega <- mean(pmax(r - T, 0)) / mean(pmax(T - r, 0))
428 |   return(omega)
429 | }
430 | 
431 | #######################
432 | # Strategy attributes #
433 | #######################
434 | # Find out where the trades occur
435 | trade_dates <- data_out$qty_x[data_out$qty_x != 0]
436 | 
437 | # The trade_dates object is an xts object whose index
438 | # contains the necessary time information
439 | duration <- as.numeric(diff(index(trade_dates)))
440 | 
441 | # Summary statistics
442 | summary(duration)
443 | ## Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
444 | ## 1.00   13.50   21.00   31.84   44.00  128.00
445 | 
446 | # Histogram of trade duration
447 | hist(duration, breaks = 20,
448 |   main = "Histogram of trade durations",
449 |   cex.main = 0.8,
450 |   cex.lab = 0.8,
451 |   cex.axis = 0.8)
452 | 
453 | 
454 | 
455 | 
456 | 
457 | 
458 | 
459 | 
460 | 
461 | 
462 | 
463 | 
464 | 
465 | 
466 | 
467 | 
468 | 
469 | 
470 | 
471 | 
472 | 
473 | 
474 | 
475 | 
476 | 
477 | 
478 | 
479 | 
480 | 
481 | 
482 | 
483 | 
484 | 
485 | 
486 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_03/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 3
  2 | # Working with Data
  3 | 
  4 | #######################
  5 | # Getting data into R #
  6 | #######################
  7 | # In Windows
  8 | aapl <- read.table("clipboard")
  9 | 
 10 | # On Mac/Linux
 11 | aapl <- read.table(pipe("pbpaste"))
 12 | 
 13 | head(aapl)
 14 | ##     V1
 15 | ## 1 104.08
 16 | ## 2 110.26
 17 | ## 3  96.80
 18 | ## 4  88.74
 19 | ## 5  89.79
 20 | ## 6  89.16
 21 | 
 22 | class(aapl)
 23 | ## [1] "data.frame"
 24 | 
 25 | aapl <- aapl[rev(rownames(aapl)), , drop = FALSE]
 26 | 
 27 | prices <- aapl$V1
 28 | plot(prices, main = "AAPL plot", type = 'l')
 29 | 
 30 | # Load the .csv file
 31 | aapl_2 <- read.csv(file = "Chapter_03/aapl.csv", header = TRUE, stringsAsFactors = FALSE)
 32 | 
 33 | # Reverse the entries
 34 | aapl_2 <- aapl_2[rev(rownames(aapl_2)), ]
 35 | 
 36 | aapl_close <- aapl_2[, "Close"]
 37 | 
 38 | summary(aapl_close)
 39 | ## Min.   1st Qu. Median   Mean   3rd Qu.  Max.
 40 | ## 12.94   24.69   38.13   46.80   53.61  199.80
 41 | 
 42 | ############################
 43 | # Installing packages in R #
 44 | ############################
 45 | install.packages(pkgs, lib, repos = getOption("repos"),
 46 |   contriburl = contrib.url(repos, type),
 47 |   method, available = NULL, destdir = NULL,
 48 |   dependencies = NA, type = getOption("pkgType"),
 49 |   configure.args = getOption("configure.args"),
 50 |   configure.vars = getOption("configure.vars"),
 51 |   clean = FALSE, Ncpus = getOption("Ncpus", 1L),
 52 |   verbose = getOption("verbose"),
 53 |   libs_only = FALSE, INSTALL_opts, quiet = FALSE,
 54 |   keep_outputs = FALSE, ...)
 55 | 
 56 | #################################
 57 | # Storing and transmitting data #
 58 | #################################
 59 | {
 60 |   "CVX":
 61 |   {
 62 |     "Currency": "USD",
 63 |     "Sector": "Basic Materials",
 64 |     "Industry": "Major Integrated Oil & Gas"
 65 |   },
 66 |   "GOOG":
 67 |   {
 68 |     "Currency": "USD",
 69 |     "Sector": "Technology",
 70 |     "Industry": "Internet Information Providers"
 71 |   } 
 72 | }
 73 | 
 74 | # Install and load the package
 75 | install.packages("RJSONIO")
 76 | library(RJSONIO)
 77 | 
 78 | # Read the file
 79 | out <- fromJSON(content = "Chapter_03/sample_json_file.json" )
 80 | 
 81 | # Look at the structure of the resulting object
 82 | str(out)
 83 | ## List of 2
 84 | ## $ CVX : Named chr [1:3] "USD" "Basic Materials...
 85 | ##  ..- attr(*, "names")= chr [1:3] "Currency"...
 86 | ## $ GOOG: Named chr [1:3] "USD" "Technology"...
 87 | ##  ..- attr(*, "names")= chr [1:3] "Currency"...
 88 | 
 89 | write.csv(aapl_2, file = "Chapter_03/aapl_2.csv")
 90 | 
 91 | save(aapl_2, file = "Chapter_03/aapl_2.rdata")
 92 | 
 93 | aapl_old <- aapl_2
 94 | rm(aapl_2)
 95 | load(file = "Chapter_03/aapl_2.rdata")
 96 | 
 97 | identical(aapl_old, aapl_2)
 98 | ## [1] TRUE
 99 | 
100 | ######################################
101 | # Extracting data from a spreadsheet #
102 | ######################################
103 | library(XLConnect)
104 | # Create a workbook object
105 | # Needs Java 6 runtime as a default
106 | 
107 | book <- loadWorkbook("Chapter_03/strategy.xlsx")
108 | 
109 | # Convert it into a data frame
110 | signals <- readWorksheet(book, sheet = "signals", header = TRUE)
111 | 
112 | signals
113 | ##     time signal1 signal2
114 | ## 1 08:30:00  0.43   -0.20
115 | ## 2 08:31:00  0.54    0.33
116 | ## 3 08:32:00  0.32   -0.21
117 | 
118 | strength <- readWorksheet(book, sheet = "strength", header = TRUE)
119 | 
120 | strength
121 | ## intensity score
122 | ## 1 2 7.5
123 | ## 2 3 8.4
124 | ## 3 6 5.4
125 | 
126 | # Setup a new spreadsheet
127 | book <- loadWorkbook("demo_sheet.xlsx", create = TRUE)
128 | 
129 | # Create a sheet called stock1
130 | createSheet(book, name = "stock1")
131 | 
132 | # Creating a sheet called stock2
133 | createSheet(book, name = "stock2")
134 | 
135 | # Load data into workbook
136 | df <- data.frame(a = c(1, 2, 3), b = c(4, 5, 6))
137 | writeWorksheet (book, data=df, sheet="stock1", header = TRUE)
138 | 
139 | # Save the workbook
140 | saveWorkbook(book, file = "Chapter_03/demo_sheet.xlsx")
141 | 
142 | ########################
143 | # Accessing a database #
144 | ########################
145 | # Load the RODBC package
146 | require(RODBC)
147 | 
148 | # Establish a connection to MySQL
149 | con <- odbcConnect("rfortraders")
150 | 
151 | # Choose the database name and table name
152 | database_name <- "OptionsData"
153 | table_name <- "ATMVolatilities"
154 | symbol <- "SPY"
155 | 
156 | sql_command <- paste0("SELECT Symbol, Date, Maturity,
157 |   Delta, CallPut, ImpliedVolatility
158 |   FROM ", database_name, ".", table_name,
159 |   " WHERE Maturity = 91
160 |   AND Symbol IN ('", symbol, "');")
161 | 
162 | iv <- sqlQuery(con, sql_command)
163 | 
164 | # disconnect from database
165 | odbcClose(con)
166 | 
167 | head(iv)
168 | ## Symbol Date Maturity Delta CallPut ImpliedVolatility
169 | ## SPY 6/9/2014       91    55       C          0.115925
170 | ## SPY 6/9/2014       91    60       C          0.119577
171 | ## SPY 6/9/2014       91    65       C          0.123468
172 | ## SPY 6/9/2014       91    70       C          0.127629
173 | ## SPY 6/9/2014       91    75       C          0.132094
174 | ## SPY 6/9/2014       91    80       C          0.136776
175 | 
176 | 
177 | # Load the necessary package
178 | require(RMySQL)
179 | 
180 | # Establish a connection
181 | con <- dbConnect(MySQL(),
182 |   user="your_login",
183 |   password="your_password",
184 |   dbname="OptionsData",
185 |   host="location_of_database")
186 | 
187 | # List the tables and fields
188 | dbListTables(con)
189 | 
190 | # Define the command and extract a data frame
191 | sql_command <- paste0("SELECT Symbol, Date, Maturity,
192 |   Delta, CallPut, ImpliedVolatility FROM ",
193 |   database_name, ".", table_name,
194 |   " WHERE Maturity = 91
195 |   AND Symbol IN ('", symbol, "');")
196 | 
197 | result <- dbGetQuery(con, sql_command)
198 | 
199 | # Close the connection
200 | dbDisconnect(con)
201 | 
202 | results <- dbSendQuery(con, sql_command)
203 | partial_results <- fetch(results, n = 100)
204 | 
205 | #####################
206 | # The dplyr package #
207 | #####################
208 | # Get the CRAN version
209 | install.packages("dplyr")
210 | require(dplyr)
211 | 
212 | # Or, first load devtools
213 | install.packages("devtools")
214 | require(devtools)
215 | 
216 | # Get the github version
217 | devtools::install_github("hadley/dplyr")
218 | require(dplyr)
219 | 
220 | # Load the flight database that comes with dplyr
221 | library(hflights)
222 | 
223 | # Look at number of rows and columns
224 | dim(hflights)
225 | ## [1] 227496     21
226 | 
227 | # First, coerce the data into a data.table
228 | flights_dt <- tbl_dt(hflights)
229 | 
230 | # What type of object is this?
231 | class(flights_dt)
232 | ## [1] "tbl_dt"  "tbl"   "data.table"  "data.frame"
233 | 
234 | # Create a grouping by carrier
235 | carrier_group <- group_by(flights_dt, UniqueCarrier)
236 | 
237 | # Now compute the summary statistics
238 | summarise(carrier_group, avg_delay = mean(ArrDelay, na.rm = TRUE))
239 | 
240 | # load the library xts
241 | library(xts)
242 | 
243 | # Load a small dataset that comes along with xts.
244 | # We could have used our original .csv file as well.
245 | data(sample_matrix)
246 | 
247 | # Look at the data
248 | head(sample_matrix)
249 | ## [1] "matrix"
250 | 
251 | # What is the type of this object?
252 | class(sample_matrix)
253 | ## [1] "matrix"
254 | 
255 | # Use the str() command to get more details about this object.
256 | str(sample_matrix)
257 | ## num [1:180, 1:4] 50 50.2 50.4 50.4 50.2 ...
258 | ## - attr(*, "dimnames")=List of 2
259 | ## ..$ : chr [1:180] "2007-01-02" "2007-01-03"
260 | ## "2007-01-04" "2007-01-05" ...
261 | ## ..$ : chr [1:4] "Open" "High" "Low" "Close"
262 | 
263 | xts_matrix <- as.xts(sample_matrix, descr = 'my new xts object')
264 | 
265 | str(xts_matrix}
266 | ## An 'xts' object on 2007-01-02/2007-06-30 containing: ## Data: num [1:180, 1:4] 50 50.2 50.4 50.4 50.2 ...
267 | ## - attr(*, "dimnames")=List of 2
268 | ## ..$ : NULL
269 | ## ..$ : chr [1:4] "Open" "High" "Low" "Close"
270 | ## Indexed by objects of class: [POSIXct,POSIXt] TZ: ## xts Attributes:
271 | ## List of 3
272 | ## $ tclass: chr [1:2] "POSIXct" "POSIXt"
273 | ## $tzone:chr""
274 | ## $ descr : chr "my new xts object"
275 | 
276 | # Simple plot
277 | plot(xts_matrix[,1], main = "Our first xts plot",
278 |  cex.main = 0.8)
279 | 
280 | # Or we can try something fancier.
281 | plot(xts_matrix, main = "Candle plot on xts object",
282 |  cex.main = 0.8, type = "candles")
283 | 
284 | plot(xts_matrix["2007-01-01::2007-02-12"],
285 |   main = "An xts candle plot with subsetting",
286 |   cex.main = 0.8, type = "candles")
287 | 
288 | range <- "2007-03-15::2007-06-15"
289 |   plot(xts_matrix(range))
290 | 
291 | start_date <- "2007-05-05"
292 | end_date <- "2007-12-31"
293 | 
294 | plot(xts_matrix[paste(start_date, "::",
295 |   end_date, sep = "")])
296 | 
297 | # Defaults to space separator
298 | paste("Hello", "World", "in R")
299 | ## [1] "Hello World in R"
300 | 
301 | paste("Hello", "Again", sep = "**")
302 | ## [1] "Hello**Again"
303 | 
304 | paste(c(1,2,3,4,5), collapse = "oooo")
305 | ## [1] "1oooo2oooo3oooo4oooo5"
306 | 
307 | # Create a vector of 10 fictitious stock prices along with
308 | # a time index in microsecond resolution.
309 | price_vector <- c(101.02, 101.03, 101.03, 101.04, 101.05,
310 |   101.03, 101.02, 101.01, 101.00, 100.99)
311 | 
312 | dates <- c("03/12/2013 08:00:00.532123",
313 |   "03/12/2013 08:00:01.982333",
314 |   "03/12/2013 08:00:01.650321",
315 |   "03/12/2013 08:00:02.402321",
316 |   "03/12/2013 08:00:02.540432",
317 |   "03/12/2013 08:00:03.004554",
318 |   "03/12/2013 08:00:03.900213",
319 |   "03/12/2013 08:00:04.050323",
320 |   "03/12/2013 08:00:04.430345",
321 |   "03/12/2013 08:00:05.700123")
322 | 
323 | # Allow the R console to display the microsecond field
324 | options(digits.secs = 6)
325 | 
326 | # Create the time index with the correct format
327 | time_index <- strptime(dates, format = "%d/%m/%Y %H:%M:%OS")
328 | 
329 | # Pass the time index into the its object
330 | xts_price_vector <- xts(price_vector, time_index)
331 | 
332 | # Plot the price of the fictitious stock
333 | plot(xts_price_vector, main = "Fictitious price series",
334 |   cex.main = 0.8)
335 | 
336 | # Add a horizontal line where the mean value is
337 | abline(h = mean(xts_price_vector), lwd = 2)
338 | 
339 | # Add a vertical blue line at a specified time stamp
340 | my_time <- as.POSIXct("03/12/2013 08:00:03.004554",
341 |   format = "%d/%m/%Y %H:%M:%OS")
342 | 
343 | abline(v = my_time, lwd = 2, lty = 2)
344 | 
345 | es_price <- c(1700.00, 1700.25, 1700.50, 1700.00, 1700.75,
346 |     1701.25, 1701.25, 1701.25, 1700.75, 1700.50)
347 | 
348 | es_time  <- c("09/12/2013 08:00:00.532123",
349 |   "09/12/2013 08:00:01.982333",
350 |   "09/12/2013 08:00:05.650321",
351 |   "09/12/2013 08:10:02.402321",
352 |   "09/12/2013 08:12:02.540432",
353 |   "09/12/2013 08:12:03.004554",
354 |   "09/12/2013 08:14:03.900213",
355 |   "09/12/2013 08:15:07.090323",
356 |   "09/12/2013 08:16:04.430345",
357 |   "09/12/2013 08:18:05.700123")
358 | 
359 | # create an xts time series object
360 | xts_es <- xts(es_price, as.POSIXct(es_time,
361 |   format = "%d/%m/%Y %H:%M:%OS"))
362 | 
363 | names(xts_es) <- c("price")
364 | 
365 | time_diff <- difftime(index(xts_es)[2], index(xts_es)[1],
366 |   units = "secs")
367 | 
368 | time_diff
369 | ## Time difference of 1.45021 secs
370 | 
371 | diffs <- c()
372 | for(i in 2:length(index(xts_es))) {
373 |   diffs[i] <- difftime(index(xts_es)[i], index(xts_es)[i - 1],
374 |     units = "secs")
375 | }
376 | 
377 | diffs <- index(xts_es)[-1] - index(xts_es)[-length(index(xts_es))]
378 | 
379 | diffs
380 | ## Time differences in secs
381 | ## [1]   1.4502099   3.6679881 596.7520001
382 | ## [4] 120.1381109   0.4641221 120.8956590
383 | ## [7]  63.1901100  57.3400221 121.2697780
384 | ## attr(,"tzone")
385 | 
386 | class(diffs)
387 | ## [1] "difftime"
388 | 
389 | es_times <- index(xts_es)
390 | diffs <- es_times[-1] - es_times[-length(es_times)]
391 | 
392 | diffs
393 | ## Time differences in secs
394 | ## [1]   1.4502099   3.6679881 596.7520001
395 | ## [4] 120.1381109   0.4641221 120.8956590
396 | ## [7]  63.1901100  57.3400221 121.2697780
397 | ## attr(,"tzone")
398 | 
399 | par(mfrow = c(2, 1))
400 | diffs <- as.numeric(diffs)
401 | plot(diffs, main = "Time difference in seconds for ES trades",
402 |   xlab = "", ylab = "Time differences",
403 |   cex.lab = 0.8,
404 |   cex.main = 0.8)
405 | grid()
406 | 
407 | hist(diffs, main = "Time difference in seconds for ES trades",
408 |   xlab = "Time difference (secs)", ylab = "Observations",
409 |   breaks = 20,
410 |   cex.lab = 0.8,
411 |   cex.main = 0.8)
412 | grid()
413 | 
414 | ##############################
415 | # Using the quantmod package #
416 | ##############################
417 | # Load the quantmod packages after installing it locally.
418 | library(quantmod)
419 | 
420 | AAPL <- getSymbols("AAPL", auto.assign=FALSE)
421 | head(AAPL)
422 | 
423 | ##########################
424 | # Charting with quantmod #
425 | ##########################
426 | # Adding some technical indicators on top of the original plot
427 | chartSeries(AAPL, subset='2010::2010-04',
428 |   theme = chartTheme('white'),
429 |   TA = "addVo(); addBBands()")
430 | 
431 | reChart(subset='2009-01-01::2009-03-03')
432 | 
433 | chartSeries(AAPL, subset='2011::2012',
434 |   theme = chartTheme('white'),
435 |   TA = "addBBands(); addDEMA()")
436 | 
437 | addVo()
438 | addDPO()
439 | 
440 | # Initial chart plot with no indicators
441 | chartSeries(AAPL, theme = chartTheme('white'), TA = NULL)
442 | 
443 | # Custom function creation
444 | my_indicator <- function(x) {
445 |    return(x + 90)
446 | }
447 | 
448 | add_my_indicator <- newTA(FUN = my_indicator, preFUN=Cl,
449 |   legend.name = "My Fancy Indicator", on = 1)
450 | 
451 | add_my_indicator()
452 | 
453 | #########################
454 | # Graphing wiht ggplot2 #
455 | #########################
456 | # Create a matrix with price and volume
457 | df <- AAPL[, c("AAPL.Adjusted", "AAPL.Volume")]
458 | names(df) <- c("price", "volume")
459 | 
460 | # Create
461 | df$return <- diff(log(df[, 1]))
462 | df <- df[-1, ]
463 | 
464 | df$cuts <- cut(abs(df$return),
465 |   breaks = c(0, 0.02, 0.04, 0.25),
466 |   include.lowest = TRUE)
467 | 
468 | # Create another column for the mean
469 | df$means <- NA
470 | for(i in 1:3) {
471 |   group <- which(df$cuts == i)
472 |   if(length(group) > 0) {
473 |     df$means[group] <- mean(df$volume[group])
474 |   }
475 | }
476 | 
477 | # Load ggplot2
478 | library(ggplot2)
479 | 
480 | ggplot(df) +
481 | geom_histogram(aes(x=volume)) +
482 | facet_grid(cuts ~ .) +
483 | geom_vline(aes(xintercept=means), linetype="dashed", size=1)
484 | 
485 | 
486 | 


--------------------------------------------------------------------------------
/bar_charts/bar_charts.R:
--------------------------------------------------------------------------------
  1 | rm(list=ls(all=TRUE))
  2 | setwd("~/Desktop/ggplot2/bar_charts")
  3 | 
  4 | library(quantmod)
  5 | library(ggplot2)
  6 | library(gtable)
  7 | library(scales)
  8 | library(grid)
  9 | 
 10 | readData <- read.csv('GCQ 2014-07-03.csv',stringsAsFactors=FALSE,header=TRUE,sep=',')
 11 | revData <- apply(readData,2,rev)
 12 | dfData <- data.frame(readData)
 13 | xtsData <- xts(dfData[,2:5],as.Date(dfData[,1]))
 14 | 
 15 | 
 16 | #2013-12-26
 17 | subsetDF<-xtsData["2013-12-18/"] 
 18 | plotDF<- data.frame(subsetDF[,1:4])
 19 | plotDF$xaxis <- as.Date(index(subsetDF))
 20 | 
 21 | 
 22 | # Y-Axis 
 23 | plotMIN <- min(plotDF[,1:4], na.rm=TRUE)
 24 | plotMAX <- max(plotDF[,1:4], na.rm=TRUE)
 25 | 
 26 | minDigits <- nchar(round(plotMIN))-1
 27 | maxDigits <- nchar(round(plotMAX))-1
 28 | 
 29 | evenMIN <- as.numeric(paste(substring(as.character(plotMIN*100), 1, minDigits),0,sep=""))
 30 | evenMAX <- as.numeric(paste(substring(as.character(plotMAX*100), 1, maxDigits),0,sep="")) 
 31 | 
 32 | roundUpNice <- function(x, nice=c(1,2,4,5,6,8,10)) {
 33 |   if(length(x) != 1) stop("'x' must be of length 1")
 34 |   10^floor(log10(x)) * nice[[which(x <= 10^floor(log10(x)) * nice)[[1]]]]
 35 | }
 36 | 
 37 | niceLimit <- roundUpNice(evenMAX)/100
 38 | ySequence <- seq(evenMIN-niceLimit,evenMAX+niceLimit,by=niceLimit)
 39 | #ySequence <- c(1700,1750,1800,1850,1900,1950)
 40 | 
 41 | # X-Axis
 42 | date1 <- first(index(subsetDF))
 43 | date2 <- last(index(subsetDF))
 44 | dateLength <- length(seq(from=date1, to=date2, by='month'))*2 # -1 for complete months
 45 | 
 46 | xSequence <- seq(2,dateLength-1,2)
 47 | 
 48 | 
 49 | ###############################################################################################
 50 | # CQG
 51 | ###############################################################################################
 52 | # Draw OHLC Bars
 53 | p <- ggplot(plotDF)
 54 | p <- p+geom_segment(aes(x=xaxis,xend=xaxis,y=plotDF[,3],yend=plotDF[,2]),size=.80)
 55 | p <- p+geom_segment(aes(x=xaxis-0.4,xend=xaxis,y=plotDF[,1],yend=plotDF[,1]),size=.90)
 56 | p <- p+geom_segment(aes(x=xaxis,xend=xaxis+0.4,y=plotDF[,4],yend=plotDF[,4]),size=.90)
 57 | # -------------------------------------------------------------------------------#
 58 | # Setup 1
 59 | setup1 <- which(rownames(plotDF) == "2014-02-18")
 60 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1,])), y = plotDF[setup1,"high"]+5, label = "9", colour='blue',size=6)
 61 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-1,])), y = plotDF[setup1-1,"high"]+3, label = "8", colour='blue',size=4)
 62 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-2,])), y = plotDF[setup1-2,"high"]+3, label = "7", colour='blue',size=4)
 63 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-3,])), y = plotDF[setup1-3,"high"]+3, label = "6", colour='blue',size=4)
 64 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-4,])), y = plotDF[setup1-4,"high"]+3, label = "5", colour='blue',size=4)
 65 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-5,])), y = plotDF[setup1-5,"high"]+3, label = "4", colour='blue',size=4)
 66 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-6,])), y = plotDF[setup1-6,"high"]+3, label = "3", colour='blue',size=4)
 67 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-7,])), y = plotDF[setup1-7,"high"]+3, label = "2", colour='blue',size=4)
 68 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup1-8,])), y = plotDF[setup1-8,"high"]+3, label = "1", colour='blue',size=4)
 69 | 
 70 | # Setup 2
 71 | setup2<- which(rownames(plotDF) == "2014-03-28")
 72 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2,])), y = plotDF[setup2,"low"]-5, label = "9", colour='blue',size=6)
 73 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-1,])), y = plotDF[setup2-1,"low"]-3, label = "8", colour='blue',size=4)
 74 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-2,])), y = plotDF[setup2-2,"low"]-3, label = "7", colour='blue',size=4)
 75 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-3,])), y = plotDF[setup2-3,"low"]-3, label = "6", colour='blue',size=4)
 76 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-4,])), y = plotDF[setup2-4,"low"]-3, label = "5", colour='blue',size=4)
 77 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-5,])), y = plotDF[setup2-5,"low"]-3, label = "4", colour='blue',size=4)
 78 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-6,])), y = plotDF[setup2-6,"low"]-3, label = "3", colour='blue',size=4)
 79 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-7,])), y = plotDF[setup2-7,"low"]-3, label = "2", colour='blue',size=4)
 80 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup2-8,])), y = plotDF[setup2-8,"low"]-3, label = "1", colour='blue',size=4)
 81 | 
 82 | # Setup 3
 83 | setup3 <- which(rownames(plotDF) == "2014-06-17")
 84 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3,])), y = plotDF[setup3,"high"]+5, label = "9", colour='blue',size=6)
 85 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-1,])), y = plotDF[setup3-1,"high"]+3, label = "8", colour='blue',size=4)
 86 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-2,])), y = plotDF[setup3-2,"high"]+3, label = "7", colour='blue',size=4)
 87 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-3,])), y = plotDF[setup3-3,"high"]+3, label = "6", colour='blue',size=4)
 88 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-4,])), y = plotDF[setup3-4,"high"]+3, label = "5", colour='blue',size=4)
 89 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-5,])), y = plotDF[setup3-5,"high"]+3, label = "4", colour='blue',size=4)
 90 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-6,])), y = plotDF[setup3-6,"high"]+3, label = "3", colour='blue',size=4)
 91 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-7,])), y = plotDF[setup3-7,"high"]+3, label = "2", colour='blue',size=4)
 92 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup3-8,])), y = plotDF[setup3-8,"high"]+3, label = "1", colour='blue',size=4)
 93 | 
 94 | # Setup 4
 95 | setup4 <- which(rownames(plotDF) == "2014-07-01")
 96 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4,])), y = plotDF[setup4,"high"]+5, label = "9", colour='blue',size=6)
 97 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-1,])), y = plotDF[setup4-1,"high"]+3, label = "8", colour='blue',size=4)
 98 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-2,])), y = plotDF[setup4-2,"high"]+3, label = "7", colour='blue',size=4)
 99 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-3,])), y = plotDF[setup4-3,"high"]+3, label = "6", colour='blue',size=4)
100 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-4,])), y = plotDF[setup4-4,"high"]+3, label = "5", colour='blue',size=4)
101 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-5,])), y = plotDF[setup4-5,"high"]+3, label = "4", colour='blue',size=4)
102 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-6,])), y = plotDF[setup4-6,"high"]+3, label = "3", colour='blue',size=4)
103 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-7,])), y = plotDF[setup4-7,"high"]+3, label = "2", colour='blue',size=4)
104 | p <- p+annotate("text", x = as.Date(rownames(plotDF[setup4-8,])), y = plotDF[setup4-8,"high"]+3, label = "1", colour='blue',size=4)
105 | 
106 | # 13 in March and May
107 | countdown1 <- which(rownames(plotDF) == "2014-05-30")
108 | countdown2 <- which(rownames(plotDF) == "2014-03-14")
109 | countdown3 <- which(rownames(plotDF) == "2013-12-20")
110 | p <- p+annotate("text", x = as.Date(rownames(plotDF[countdown1,])), y = plotDF[countdown1,"low"]-(niceLimit/2), label = "13", colour='magenta',size=18)
111 | p <- p+annotate("text", x = as.Date(rownames(plotDF[countdown2,])), y = plotDF[countdown2,"high"]+(niceLimit/2), label = "13", colour='magenta',size=18)
112 | p <- p+annotate("text", x = as.Date(rownames(plotDF[countdown3,])), y = plotDF[countdown3,"low"]-(niceLimit/2), label = "13", colour='magenta',size=18)
113 | 
114 | # Countdown 1
115 | p <- p+annotate("text", x = as.Date("2014-03-13"), y = plotDF["2014-03-13","high"]+3, label = "12", colour='red',size=4)
116 | p <- p+annotate("text", x = as.Date("2014-03-12"), y = plotDF["2014-03-12","high"]+3, label = "11", colour='red',size=4)
117 | p <- p+annotate("text", x = as.Date("2014-03-06"), y = plotDF["2014-03-06","high"]+3, label = "10", colour='red',size=4)
118 | p <- p+annotate("text", x = as.Date("2014-03-03"), y = plotDF["2014-03-03","high"]+3, label = "9", colour='red',size=4)
119 | p <- p+annotate("text", x = as.Date("2014-02-25"), y = plotDF["2014-02-25","high"]+3, label = "8", colour='red',size=4)
120 | p <- p+annotate("text", x = as.Date("2014-02-24"), y = plotDF["2014-02-24","high"]+3, label = "7", colour='red',size=4)
121 | p <- p+annotate("text", x = as.Date("2014-02-18"), y = plotDF["2014-02-18","high"]+10, label = "6", colour='red',size=4)
122 | p <- p+annotate("text", x = as.Date("2014-02-14"), y = plotDF["2014-02-14","high"]+8, label = "5", colour='red',size=4)
123 | p <- p+annotate("text", x = as.Date("2014-02-13"), y = plotDF["2014-02-13","high"]+8, label = "4", colour='red',size=4)
124 | p <- p+annotate("text", x = as.Date("2014-02-12"), y = plotDF["2014-02-12","high"]+8, label = "3", colour='red',size=4)
125 | p <- p+annotate("text", x = as.Date("2014-02-11"), y = plotDF["2014-02-11","high"]+8, label = "2", colour='red',size=4)
126 | p <- p+annotate("text", x = as.Date("2014-02-10"), y = plotDF["2014-02-10","high"]+8, label = "1", colour='red',size=4)
127 | 
128 | # Countdown 2
129 | p <- p+annotate("text", x = as.Date("2014-05-29"), y = plotDF["2014-05-29","low"]-2, label = "12", colour='red',size=4)
130 | p <- p+annotate("text", x = as.Date("2014-05-28"), y = plotDF["2014-05-28","low"]-2, label = "11", colour='red',size=4)
131 | p <- p+annotate("text", x = as.Date("2014-05-27"), y = plotDF["2014-05-27","low"]-2, label = "10", colour='red',size=4)
132 | p <- p+annotate("text", x = as.Date("2014-04-01"), y = plotDF["2014-04-01","low"]-2, label = "9", colour='red',size=4)
133 | p <- p+annotate("text", x = as.Date("2014-03-31"), y = plotDF["2014-03-31","low"]-2, label = "8", colour='red',size=4)
134 | p <- p+annotate("text", x = as.Date("2014-03-28"), y = plotDF["2014-03-28","low"]-10, label = "7", colour='red',size=4)
135 | p <- p+annotate("text", x = as.Date("2014-03-27"), y = plotDF["2014-03-27","low"]-8, label = "6", colour='red',size=4)
136 | p <- p+annotate("text", x = as.Date("2014-03-26"), y = plotDF["2014-03-26","low"]-8, label = "5", colour='red',size=4)
137 | p <- p+annotate("text", x = as.Date("2014-03-24"), y = plotDF["2014-03-24","low"]-8, label = "4", colour='red',size=4)
138 | p <- p+annotate("text", x = as.Date("2014-03-20"), y = plotDF["2014-03-20","low"]-8, label = "3", colour='red',size=4)
139 | p <- p+annotate("text", x = as.Date("2014-03-19"), y = plotDF["2014-03-19","low"]-8, label = "2", colour='red',size=4)
140 | p <- p+annotate("text", x = as.Date("2014-03-18"), y = plotDF["2014-03-18","low"]-8, label = "1", colour='red',size=4)
141 | 
142 | # Countdown 3
143 | p <- p+annotate("text", x = as.Date("2014-07-02"), y = plotDF["2014-07-02","high"]+3, label = "12", colour='red',size=4)
144 | p <- p+annotate("text", x = as.Date("2014-07-01"), y = plotDF["2014-07-01","high"]+10, label = "11", colour='red',size=4)
145 | p <- p+annotate("text", x = as.Date("2014-06-25"), y = plotDF["2014-06-25","high"]+8, label = "10", colour='red',size=4)
146 | p <- p+annotate("text", x = as.Date("2014-06-24"), y = plotDF["2014-06-24","high"]+7, label = "9", colour='red',size=4)
147 | p <- p+annotate("text", x = as.Date("2014-06-20"), y = plotDF["2014-06-20","high"]+8, label = "8", colour='red',size=4)
148 | p <- p+annotate("text", x = as.Date("2014-06-19"), y = plotDF["2014-06-19","high"]+8, label = "7", colour='red',size=4)
149 | p <- p+annotate("text", x = as.Date("2014-06-16"), y = plotDF["2014-06-16","high"]+8, label = "6", colour='red',size=4)
150 | p <- p+annotate("text", x = as.Date("2014-06-13"), y = plotDF["2014-06-13","high"]+8, label = "5", colour='red',size=4)
151 | p <- p+annotate("text", x = as.Date("2014-06-12"), y = plotDF["2014-06-12","high"]+8, label = "4", colour='red',size=4)
152 | p <- p+annotate("text", x = as.Date("2014-06-11"), y = plotDF["2014-06-11","high"]+8, label = "3", colour='red',size=4)
153 | p <- p+annotate("text", x = as.Date("2014-06-10"), y = plotDF["2014-06-10","high"]+8, label = "2", colour='red',size=4)
154 | p <- p+annotate("text", x = as.Date("2014-06-05"), y = plotDF["2014-06-05","high"]+8, label = "1", colour='red',size=4)
155 | 
156 | # Text: "GCQ combo version 1"
157 | p <- p+annotate("text", x = as.Date("2014-05-13"), y = 1390, label = "GCQ", colour='magenta',size=10)
158 | p <- p+annotate("text", x = as.Date("2014-05-13"), y = 1375, label = "combo version 1", colour='magenta',size=10)
159 | 
160 | # Format the graph axis and panels
161 | p <- p+scale_y_continuous(scale_y_log10(),breaks=c(ySequence),limits=c(evenMIN,evenMAX+niceLimit))
162 | p <- p+theme(axis.title=element_blank(),
163 |              panel.background=element_rect(fill='#FFFFFF'),
164 |              panel.grid.minor =   element_line(colour = "#000000",size=0.1, linetype = "19"),
165 |              panel.grid.major =   element_line(colour = "#000000",size=0.3, linetype = "15"),
166 |              axis.text.x=element_text(colour="black"),
167 |              axis.text.y=element_text(colour="black"))
168 | p
169 | 
170 | g <- ggplot_gtable(ggplot_build(p))
171 | 
172 | # axis tweaks
173 | ia <- which(g$layout$name == "axis-l")
174 | ax <- g$grobs[[ia]]$children[[2]]
175 | ax$widths <- rev(ax$widths)
176 | ax$grobs <- rev(ax$grobs)
177 | ax$grobs[[1]]$x <- ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm")
178 | pp <- c(subset(g$layout, name == "panel", select = t:r))
179 | g <- gtable_add_cols(g, g$widths[g$layout[ia, ]$l], length(g$widths) - 1)
180 | g <-  gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b)
181 | g$grobs[[ia]]$children[[2]] <- NULL
182 | ################################################################################
183 | ia <- which(g$layout$name == "ylab")
184 | ylab <- g$grobs[[ia]]
185 | g <- gtable_add_cols(g, g$widths[g$layout[ia, ]$l], length(g$widths) - 1)
186 | g <-  gtable_add_grob(g, ylab, pp$t, length(g$widths) - 1, pp$b)
187 | g$grobs[[ia]]$label = ''
188 | grid.draw(g)
189 | 
190 | 
191 | 
192 | 
193 | # THANK YOU ---------------------------------------------------------------
194 | 
195 | # Method to Move Axis to the Right Side
196 | # http://stackoverflow.com/questions/15334494/how-to-change-positions-of-x-and-y-axis-in-ggplot2
197 | 
198 | # x-axis breaks
199 | # http://r.789695.n4.nabble.com/ggplot2-scaling-and-tick-mark-of-x-axis-td857540.html
200 | 
201 | # Arrow Annotation
202 | # http://stackoverflow.com/questions/17032393/how-to-draw-arrow-in-ggplot2-with-annotation
203 | 
204 | # Round to the nearest factor
205 | # http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x/6463946#6463946
206 | 
207 | # Hide Missing Dates from the X-axis
208 | # http://stackoverflow.com/questions/5169366/r-ggplot2-how-to-hide-missing-dates-from-x-axis
209 | 
210 | 
211 | 
212 | 
213 | 


--------------------------------------------------------------------------------
/rfortraders/Chapter_07/code.R:
--------------------------------------------------------------------------------
  1 | # CHAPTER 7
  2 | # Backtesting with Quantstrat
  3 | 
  4 | #################
  5 | # Initial setup #
  6 | #################
  7 | 
  8 | # Suppresses warnings
  9 | options("getSymbols.warning4.0" = FALSE)
 10 | 
 11 | # Do some house cleaning
 12 | rm(list = ls(.blotter), envir = .blotter)
 13 | 
 14 | # Set the currency and the timezone
 15 | currency('USD')
 16 | Sys.setenv(TZ = "UTC")
 17 | 
 18 | # Define symbols of interest
 19 | symbols <- c("XLB", #SPDR Materials sector
 20 |   "XLE", #SPDR Energy sector
 21 |   "XLF", #SPDR Financial sector
 22 |   "XLP", #SPDR Consumer staples sector
 23 |   "XLI", #SPDR Industrial sector
 24 |   "XLU", #SPDR Utilities sector
 25 |   "XLV", #SPDR Healthcare sector
 26 |   "XLK", #SPDR Tech sector
 27 |   "XLY", #SPDR Consumer discretionary sector
 28 |   "RWR", #SPDR Dow Jones REIT ETF
 29 |   "EWJ", #iShares Japan
 30 |   "EWG", #iShares Germany
 31 |   "EWU", #iShares UK
 32 |   "EWC", #iShares Canada
 33 |   "EWY", #iShares South Korea
 34 |   "EWA", #iShares Australia
 35 |   "EWH", #iShares Hong Kong
 36 |   "EWS", #iShares Singapore
 37 |   "IYZ", #iShares U.S. Telecom
 38 |   "EZU", #iShares MSCI EMU ETF
 39 |   "IYR", #iShares U.S. Real Estate
 40 |   "EWT", #iShares Taiwan
 41 |   "EWZ", #iShares Brazil
 42 |   "EFA", #iShares EAFE
 43 |   "IGE", #iShares North American Natural Resources
 44 |   "EPP", #iShares Pacific Ex Japan
 45 |   "LQD", #iShares Investment Grade Corporate Bonds
 46 |   "SHY", #iShares 1-3 year TBonds
 47 |   "IEF", #iShares 3-7 year TBonds
 48 |   "TLT" #iShares 20+ year Bonds
 49 | )
 50 | 
 51 | # SPDR ETFs first, iShares ETFs afterwards
 52 | if(!"XLB" %in% ls()) {
 53 |   # If data is not present, get it from yahoo
 54 |   suppressMessages(getSymbols(symbols, from = from,
 55 |     to = to,  src = "yahoo", adjust = TRUE))
 56 | }
 57 | 
 58 | # Define the instrument type
 59 | stock(symbols, currency = "USD", multiplier = 1)
 60 | 
 61 | ###############################################
 62 | # The first strategy: A simple trend follower #
 63 | ###############################################
 64 | "lagATR" <- function(HLC, n = 14, maType, lag = 1, ...) {
 65 |   ATR <- ATR(HLC, n = n, maType = maType, ...)
 66 |   ATR <- lag(ATR, lag)
 67 |   out <- ATR$atr
 68 |   colnames(out) <- "atr"
 69 |   return(out)
 70 | }
 71 | 
 72 | "osDollarATR" <- function(orderside, tradeSize, pctATR,
 73 |   maxPctATR = pctATR,  data, timestamp,
 74 |   symbol, prefer = "Open", portfolio, integerQty = TRUE,
 75 |   atrMod = "", rebal = FALSE, ...) {
 76 |   if(tradeSize > 0 & orderside == "short"){
 77 |     tradeSize <- tradeSize * -1
 78 |   }
 79 | 
 80 |   pos <- getPosQty(portfolio, symbol, timestamp)
 81 |   atrString <- paste0("atr", atrMod)
 82 |   atrCol <- grep(atrString, colnames(mktdata))
 83 | 
 84 |   if(length(atrCol) == 0) {
 85 |     stop(paste("Term", atrString,
 86 |     "not found in mktdata column names."))
 87 |   }
 88 | 
 89 |   atrTimeStamp <- mktdata[timestamp, atrCol]
 90 |   if(is.na(atrTimeStamp) | atrTimeStamp == 0) {
 91 |     stop(paste("ATR corresponding to", atrString,
 92 |     "is invalid at this point in time.  Add a logical
 93 |     operator to account for this."))
 94 |   }
 95 |    
 96 |   dollarATR <- pos * atrTimeStamp
 97 | 
 98 |   desiredDollarATR <- pctATR * tradeSize
 99 |   remainingRiskCapacity <- tradeSize *
100 |     maxPctATR - dollarATR
101 | 
102 |   if(orderside == "long"){
103 |     qty <- min(tradeSize * pctATR / atrTimeStamp,
104 |       remainingRiskCapacity / atrTimeStamp)
105 |   } else {
106 |     qty <- max(tradeSize * pctATR / atrTimeStamp,
107 |       remainingRiskCapacity / atrTimeStamp)
108 |   }
109 | 
110 |   if(integerQty) {
111 |     qty <- trunc(qty)
112 |   }
113 |   if(!rebal) {
114 |     if(orderside == "long" & qty < 0) {
115 |       qty <- 0
116 |     }
117 |     if(orderside == "short" & qty > 0) {
118 |   qty <- 0 }
119 |   }
120 |   if(rebal) {
121 |     if(pos == 0) {
122 |       qty <- 0
123 |     } 
124 |   }
125 |   return(qty)
126 | }
127 | 
128 | require(quantstrat)
129 | require(PerformanceAnalytics)
130 | 
131 | initDate = "1990-01-01"
132 | from = "2003-01-01"
133 | to = "2013-12-31"
134 | options(width = 70)
135 | 
136 | # To rerun the strategy, rerun everything below this line
137 | # demoData.R contains all of the data-related boilerplate.
138 | source("demoData.R")
139 | 
140 | # Trade sizing and initial equity settings
141 | tradeSize <- 10000
142 | initEq <- tradeSize * length(symbols)
143 | 
144 | strategy.st <- "Clenow_Simple"
145 | portfolio.st <- "Clenow_Simple"
146 | account.st <- "Clenow_Simple"
147 | rm.strat(portfolio.st)
148 | rm.strat(strategy.st)
149 | 
150 | initPortf(portfolio.st, symbols = symbols,
151 |   initDate = initDate, currency = 'USD')
152 | 
153 | initAcct(account.st, portfolios = portfolio.st,
154 |   initDate = initDate, currency = 'USD', initEq = initEq)
155 | 
156 | initOrders(portfolio.st, initDate = initDate)
157 | 
158 | strategy(strategy.st, store=TRUE)
159 | 
160 | ##################################
161 | # Backtesting the first strategy #
162 | ##################################
163 | nLag = 252
164 | pctATR = 0.02
165 | period = 10
166 | 
167 | namedLag <- function(x, k = 1, na.pad = TRUE, ...) {
168 |   out <- lag(x, k = k, na.pad = na.pad, ...)
169 |   out[is.na(out)] <- x[is.na(out)]
170 |   colnames(out) <- "namedLag"
171 |   return(out)
172 | }
173 | 
174 | add.indicator(strategy.st, name = "namedLag",
175 |   arguments = list(x = quote(Cl(mktdata)), k = nLag),
176 |   label = "ind")
177 | 
178 | add.indicator(strategy.st, name = "lagATR",
179 |   arguments = list(HLC = quote(HLC(mktdata)), n = period),
180 |   label = "atrX")
181 | 
182 | test <- applyIndicators(strategy.st, mktdata = OHLC(XLB))
183 | head(round(test, 2), 253)
184 | 
185 | # Signals
186 | add.signal(strategy.st, name = "sigCrossover",
187 |   arguments = list(columns = c("Close", "namedLag.ind"),
188 |   relationship = "gt"),
189 |   label = "coverOrBuy")
190 | 
191 | add.signal(strategy.st, name = "sigCrossover",
192 |   arguments = list(columns = c("Close", "namedLag.ind"),
193 |   relationship = "lt"),
194 |   label = "sellOrShort")
195 | 
196 | # Long rules
197 | add.rule(strategy.st, name = "ruleSignal",
198 |   arguments = list(sigcol = "coverOrBuy",
199 |   sigval = TRUE, ordertype = "market",
200 |   orderside = "long", replace = FALSE,
201 |   prefer = "Open", osFUN = osDollarATR,
202 |   tradeSize = tradeSize, pctATR = pctATR,
203 |   atrMod = "X"), type = "enter", path.dep = TRUE)
204 | 
205 | add.rule(strategy.st, name = "ruleSignal",
206 |   arguments = list(sigcol = "sellOrShort",
207 |   sigval = TRUE, orderqty = "all",
208 |   ordertype = "market", orderside = "long",
209 |   replace = FALSE, prefer = "Open"),
210 |   type = "exit", path.dep = TRUE)
211 | 
212 | # Short rules
213 | add.rule(strategy.st, name = "ruleSignal",
214 |   arguments = list(sigcol = "sellOrShort",
215 |   sigval = TRUE, ordertype = "market",
216 |   orderside = "short", replace = FALSE,
217 |   prefer = "Open", osFUN = osDollarATR,
218 |   tradeSize = -tradeSize, pctATR = pctATR,
219 |   atrMod = "X"), type = "enter", path.dep = TRUE)
220 | 
221 | add.rule(strategy.st, name = "ruleSignal",
222 |   arguments = list(sigcol = "coverOrBuy",
223 |   sigval = TRUE, orderqty = "all",
224 |   ordertype = "market", orderside = "short",
225 |   replace = FALSE, prefer = "Open"),
226 |   type = "exit", path.dep = TRUE)
227 | 
228 | # Get begin time
229 | t1 <- Sys.time()
230 | out <- applyStrategy(strategy = strategy.st,
231 |   portfolios = portfolio.st)
232 | 
233 | # Record end time
234 | t2 <- Sys.time()
235 | print(t2 - t1)
236 | 
237 | applyStrategy()
238 | ## [1] "2007-10-22 00:00:00 XLY -655 @ 32.3578893111826"
239 | ## [1] "2007-10-22 00:00:00 XLY -393 @ 32.3578893111826"
240 | ## [1] "2007-10-23 00:00:00 XLY 393 @ 33.1349846702336"
241 | ## [1] "2007-10-23 00:00:00 XLY 358 @ 33.1349846702336"
242 | ## [1] "2007-10-25 00:00:00 XLY -358 @ 32.8639048938205"
243 | ## [1] "2007-10-25 00:00:00 XLY -333 @ 32.8639048938205"
244 | ## [1] "2009-09-30 00:00:00 XLY 333 @ 25.9947501843176"
245 | ## [1] "2009-09-30 00:00:00 XLY 449 @ 25.9947501843176"
246 | ## [1] "2009-10-02 00:00:00 XLY -449 @ 24.8800203565938"
247 | 
248 | ##############################
249 | # Evaluating the performance #
250 | ##############################
251 | updatePortf(portfolio.st)
252 | dateRange <- time(getPortfolio(portfolio.st)$summary)[-1]
253 | updateAcct(portfolio.st, dateRange)
254 | updateEndEq(account.st)
255 | 
256 | tStats <- tradeStats(Portfolios = portfolio.st, use = "trades",
257 |   inclZeroDays = FALSE)
258 | tStats[, 4:ncol(tStats)] <- round(tStats[, 4:ncol(tStats)], 2)
259 |   
260 | print(data.frame(t(tStats[,-c(1,2)])))
261 | aggPF <- sum(tStats$Gross.Profits) / -sum(tStats$Gross.Losses)
262 | aggCorrect <- mean(tStats$Percent.Positive)
263 | numTrades <- sum(tStats$Num.Trades)
264 | meanAvgWLR <- mean(tStats$Avg.WinLoss.Ratio[
265 |   tStats$Avg.WinLoss.Ratio < Inf], na.rm = TRUE)
266 | 
267 | tStats
268 | 
269 | aggPF <- sum(tStats$Gross.Profits) / -sum(tStats$Gross.Losses)
270 | ## [1] 3.663545
271 | 
272 | aggCorrect <- mean(tStats$Percent.Positive)
273 | ## [1] 36.00233
274 | 
275 | numTrades <- sum(tStats$Num.Trades)
276 | ## [1] 1134
277 | 
278 | meanAvgWLR <- mean(tStats$Avg.WinLoss.Ratio[
279 |   tStats$Avg.WinLoss.Ratio < Inf], na.rm = TRUE)
280 | ## [1] 9.871333
281 | 
282 | instRets <- PortfReturns(account.st)
283 | portfRets <- xts(rowMeans(instRets) * ncol(instRets),
284 |   order.by = index(instRets))
285 | portfRets <- portfRets[!is.na(portfRets)]
286 | cumPortfRets <- cumprod(1 + portfRets)
287 | firstNonZeroDay <- as.character(index(portfRets)[
288 |   min(which(portfRets != 0))])
289 | 
290 | # Obtain symbol
291 | getSymbols("SPY", from = firstNonZeroDay, to = to)
292 | SPYrets <- diff(log(Cl(SPY)))[-1]
293 | cumSPYrets <- cumprod(1 + SPYrets)
294 | comparison <- cbind(cumPortfRets, cumSPYrets)
295 | colnames(comparison)  <- c("strategy", "SPY")
296 | chart.TimeSeries(comparison, legend.loc = "topleft",
297 |   colors=c("green", "red"))
298 | 
299 | # Calculate risk metrics
300 | SharpeRatio.annualized(portfRets)
301 | Return.annualized(portfRets)
302 | maxDrawdown(portfRets)
303 | 
304 | SharpeRatio.annualized(portfRets)
305 | ## [1] 0.6424366
306 | 
307 | Return.annualized(portfRets)
308 | ## [1] 0.1392711
309 | 
310 | maxDrawdown(portfRets)
311 | ## [1] 0.278221
312 | 
313 | chart.Posn(portfolio.st, "XLB")
314 | tmp <- namedLag(Cl(XLB), k = nLag)
315 | add_TA(tmp$namedLag, col = "blue", on = 1)
316 | 
317 | ###############################################
318 | # The second strategy: Cumulative Connors RSI #
319 | ###############################################
320 | # Compute Connor's RSI, depends on RSI TTR function
321 | connorsRSI <- function(price, nRSI = 3, nStreak = 2,
322 |   nPercentLookBack = 100 ) {
323 |   priceRSI <- RSI(price, nRSI)
324 |   streakRSI <- RSI(computeStreak(price), nStreak)
325 |   percents <- round(runPercentRank(x = diff(log(price)),
326 |     n = 100, cumulative = FALSE, exact.multiplier = 1) * 100)
327 |   ret <- (priceRSI + streakRSI + percents) / 3
328 |   colnames(ret) <- "connorsRSI"
329 |   return(ret)
330 | }
331 | 
332 | # Computes a running streak of positives and
333 | # negatives of price changes
334 | computeStreak <- function(priceSeries) {
335 |   signs <- sign(diff(priceSeries))
336 |   posDiffs <- negDiffs <- rep(0,length(signs))
337 |   posDiffs[signs == 1] <- 1
338 |   negDiffs[signs == -1] <- -1
339 | 
340 |   # Create vector of cumulative sums and cumulative
341 |   # sums not incremented during streaks.
342 |   # Zero out any leading NAs after na.locf
343 |   posCum <- cumsum(posDiffs)
344 |   posNAcum <- posCum
345 |   posNAcum[posDiffs == 1] <- NA
346 |   posNAcum <- na.locf(posNAcum, na.rm = FALSE)
347 |   posNAcum[is.na(posNAcum)] <- 0
348 |   posStreak <- posCum - posNAcum
349 | 
350 |   # Repeat for negative cumulative sums
351 |   negCum <- cumsum(negDiffs)
352 |   negNAcum <- negCum
353 |   negNAcum[negDiffs == -1] <- NA
354 |   negNAcum <- na.locf(negNAcum, na.rm = FALSE)
355 |   negNAcum[is.na(negNAcum)] <- 0
356 |   negStreak <- negCum - negNAcum
357 | 
358 |   streak <- posStreak + negStreak
359 |   streak <- xts(streak, order.by = index(priceSeries))
360 |   return (streak)
361 | }
362 | 
363 | sigAND <- function(label, data=mktdata,
364 |   columns,  cross = FALSE) {
365 |   ret_sig = NULL
366 |   colNums <- rep(0, length(columns))
367 |   for(i in 1:length(columns)) {
368 |     colNums[i] <- match.names(columns[i], colnames(data))
369 |   }
370 |   ret_sig <- data[, colNums[1]]
371 |   for(i in 2:length(colNums)) {
372 |     ret_sig <- ret_sig & data[, colNums[i]]
373 |   }
374 |   ret_sig <- ret_sig * 1
375 |   if (isTRUE(cross))
376 |     ret_sig <- diff(ret_sig) == 1
377 |   colnames(ret_sig) <- label
378 |   return(ret_sig)
379 | }
380 | 
381 | cumCRSI <- function(price, nCum = 2, ...) {
382 |   CRSI <- connorsRSI(price, ...)
383 |   out <- runSum(CRSI, nCum)
384 |   colnames(out) <- "cumCRSI"
385 |   out
386 | }
387 | 
388 | rm(list = ls(.blotter), envir = .blotter)
389 |   initDate = '1990-01-01'
390 |   from = "2003-01-01"
391 |   to = "2013-12-31"
392 |   initEq = 10000
393 | 
394 | currency('USD')
395 | Sys.setenv(TZ="UTC")
396 | source("demoData.R")
397 | 
398 | strategy.st <- "CRSIcumStrat"
399 | portfolio.st <- "CRSIcumStrat"
400 | account.st <- "CRSIcumStrat"
401 | 
402 | rm.strat(portfolio.st)
403 | rm.strat(strategy.st)
404 | 
405 | initPortf(portfolio.st, symbols = symbols,
406 |   initDate = initDate, currency = 'USD')
407 | 
408 | initAcct(account.st, portfolios = portfolio.st,
409 |   initDate = initDate, currency = 'USD',
410 |   initEq = initEq)
411 | 
412 | initOrders(portfolio.st, initDate = initDate)
413 | strategy(strategy.st, store = TRUE)
414 | 
415 | # Parameters
416 | cumThresh <- 40
417 | exitThresh <- 75
418 | nCum <- 2
419 | nRSI <- 3
420 | nStreak <- 2
421 | nPercentLookBack <- 100
422 | nSMA <- 200
423 | pctATR <- .02
424 | period <- 10
425 | 
426 | # Indicators
427 | add.indicator(strategy.st, name = "cumCRSI",
428 |   arguments = list(price = quote(Cl(mktdata)), nCum = nCum,
429 |   nRSI = nRSI, nStreak = nStreak,
430 |   nPercentLookBack = nPercentLookBack),
431 |   label = "CRSIcum")
432 | 
433 | add.indicator(strategy.st, name = "connorsRSI",
434 |   arguments = list(price = quote(Cl(mktdata)), nRSI = nRSI,
435 |   nStreak = nStreak,
436 |   nPercentLookBack = nPercentLookBack),
437 |   label = "CRSI")
438 | 
439 | add.indicator(strategy.st, name = "SMA",
440 |   arguments = list(x = quote(Cl(mktdata)), n = nSMA),
441 |   label = "sma")
442 | 
443 | add.indicator(strategy.st, name = "lagATR",
444 |     arguments = list(HLC = quote(HLC(mktdata)), n = period),
445 |     label = "atrX")
446 | 
447 | # Signals
448 | add.signal(strategy.st, name = "sigThreshold",
449 |   arguments = list(column = "cumCRSI.CRSIcum",
450 |   threshold = cumThresh, relationship = "lt", cross = FALSE),
451 |   label="cumCRSI.lt.thresh")
452 | 
453 | add.signal(strategy.st, name = "sigComparison",
454 |   arguments = list(columns = c("Close", "SMA.sma"),
455 |   relationship = "gt"), label = "Cl.gt.SMA")
456 | 
457 | add.signal(strategy.st, name = "sigAND",
458 |   arguments = list(columns = c("cumCRSI.lt.thresh",
459 |   "Cl.gt.SMA"), cross = TRUE), label = "longEntry")
460 | 
461 | add.signal(strategy.st, name = "sigThreshold",
462 |   arguments = list(column = "connorsRSI.CRSI",
463 |   threshold = exitThresh, relationship = "gt",
464 |   cross = TRUE), label = "longExit")
465 | 
466 | # Rules
467 | add.rule(strategy.st, name = "ruleSignal",
468 |   arguments = list(sigcol = "longEntry", sigval = TRUE,
469 |   ordertype = "market", orderside  ="long", replace = FALSE,
470 |   prefer = "Open", osFUN = osDollarATR, tradeSize = tradeSize,
471 |   pctATR = pctATR, atrMod = "X"), type = "enter", path.dep = TRUE)
472 | 
473 | add.rule(strategy.st, name = "ruleSignal",
474 |   arguments = list(sigcol = "longExit", sigval = TRUE,
475 |   orderqty = "all", ordertype = "market", orderside = "long",
476 |   replace = FALSE, prefer = "Open"), type = "exit", path.dep = TRUE)
477 | 
478 | # Apply Strategy
479 | t1 <- Sys.time()
480 | out <- applyStrategy(strategy = strategy.st,
481 |   portfolios = portfolio.st)
482 | t2 <- Sys.time()
483 | print(t2 - t1)
484 | 
485 | # Set up analytics
486 | updatePortf(portfolio.st)
487 | dateRange <- time(getPortfolio(portfolio.st)$summary)[-1]
488 | updateAcct(portfolio.st,dateRange)
489 | updateEndEq(account.st)
490 | 
491 | ##########################################
492 | # Evaluating the mean-reverting strategy #
493 | ##########################################
494 | aggPF <- sum(tStats$Gross.Profits)/-sum(tStats$Gross.Losses)
495 | ## [1] 1.699368
496 | 
497 | aggCorrect <- mean(tStats$Percent.Positive)
498 | ## [1] 71.608
499 | 
500 | numTrades <- sum(tStats$Num.Trades)
501 | ## [1] 1500
502 | 
503 | meanAvgWLR <- mean(tStats$Avg.WinLoss.Ratio[
504 |   tStats$Avg.WinLoss.Ratio < Inf], na.rm = TRUE)
505 | ## [1] 0.725
506 | 
507 | dStats <- dailyStats(Portfolios = portfolio.st, use = "Equity")
508 | rownames(dStats) <- gsub(".DailyEndEq", "", rownames(dStats))
509 | print(data.frame(t(dStats)))
510 | 
511 | durationStatistics <- function(Portfolio, Symbols,
512 |   includeOpenTrade = FALSE, ...) {
513 | 
514 |   tmp <- list()
515 |   length(tmp) <- length(Symbols)
516 |   for(Symbol in Symbols) {
517 |     pts <- perTradeStats(Portfolio = Portfolio,
518 |       Symbol = Symbol, includeOpenTrade = includeOpenTrade)
519 |     pts$diff <- pts$End - pts$Start
520 | 
521 |     durationSummary <- summary(as.numeric(pts$diff))
522 |     winDurationSummary <- summary(as.numeric(
523 |       pts$diff[pts$Net.Trading.PL > 0]))
524 |     lossDurationSummary <- summary(as.numeric(
525 |       pts$diff[pts$Net.Trading.PL <= 0]))
526 |     names(durationSummary) <-
527 |       c("Min", "Q1", "Med", "Mean", "Q3", "Max")
528 |     names(winDurationSummary) <-
529 |       c("Min", "Q1", "Med", "Mean", "Q3", "Max")
530 |     names(lossDurationSummary) <-
531 |       c("Min", "Q1", "Med", "Mean", "Q3", "Max")
532 |     names(winDurationSummary) <-
533 |       paste0("W", names(winDurationSummary))
534 |     names(lossDurationSummary) <-
535 |       paste0("L", names(lossDurationSummary))
536 |     dataRow <- data.frame(cbind(t(round(durationSummary)),
537 |       t(round(winDurationSummary)),
538 |       t(round(lossDurationSummary))))
539 |     tmp[[Symbol]] <- dataRow
540 |   }
541 |   out <- do.call(rbind, tmp)
542 |   return(out)
543 | }
544 | 
545 | durStats <- durationStatistics(Portfolio=portfolio.st,
546 |   Symbols=sort(symbols))
547 | print(t(durStats))
548 | 
549 | # Market exposure
550 | tmp <- list()
551 | length(tmp) <- length(symbols)
552 | for(i in 1:nrow(dStats)) {
553 |   totalDays <- nrow(get(rownames(dStats)[i]))
554 |   mktExposure <- dStats$Total.Days[i] / totalDays
555 |   tmp[[i]] <- c(rownames(dStats)[i], round(mktExposure, 3))
556 | }
557 | mktExposure <- data.frame(do.call(rbind, tmp))
558 | colnames(mktExposure) <- c("Symbol", "MktExposure")
559 | 
560 | print(mktExposure)
561 | 
562 | print(mean(as.numeric(as.character(mktExposure$MktExposure))))
563 | ## [1] 0.1257
564 | 
565 | SharpeRatio.annualized(portfRets)
566 | ## [1]  0.6973019
567 | 
568 | Return.annualized(portfRets)
569 | ## [1] 0.03370045
570 | 
571 | maxDrawdown(portfRets)
572 | ## [1] 0.09120687
573 | 
574 | chart.Posn(portfolio.st, "XLB")
575 | TA_CRSI <- connorsRSI(Cl(XLB), nRSI = nRSI,
576 |   nStreak = nStreak, nPercentLookBack = nPercentLookBack)
577 | add_TA(TA_CRSI, col = "red")
578 | 
579 | TA_cumCRSI <- cumCRSI(price = Cl(XLB),
580 |   nCum = nCum, nRSI = nRSI, nStreak = nStreak,
581 |   nPercentLookBack = nPercentLookBack)
582 | add_TA(TA_cumCRSI, col = "blue")
583 | 
584 | TA_lagATR <- lagATR(HLC = HLC(XLB), n = period)
585 | add_TA(TA_lagATR, col = "purple")
586 | 
587 | TA_SMA <- SMA(Cl(XLB), n = nSMA)
588 | add_TA(TA_SMA, col = "blue", lwd = 2, on = 1)
589 | 
590 | 
591 | 


--------------------------------------------------------------------------------
/backtesting/.Rhistory:
--------------------------------------------------------------------------------
  1 | return(correlationIndicator)
  2 | }
  3 | # current portfolio -------------------------------------------------------
  4 | year2006 <- read.csv("2006.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
  5 | year2007 <- read.csv("2007.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
  6 | year2008 <- read.csv("2008.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
  7 | year2009 <- read.csv("2009.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
  8 | year2010 <- read.csv("2010.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
  9 | year2011 <- read.csv("2011.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
 10 | year2012 <- read.csv("2012.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
 11 | year2013 <- read.csv("2013.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
 12 | year2014 <- read.csv("2014.csv", as.is=TRUE, header = TRUE, stringsAsFactors = FALSE)
 13 | total_portfolio <- rbind(year2006[1:2, ],
 14 | year2007[1:16, ],
 15 | year2008[1:14, ],
 16 | year2009[1:24, ],
 17 | year2010[1:34, ],
 18 | year2011[1:26, ],
 19 | year2012[1:26, ],
 20 | year2013[1:26, ],
 21 | year2014[1:22, ])
 22 | mom_tickers <- sort(unique(total_portfolio[, 3]))
 23 | all_dates   <- seq(as.Date("2012-12-01"), as.Date("2014-12-01"), by = 1)
 24 | initial_portfolio <- data.frame(AAPL  = rep(0, length(all_dates)),
 25 | AGG   = rep(0, length(all_dates)),
 26 | ATVI  = rep(0, length(all_dates)),
 27 | DLB   = rep(0, length(all_dates)),
 28 | FORD  = rep(0, length(all_dates)),
 29 | GOOGL = rep(0, length(all_dates)),
 30 | HAS   = rep(0, length(all_dates)),
 31 | LF    = rep(0, length(all_dates)),
 32 | LUV   = rep(0, length(all_dates)),
 33 | SPY   = rep(0, length(all_dates)),
 34 | TIBX  = rep(0, length(all_dates)))
 35 | # fill positions
 36 | initial_portfolio["2012-12-10","AAPL"]                  <- 1
 37 | initial_portfolio[c("2006-12-12","2007-01-09","2007-02-13",
 38 | "2007-03-13","2007-04-10","2007-05-08",
 39 | "2007-06-12","2007-07-10","2008-07-08",
 40 | "2008-08-12","2008-09-09","2008-10-14",
 41 | "2008-11-11","2008-12-09","2009-01-13",
 42 | "2009-02-10","2009-03-10","2009-04-14",
 43 | "2009-05-12","2009-06-09","2009-07-14",
 44 | "2009-08-11","2009-09-08","2009-10-13",
 45 | "2009-11-10","2009-12-08","2010-01-12",
 46 | "2010-02-09","2010-03-09","2010-04-13",
 47 | "2010-05-11","2010-06-08","2010-07-13",
 48 | "2010-08-10","2010-09-14","2010-10-12",
 49 | "2010-11-09","2010-12-14","2011-01-11",
 50 | "2011-02-08","2011-03-08","2011-04-12",
 51 | "2011-05-10","2011-06-14","2011-07-12",
 52 | "2011-08-09","2011-09-13","2011-10-11",
 53 | "2011-11-08","2011-12-07","2011-12-13",
 54 | "2012-01-10","2012-02-14","2012-03-13",
 55 | "2012-04-10","2012-05-08","2012-06-12",
 56 | "2012-08-14","2012-09-11","2012-10-09",
 57 | "2012-11-13","2012-12-11","2013-01-08",
 58 | "2013-02-12","2013-03-12","2013-04-09",
 59 | "2013-05-14","2013-06-11","2013-07-09",
 60 | "2013-08-13","2013-09-10","2013-10-08",
 61 | "2013-11-12","2013-12-10","2014-01-14",
 62 | "2014-02-11","2014-03-11","2014-04-08",
 63 | "2014-05-13","2014-06-10","2014-07-08",
 64 | "2014-08-12","2014-09-09","2014-10-14",
 65 | "2014-11-11"), "AGG"]               <- 1
 66 | initial_portfolio[c("2010-04-30","2010-05-20"),"ATVI"]  <- 1
 67 | initial_portfolio[c("2010-04-30","2010-05-05",
 68 | "2010-05-21"), "DLB"]               <- 1
 69 | initial_portfolio[c("2010-08-24"), "GOOGL"]             <- 1
 70 | initial_portfolio[c("2010-04-30"), "HAS"]               <- 1
 71 | "2007-11-08",
 72 | "2007-12-10","2008-02-04","2012-07-10","2012-12-10","2013-12-06",
 73 | initial_xts <- xts(initial_portfolio, order.by = all_dates)
 74 | # backtesting assumptions -------------------------------------------------
 75 | ## Define frictions (as percentage of trade)
 76 | commission_pct <- c(.0003)
 77 | slippage_pct <- c(.0005)
 78 | friction_pct  <- commission_pct + slippage_pct
 79 | ## Data dates
 80 | start_date  <- as.Date("2007-04-01")
 81 | end_date    <- as.Date("2014-12-01")
 82 | # download data -----------------------------------------------------------
 83 | #universe <- c("SPY", "MDY", "IWM",         # Large-Mid-Small Cap equities
 84 | #              "EFA", "EEM",                # Intl.and emerging markets
 85 | #              "AGG", "TIP", "TLT", "LQD",  # Bonds
 86 | #              "GSG", "DBE", "GLD",         # Commodities, Energy, Gold
 87 | #              "RWR", "RWX", "VNQ", "MBB")  # Real Estate
 88 | universe <- c("SPY", "MDY", "IWM",
 89 | "EFA", "EEM", "AGG",
 90 | "TIP", "GSG", "RWR",
 91 | "RWX", "TLT", "LQD",
 92 | "MBB")
 93 | # "SHV"
 94 | cash_ticker <- c("VFISX")
 95 | all_symbols <- c(universe,cash_ticker)
 96 | # first download
 97 | getSymbols(all_symbols, from=start_date, to=end_date)
 98 | for(i in all_symbols)
 99 | assign(i, get(i))
100 | close_prices  <- do.call(merge, lapply(all_symbols, function(x) Cl(get(x))))
101 | adj_prices    <- do.call(merge, lapply(all_symbols, function(x) Ad(get(x))))
102 | returns_matrix1   <-  na.omit(diff(adj_prices)/lag(adj_prices, k = 1))
103 | returns_monthly1  <-  returns_matrix1[endpoints(index(returns_matrix1),
104 | on = "months"), ]
105 | # second download
106 | getSymbols(mom_tickers, from=start_date, to=end_date)
107 | for(i in mom_tickers)
108 | assign(i, get(i))
109 | close_prices2  <- do.call(merge, lapply(mom_tickers, function(x) Cl(get(x))))
110 | adj_prices2    <- do.call(merge, lapply(mom_tickers, function(x) Ad(get(x))))
111 | returns_matrix2 <- na.omit(diff(adj_prices2)/lag(adj_prices2, k = 1))
112 | returns_monthly2  <-  returns_matrix2[endpoints(index(returns_matrix2),
113 | on = "months"), ]
114 | # model dates
115 | model_dates <- index(adj_prices)[85:nrow(adj_prices)]
116 | # indicators --------------------------------------------------------------
117 | roc1DP <- ROC(close_prices[,1:ncol(close_prices)-1], n=1,
118 | type="discrete")*100
119 | ## Highest Momentum (ROC),
120 | ## Lowest Volatility (SD),
121 | ## Lowest Avg. Correlation
122 | ## 4-month look back
123 | momentumIndicator <- xts(apply(roc1DP,2,SMA,n=84), index(close_prices))
124 | volatilityIndicator <- rollapply(roc1DP[,1:ncol(close_prices)-1], 84, sd, fill=NA)
125 | correlationIndicator <- corIndicator(universe = universe, data = roc1DP,n=84)
126 | momentumRank    <- ranking(momentumIndicator)
127 | volatilityRank  <- ranking(-volatilityIndicator)
128 | correlationRank <- ranking(-correlationIndicator)
129 | combineRankWeights <- (.50*momentumRank) + (.25*volatilityRank) + (.25*correlationRank)
130 | compositeRank <- ranking(combineRankWeights)
131 | colnames(compositeRank) <- universe
132 | compositeRank[compositeRank < 9] <- 0
133 | compositeRank[compositeRank >= 9] <- 1
134 | # Filter: - ON - OFF Filters
135 | momentumFilterBUY <- xts(ifelse(as.matrix(momentumIndicator) < 0, 0, 1), index(adj_prices))
136 | # MVC portfolio -----------------------------------------------------------
137 | strategy_nameBH <- c("Jonnie:MVC")
138 | strategy <- as.matrix(compositeRank)*as.matrix(momentumFilterBUY) *.20
139 | prelimallocmatrix <- xts(strategy, order.by = index(adj_prices))
140 | prelimallocmatrix <- prelimallocmatrix[endpoints(index(prelimallocmatrix), on = "months"), ]
141 | colnames(prelimallocmatrix) <- universe
142 | ## allocation matrix + cash
143 | cashBH <- ifelse(rowSums(prelimallocmatrix) == 0, 1, 1 - rowSums(prelimallocmatrix))
144 | allocmatrixBH <- cbind(prelimallocmatrix, cashBH)
145 | colnames(allocmatrixBH) <- all_symbols
146 | ## trade matrices
147 | prelimtradematrixBH <- allocmatrixBH
148 | prelimtradematrixBH[is.na(prelimtradematrixBH)] <- 0
149 | tradematrixBH <- prelimtradematrixBH - lag(prelimtradematrixBH, 1)
150 | ## friction matrices
151 | constituenttradematrixBH <- tradematrixBH[,1:length(universe)]
152 | frictionmatrixBH <- cbind(abs(constituenttradematrixBH) * friction_pct, 0)
153 | ## Create matrix of (monthly) returns for the portfolios
154 | wheninvested_retsBH <- as.numeric(returns_monthly1) * lag(allocmatrixBH, 1) - frictionmatrixBH
155 | ## Calculate portfolio returns vector
156 | port_retsBH3 <- rowSums(wheninvested_retsBH)
157 | ## Create final portfolio returns by coercing port_rets to xts object and remove NAs
158 | port_rets_xtsBH3 <- xts(port_retsBH3, order.by = index(prelimallocmatrix))
159 | port_returnsBH3 <- port_rets_xtsBH3[is.na(port_rets_xtsBH3) == FALSE]
160 | colnames(port_returnsBH) <- strategy_nameBH
161 | ## Create equity curve and additional portfolio returns
162 | port_equity_curveBH <- cumprod(1 + port_returnsBH)
163 | port_returns_monthlyBH <- monthlyReturn(port_equity_curveBH)
164 | # Buy and Hold (my portfolio) ---------------------------------------------
165 | strategy_nameBH <- c("Jonnie")
166 | bh_allocation <- 1/length(universe)
167 | bh_initial <- matrix(1, nrow = nrow(returns_matrix1), ncol = ncol(returns_matrix1)-1)
168 | bh_weights <- bh_initial*bh_allocation
169 | prelimallocmatrixBH <- xts(bh_weights, order.by = index(returns_matrix1))
170 | prelimallocmatrixBH <- prelimallocmatrixBH[endpoints(index(prelimallocmatrixBH), on = "months"), ]
171 | colnames(prelimallocmatrixBH) <- universe
172 | ## allocation matrix + cash
173 | cashBH <- ifelse(rowSums(prelimallocmatrixBH) == 0, 1, 1 - rowSums(prelimallocmatrixBH))
174 | allocmatrixBH <- cbind(prelimallocmatrixBH, cashBH)
175 | colnames(allocmatrixBH) <- all_symbols
176 | ## trade matrices
177 | prelimtradematrixBH <- allocmatrixBH
178 | prelimtradematrixBH[is.na(prelimtradematrixBH)] <- 0
179 | tradematrixBH <- prelimtradematrixBH - lag(prelimtradematrixBH, 1)
180 | ## friction matrices
181 | constituenttradematrixBH <- tradematrixBH[,1:length(universe)]
182 | frictionmatrixBH <- cbind(abs(constituenttradematrixBH) * friction_pct, 0)
183 | ## Create matrix of (monthly) returns for the portfolios
184 | wheninvested_retsBH <- as.numeric(returns_monthly1) * lag(allocmatrixBH, 1) - frictionmatrixBH
185 | ## Calculate portfolio returns vector
186 | port_retsBH <- rowSums(wheninvested_retsBH)
187 | ## Create final portfolio returns by coercing port_rets to xts object and remove NAs
188 | port_rets_xtsBH <- xts(port_retsBH, order.by = index(prelimallocmatrixBH))
189 | port_returnsBH <- port_rets_xtsBH[is.na(port_rets_xtsBH) == FALSE]
190 | colnames(port_returnsBH) <- strategy_nameBH
191 | ## Create equity curve and additional portfolio returns
192 | port_equity_curveBH <- cumprod(1 + port_returnsBH)
193 | port_returns_monthlyBH <- monthlyReturn(port_equity_curveBH)
194 | # buy and hold (mom's portfolio) ------------------------------------------
195 | strategy_nameBH <- c("Mom")
196 | bh_allocation <- 1/length(mom_tickers)
197 | bh_initial <- matrix(1, nrow = nrow(returns_matrix2), ncol = ncol(returns_matrix2))
198 | bh_weights <- bh_initial*bh_allocation
199 | prelimallocmatrixBH <- xts(bh_weights, order.by = index(returns_matrix2))
200 | prelimallocmatrixBH <- prelimallocmatrixBH[endpoints(index(prelimallocmatrixBH), on = "months"), ]
201 | colnames(prelimallocmatrixBH) <- mom_tickers
202 | ## allocation matrix + cash
203 | allocmatrixBH <- prelimallocmatrixBH
204 | ## trade matrices
205 | prelimtradematrixBH <- allocmatrixBH
206 | prelimtradematrixBH[is.na(prelimtradematrixBH)] <- 0
207 | tradematrixBH <- prelimtradematrixBH - lag(prelimtradematrixBH, 1)
208 | ## friction matrices
209 | constituenttradematrixBH <- tradematrixBH[,1:length(mom_tickers)]
210 | frictionmatrixBH <- cbind(abs(constituenttradematrixBH) * friction_pct)
211 | ## Create matrix of (monthly) returns for the portfolios
212 | wheninvested_retsBH <- as.numeric(returns_monthly2) * lag(allocmatrixBH, 1) - frictionmatrixBH
213 | ## Calculate portfolio returns vector
214 | port_retsBH <- rowSums(wheninvested_retsBH)
215 | ## Create final portfolio returns by coercing port_rets to xts object and remove NAs
216 | port_rets_xtsBH2 <- xts(port_retsBH, order.by = index(prelimallocmatrixBH))
217 | port_returnsBH2 <- port_rets_xtsBH2[is.na(port_rets_xtsBH2) == FALSE]
218 | colnames(port_returnsBH) <- strategy_nameBH
219 | ## Create equity curve and additional portfolio returns
220 | port_equity_curveBH2 <- cumprod(1 + port_returnsBH)
221 | port_returns_monthlyBH2 <- monthlyReturn(port_equity_curveBH)
222 | chart.CumReturns(cbind(port_returnsBH,port_returnsBH2,port_returnsBH3), wealth.index = TRUE)
223 | rm(list = ls(all = TRUE))
224 | options(scipen=999)
225 | setwd("~/Dropbox/setup/quant_trading/mom_portfolio/mvc_20150109")
226 | #setwd("C:/Users/jlappen/Dropbox/setup/mom_portfolio/mvc_20150109")
227 | library(xts)
228 | library(plyr)
229 | library(quantmod)
230 | library(PerformanceAnalytics)
231 | # load custom functions ---------------------------------------------------
232 | source("backtest_function.R")
233 | source("run_corr_function.R")
234 | source("rank_function.R")
235 | # backtest parameters -----------------------------------------------------
236 | start_date  <- as.Date("2007-04-01")
237 | end_date    <- as.Date("2014-12-31")
238 | ## Define frictions (as percentage of trade)
239 | commission_pct <- c(.0003)
240 | slippage_pct <- c(.0005)
241 | friction_pct <- commission_pct + slippage_pct
242 | ## Portfolio Symbols
243 | data_source <- c("yahoo")
244 | universe <- c("SPY", "MDY", "IWM",      # Large-Mid-Small Cap equities
245 | "EFA", "EEM",             # Intl.and emerging markets
246 | "AGG","TIP","TLT", "LQD", # Bonds
247 | "GSG",                    # Commodities
248 | "RWR", "RWX",  "MBB")     # Real Estateads
249 | ## Cash
250 | cash_symbol <- c("SHV")
251 | portfolio_symbols <- c(universe, cash_symbol)
252 | # download data and compute returns ---------------------------------------
253 | getSymbols(universe, src = data_source, from = start_date, to = end_date)
254 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date)
255 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x))))
256 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x))))
257 | ## Convert Daily to Monthly
258 | monthly_raw  <- raw_close[endpoints(index(raw_close), on = "months"), ]
259 | monthly_adj  <- adj_close[endpoints(index(adj_close), on = "months"), ]
260 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1)
261 | # market timing -----------------------------------------------------------
262 | roc1DP <- ROC(raw_close[,1:length(universe)], n=1, type="discrete")*100
263 | ## Highest Momentum (ROC),
264 | ## Lowest Volatility (SD),
265 | ## Lowest Avg. Correlation
266 | ## 4-month look back
267 | momentum    <- xts(apply(roc1DP, 2, SMA, n = 84), index(adj_close))
268 | volatility  <- rollapply(roc1DP[, 1:length(universe)], 84, sd, fill = NA)
269 | correlation <- corIndicator(universe = universe, data = roc1DP, n = 84)
270 | moment_rank <- ranking(momentum)
271 | vol_rank    <- ranking(-volatility)
272 | corr_rank   <- ranking(-correlation)
273 | all_ranks <- (.50 * moment_rank) + (.25 * vol_rank) + (.25 * corr_rank)
274 | composite_rank <- ranking(all_ranks)
275 | colnames(composite_rank) <- universe
276 | composite_rank[composite_rank < 9]  <- 0
277 | composite_rank[composite_rank >= 9] <- 1
278 | ## Filters
279 | moment_filter <- xts(ifelse(as.matrix(momentum) < 0, 0, 1), index(adj_close))
280 | # system 1: mvc + momentum filter -----------------------------------------
281 | strategy1 <- (as.matrix(composite_rank) * as.matrix(moment_filter)) #* .20
282 | strategy1 <- strategy1 * (1 / rowSums(strategy1))
283 | strategy1 <- xts(strategy1, index(adj_close))
284 | strategy1 <- strategy1[endpoints(index(strategy1), on = "months"), ]
285 | # strategy 2: risk parity -------------------------------------------------
286 | rp_buy <- as.matrix(composite_rank) * as.matrix(moment_filter)
287 | kt_buy <- 1/rowSums(rp_buy * volatility)
288 | kt_buy[kt_buy == Inf] <- 0
289 | rp_weights <- kt_buy * volatility
290 | strategy2  <- xts(rp_buy * rp_weights, index(adj_close))
291 | strategy2  <- strategy2[endpoints(index(strategy2), on = "months"), ]
292 | # Buy and Hold ------------------------------------------------------------
293 | bh_allocation <- 1/length(universe)
294 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1)
295 | bh_weights <- bh_initial * bh_allocation
296 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets))
297 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ]
298 | colnames(bh_weights) <- universe
299 | # output ------------------------------------------------------------------
300 | strat1_returns <- backtest_engine(strategy1, "MVC_filter", monthly_rets)
301 | strat2_returns <- backtest_engine(strategy2, "risk_parity", monthly_rets)
302 | bh_returns     <- backtest_engine(bh_weights, "buy_hold", monthly_rets)
303 | port_returns_report <- cbind(strat1_returns$portfolio_returns,
304 | bh_returns$portfolio_returns,
305 | strat2_returns$portfolio_returns)
306 | aa  <- table.AnnualizedReturns(cbind(port_returns_report))
307 | bb  <- table.DownsideRisk(cbind(port_returns_report))
308 | a   <- table.Stats(cbind(port_returns_report))
309 | b   <- table.Variability(cbind(port_returns_report))
310 | c   <- table.Distributions(cbind(port_returns_report))
311 | d   <- table.DrawdownsRatio(cbind(port_returns_report))
312 | e   <- table.DownsideRiskRatio(cbind(port_returns_report))
313 | f   <- table.TrailingPeriods(cbind(port_returns_report))
314 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f)
315 | charts.PerformanceSummary(port_returns_report, wealth.index = TRUE)
316 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns),
317 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns),
318 | bh     = table.Drawdowns(bh_returns$portfolio_returns))
319 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2),
320 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2),
321 | bh     = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2))
322 | # final numeric tables printed to the console
323 | rowBinding
324 | drawdowns_list
325 | monthly_ret_list
326 | universe <- c("SPY", "QQQ", "IWM", "EEM", "EFA",
327 | "TLT", "LQD", "DBC", "GLD")
328 | ## Cash
329 | cash_symbol <- c("SHV")
330 | portfolio_symbols <- c(universe, cash_symbol)
331 | # download data and compute returns ---------------------------------------
332 | getSymbols(universe, src = data_source, from = start_date, to = end_date)
333 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date)
334 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x))))
335 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x))))
336 | ## Convert Daily to Monthly
337 | monthly_raw  <- raw_close[endpoints(index(raw_close), on = "months"), ]
338 | monthly_adj  <- adj_close[endpoints(index(adj_close), on = "months"), ]
339 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1)
340 | # market timing -----------------------------------------------------------
341 | roc1DP <- ROC(raw_close[,1:length(universe)], n=1, type="discrete")*100
342 | ## Highest Momentum (ROC),
343 | ## Lowest Volatility (SD),
344 | ## Lowest Avg. Correlation
345 | ## 4-month look back
346 | momentum    <- xts(apply(roc1DP, 2, SMA, n = 84), index(adj_close))
347 | volatility  <- rollapply(roc1DP[, 1:length(universe)], 84, sd, fill = NA)
348 | correlation <- corIndicator(universe = universe, data = roc1DP, n = 84)
349 | moment_rank <- ranking(momentum)
350 | vol_rank    <- ranking(-volatility)
351 | corr_rank   <- ranking(-correlation)
352 | all_ranks <- (.50 * moment_rank) + (.25 * vol_rank) + (.25 * corr_rank)
353 | composite_rank <- ranking(all_ranks)
354 | colnames(composite_rank) <- universe
355 | composite_rank[composite_rank < 9]  <- 0
356 | composite_rank[composite_rank >= 9] <- 1
357 | ## Filters
358 | moment_filter <- xts(ifelse(as.matrix(momentum) < 0, 0, 1), index(adj_close))
359 | # system 1: mvc + momentum filter -----------------------------------------
360 | strategy1 <- (as.matrix(composite_rank) * as.matrix(moment_filter)) #* .20
361 | strategy1 <- strategy1 * (1 / rowSums(strategy1))
362 | strategy1 <- xts(strategy1, index(adj_close))
363 | strategy1 <- strategy1[endpoints(index(strategy1), on = "months"), ]
364 | # strategy 2: risk parity -------------------------------------------------
365 | rp_buy <- as.matrix(composite_rank) * as.matrix(moment_filter)
366 | kt_buy <- 1/rowSums(rp_buy * volatility)
367 | kt_buy[kt_buy == Inf] <- 0
368 | rp_weights <- kt_buy * volatility
369 | strategy2  <- xts(rp_buy * rp_weights, index(adj_close))
370 | strategy2  <- strategy2[endpoints(index(strategy2), on = "months"), ]
371 | # Buy and Hold ------------------------------------------------------------
372 | bh_allocation <- 1/length(universe)
373 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1)
374 | bh_weights <- bh_initial * bh_allocation
375 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets))
376 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ]
377 | colnames(bh_weights) <- universe
378 | # output ------------------------------------------------------------------
379 | strat1_returns <- backtest_engine(strategy1, "MVC_filter", monthly_rets)
380 | strat2_returns <- backtest_engine(strategy2, "risk_parity", monthly_rets)
381 | bh_returns     <- backtest_engine(bh_weights, "buy_hold", monthly_rets)
382 | port_returns_report <- cbind(strat1_returns$portfolio_returns,
383 | bh_returns$portfolio_returns,
384 | strat2_returns$portfolio_returns)
385 | aa  <- table.AnnualizedReturns(cbind(port_returns_report))
386 | bb  <- table.DownsideRisk(cbind(port_returns_report))
387 | a   <- table.Stats(cbind(port_returns_report))
388 | b   <- table.Variability(cbind(port_returns_report))
389 | c   <- table.Distributions(cbind(port_returns_report))
390 | d   <- table.DrawdownsRatio(cbind(port_returns_report))
391 | e   <- table.DownsideRiskRatio(cbind(port_returns_report))
392 | f   <- table.TrailingPeriods(cbind(port_returns_report))
393 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f)
394 | charts.PerformanceSummary(port_returns_report, wealth.index = TRUE)
395 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns),
396 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns),
397 | bh     = table.Drawdowns(bh_returns$portfolio_returns))
398 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2),
399 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2),
400 | bh     = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2))
401 | # final numeric tables printed to the console
402 | rowBinding
403 | drawdowns_list
404 | monthly_ret_list
405 | head(adj_close)
406 | universe <- c("SPY", "QQQ", "IWM", "EEM", "EFA",
407 | "TLT", "LQD", "DBC", "GLD", "AGG",
408 | "RWR","RWX","MBB")
409 | ## Cash
410 | cash_symbol <- c("SHV")
411 | portfolio_symbols <- c(universe, cash_symbol)
412 | # download data and compute returns ---------------------------------------
413 | getSymbols(universe, src = data_source, from = start_date, to = end_date)
414 | getSymbols(cash_symbol, src = data_source, from = start_date, to = end_date)
415 | raw_close <- do.call(merge, lapply(portfolio_symbols, function(x) Cl(get(x))))
416 | adj_close <- do.call(merge, lapply(portfolio_symbols, function(x) Ad(get(x))))
417 | ## Convert Daily to Monthly
418 | monthly_raw  <- raw_close[endpoints(index(raw_close), on = "months"), ]
419 | monthly_adj  <- adj_close[endpoints(index(adj_close), on = "months"), ]
420 | monthly_rets <- diff(monthly_adj)/lag(monthly_adj, k = 1)
421 | # market timing -----------------------------------------------------------
422 | roc1DP <- ROC(raw_close[,1:length(universe)], n=1, type="discrete")*100
423 | ## Highest Momentum (ROC),
424 | ## Lowest Volatility (SD),
425 | ## Lowest Avg. Correlation
426 | ## 4-month look back
427 | momentum    <- xts(apply(roc1DP, 2, SMA, n = 84), index(adj_close))
428 | volatility  <- rollapply(roc1DP[, 1:length(universe)], 84, sd, fill = NA)
429 | correlation <- corIndicator(universe = universe, data = roc1DP, n = 84)
430 | moment_rank <- ranking(momentum)
431 | vol_rank    <- ranking(-volatility)
432 | corr_rank   <- ranking(-correlation)
433 | all_ranks <- (.50 * moment_rank) + (.25 * vol_rank) + (.25 * corr_rank)
434 | composite_rank <- ranking(all_ranks)
435 | colnames(composite_rank) <- universe
436 | composite_rank[composite_rank < 9]  <- 0
437 | composite_rank[composite_rank >= 9] <- 1
438 | ## Filters
439 | moment_filter <- xts(ifelse(as.matrix(momentum) < 0, 0, 1), index(adj_close))
440 | # system 1: mvc + momentum filter -----------------------------------------
441 | strategy1 <- (as.matrix(composite_rank) * as.matrix(moment_filter)) #* .20
442 | strategy1 <- strategy1 * (1 / rowSums(strategy1))
443 | strategy1 <- xts(strategy1, index(adj_close))
444 | strategy1 <- strategy1[endpoints(index(strategy1), on = "months"), ]
445 | # strategy 2: risk parity -------------------------------------------------
446 | rp_buy <- as.matrix(composite_rank) * as.matrix(moment_filter)
447 | kt_buy <- 1/rowSums(rp_buy * volatility)
448 | kt_buy[kt_buy == Inf] <- 0
449 | rp_weights <- kt_buy * volatility
450 | strategy2  <- xts(rp_buy * rp_weights, index(adj_close))
451 | strategy2  <- strategy2[endpoints(index(strategy2), on = "months"), ]
452 | # Buy and Hold ------------------------------------------------------------
453 | bh_allocation <- 1/length(universe)
454 | bh_initial <- matrix(1, nrow = nrow(monthly_rets), ncol = ncol(monthly_rets) - 1)
455 | bh_weights <- bh_initial * bh_allocation
456 | bh_weights <- xts(bh_weights, order.by = index(monthly_rets))
457 | bh_weights <- bh_weights[endpoints(index(bh_weights), on = "months"), ]
458 | colnames(bh_weights) <- universe
459 | # output ------------------------------------------------------------------
460 | strat1_returns <- backtest_engine(strategy1, "MVC_filter", monthly_rets)
461 | strat2_returns <- backtest_engine(strategy2, "risk_parity", monthly_rets)
462 | bh_returns     <- backtest_engine(bh_weights, "buy_hold", monthly_rets)
463 | port_returns_report <- cbind(strat1_returns$portfolio_returns,
464 | bh_returns$portfolio_returns,
465 | strat2_returns$portfolio_returns)
466 | aa  <- table.AnnualizedReturns(cbind(port_returns_report))
467 | bb  <- table.DownsideRisk(cbind(port_returns_report))
468 | a   <- table.Stats(cbind(port_returns_report))
469 | b   <- table.Variability(cbind(port_returns_report))
470 | c   <- table.Distributions(cbind(port_returns_report))
471 | d   <- table.DrawdownsRatio(cbind(port_returns_report))
472 | e   <- table.DownsideRiskRatio(cbind(port_returns_report))
473 | f   <- table.TrailingPeriods(cbind(port_returns_report))
474 | rowBinding <- rbind(aa, bb, a, b, c, d, e, f)
475 | charts.PerformanceSummary(port_returns_report, wealth.index = TRUE)
476 | drawdowns_list <- list(strat1 = table.Drawdowns(strat1_returns$portfolio_returns),
477 | strat2 = table.Drawdowns(strat2_returns$portfolio_returns),
478 | bh     = table.Drawdowns(bh_returns$portfolio_returns))
479 | monthly_ret_list <- list(strat1 = table.CalendarReturns(strat1_returns$portfolio_returns, digits = 2),
480 | strat2 = table.CalendarReturns(strat2_returns$portfolio_returns, digits = 2),
481 | bh     = table.CalendarReturns(bh_returns$portfolio_returns, digits = 2))
482 | # final numeric tables printed to the console
483 | rowBinding
484 | drawdowns_list
485 | monthly_ret_list
486 | rm(list = ls(all = TRUE))
487 | gbm_function <- function(x, omega, mu, sigma, n) {
488 | # Geometric Brownian Motion:
489 | # x     = initial value of the process at t0
490 | # alpha = start time
491 | # omega = final time
492 | # mu    = rate of return (drift)
493 | # sigma = volatility
494 | # n     = number of intervals in which to split [t0, t1]
495 | dt  <- omega/n
496 | t   <- seq(0, omega, length = n + 1)
497 | bm  <- ts(cumsum(c(0, rnorm(n) * sqrt(dt))), start = 0, deltat = dt)
498 | gbm <- x * exp((mu - sigma ^ 2 / 2) * time(bm) + sigma * as.numeric(bm))
499 | out <- ts(gbm, start = 0, deltat = deltat(bm))
500 | }
501 | model1 <- gbm_function(x     = 5,
502 | omega = 1,
503 | mu    = 0.09,
504 | sigma = 0.3,
505 | n     = 5000)
506 | plot(x    = seq(1, length(model1), by = 1),
507 | y    = model1,
508 | type = "l",
509 | lwd  = 1.5,
510 | col  = "#31a354",
511 | ylab = "$",
512 | xlab = "# of Observations")
513 | 


--------------------------------------------------------------------------------