├── .gitignore ├── README.md ├── license.md ├── quantitative_stock_analysis_tutorial.Rproj └── sp500_analysis_tutorial.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .Rproj 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quantitative Stock Analysis Tutorial 2 | 3 | 4 | The stock analysis `.R` file for computing stock returns and correlations for the S&P500 stock listing, which complements [Quantitative Stock Analysis Tutorial: Screening the Returns for Every S&P500 Stock in Less than 5 Minutes](http://www.mattdancho.com/investments/2016/10/23/SP500_Analysis.html). 5 | -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Matt Dancho 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /quantitative_stock_analysis_tutorial.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | -------------------------------------------------------------------------------- /sp500_analysis_tutorial.R: -------------------------------------------------------------------------------- 1 | # R Code for S&P500 Stock Analysis 2 | # Author: Matt Dancho 3 | # Date: 2016-10-23 4 | 5 | 6 | # Prerequisites ---------------------------------------------------------------- 7 | library(quantmod) # get stock prices; useful stock analysis functions 8 | library(xts) # working with extensible time series 9 | library(rvest) # web scraping 10 | library(tidyverse) # ggplot2, purrr, dplyr, tidyr, readr, tibble 11 | library(stringr) # working with strings 12 | library(forcats) # working with factors 13 | library(lubridate) # working with dates in tibbles / data frames 14 | library(plotly) # Interactive plots 15 | library(corrplot) # Visuazlize correlation plots 16 | 17 | 18 | # Web Scraping: Get the List of S&P500 Stocks ---------------------------------- 19 | 20 | # Web-scrape S&P500 stock list 21 | sp_500 <- read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies") %>% 22 | html_node("table.wikitable") %>% 23 | html_table() %>% 24 | select(`Ticker symbol`, Security, `GICS Sector`, `GICS Sub Industry`) %>% 25 | as_tibble() 26 | # Format names 27 | names(sp_500) <- sp_500 %>% 28 | names() %>% 29 | str_to_lower() %>% 30 | make.names() 31 | 32 | 33 | # Creating Functions to Map ---------------------------------------------------- 34 | 35 | get_stock_prices <- function(ticker, return_format = "tibble", ...) { 36 | # Get stock prices 37 | stock_prices_xts <- getSymbols(Symbols = ticker, auto.assign = FALSE, ...) 38 | # Rename 39 | names(stock_prices_xts) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted") 40 | # Return in xts format if tibble is not specified 41 | if (return_format == "tibble") { 42 | stock_prices <- stock_prices_xts %>% 43 | as_tibble() %>% 44 | rownames_to_column(var = "Date") %>% 45 | mutate(Date = ymd(Date)) 46 | } else { 47 | stock_prices <- stock_prices_xts 48 | } 49 | stock_prices 50 | } 51 | 52 | get_log_returns <- function(x, return_format = "tibble", period = 'daily', ...) { 53 | # Convert tibble to xts 54 | if (!is.xts(x)) { 55 | x <- xts(x[,-1], order.by = x$Date) 56 | } 57 | # Get stock prices 58 | log_returns_xts <- periodReturn(x = x$Adjusted, type = 'log', period = period, ...) 59 | # Rename 60 | names(log_returns_xts) <- "Log.Returns" 61 | # Return in xts format if tibble is not specified 62 | if (return_format == "tibble") { 63 | log_returns <- log_returns_xts %>% 64 | as_tibble() %>% 65 | rownames_to_column(var = "Date") %>% 66 | mutate(Date = ymd(Date)) 67 | } else { 68 | log_returns <- log_returns_xts 69 | } 70 | log_returns 71 | } 72 | 73 | 74 | # Mapping the Functions -------------------------------------------------------- 75 | from <- "2007-01-01" 76 | to <- today() 77 | sp_500 <- sp_500 %>% 78 | mutate( 79 | stock.prices = map(ticker.symbol, 80 | function(.x) get_stock_prices(.x, 81 | return_format = "tibble", 82 | from = from, 83 | to = to) 84 | ), 85 | log.returns = map(stock.prices, 86 | function(.x) get_log_returns(.x, return_format = "tibble")), 87 | mean.log.returns = map_dbl(log.returns, ~ mean(.$Log.Returns)), 88 | sd.log.returns = map_dbl(log.returns, ~ sd(.$Log.Returns)), 89 | n.trade.days = map_dbl(stock.prices, nrow) 90 | ) 91 | 92 | 93 | # Visualizing the Results with Plotly ------------------------------------------ 94 | 95 | plot_ly(data = sp_500, 96 | type = "scatter", 97 | mode = "markers", 98 | x = ~ sd.log.returns, 99 | y = ~ mean.log.returns, 100 | color = ~ n.trade.days, 101 | colors = "Blues", 102 | size = ~ n.trade.days, 103 | text = ~ str_c("", security, "
", 104 | "Ticker: ", ticker.symbol, "
", 105 | "Sector: ", gics.sector, "
", 106 | "Sub Sector: ", gics.sub.industry, "
", 107 | "No. of Trading Days: ", n.trade.days), 108 | marker = list(opacity = 0.8, 109 | symbol = 'circle', 110 | sizemode = 'diameter', 111 | sizeref = 4.0, 112 | line = list(width = 2, color = '#FFFFFF')) 113 | ) %>% 114 | layout(title = 'S&P500 Analysis: Stock Risk vs Reward', 115 | xaxis = list(title = 'Risk: StDev Log Returns', 116 | gridcolor = 'rgb(255, 255, 255)', 117 | zerolinewidth = 1, 118 | ticklen = 5, 119 | gridwidth = 2), 120 | yaxis = list(title = 'Reward: Mean Log Returns', 121 | gridcolor = 'rgb(255, 255, 255)', 122 | zerolinewidth = 1, 123 | ticklen = 5, 124 | gridwith = 2), 125 | margin = list(l = 100, 126 | t = 100, 127 | b = 100), 128 | font = list(color = '#FFFFFF'), 129 | paper_bgcolor = 'rgb(0, 0, 0)', 130 | plot_bgcolor = 'rgb(0, 0, 0)') 131 | 132 | 133 | # Bonus: Computing Correlations ------------------------------------------------ 134 | 135 | # Filter high performing stocks 136 | limit <- 30 137 | sp_500_hp <- sp_500 %>% 138 | filter(n.trade.days > 1000) %>% 139 | filter(sd.log.returns < 0.0315) %>% 140 | mutate(rank = mean.log.returns %>% desc() %>% min_rank()) %>% 141 | filter(rank <= limit) %>% 142 | arrange(rank) %>% 143 | select(ticker.symbol, rank, mean.log.returns, sd.log.returns, log.returns) 144 | sp_500_hp 145 | 146 | # Unnest high performing stocks 147 | sp_500_hp_unnest <- sp_500_hp %>% 148 | select(ticker.symbol, log.returns) %>% 149 | unnest() 150 | sp_500_hp_unnest 151 | 152 | # Spread format conducive to cor() 153 | sp_500_hp_spread <- sp_500_hp_unnest %>% 154 | spread(key = ticker.symbol, value = Log.Returns) %>% 155 | na.omit() 156 | sp_500_hp_spread 157 | 158 | # Correlation plot 159 | sp_500_hp_spread %>% 160 | select(-Date) %>% 161 | cor() %>% 162 | corrplot(order = "hclust", 163 | addrect = 6) --------------------------------------------------------------------------------