├── .gitignore
├── README.md
├── license.md
├── quantitative_stock_analysis_tutorial.Rproj
└── sp500_analysis_tutorial.R
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .Rproj
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Quantitative Stock Analysis Tutorial
2 |
3 |
4 | The stock analysis `.R` file for computing stock returns and correlations for the S&P500 stock listing, which complements [Quantitative Stock Analysis Tutorial: Screening the Returns for Every S&P500 Stock in Less than 5 Minutes](http://www.mattdancho.com/investments/2016/10/23/SP500_Analysis.html).
5 |
--------------------------------------------------------------------------------
/license.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Matt Dancho
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/quantitative_stock_analysis_tutorial.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 |
--------------------------------------------------------------------------------
/sp500_analysis_tutorial.R:
--------------------------------------------------------------------------------
1 | # R Code for S&P500 Stock Analysis
2 | # Author: Matt Dancho
3 | # Date: 2016-10-23
4 |
5 |
6 | # Prerequisites ----------------------------------------------------------------
7 | library(quantmod) # get stock prices; useful stock analysis functions
8 | library(xts) # working with extensible time series
9 | library(rvest) # web scraping
10 | library(tidyverse) # ggplot2, purrr, dplyr, tidyr, readr, tibble
11 | library(stringr) # working with strings
12 | library(forcats) # working with factors
13 | library(lubridate) # working with dates in tibbles / data frames
14 | library(plotly) # Interactive plots
15 | library(corrplot) # Visuazlize correlation plots
16 |
17 |
18 | # Web Scraping: Get the List of S&P500 Stocks ----------------------------------
19 |
20 | # Web-scrape S&P500 stock list
21 | sp_500 <- read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies") %>%
22 | html_node("table.wikitable") %>%
23 | html_table() %>%
24 | select(`Ticker symbol`, Security, `GICS Sector`, `GICS Sub Industry`) %>%
25 | as_tibble()
26 | # Format names
27 | names(sp_500) <- sp_500 %>%
28 | names() %>%
29 | str_to_lower() %>%
30 | make.names()
31 |
32 |
33 | # Creating Functions to Map ----------------------------------------------------
34 |
35 | get_stock_prices <- function(ticker, return_format = "tibble", ...) {
36 | # Get stock prices
37 | stock_prices_xts <- getSymbols(Symbols = ticker, auto.assign = FALSE, ...)
38 | # Rename
39 | names(stock_prices_xts) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted")
40 | # Return in xts format if tibble is not specified
41 | if (return_format == "tibble") {
42 | stock_prices <- stock_prices_xts %>%
43 | as_tibble() %>%
44 | rownames_to_column(var = "Date") %>%
45 | mutate(Date = ymd(Date))
46 | } else {
47 | stock_prices <- stock_prices_xts
48 | }
49 | stock_prices
50 | }
51 |
52 | get_log_returns <- function(x, return_format = "tibble", period = 'daily', ...) {
53 | # Convert tibble to xts
54 | if (!is.xts(x)) {
55 | x <- xts(x[,-1], order.by = x$Date)
56 | }
57 | # Get stock prices
58 | log_returns_xts <- periodReturn(x = x$Adjusted, type = 'log', period = period, ...)
59 | # Rename
60 | names(log_returns_xts) <- "Log.Returns"
61 | # Return in xts format if tibble is not specified
62 | if (return_format == "tibble") {
63 | log_returns <- log_returns_xts %>%
64 | as_tibble() %>%
65 | rownames_to_column(var = "Date") %>%
66 | mutate(Date = ymd(Date))
67 | } else {
68 | log_returns <- log_returns_xts
69 | }
70 | log_returns
71 | }
72 |
73 |
74 | # Mapping the Functions --------------------------------------------------------
75 | from <- "2007-01-01"
76 | to <- today()
77 | sp_500 <- sp_500 %>%
78 | mutate(
79 | stock.prices = map(ticker.symbol,
80 | function(.x) get_stock_prices(.x,
81 | return_format = "tibble",
82 | from = from,
83 | to = to)
84 | ),
85 | log.returns = map(stock.prices,
86 | function(.x) get_log_returns(.x, return_format = "tibble")),
87 | mean.log.returns = map_dbl(log.returns, ~ mean(.$Log.Returns)),
88 | sd.log.returns = map_dbl(log.returns, ~ sd(.$Log.Returns)),
89 | n.trade.days = map_dbl(stock.prices, nrow)
90 | )
91 |
92 |
93 | # Visualizing the Results with Plotly ------------------------------------------
94 |
95 | plot_ly(data = sp_500,
96 | type = "scatter",
97 | mode = "markers",
98 | x = ~ sd.log.returns,
99 | y = ~ mean.log.returns,
100 | color = ~ n.trade.days,
101 | colors = "Blues",
102 | size = ~ n.trade.days,
103 | text = ~ str_c("", security, "
",
104 | "Ticker: ", ticker.symbol, "
",
105 | "Sector: ", gics.sector, "
",
106 | "Sub Sector: ", gics.sub.industry, "
",
107 | "No. of Trading Days: ", n.trade.days),
108 | marker = list(opacity = 0.8,
109 | symbol = 'circle',
110 | sizemode = 'diameter',
111 | sizeref = 4.0,
112 | line = list(width = 2, color = '#FFFFFF'))
113 | ) %>%
114 | layout(title = 'S&P500 Analysis: Stock Risk vs Reward',
115 | xaxis = list(title = 'Risk: StDev Log Returns',
116 | gridcolor = 'rgb(255, 255, 255)',
117 | zerolinewidth = 1,
118 | ticklen = 5,
119 | gridwidth = 2),
120 | yaxis = list(title = 'Reward: Mean Log Returns',
121 | gridcolor = 'rgb(255, 255, 255)',
122 | zerolinewidth = 1,
123 | ticklen = 5,
124 | gridwith = 2),
125 | margin = list(l = 100,
126 | t = 100,
127 | b = 100),
128 | font = list(color = '#FFFFFF'),
129 | paper_bgcolor = 'rgb(0, 0, 0)',
130 | plot_bgcolor = 'rgb(0, 0, 0)')
131 |
132 |
133 | # Bonus: Computing Correlations ------------------------------------------------
134 |
135 | # Filter high performing stocks
136 | limit <- 30
137 | sp_500_hp <- sp_500 %>%
138 | filter(n.trade.days > 1000) %>%
139 | filter(sd.log.returns < 0.0315) %>%
140 | mutate(rank = mean.log.returns %>% desc() %>% min_rank()) %>%
141 | filter(rank <= limit) %>%
142 | arrange(rank) %>%
143 | select(ticker.symbol, rank, mean.log.returns, sd.log.returns, log.returns)
144 | sp_500_hp
145 |
146 | # Unnest high performing stocks
147 | sp_500_hp_unnest <- sp_500_hp %>%
148 | select(ticker.symbol, log.returns) %>%
149 | unnest()
150 | sp_500_hp_unnest
151 |
152 | # Spread format conducive to cor()
153 | sp_500_hp_spread <- sp_500_hp_unnest %>%
154 | spread(key = ticker.symbol, value = Log.Returns) %>%
155 | na.omit()
156 | sp_500_hp_spread
157 |
158 | # Correlation plot
159 | sp_500_hp_spread %>%
160 | select(-Date) %>%
161 | cor() %>%
162 | corrplot(order = "hclust",
163 | addrect = 6)
--------------------------------------------------------------------------------