├── .Rbuildignore ├── .github └── workflows │ └── check-r-package.yaml ├── .gitignore ├── .roxygenize.R ├── DESCRIPTION ├── LICENSE ├── Makefile ├── NAMESPACE ├── R ├── connected-graph-admm.R ├── connected-graph-heavy-tail-admm.R └── k-component-graph-heavy-tail.R ├── README.Rmd ├── README.html ├── README.md ├── examples ├── crypto │ ├── crypto-prices.rds │ ├── crypto-symbols-yahoo-finance.csv │ ├── crypto.ipynb │ └── list-of-crypto-symbols-from-yahoo-finance.ipynb └── stocks │ ├── SP500-sectors.csv │ ├── get-sp500-constituents-sector.ipynb │ ├── sp500.ipynb │ └── stock-data-2014-2018.rds ├── man ├── figures │ ├── README-plot_crypto_network-1.png │ └── README-plot_sp500_stocks_network-1.png ├── learn_connected_graph.Rd ├── learn_kcomp_heavytail_graph.Rd └── learn_regular_heavytail_graph.Rd ├── tests ├── testthat.R └── testthat │ └── test-regular.R └── vignettes ├── talk-rfinance-2023.pdf └── talk-rfinance-2023.pdf.asis /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^.*\.gz$ 3 | ^\.Rproj\.user$ 4 | ^cran-comments\.md$ 5 | ^README\.Rmd$ 6 | ^README\.html$ 7 | ^README_cache$ 8 | ^README_files$ 9 | ^R_buildignore$ 10 | ^examples$ 11 | ^.compileAttributes\.R$ 12 | ^.roxygenize\.R$ 13 | ^.git$ 14 | ^.gitignore$ 15 | ^.github$ 16 | ^Makefile$ 17 | ^LICENSE$ 18 | -------------------------------------------------------------------------------- /.github/workflows/check-r-package.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main, master] 4 | pull_request: 5 | branches: [main, master] 6 | 7 | name: test-coverage 8 | 9 | jobs: 10 | test-coverage: 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - uses: r-lib/actions/setup-r@v2 19 | with: 20 | use-public-rspm: true 21 | 22 | - uses: r-lib/actions/setup-r-dependencies@v2 23 | with: 24 | extra-packages: | 25 | any::covr, any::XML, any::CVXR 26 | needs: coverage 27 | 28 | - name: Test coverage 29 | run: covr::codecov() 30 | shell: Rscript {0} 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.aux 3 | *.bbl 4 | *.blg 5 | *.fdb_latexmk 6 | *.fls 7 | *.log 8 | *.out 9 | *.synctex.gz 10 | *.toc 11 | *.ipynb_checkpoints/ 12 | *.ps 13 | ..Rcheck/ 14 | -------------------------------------------------------------------------------- /.roxygenize.R: -------------------------------------------------------------------------------- 1 | library(roxygen2) 2 | roxygenize() 3 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: fingraph 2 | Title: Learning Graphs for Financial Markets 3 | Version: 0.1.0 4 | Date: 2023-02-02 5 | Description: Learning graphs for financial markets with optimization algorithms. 6 | This package contains implementations of the algorithms described in the paper: 7 | Cardoso JVM, Ying J, and Palomar DP (2021) 8 | "Learning graphs in heavy-tailed markets", Advances in Neural Informations Processing Systems (NeurIPS). 9 | Authors@R: c( 10 | person("Ze", "Vinicius", role = c("cre", "aut"), email = "jvmirca@gmail.com"), 11 | person("Daniel", "Palomar", role = c("cre", "aut"), email = "daniel.p.palomar@gmail.com"), 12 | ) 13 | URL: https://github.com/convexfi/fingraph/ 14 | BugReports: https://github.com/convexfi/fingraph/issues 15 | License: GPL-3 16 | Encoding: UTF-8 17 | Depends: spectralGraphTopology 18 | Imports: 19 | MASS, 20 | stats, 21 | progress, 22 | mvtnorm 23 | Suggests: 24 | testthat 25 | RoxygenNote: 7.1.1 26 | VignetteBuilder: 27 | knitr, 28 | rmarkdown, 29 | R.rsp 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Zé Vinícius 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -v src/*.so src/*.o 3 | rm -v R/RcppExports.R 4 | rm -v src/RcppExports.cpp 5 | 6 | build: 7 | Rscript .roxygenize.R 8 | 9 | install: 10 | R CMD INSTALL ../fingraph 11 | 12 | test: 13 | Rscript -e "devtools::test()" 14 | 15 | all: 16 | make build && make install && make test 17 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(learn_connected_graph) 4 | export(learn_kcomp_heavytail_graph) 5 | export(learn_regular_heavytail_graph) 6 | import(spectralGraphTopology) 7 | -------------------------------------------------------------------------------- /R/connected-graph-admm.R: -------------------------------------------------------------------------------- 1 | library(spectralGraphTopology) 2 | 3 | #' @title Laplacian matrix of a connected graph with Gaussian data 4 | #' 5 | #' Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 6 | #' where we assume the data to be Gaussian distributed. 7 | #' 8 | #' @param S a p x p covariance matrix, where p is the number of nodes in the graph 9 | #' @param w0 initial vector of graph weights. Either a vector of length p(p-1)/2 or 10 | #' a string indicating the method to compute an initial value. 11 | #' @param d the nodes' degrees. Either a vector or a single value. 12 | #' @param rho constraint relaxation hyperparameter. 13 | #' @param maxiter maximum number of iterations. 14 | #' @param reltol relative tolerance as a convergence criteria. 15 | #' @param verbose whether or not to show a progress bar during the iterations. 16 | #' @return A list containing possibly the following elements: 17 | #' \item{\code{laplacian}}{estimated Laplacian matrix} 18 | #' \item{\code{adjacency}}{estimated adjacency matrix} 19 | #' \item{\code{theta}}{estimated Laplacian matrix slack variable} 20 | #' \item{\code{maxiter}}{number of iterations taken to reach convergence} 21 | #' \item{\code{convergence}}{boolean flag to indicate whether or not the optimization converged} 22 | #' @import spectralGraphTopology 23 | #' @export 24 | learn_connected_graph <- function(S, 25 | w0 = "naive", 26 | d = 1, 27 | rho = 1, 28 | maxiter = 10000, 29 | reltol = 1e-5, 30 | verbose = TRUE) { 31 | # number of nodes 32 | p <- nrow(S) 33 | # w-initialization 34 | w <- spectralGraphTopology:::w_init(w0, MASS::ginv(S)) 35 | A0 <- A(w) 36 | A0 <- A0 / rowSums(A0) 37 | w <- spectralGraphTopology:::Ainv(A0) 38 | LstarS <- Lstar(S) 39 | J <- matrix(1, p, p) / p 40 | # Theta-initilization 41 | Lw <- L(w) 42 | Theta <- Lw 43 | Y <- matrix(0, p, p) 44 | y <- rep(0, p) 45 | # ADMM constants 46 | mu <- 2 47 | tau <- 2 48 | if (verbose) 49 | pb <- progress::progress_bar$new(format = "<:bar> :current/:total eta: :eta", 50 | total = maxiter, clear = FALSE, width = 80) 51 | for (i in 1:maxiter) { 52 | # update w 53 | LstarLw <- Lstar(Lw) 54 | DstarDw <- Dstar(diag(Lw)) 55 | grad <- LstarS - Lstar(Y + rho * Theta) + Dstar(y - rho * d) + rho * (LstarLw + DstarDw) 56 | eta <- 1 / (2*rho * (2*p - 1)) 57 | wi <- w - eta * grad 58 | wi[wi < 0] <- 0 59 | Lwi <- L(wi) 60 | # update Theta 61 | eig <- eigen(rho * (Lwi + J) - Y, symmetric = TRUE) 62 | V <- eig$vectors 63 | gamma <- eig$values 64 | Thetai <- V %*% diag((gamma + sqrt(gamma^2 + 4 * rho)) / (2 * rho)) %*% t(V) - J 65 | # update Y 66 | R1 <- Thetai - Lwi 67 | Y <- Y + rho * R1 68 | # update y 69 | R2 <- diag(Lwi) - d 70 | y <- y + rho * R2 71 | # update rho 72 | s <- rho * norm(Lstar(Theta - Thetai), "2") 73 | r <- norm(R1, "F") 74 | if (r > mu * s) 75 | rho <- rho * tau 76 | else if (s > mu * r) 77 | rho <- rho / tau 78 | if (verbose) 79 | pb$tick() 80 | has_converged <- (norm(Lwi - Lw, 'F') / norm(Lw, 'F') < reltol) && (i > 1) 81 | if (has_converged) 82 | break 83 | w <- wi 84 | Lw <- Lwi 85 | Theta <- Thetai 86 | } 87 | results <- list(laplacian = L(wi), 88 | adjacency = A(wi), 89 | theta = Thetai, 90 | maxiter = i, 91 | convergence = has_converged) 92 | return(results) 93 | } 94 | -------------------------------------------------------------------------------- /R/connected-graph-heavy-tail-admm.R: -------------------------------------------------------------------------------- 1 | library(spectralGraphTopology) 2 | 3 | #' @title Laplacian matrix of a connected graph with heavy-tailed data 4 | #' 5 | #' Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 6 | #' where we assume the data to be Student-t distributed. 7 | #' 8 | #' @param X an n x p data matrix, where n is the number of observations and p is 9 | #' the number of nodes in the graph 10 | #' @param heavy_type a string which selects the statistical distribution of the data. 11 | #' Valid values are "gaussian" or "student". 12 | #' @param nu the degrees of freedom of the Student-t distribution. 13 | #' Must be a real number greater than 2. 14 | #' @param w0 initial vector of graph weights. Either a vector of length p(p-1)/2 or 15 | #' a string indicating the method to compute an initial value. 16 | #' @param d the nodes' degrees. Either a vector or a single value. 17 | #' @param rho constraint relaxation hyperparameter. 18 | #' @param update_rho whether or not to update rho during the optimization. 19 | #' @param maxiter maximum number of iterations. 20 | #' @param reltol relative tolerance as a convergence criteria. 21 | #' @param verbose whether or not to show a progress bar during the iterations. 22 | #' @return A list containing possibly the following elements: 23 | #' \item{\code{laplacian}}{estimated Laplacian matrix} 24 | #' \item{\code{adjacency}}{estimated adjacency matrix} 25 | #' \item{\code{theta}}{estimated Laplacian matrix slack variable} 26 | #' \item{\code{maxiter}}{number of iterations taken to reach convergence} 27 | #' \item{\code{convergence}}{boolean flag to indicate whether or not the optimization conv erged} 28 | #' \item{\code{primal_lap_residual}}{primal residual for the Laplacian matrix per iteration} 29 | #' \item{\code{primal_deg_residual}}{primal residual for the degree vector per iteration} 30 | #' \item{\code{dual_residual}}{dual residual per iteration} 31 | #' \item{\code{lagrangian}}{Lagrangian value per iteration} 32 | #' \item{\code{elapsed_time}}{Time taken to reach convergence} 33 | #' @import spectralGraphTopology 34 | #' @export 35 | learn_regular_heavytail_graph <- function(X, 36 | heavy_type = "gaussian", 37 | nu = NULL, 38 | w0 = "naive", 39 | d = 1, 40 | rho = 1, 41 | update_rho = TRUE, 42 | maxiter = 10000, 43 | reltol = 1e-5, 44 | verbose = TRUE) { 45 | X <- as.matrix(X) 46 | # number of nodes 47 | p <- ncol(X) 48 | # number of observations 49 | n <- nrow(X) 50 | LstarSq <- vector(mode = "list", length = n) 51 | for (i in 1:n) 52 | LstarSq[[i]] <- Lstar(X[i, ] %*% t(X[i, ])) / (n-1) 53 | # w-initialization 54 | w <- spectralGraphTopology:::w_init(w0, MASS::ginv(stats::cor(X))) 55 | A0 <- A(w) 56 | A0 <- A0 / rowSums(A0) 57 | w <- spectralGraphTopology:::Ainv(A0) 58 | J <- matrix(1, p, p) / p 59 | # Theta-initilization 60 | Lw <- L(w) 61 | Theta <- Lw 62 | Y <- matrix(0, p, p) 63 | y <- rep(0, p) 64 | # ADMM constants 65 | mu <- 2 66 | tau <- 2 67 | # residual vectors 68 | primal_lap_residual <- c() 69 | primal_deg_residual <- c() 70 | dual_residual <- c() 71 | # augmented lagrangian vector 72 | lagrangian <- c() 73 | if (verbose) 74 | pb <- progress::progress_bar$new(format = "<:bar> :current/:total eta: :eta", 75 | total = maxiter, clear = FALSE, width = 80) 76 | elapsed_time <- c() 77 | start_time <- proc.time()[3] 78 | for (i in 1:maxiter) { 79 | # update w 80 | LstarLw <- Lstar(Lw) 81 | DstarDw <- Dstar(diag(Lw)) 82 | LstarSweighted <- rep(0, .5*p*(p-1)) 83 | if (heavy_type == "student") { 84 | for (q in 1:n) 85 | LstarSweighted <- LstarSweighted + LstarSq[[q]] * compute_student_weights(w, LstarSq[[q]], p, nu) 86 | } else if(heavy_type == "gaussian") { 87 | for (q in 1:n) 88 | LstarSweighted <- LstarSweighted + LstarSq[[q]] 89 | } 90 | grad <- LstarSweighted - Lstar(rho * Theta + Y) + Dstar(y - rho * d) + rho * (LstarLw + DstarDw) 91 | eta <- 1 / (2*rho * (2*p - 1)) 92 | wi <- w - eta * grad 93 | wi[wi < 0] <- 0 94 | Lwi <- L(wi) 95 | # update Theta 96 | eig <- eigen(rho * (Lwi + J) - Y, symmetric = TRUE) 97 | V <- eig$vectors 98 | gamma <- eig$values 99 | Thetai <- V %*% diag((gamma + sqrt(gamma^2 + 4 * rho)) / (2 * rho)) %*% t(V) - J 100 | # update Y 101 | R1 <- Thetai - Lwi 102 | Y <- Y + rho * R1 103 | # update y 104 | R2 <- diag(Lwi) - d 105 | y <- y + rho * R2 106 | # compute primal, dual residuals, & lagrangian 107 | primal_lap_residual <- c(primal_lap_residual, norm(R1, "F")) 108 | primal_deg_residual <- c(primal_deg_residual, norm(R2, "2")) 109 | dual_residual <- c(dual_residual, rho*norm(Lstar(Theta - Thetai), "2")) 110 | lagrangian <- c(lagrangian, compute_augmented_lagrangian_ht(wi, LstarSq, Thetai, J, Y, y, d, heavy_type, n, p, rho, nu)) 111 | # update rho 112 | if (update_rho) { 113 | s <- rho * norm(Lstar(Theta - Thetai), "2") 114 | r <- norm(R1, "F") 115 | if (r > mu * s) 116 | rho <- rho * tau 117 | else if (s > mu * r) 118 | rho <- rho / tau 119 | } 120 | if (verbose) 121 | pb$tick() 122 | has_converged <- (norm(Lw - Lwi, 'F') / norm(Lw, 'F') < reltol) && (i > 1) 123 | elapsed_time <- c(elapsed_time, proc.time()[3] - start_time) 124 | if (has_converged) 125 | break 126 | w <- wi 127 | Lw <- Lwi 128 | Theta <- Thetai 129 | } 130 | results <- list(laplacian = L(wi), 131 | adjacency = A(wi), 132 | theta = Thetai, 133 | maxiter = i, 134 | convergence = has_converged, 135 | primal_lap_residual = primal_lap_residual, 136 | primal_deg_residual = primal_deg_residual, 137 | dual_residual = dual_residual, 138 | lagrangian = lagrangian, 139 | elapsed_time = elapsed_time) 140 | return(results) 141 | } 142 | 143 | compute_student_weights <- function(w, LstarSq, p, nu) { 144 | return((p + nu) / (sum(w * LstarSq) + nu)) 145 | } 146 | 147 | compute_augmented_lagrangian_ht <- function(w, LstarSq, Theta, J, Y, y, d, heavy_type, n, p, rho, nu) { 148 | eig <- eigen(Theta + J, symmetric = TRUE, only.values = TRUE)$values 149 | Lw <- L(w) 150 | Dw <- diag(Lw) 151 | u_func <- 0 152 | if (heavy_type == "student") { 153 | for (q in 1:n) 154 | u_func <- u_func + (p + nu) * log(1 + n * sum(w * LstarSq[[q]]) / nu) 155 | } else if (heavy_type == "gaussian"){ 156 | for (q in 1:n) 157 | u_func <- u_func + sum(n * w * LstarSq[[q]]) 158 | } 159 | u_func <- u_func / n 160 | return(u_func - sum(log(eig)) + sum(y * (Dw - d)) + sum(diag(Y %*% (Theta - Lw))) 161 | + .5 * rho * (norm(Dw - d, "2")^2 + norm(Lw - Theta, "F")^2)) 162 | } 163 | -------------------------------------------------------------------------------- /R/k-component-graph-heavy-tail.R: -------------------------------------------------------------------------------- 1 | library(spectralGraphTopology) 2 | 3 | #' @title Laplacian matrix of a k-component graph with heavy-tailed data 4 | #' 5 | #' Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 6 | #' where we assume the data to be Student-t distributed. 7 | #' 8 | #' @param X an n x p data matrix, where n is the number of observations and p is 9 | #' the number of nodes in the graph. 10 | #' @param k the number of components of the graph. 11 | #' @param heavy_type a string which selects the statistical distribution of the data . 12 | #' Valid values are "gaussian" or "student". 13 | #' @param nu the degrees of freedom of the Student-t distribution. 14 | #' Must be a real number greater than 2. 15 | #' @param w0 initial vector of graph weights. Either a vector of length p(p-1)/2 or 16 | #' a string indicating the method to compute an initial value. 17 | #' @param beta hyperparameter that controls the regularization to obtain a 18 | #' k-component graph 19 | #' @param update_beta whether to update beta during the optimization. 20 | #' @param early_stopping whether to stop the iterations as soon as the rank 21 | #' constraint is satisfied. 22 | #' @param d the nodes' degrees. Either a vector or a single value. 23 | #' @param rho constraint relaxation hyperparameter. 24 | #' @param update_rho whether or not to update rho during the optimization. 25 | #' @param maxiter maximum number of iterations. 26 | #' @param reltol relative tolerance as a convergence criteria. 27 | #' @param verbose whether to show a progress bar during the iterations. 28 | #' @param record_objective whether to record the objective function per iteration. 29 | #' @return A list containing possibly the following elements: 30 | #' \item{\code{laplacian}}{estimated Laplacian matrix} 31 | #' \item{\code{adjacency}}{estimated adjacency matrix} 32 | #' \item{\code{theta}}{estimated Laplacian matrix slack variable} 33 | #' \item{\code{maxiter}}{number of iterations taken to reach convergence} 34 | #' \item{\code{convergence}}{boolean flag to indicate whether or not the optimization conv erged} 35 | #' \item{\code{beta_seq}}{sequence of values taken by the hyperparameter beta until convergence} 36 | #' \item{\code{primal_lap_residual}}{primal residual for the Laplacian matrix per iteratio n} 37 | #' \item{\code{primal_deg_residual}}{primal residual for the degree vector per iteration} 38 | #' \item{\code{dual_residual}}{dual residual per iteration} 39 | #' \item{\code{lagrangian}}{Lagrangian value per iteration} 40 | #' \item{\code{elapsed_time}}{Time taken to reach convergence} 41 | #' @import spectralGraphTopology 42 | #' @export 43 | learn_kcomp_heavytail_graph <- function(X, 44 | k = 1, 45 | heavy_type = "gaussian", 46 | nu = NULL, 47 | w0 = "naive", 48 | d = 1, 49 | beta = 1e-8, 50 | update_beta = TRUE, 51 | early_stopping = FALSE, 52 | rho = 1, 53 | update_rho = FALSE, 54 | maxiter = 10000, 55 | reltol = 1e-5, 56 | verbose = TRUE, 57 | record_objective = FALSE) { 58 | X <- scale(as.matrix(X)) 59 | # number of nodes 60 | p <- ncol(X) 61 | # number of observations 62 | n <- nrow(X) 63 | LstarSq <- vector(mode = "list", length = n) 64 | for (i in 1:n) 65 | LstarSq[[i]] <- Lstar(X[i, ] %*% t(X[i, ])) / n 66 | # w-initialization 67 | w <- spectralGraphTopology:::w_init(w0, MASS::ginv(stats::cor(X))) 68 | A0 <- A(w) 69 | A0 <- A0 / rowSums(A0) 70 | w <- spectralGraphTopology:::Ainv(A0) 71 | # Theta-initilization 72 | Lw <- L(w) 73 | Theta <- Lw 74 | U <- eigen(Lw, symmetric = TRUE)$vectors[, (p - k + 1):p] 75 | Y <- matrix(0, p, p) 76 | y <- rep(0, p) 77 | # ADMM constants 78 | mu <- 2 79 | tau <- 2 80 | # residual vectors 81 | primal_lap_residual <- c() 82 | primal_deg_residual <- c() 83 | dual_residual <- c() 84 | # augmented lagrangian vector 85 | lagrangian <- c() 86 | beta_seq <- c() 87 | if (verbose) 88 | pb <- progress::progress_bar$new(format = "<:bar> :current/:total eta: :eta", 89 | total = maxiter, clear = FALSE, width = 80) 90 | elapsed_time <- c() 91 | start_time <- proc.time()[3] 92 | for (i in 1:maxiter) { 93 | # update w 94 | LstarLw <- Lstar(Lw) 95 | DstarDw <- Dstar(diag(Lw)) 96 | LstarSweighted <- rep(0, .5*p*(p-1)) 97 | if (heavy_type == "student") { 98 | for (q in 1:n) 99 | LstarSweighted <- LstarSweighted + LstarSq[[q]] * compute_student_weights(w, LstarSq[[q]], p, nu) 100 | } else if (heavy_type == "gaussian") { 101 | for (q in 1:n) 102 | LstarSweighted <- LstarSweighted + LstarSq[[q]] 103 | } 104 | grad <- LstarSweighted + Lstar(beta * crossprod(t(U)) - Y - rho * Theta) + Dstar(y - rho * d) + rho * (LstarLw + DstarDw) 105 | eta <- 1 / (2*rho * (2*p - 1)) 106 | wi <- w - eta * grad 107 | wi[wi < 0] <- 0 108 | Lwi <- L(wi) 109 | # update U 110 | U <- eigen(Lwi, symmetric = TRUE)$vectors[, (p - k + 1):p] 111 | # update Theta 112 | eig <- eigen(rho * Lwi - Y, symmetric = TRUE) 113 | V <- eig$vectors[,1:(p-k)] 114 | gamma <- eig$values[1:(p-k)] 115 | Thetai <- V %*% diag((gamma + sqrt(gamma^2 + 4 * rho)) / (2 * rho)) %*% t(V) 116 | # update Y 117 | R1 <- Thetai - Lwi 118 | Y <- Y + rho * R1 119 | # update y 120 | R2 <- diag(Lwi) - d 121 | y <- y + rho * R2 122 | # compute primal, dual residuals, & lagrangian 123 | primal_lap_residual <- c(primal_lap_residual, norm(R1, "F")) 124 | primal_deg_residual <- c(primal_deg_residual, norm(R2, "2")) 125 | dual_residual <- c(dual_residual, rho*norm(Lstar(Theta - Thetai), "2")) 126 | lagrangian <- c(lagrangian, compute_augmented_lagrangian_kcomp_ht(wi, LstarSq, Thetai, U, Y, y, d, heavy_type, n, p, k, rho, beta, nu)) 127 | # update rho 128 | if (update_rho) { 129 | s <- rho * norm(Lstar(Theta - Thetai), "2") 130 | r <- norm(R1, "F")# + norm(R2, "2") 131 | if (r > mu * s) 132 | rho <- rho * tau 133 | else if (s > mu * r) 134 | rho <- rho / tau 135 | } 136 | if (update_beta) { 137 | eig_vals <- spectralGraphTopology:::eigval_sym(L(wi)) 138 | n_zero_eigenvalues <- sum(eig_vals < 1e-9) 139 | if (k < n_zero_eigenvalues) 140 | beta <- .5 * beta 141 | else if (k > n_zero_eigenvalues) 142 | beta <- 2 * beta 143 | else { 144 | if (early_stopping) { 145 | has_converged <- TRUE 146 | break 147 | } 148 | } 149 | beta_seq <- c(beta_seq, beta) 150 | } 151 | if (verbose) 152 | pb$tick() 153 | has_converged <- (norm(Lwi - Lw, 'F') / norm(Lw, 'F') < reltol) && (i > 1) 154 | elapsed_time <- c(elapsed_time, proc.time()[3] - start_time) 155 | if (has_converged) 156 | break 157 | w <- wi 158 | Lw <- Lwi 159 | Theta <- Thetai 160 | } 161 | results <- list(laplacian = L(wi), 162 | adjacency = A(wi), 163 | theta = Thetai, 164 | maxiter = i, 165 | convergence = has_converged, 166 | beta_seq = beta_seq, 167 | primal_lap_residual = primal_lap_residual, 168 | primal_deg_residual = primal_deg_residual, 169 | dual_residual = dual_residual, 170 | lagrangian = lagrangian, 171 | elapsed_time = elapsed_time) 172 | return(results) 173 | } 174 | 175 | compute_augmented_lagrangian_kcomp_ht <- function(w, LstarSq, Theta, U, Y, y, d, heavy_type, n, p, k, rho, beta, nu) { 176 | eig <- eigen(Theta, symmetric = TRUE, only.values = TRUE)$values[1:(p-k)] 177 | Lw <- L(w) 178 | Dw <- diag(Lw) 179 | u_func <- 0 180 | if (heavy_type == "student") { 181 | for (q in 1:n) 182 | u_func <- u_func + (p + nu) * log(1 + n * sum(w * LstarSq[[q]]) / nu) 183 | } else if (heavy_type == "gaussian"){ 184 | for (q in 1:n) 185 | u_func <- u_func + sum(n * w * LstarSq[[q]]) 186 | } 187 | u_func <- u_func / n 188 | return(u_func - sum(log(eig)) + sum(y * (Dw - d)) + sum(diag(Y %*% (Theta - Lw))) 189 | + .5 * rho * (norm(Dw - d, "2")^2 + norm(Lw - Theta, "F")^2) + beta * sum(w * Lstar(crossprod(t(U))))) 190 | } 191 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "fingraph README" 3 | output: 4 | html_document: 5 | keep_md: true 6 | --- 7 | 8 | ```{r, echo = FALSE} 9 | library(knitr) 10 | opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "man/figures/README-", 14 | fig.align = "center", 15 | fig.retina = 2, 16 | out.width = "75%", 17 | dpi = 96 18 | ) 19 | knit_hooks$set(pngquant = hook_pngquant) 20 | ``` 21 | 22 | # fingraph 23 | [![codecov](https://codecov.io/gh/convexfi/fingraph/branch/main/graph/badge.svg?token=OhreF1p2Yt)](https://app.codecov.io/gh/convexfi/fingraph) 24 | 25 | 26 | This repo contains ADMM implementations to estimate weighted undirected graphs 27 | (Markov random fields) under Student-t assumptions with applications to financial 28 | markets. 29 | 30 | ## Installation 31 | 32 | **fingraph** depends on the development version of **spectralGraphTopology**, 33 | which can be installed as: 34 | ```{r, eval = FALSE} 35 | > devtools::install_github("convexfi/spectralGraphTopology") 36 | ``` 37 | 38 | The stable version of **fingraph** can be installed directly from CRAN: 39 | ```{r, eval = FALSE} 40 | > install.packages("fingraph") 41 | ``` 42 | 43 | #### Microsoft Windows 44 | On MS Windows environments, make sure to install the most recent version of ``Rtools``. 45 | 46 | ## Usage 47 | 48 | ### Learning a graph of cryptocurrencies 49 | ```{r plot_crypto_network, message=FALSE} 50 | library(igraph) 51 | library(fingraph) 52 | library(fitHeavyTail) 53 | library(xts) 54 | set.seed(123) 55 | 56 | # load crypto prices into an xts table 57 | crypto_prices <- readRDS("examples/crypto/crypto-prices.rds") 58 | colnames(crypto_prices) 59 | 60 | # compute log-returns 61 | log_returns <- diff(log(crypto_prices), na.pad = FALSE) 62 | 63 | # estimate a weighted, undirected graph (markov random field) 64 | graph_mrf <- learn_kcomp_heavytail_graph(scale(log_returns), 65 | k = 8, 66 | heavy_type = "student", 67 | nu = fit_mvt(scale(log_returns))$nu, 68 | verbose = FALSE) 69 | 70 | # plot network 71 | net <- graph_from_adjacency_matrix(graph_mrf$adjacency, 72 | mode = "undirected", 73 | weighted = TRUE) 74 | cfg <- cluster_fast_greedy(as.undirected(net)) 75 | la_kcomp <- layout_nicely(net) 76 | V(net)$label.cex = 1 77 | plot(cfg, net, vertex.label = colnames(crypto_prices), 78 | layout = la_kcomp, 79 | vertex.size = 4.5, 80 | col = "black", 81 | edge.color = c("#686de0"), 82 | vertex.label.family = "Helvetica", 83 | vertex.label.color = "black", 84 | vertex.label.dist = 1.25, 85 | vertex.shape = "circle", 86 | edge.width = 20*E(net)$weight, 87 | edge.curved = 0.1) 88 | ``` 89 | 90 | 91 | ### Learning a network of S&P500 stocks 92 | ```{r plot_sp500_stocks_network, message=FALSE} 93 | library(xts) 94 | library(igraph) 95 | library(fingraph) 96 | library(fitHeavyTail) 97 | library(readr) 98 | set.seed(123) 99 | 100 | # load table w/ stocks and their sectors 101 | SP500 <- read_csv("examples/stocks/SP500-sectors.csv") 102 | 103 | # load stock prices into an xts table 104 | stock_prices <- readRDS("examples/stocks/stock-data-2014-2018.rds") 105 | colnames(stock_prices) 106 | 107 | # compute log-returns 108 | log_returns <- diff(log(stock_prices), na.pad = FALSE) 109 | 110 | # estimate a weighted, undirected graph (markov random field) 111 | graph_mrf <- learn_kcomp_heavytail_graph(scale(log_returns), 112 | rho = 10, 113 | k = 3, 114 | heavy_type = "student", 115 | nu = fit_mvt(scale(log_returns))$nu, 116 | verbose = FALSE) 117 | 118 | # map stock names and sectors 119 | stock_sectors <- c(SP500$GICS.Sector[SP500$Symbol %in% colnames(stock_prices)]) 120 | stock_sectors_index <- as.numeric(as.factor(stock_sectors)) 121 | 122 | # plot network 123 | net <- graph_from_adjacency_matrix(graph_mrf$adjacency, 124 | mode = "undirected", 125 | weighted = TRUE) 126 | la_kcomp <- layout_nicely(net) 127 | V(net)$label.cex = 1 128 | colors <- c("#FD7272", "#55E6C1", "#25CCF7") 129 | V(net)$color <- colors[stock_sectors_index] 130 | V(net)$type <- stock_sectors_index 131 | V(net)$cluster <- stock_sectors_index 132 | E(net)$color <- apply(as.data.frame(get.edgelist(net)), 1, 133 | function(x) ifelse(V(net)$cluster[x[1]] == V(net)$cluster[x[2]], 134 | colors[V(net)$cluster[x[1]]], 'grey')) 135 | plot(net, vertex.label = colnames(stock_prices), 136 | layout = la_kcomp, 137 | vertex.size = 4.5, 138 | vertex.label.family = "Helvetica", 139 | vertex.label.dist = 1.25, 140 | vertex.label.color = "black", 141 | vertex.shape = "circle", 142 | edge.width = 20*E(net)$weight, 143 | edge.curved = 0.1) 144 | ``` 145 | 146 | ## Citation 147 | If you made use of this software please consider citing: 148 | 149 | - [Cardoso JVM](https://mirca.github.io), [Ying J](https://github.com/jxying), 150 | [Palomar DP](https://www.danielppalomar.com) (2021). 151 | [Graphical Models in Heavy-Tailed Markets](https://papers.nips.cc/paper/2021/hash/a64a034c3cb8eac64eb46ea474902797-Abstract.html). 152 | [Advances in Neural Information Processing Systems](https://neurips.cc/Conferences/2021) (NeurIPS’21). 153 | 154 | ## Links 155 | - [RFinance'23 Slides](https://github.com/mirca/rfinance-talk/blob/main/rfinance.pdf) 156 | - [NeurIPS’21 Slides](https://palomar.home.ece.ust.hk/papers/2021/CardosoYingPalomar-NeurIPS2021-slides.pdf) 157 | - [NeurIPS'21 Poster](https://palomar.home.ece.ust.hk/papers/2021/CardosoYingPalomar-NeurIPS2021-poster.png) 158 | - [NeurIPS'21 Supplementary Material](https://palomar.home.ece.ust.hk/papers/2021/CardosoYingPalomar-NeurIPS2021-supplemental.pdf) 159 | - [CRAN Package](https://cran.r-project.org/package=fingraph) 160 | 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fingraph 2 | [![codecov](https://codecov.io/gh/convexfi/fingraph/branch/main/graph/badge.svg?token=OhreF1p2Yt)](https://app.codecov.io/gh/convexfi/fingraph) 3 | 4 | 5 | This repo contains ADMM implementations to estimate weighted undirected graphs 6 | (Markov random fields) under Student-t assumptions with applications to financial 7 | markets. 8 | 9 | ## Installation 10 | 11 | **fingraph** depends on the development version of **spectralGraphTopology**, 12 | which can be installed as: 13 | 14 | ```r 15 | > devtools::install_github("convexfi/spectralGraphTopology") 16 | ``` 17 | 18 | The stable version of **fingraph** can be installed directly from CRAN: 19 | 20 | ```r 21 | > install.packages("fingraph") 22 | ``` 23 | 24 | #### Microsoft Windows 25 | On MS Windows environments, make sure to install the most recent version of ``Rtools``. 26 | 27 | ## Usage 28 | 29 | ### Learning a graph of cryptocurrencies 30 | 31 | ```r 32 | library(igraph) 33 | library(fingraph) 34 | library(fitHeavyTail) 35 | library(xts) 36 | set.seed(123) 37 | 38 | # load crypto prices into an xts table 39 | crypto_prices <- readRDS("examples/crypto/crypto-prices.rds") 40 | colnames(crypto_prices) 41 | #> [1] "BTC" "ETH" "USDT" "BNB" "USDC" "XRP" 42 | #> [7] "ADA" "HEX" "DOGE" "SOL" "MATIC" "DOT" 43 | #> [13] "TRX" "LTC" "BUSD" "SHIB" "AVAX" "DAI" 44 | #> [19] "LEO" "LINK" "ATOM" "UNI7083" "XMR" "OKB" 45 | #> [25] "ETC" "TON11419" "XLM" "BCH" "ICP" "CNX" 46 | #> [31] "TUSD" "FIL" "HBAR" "CRO" "LDO" "NEAR" 47 | #> [37] "VET" "QNT" "ALGO" "USDP" "FTM" "GRT6719" 48 | 49 | # compute log-returns 50 | log_returns <- diff(log(crypto_prices), na.pad = FALSE) 51 | 52 | # estimate a weighted, undirected graph (markov random field) 53 | graph_mrf <- learn_kcomp_heavytail_graph(scale(log_returns), 54 | k = 8, 55 | heavy_type = "student", 56 | nu = fit_mvt(scale(log_returns))$nu, 57 | verbose = FALSE) 58 | 59 | # plot network 60 | net <- graph_from_adjacency_matrix(graph_mrf$adjacency, 61 | mode = "undirected", 62 | weighted = TRUE) 63 | cfg <- cluster_fast_greedy(as.undirected(net)) 64 | la_kcomp <- layout_nicely(net) 65 | V(net)$label.cex = 1 66 | plot(cfg, net, vertex.label = colnames(crypto_prices), 67 | layout = la_kcomp, 68 | vertex.size = 4.5, 69 | col = "black", 70 | edge.color = c("#686de0"), 71 | vertex.label.family = "Helvetica", 72 | vertex.label.color = "black", 73 | vertex.label.dist = 1.25, 74 | vertex.shape = "circle", 75 | edge.width = 20*E(net)$weight, 76 | edge.curved = 0.1) 77 | ``` 78 | 79 | 80 | 81 | 82 | ### Learning a network of S&P500 stocks 83 | 84 | ```r 85 | library(xts) 86 | library(igraph) 87 | library(fingraph) 88 | library(fitHeavyTail) 89 | library(readr) 90 | set.seed(123) 91 | 92 | # load table w/ stocks and their sectors 93 | SP500 <- read_csv("examples/stocks/SP500-sectors.csv") 94 | 95 | # load stock prices into an xts table 96 | stock_prices <- readRDS("examples/stocks/stock-data-2014-2018.rds") 97 | colnames(stock_prices) 98 | #> [1] "AEE" "AEP" "AES" "AIV" "AMT" "ARE" "ATO" "ATVI" "AVB" 99 | #> [10] "AWK" "BXP" "CBRE" "CCI" "CHTR" "CMCSA" "CMS" "CNP" "CTL" 100 | #> [19] "D" "DIS" "DISCA" "DISCK" "DISH" "DLR" "DRE" "DTE" "DUK" 101 | #> [28] "EA" "ED" "EIX" "EQIX" "EQR" "ES" "ESS" "ETR" "EVRG" 102 | #> [37] "EXC" "EXR" "FB" "FE" "FRT" "GOOG" "GOOGL" "HST" "IPG" 103 | #> [46] "IRM" "KIM" "LNT" "LYV" "MAA" "NEE" "NFLX" "NI" "NRG" 104 | #> [55] "NWS" "NWSA" "O" "OMC" "PEAK" "PEG" "PLD" "PNW" "PPL" 105 | #> [64] "PSA" "REG" "SBAC" "SLG" "SO" "SPG" "SRE" "T" "TMUS" 106 | #> [73] "TTWO" "TWTR" "UDR" "VNO" "VTR" "VZ" "WEC" "WELL" "WY" 107 | #> [82] "XEL" 108 | 109 | # compute log-returns 110 | log_returns <- diff(log(stock_prices), na.pad = FALSE) 111 | 112 | # estimate a weighted, undirected graph (markov random field) 113 | graph_mrf <- learn_kcomp_heavytail_graph(scale(log_returns), 114 | rho = 10, 115 | k = 3, 116 | heavy_type = "student", 117 | nu = fit_mvt(scale(log_returns))$nu, 118 | verbose = FALSE) 119 | #> Warning in tclass.xts(x): index does not have a 'tclass' attribute 120 | 121 | #> Warning in tclass.xts(x): index does not have a 'tclass' attribute 122 | 123 | # map stock names and sectors 124 | stock_sectors <- c(SP500$GICS.Sector[SP500$Symbol %in% colnames(stock_prices)]) 125 | stock_sectors_index <- as.numeric(as.factor(stock_sectors)) 126 | 127 | # plot network 128 | net <- graph_from_adjacency_matrix(graph_mrf$adjacency, 129 | mode = "undirected", 130 | weighted = TRUE) 131 | la_kcomp <- layout_nicely(net) 132 | V(net)$label.cex = 1 133 | colors <- c("#FD7272", "#55E6C1", "#25CCF7") 134 | V(net)$color <- colors[stock_sectors_index] 135 | V(net)$type <- stock_sectors_index 136 | V(net)$cluster <- stock_sectors_index 137 | E(net)$color <- apply(as.data.frame(get.edgelist(net)), 1, 138 | function(x) ifelse(V(net)$cluster[x[1]] == V(net)$cluster[x[2]], 139 | colors[V(net)$cluster[x[1]]], 'grey')) 140 | plot(net, vertex.label = colnames(stock_prices), 141 | layout = la_kcomp, 142 | vertex.size = 4.5, 143 | vertex.label.family = "Helvetica", 144 | vertex.label.dist = 1.25, 145 | vertex.label.color = "black", 146 | vertex.shape = "circle", 147 | edge.width = 20*E(net)$weight, 148 | edge.curved = 0.1) 149 | ``` 150 | 151 | 152 | 153 | ## Citation 154 | If you made use of this software please consider citing: 155 | 156 | - [Cardoso JVM](https://mirca.github.io), [Ying J](https://github.com/jxying), 157 | [Palomar DP](https://www.danielppalomar.com) (2021). 158 | [Graphical Models in Heavy-Tailed Markets](https://papers.nips.cc/paper/2021/hash/a64a034c3cb8eac64eb46ea474902797-Abstract.html). 159 | [Advances in Neural Information Processing Systems](https://neurips.cc/Conferences/2021) (NeurIPS’21). 160 | 161 | ## Links 162 | - [RFinance'23 Slides](https://github.com/mirca/rfinance-talk/blob/main/rfinance.pdf) 163 | - [NeurIPS’21 Slides](https://palomar.home.ece.ust.hk/papers/2021/CardosoYingPalomar-NeurIPS2021-slides.pdf) 164 | - [NeurIPS'21 Poster](https://palomar.home.ece.ust.hk/papers/2021/CardosoYingPalomar-NeurIPS2021-poster.png) 165 | - [NeurIPS'21 Supplementary Material](https://palomar.home.ece.ust.hk/papers/2021/CardosoYingPalomar-NeurIPS2021-supplemental.pdf) 166 | - [CRAN Package](https://cran.r-project.org/package=fingraph) 167 | 168 | -------------------------------------------------------------------------------- /examples/crypto/crypto-prices.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/convexfi/fingraph/78a3f9e5f81c2b6ca6cfed896f0decffa391cf9f/examples/crypto/crypto-prices.rds -------------------------------------------------------------------------------- /examples/crypto/crypto-symbols-yahoo-finance.csv: -------------------------------------------------------------------------------- 1 | ,Symbols 2 | 0,BTC-USD 3 | 1,ETH-USD 4 | 2,USDT-USD 5 | 3,BNB-USD 6 | 4,USDC-USD 7 | 5,XRP-USD 8 | 6,ADA-USD 9 | 7,HEX-USD 10 | 8,STETH-USD 11 | 9,DOGE-USD 12 | 10,SOL-USD 13 | 11,MATIC-USD 14 | 12,WTRX-USD 15 | 13,DOT-USD 16 | 14,TRX-USD 17 | 15,LTC-USD 18 | 16,BUSD-USD 19 | 17,SHIB-USD 20 | 18,AVAX-USD 21 | 19,DAI-USD 22 | 20,WBTC-USD 23 | 21,LEO-USD 24 | 22,LINK-USD 25 | 23,ATOM-USD 26 | 24,UNI7083-USD 27 | 25,XMR-USD 28 | 26,OKB-USD 29 | 27,ETC-USD 30 | 28,TON11419-USD 31 | 29,XLM-USD 32 | 30,BCH-USD 33 | 31,ICP-USD 34 | 32,CNX-USD 35 | 33,TUSD-USD 36 | 34,FIL-USD 37 | 35,HBAR-USD 38 | 36,WHBAR-USD 39 | 37,CRO-USD 40 | 38,APT21794-USD 41 | 39,LDO-USD 42 | 40,BTCB-USD 43 | 41,NEAR-USD 44 | 42,ARB11841-USD 45 | 43,VET-USD 46 | 44,QNT-USD 47 | 45,APE18876-USD 48 | 46,ALGO-USD 49 | 47,USDP-USD 50 | 48,FTM-USD 51 | 49,GRT6719-USD 52 | -------------------------------------------------------------------------------- /examples/crypto/list-of-crypto-symbols-from-yahoo-finance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from requests_html import HTMLSession\n", 10 | "from bs4 import BeautifulSoup\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "number_of_tokens = 50" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "url = \"https://finance.yahoo.com/crypto/?offset=0&count={}\".format(number_of_tokens)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "'https://finance.yahoo.com/crypto/?offset=0&count=50'" 41 | ] 42 | }, 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "url" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 5, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "session = HTMLSession()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 6, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "page = session.get(url)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 7, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "soup = BeautifulSoup(page.content, 'html.parser')" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 8, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "symbols = [s['href'] for s in soup.find_all('a') if s['href'].endswith('USD') and s['href'].startswith('/quote/')]" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 9, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "16" 97 | ] 98 | }, 99 | "execution_count": 9, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "symbols[0].find('=')" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 10, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "['/quote/BTC-USD?p=BTC-USD',\n", 117 | " '/quote/ETH-USD?p=ETH-USD',\n", 118 | " '/quote/USDT-USD?p=USDT-USD',\n", 119 | " '/quote/BNB-USD?p=BNB-USD',\n", 120 | " '/quote/USDC-USD?p=USDC-USD',\n", 121 | " '/quote/XRP-USD?p=XRP-USD',\n", 122 | " '/quote/ADA-USD?p=ADA-USD',\n", 123 | " '/quote/HEX-USD?p=HEX-USD',\n", 124 | " '/quote/STETH-USD?p=STETH-USD',\n", 125 | " '/quote/DOGE-USD?p=DOGE-USD',\n", 126 | " '/quote/SOL-USD?p=SOL-USD',\n", 127 | " '/quote/MATIC-USD?p=MATIC-USD',\n", 128 | " '/quote/WTRX-USD?p=WTRX-USD',\n", 129 | " '/quote/DOT-USD?p=DOT-USD',\n", 130 | " '/quote/TRX-USD?p=TRX-USD',\n", 131 | " '/quote/LTC-USD?p=LTC-USD',\n", 132 | " '/quote/BUSD-USD?p=BUSD-USD',\n", 133 | " '/quote/SHIB-USD?p=SHIB-USD',\n", 134 | " '/quote/AVAX-USD?p=AVAX-USD',\n", 135 | " '/quote/DAI-USD?p=DAI-USD',\n", 136 | " '/quote/WBTC-USD?p=WBTC-USD',\n", 137 | " '/quote/LEO-USD?p=LEO-USD',\n", 138 | " '/quote/LINK-USD?p=LINK-USD',\n", 139 | " '/quote/ATOM-USD?p=ATOM-USD',\n", 140 | " '/quote/UNI7083-USD?p=UNI7083-USD',\n", 141 | " '/quote/XMR-USD?p=XMR-USD',\n", 142 | " '/quote/OKB-USD?p=OKB-USD',\n", 143 | " '/quote/ETC-USD?p=ETC-USD',\n", 144 | " '/quote/TON11419-USD?p=TON11419-USD',\n", 145 | " '/quote/XLM-USD?p=XLM-USD',\n", 146 | " '/quote/BCH-USD?p=BCH-USD',\n", 147 | " '/quote/ICP-USD?p=ICP-USD',\n", 148 | " '/quote/CNX-USD?p=CNX-USD',\n", 149 | " '/quote/TUSD-USD?p=TUSD-USD',\n", 150 | " '/quote/FIL-USD?p=FIL-USD',\n", 151 | " '/quote/HBAR-USD?p=HBAR-USD',\n", 152 | " '/quote/WHBAR-USD?p=WHBAR-USD',\n", 153 | " '/quote/CRO-USD?p=CRO-USD',\n", 154 | " '/quote/APT21794-USD?p=APT21794-USD',\n", 155 | " '/quote/LDO-USD?p=LDO-USD',\n", 156 | " '/quote/BTCB-USD?p=BTCB-USD',\n", 157 | " '/quote/NEAR-USD?p=NEAR-USD',\n", 158 | " '/quote/ARB11841-USD?p=ARB11841-USD',\n", 159 | " '/quote/VET-USD?p=VET-USD',\n", 160 | " '/quote/QNT-USD?p=QNT-USD',\n", 161 | " '/quote/APE18876-USD?p=APE18876-USD',\n", 162 | " '/quote/ALGO-USD?p=ALGO-USD',\n", 163 | " '/quote/USDP-USD?p=USDP-USD',\n", 164 | " '/quote/FTM-USD?p=FTM-USD',\n", 165 | " '/quote/GRT6719-USD?p=GRT6719-USD']" 166 | ] 167 | }, 168 | "execution_count": 10, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "symbols" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 11, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "symbols_ = [s[s.find('=')+1:] for s in symbols]" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 12, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "50" 195 | ] 196 | }, 197 | "execution_count": 12, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "len(symbols_)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 13, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "df = pd.DataFrame({'Symbols': symbols_})" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 14, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/html": [ 223 | "
\n", 224 | "\n", 237 | "\n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | "
Symbols
0BTC-USD
1ETH-USD
2USDT-USD
3BNB-USD
4USDC-USD
5XRP-USD
6ADA-USD
7HEX-USD
8STETH-USD
9DOGE-USD
10SOL-USD
11MATIC-USD
12WTRX-USD
13DOT-USD
14TRX-USD
15LTC-USD
16BUSD-USD
17SHIB-USD
18AVAX-USD
19DAI-USD
20WBTC-USD
21LEO-USD
22LINK-USD
23ATOM-USD
24UNI7083-USD
25XMR-USD
26OKB-USD
27ETC-USD
28TON11419-USD
29XLM-USD
30BCH-USD
31ICP-USD
32CNX-USD
33TUSD-USD
34FIL-USD
35HBAR-USD
36WHBAR-USD
37CRO-USD
38APT21794-USD
39LDO-USD
40BTCB-USD
41NEAR-USD
42ARB11841-USD
43VET-USD
44QNT-USD
45APE18876-USD
46ALGO-USD
47USDP-USD
48FTM-USD
49GRT6719-USD
\n", 447 | "
" 448 | ], 449 | "text/plain": [ 450 | " Symbols\n", 451 | "0 BTC-USD\n", 452 | "1 ETH-USD\n", 453 | "2 USDT-USD\n", 454 | "3 BNB-USD\n", 455 | "4 USDC-USD\n", 456 | "5 XRP-USD\n", 457 | "6 ADA-USD\n", 458 | "7 HEX-USD\n", 459 | "8 STETH-USD\n", 460 | "9 DOGE-USD\n", 461 | "10 SOL-USD\n", 462 | "11 MATIC-USD\n", 463 | "12 WTRX-USD\n", 464 | "13 DOT-USD\n", 465 | "14 TRX-USD\n", 466 | "15 LTC-USD\n", 467 | "16 BUSD-USD\n", 468 | "17 SHIB-USD\n", 469 | "18 AVAX-USD\n", 470 | "19 DAI-USD\n", 471 | "20 WBTC-USD\n", 472 | "21 LEO-USD\n", 473 | "22 LINK-USD\n", 474 | "23 ATOM-USD\n", 475 | "24 UNI7083-USD\n", 476 | "25 XMR-USD\n", 477 | "26 OKB-USD\n", 478 | "27 ETC-USD\n", 479 | "28 TON11419-USD\n", 480 | "29 XLM-USD\n", 481 | "30 BCH-USD\n", 482 | "31 ICP-USD\n", 483 | "32 CNX-USD\n", 484 | "33 TUSD-USD\n", 485 | "34 FIL-USD\n", 486 | "35 HBAR-USD\n", 487 | "36 WHBAR-USD\n", 488 | "37 CRO-USD\n", 489 | "38 APT21794-USD\n", 490 | "39 LDO-USD\n", 491 | "40 BTCB-USD\n", 492 | "41 NEAR-USD\n", 493 | "42 ARB11841-USD\n", 494 | "43 VET-USD\n", 495 | "44 QNT-USD\n", 496 | "45 APE18876-USD\n", 497 | "46 ALGO-USD\n", 498 | "47 USDP-USD\n", 499 | "48 FTM-USD\n", 500 | "49 GRT6719-USD" 501 | ] 502 | }, 503 | "execution_count": 14, 504 | "metadata": {}, 505 | "output_type": "execute_result" 506 | } 507 | ], 508 | "source": [ 509 | "df" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 15, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "df.to_csv(\"crypto-symbols-yahoo-finance.csv\")" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [] 527 | } 528 | ], 529 | "metadata": { 530 | "kernelspec": { 531 | "display_name": "Python 3", 532 | "language": "python", 533 | "name": "python3" 534 | }, 535 | "language_info": { 536 | "codemirror_mode": { 537 | "name": "ipython", 538 | "version": 3 539 | }, 540 | "file_extension": ".py", 541 | "mimetype": "text/x-python", 542 | "name": "python", 543 | "nbconvert_exporter": "python", 544 | "pygments_lexer": "ipython3", 545 | "version": "3.8.3" 546 | } 547 | }, 548 | "nbformat": 4, 549 | "nbformat_minor": 4 550 | } 551 | -------------------------------------------------------------------------------- /examples/stocks/SP500-sectors.csv: -------------------------------------------------------------------------------- 1 | "","Symbol","GICS.Sector" 2 | "12","A","Health Care" 3 | "29","AAL","Industrials" 4 | "9","AAP","Consumer Discretionary" 5 | "47","AAPL","Information Technology" 6 | "3","ABBV","Health Care" 7 | "36","ABC","Health Care" 8 | "4","ABMD","Health Care" 9 | "2","ABT","Health Care" 10 | "5","ACN","Information Technology" 11 | "7","ADBE","Information Technology" 12 | "40","ADI","Information Technology" 13 | "50","ADM","Consumer Staples" 14 | "57","ADP","Information Technology" 15 | "56","ADSK","Information Technology" 16 | "28","AEE","Utilities" 17 | "30","AEP","Utilities" 18 | "10","AES","Utilities" 19 | "11","AFL","Financials" 20 | "32","AIG","Financials" 21 | "46","AIV","Real Estate" 22 | "53","AIZ","Financials" 23 | "52","AJG","Financials" 24 | "14","AKAM","Information Technology" 25 | "16","ALB","Materials" 26 | "19","ALGN","Health Care" 27 | "15","ALK","Industrials" 28 | "22","ALL","Financials" 29 | "20","ALLE","Industrials" 30 | "18","ALXN","Health Care" 31 | "48","AMAT","Information Technology" 32 | "27","AMCR","Materials" 33 | "8","AMD","Information Technology" 34 | "37","AME","Industrials" 35 | "38","AMGN","Health Care" 36 | "35","AMP","Financials" 37 | "33","AMT","Real Estate" 38 | "26","AMZN","Consumer Discretionary" 39 | "51","ANET","Information Technology" 40 | "41","ANSS","Information Technology" 41 | "42","ANTM","Health Care" 42 | "43","AON","Financials" 43 | "44","AOS","Industrials" 44 | "45","APA","Energy" 45 | "13","APD","Materials" 46 | "39","APH","Information Technology" 47 | "49","APTV","Consumer Discretionary" 48 | "17","ARE","Real Estate" 49 | "55","ATO","Utilities" 50 | "6","ATVI","Communication Services" 51 | "59","AVB","Real Estate" 52 | "78","AVGO","Information Technology" 53 | "60","AVY","Materials" 54 | "34","AWK","Utilities" 55 | "31","AXP","Financials" 56 | "58","AZO","Consumer Discretionary" 57 | "72","BA","Industrials" 58 | "63","BAC","Financials" 59 | "65","BAX","Health Care" 60 | "68","BBY","Consumer Discretionary" 61 | "66","BDX","Health Care" 62 | "206","BEN","Financials" 63 | "80","BF.B","Consumer Staples" 64 | "70","BIIB","Health Care" 65 | "69","BIO","Health Care" 66 | "64","BK","Financials" 67 | "73","BKNG","Consumer Discretionary" 68 | "61","BKR","Energy" 69 | "71","BLK","Financials" 70 | "62","BLL","Materials" 71 | "77","BMY","Health Care" 72 | "79","BR","Information Technology" 73 | "67","BRK.B","Financials" 74 | "76","BSX","Health Care" 75 | "74","BWA","Consumer Discretionary" 76 | "75","BXP","Real Estate" 77 | "110","C","Financials" 78 | "121","CAG","Consumer Staples" 79 | "86","CAH","Health Care" 80 | "89","CARR","Industrials" 81 | "90","CAT","Industrials" 82 | "104","CB","Financials" 83 | "91","CBOE","Financials" 84 | "92","CBRE","Real Estate" 85 | "132","CCI","Real Estate" 86 | "88","CCL","Consumer Discretionary" 87 | "83","CDNS","Information Technology" 88 | "93","CDW","Information Technology" 89 | "94","CE","Materials" 90 | "98","CERN","Health Care" 91 | "99","CF","Materials" 92 | "111","CFG","Financials" 93 | "105","CHD","Consumer Staples" 94 | "81","CHRW","Industrials" 95 | "101","CHTR","Communication Services" 96 | "106","CI","Health Care" 97 | "107","CINF","Financials" 98 | "118","CL","Consumer Staples" 99 | "113","CLX","Consumer Staples" 100 | "120","CMA","Financials" 101 | "119","CMCSA","Communication Services" 102 | "114","CME","Financials" 103 | "103","CMG","Consumer Discretionary" 104 | "134","CMI","Industrials" 105 | "115","CMS","Utilities" 106 | "95","CNC","Health Care" 107 | "96","CNP","Utilities" 108 | "85","COF","Financials" 109 | "82","COG","Energy" 110 | "126","COO","Health Care" 111 | "123","COP","Energy" 112 | "130","COST","Consumer Staples" 113 | "131","COTY","Consumer Staples" 114 | "84","CPB","Consumer Staples" 115 | "127","CPRT","Industrials" 116 | "409","CRM","Information Technology" 117 | "109","CSCO","Information Technology" 118 | "133","CSX","Industrials" 119 | "108","CTAS","Industrials" 120 | "97","CTL","Communication Services" 121 | "117","CTSH","Information Technology" 122 | "129","CTVA","Materials" 123 | "112","CTXS","Information Technology" 124 | "135","CVS","Health Care" 125 | "102","CVX","Energy" 126 | "122","CXO","Energy" 127 | "153","D","Utilities" 128 | "141","DAL","Industrials" 129 | "160","DD","Materials" 130 | "140","DE","Industrials" 131 | "147","DFS","Financials" 132 | "151","DG","Consumer Discretionary" 133 | "392","DGX","Health Care" 134 | "136","DHI","Consumer Discretionary" 135 | "137","DHR","Health Care" 136 | "482","DIS","Communication Services" 137 | "148","DISCA","Communication Services" 138 | "149","DISCK","Communication Services" 139 | "150","DISH","Communication Services" 140 | "146","DLR","Real Estate" 141 | "152","DLTR","Consumer Discretionary" 142 | "155","DOV","Industrials" 143 | "156","DOW","Materials" 144 | "154","DPZ","Consumer Discretionary" 145 | "159","DRE","Real Estate" 146 | "138","DRI","Consumer Discretionary" 147 | "157","DTE","Utilities" 148 | "158","DUK","Utilities" 149 | "139","DVA","Health Care" 150 | "143","DVN","Energy" 151 | "161","DXC","Information Technology" 152 | "144","DXCM","Health Care" 153 | "169","EA","Communication Services" 154 | "165","EBAY","Consumer Discretionary" 155 | "166","ECL","Materials" 156 | "124","ED","Utilities" 157 | "173","EFX","Industrials" 158 | "167","EIX","Utilities" 159 | "177","EL","Consumer Staples" 160 | "163","EMN","Materials" 161 | "170","EMR","Industrials" 162 | "172","EOG","Energy" 163 | "174","EQIX","Real Estate" 164 | "175","EQR","Real Estate" 165 | "179","ES","Utilities" 166 | "176","ESS","Real Estate" 167 | "162","ETFC","Financials" 168 | "164","ETN","Industrials" 169 | "171","ETR","Utilities" 170 | "178","EVRG","Utilities" 171 | "168","EW","Health Care" 172 | "181","EXC","Utilities" 173 | "183","EXPD","Industrials" 174 | "182","EXPE","Consumer Discretionary" 175 | "184","EXR","Real Estate" 176 | "200","F","Consumer Discretionary" 177 | "145","FANG","Energy" 178 | "188","FAST","Industrials" 179 | "187","FB","Communication Services" 180 | "203","FBHS","Industrials" 181 | "207","FCX","Materials" 182 | "190","FDX","Industrials" 183 | "193","FE","Utilities" 184 | "186","FFIV","Information Technology" 185 | "191","FIS","Information Technology" 186 | "195","FISV","Information Technology" 187 | "192","FITB","Financials" 188 | "197","FLIR","Information Technology" 189 | "198","FLS","Industrials" 190 | "196","FLT","Information Technology" 191 | "199","FMC","Materials" 192 | "205","FOX","Communication Services" 193 | "204","FOXA","Communication Services" 194 | "194","FRC","Financials" 195 | "189","FRT","Real Estate" 196 | "438","FTI","Energy" 197 | "201","FTNT","Information Technology" 198 | "202","FTV","Industrials" 199 | "211","GD","Industrials" 200 | "212","GE","Industrials" 201 | "216","GILD","Health Care" 202 | "213","GIS","Consumer Staples" 203 | "217","GL","Financials" 204 | "128","GLW","Information Technology" 205 | "214","GM","Consumer Discretionary" 206 | "24","GOOG","Communication Services" 207 | "23","GOOGL","Communication Services" 208 | "215","GPC","Consumer Discretionary" 209 | "218","GPN","Information Technology" 210 | "208","GPS","Consumer Discretionary" 211 | "209","GRMN","Consumer Discretionary" 212 | "219","GS","Financials" 213 | "220","GWW","Industrials" 214 | "222","HAL","Energy" 215 | "225","HAS","Consumer Discretionary" 216 | "242","HBAN","Financials" 217 | "223","HBI","Consumer Discretionary" 218 | "226","HCA","Health Care" 219 | "235","HD","Consumer Discretionary" 220 | "230","HES","Energy" 221 | "233","HFC","Energy" 222 | "224","HIG","Financials" 223 | "243","HII","Industrials" 224 | "232","HLT","Consumer Discretionary" 225 | "234","HOLX","Health Care" 226 | "236","HON","Industrials" 227 | "231","HPE","Information Technology" 228 | "240","HPQ","Information Technology" 229 | "221","HRB","Consumer Discretionary" 230 | "237","HRL","Consumer Staples" 231 | "228","HSIC","Health Care" 232 | "238","HST","Real Estate" 233 | "229","HSY","Consumer Staples" 234 | "241","HUM","Health Care" 235 | "239","HWM","Industrials" 236 | "253","IBM","Information Technology" 237 | "252","ICE","Financials" 238 | "245","IDXX","Health Care" 239 | "244","IEX","Industrials" 240 | "256","IFF","Materials" 241 | "248","ILMN","Health Care" 242 | "249","INCY","Health Care" 243 | "246","INFO","Industrials" 244 | "251","INTC","Information Technology" 245 | "257","INTU","Information Technology" 246 | "254","IP","Materials" 247 | "255","IPG","Communication Services" 248 | "260","IPGP","Information Technology" 249 | "261","IQV","Health Care" 250 | "250","IR","Industrials" 251 | "262","IRM","Real Estate" 252 | "258","ISRG","Health Care" 253 | "210","IT","Information Technology" 254 | "247","ITW","Industrials" 255 | "259","IVZ","Financials" 256 | "264","J","Industrials" 257 | "265","JBHT","Industrials" 258 | "268","JCI","Industrials" 259 | "263","JKHY","Information Technology" 260 | "267","JNJ","Health Care" 261 | "270","JNPR","Information Technology" 262 | "269","JPM","Financials" 263 | "272","K","Consumer Staples" 264 | "273","KEY","Financials" 265 | "274","KEYS","Information Technology" 266 | "280","KHC","Consumer Staples" 267 | "276","KIM","Real Estate" 268 | "278","KLAC","Information Technology" 269 | "275","KMB","Consumer Staples" 270 | "277","KMI","Energy" 271 | "87","KMX","Consumer Discretionary" 272 | "116","KO","Consumer Staples" 273 | "281","KR","Consumer Staples" 274 | "279","KSS","Consumer Discretionary" 275 | "271","KSU","Industrials" 276 | "297","L","Financials" 277 | "282","LB","Consumer Discretionary" 278 | "289","LDOS","Information Technology" 279 | "288","LEG","Consumer Discretionary" 280 | "290","LEN","Consumer Discretionary" 281 | "284","LH","Health Care" 282 | "283","LHX","Industrials" 283 | "293","LIN","Materials" 284 | "295","LKQ","Consumer Discretionary" 285 | "291","LLY","Health Care" 286 | "296","LMT","Industrials" 287 | "292","LNC","Financials" 288 | "21","LNT","Utilities" 289 | "298","LOW","Consumer Discretionary" 290 | "285","LRCX","Information Technology" 291 | "422","LUV","Industrials" 292 | "287","LVS","Consumer Discretionary" 293 | "286","LW","Consumer Staples" 294 | "299","LYB","Materials" 295 | "294","LYV","Communication Services" 296 | "308","MA","Information Technology" 297 | "321","MAA","Real Estate" 298 | "304","MAR","Consumer Discretionary" 299 | "307","MAS","Industrials" 300 | "311","MCD","Consumer Discretionary" 301 | "318","MCHP","Information Technology" 302 | "312","MCK","Health Care" 303 | "326","MCO","Financials" 304 | "324","MDLZ","Consumer Staples" 305 | "313","MDT","Health Care" 306 | "315","MET","Financials" 307 | "317","MGM","Consumer Discretionary" 308 | "322","MHK","Consumer Discretionary" 309 | "309","MKC","Consumer Staples" 310 | "303","MKTX","Financials" 311 | "306","MLM","Materials" 312 | "305","MMC","Financials" 313 | "1","MMM","Industrials" 314 | "325","MNST","Consumer Staples" 315 | "25","MO","Consumer Staples" 316 | "328","MOS","Materials" 317 | "302","MPC","Energy" 318 | "314","MRK","Health Care" 319 | "301","MRO","Energy" 320 | "327","MS","Financials" 321 | "330","MSCI","Financials" 322 | "320","MSFT","Information Technology" 323 | "329","MSI","Information Technology" 324 | "300","MTB","Financials" 325 | "316","MTD","Health Care" 326 | "319","MU","Information Technology" 327 | "310","MXIM","Information Technology" 328 | "331","MYL","Health Care" 329 | "344","NBL","Energy" 330 | "349","NCLH","Consumer Discretionary" 331 | "332","NDAQ","Financials" 332 | "340","NEE","Utilities" 333 | "337","NEM","Materials" 334 | "335","NFLX","Communication Services" 335 | "343","NI","Utilities" 336 | "342","NKE","Consumer Discretionary" 337 | "348","NLOK","Information Technology" 338 | "341","NLSN","Industrials" 339 | "347","NOC","Industrials" 340 | "333","NOV","Energy" 341 | "415","NOW","Information Technology" 342 | "350","NRG","Utilities" 343 | "345","NSC","Industrials" 344 | "334","NTAP","Information Technology" 345 | "346","NTRS","Financials" 346 | "351","NUE","Materials" 347 | "352","NVDA","Information Technology" 348 | "353","NVR","Consumer Discretionary" 349 | "336","NWL","Consumer Discretionary" 350 | "339","NWS","Communication Services" 351 | "338","NWSA","Communication Services" 352 | "396","O","Real Estate" 353 | "356","ODFL","Industrials" 354 | "358","OKE","Energy" 355 | "357","OMC","Communication Services" 356 | "359","ORCL","Information Technology" 357 | "354","ORLY","Consumer Discretionary" 358 | "360","OTIS","Industrials" 359 | "355","OXY","Energy" 360 | "365","PAYC","Information Technology" 361 | "364","PAYX","Information Technology" 362 | "368","PBCT","Financials" 363 | "361","PCAR","Industrials" 364 | "227","PEAK","Real Estate" 365 | "385","PEG","Utilities" 366 | "369","PEP","Consumer Staples" 367 | "372","PFE","Health Care" 368 | "380","PFG","Financials" 369 | "381","PG","Consumer Staples" 370 | "382","PGR","Financials" 371 | "363","PH","Industrials" 372 | "387","PHM","Consumer Discretionary" 373 | "362","PKG","Materials" 374 | "370","PKI","Health Care" 375 | "383","PLD","Real Estate" 376 | "373","PM","Consumer Staples" 377 | "377","PNC","Financials" 378 | "367","PNR","Industrials" 379 | "375","PNW","Utilities" 380 | "378","PPG","Materials" 381 | "379","PPL","Utilities" 382 | "371","PRGO","Health Care" 383 | "384","PRU","Financials" 384 | "386","PSA","Real Estate" 385 | "374","PSX","Energy" 386 | "388","PVH","Consumer Discretionary" 387 | "390","PWR","Industrials" 388 | "376","PXD","Energy" 389 | "366","PYPL","Information Technology" 390 | "391","QCOM","Information Technology" 391 | "389","QRVO","Information Technology" 392 | "407","RCL","Consumer Discretionary" 393 | "180","RE","Financials" 394 | "397","REG","Real Estate" 395 | "398","REGN","Health Care" 396 | "399","RF","Financials" 397 | "402","RHI","Industrials" 398 | "394","RJF","Financials" 399 | "393","RL","Consumer Discretionary" 400 | "401","RMD","Health Care" 401 | "403","ROK","Industrials" 402 | "404","ROL","Industrials" 403 | "405","ROP","Industrials" 404 | "406","ROST","Consumer Discretionary" 405 | "400","RSG","Industrials" 406 | "395","RTX","Industrials" 407 | "410","SBAC","Real Estate" 408 | "424","SBUX","Consumer Discretionary" 409 | "100","SCHW","Financials" 410 | "413","SEE","Materials" 411 | "416","SHW","Materials" 412 | "428","SIVB","Financials" 413 | "266","SJM","Consumer Staples" 414 | "411","SLB","Energy" 415 | "419","SLG","Real Estate" 416 | "420","SNA","Industrials" 417 | "430","SNPS","Information Technology" 418 | "421","SO","Utilities" 419 | "417","SPG","Real Estate" 420 | "408","SPGI","Financials" 421 | "414","SRE","Utilities" 422 | "426","STE","Health Care" 423 | "425","STT","Financials" 424 | "412","STX","Information Technology" 425 | "125","STZ","Consumer Staples" 426 | "423","SWK","Industrials" 427 | "418","SWKS","Information Technology" 428 | "429","SYF","Financials" 429 | "427","SYK","Health Care" 430 | "431","SYY","Consumer Staples" 431 | "54","T","Communication Services" 432 | "323","TAP","Consumer Staples" 433 | "448","TDG","Industrials" 434 | "439","TDY","Industrials" 435 | "437","TEL","Information Technology" 436 | "450","TFC","Financials" 437 | "440","TFX","Health Care" 438 | "436","TGT","Consumer Discretionary" 439 | "444","TIF","Consumer Discretionary" 440 | "445","TJX","Consumer Discretionary" 441 | "443","TMO","Health Care" 442 | "432","TMUS","Communication Services" 443 | "435","TPR","Consumer Discretionary" 444 | "433","TROW","Financials" 445 | "449","TRV","Financials" 446 | "446","TSCO","Consumer Discretionary" 447 | "453","TSN","Consumer Staples" 448 | "447","TT","Industrials" 449 | "434","TTWO","Communication Services" 450 | "451","TWTR","Communication Services" 451 | "441","TXN","Information Technology" 452 | "442","TXT","Industrials" 453 | "452","TYL","Technology" 454 | "458","UA","Consumer Discretionary" 455 | "457","UAA","Consumer Discretionary" 456 | "460","UAL","Industrials" 457 | "454","UDR","Real Estate" 458 | "464","UHS","Health Care" 459 | "455","ULTA","Consumer Discretionary" 460 | "461","UNH","Health Care" 461 | "465","UNM","Financials" 462 | "459","UNP","Industrials" 463 | "462","UPS","Industrials" 464 | "463","URI","Industrials" 465 | "456","USB","Financials" 466 | "475","V","Information Technology" 467 | "468","VAR","Health Care" 468 | "466","VFC","Consumer Discretionary" 469 | "474","VIAC","Communication Services" 470 | "467","VLO","Energy" 471 | "477","VMC","Materials" 472 | "476","VNO","Real Estate" 473 | "471","VRSK","Industrials" 474 | "470","VRSN","Information Technology" 475 | "473","VRTX","Health Care" 476 | "469","VTR","Real Estate" 477 | "472","VZ","Communication Services" 478 | "479","WAB","Industrials" 479 | "484","WAT","Health Care" 480 | "481","WBA","Consumer Staples" 481 | "489","WDC","Information Technology" 482 | "485","WEC","Utilities" 483 | "487","WELL","Real Estate" 484 | "486","WFC","Financials" 485 | "493","WHR","Consumer Discretionary" 486 | "495","WLTW","Financials" 487 | "483","WM","Industrials" 488 | "494","WMB","Energy" 489 | "480","WMT","Consumer Staples" 490 | "478","WRB","Financials" 491 | "491","WRK","Materials" 492 | "488","WST","Health Care" 493 | "490","WU","Information Technology" 494 | "492","WY","Real Estate" 495 | "496","WYNN","Consumer Discretionary" 496 | "497","XEL","Utilities" 497 | "499","XLNX","Information Technology" 498 | "185","XOM","Energy" 499 | "142","XRAY","Health Care" 500 | "498","XRX","Information Technology" 501 | "500","XYL","Industrials" 502 | "501","YUM","Consumer Discretionary" 503 | "503","ZBH","Health Care" 504 | "502","ZBRA","Information Technology" 505 | "504","ZION","Financials" 506 | "505","ZTS","Health Care" 507 | -------------------------------------------------------------------------------- /examples/stocks/get-sp500-constituents-sector.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "library(RCurl)\n", 10 | "library(XML)\n", 11 | "library(rlist)\n", 12 | "\n", 13 | "url <- getURL(\"https://en.wikipedia.org/wiki/List_of_S%26P_500_companies\")\n", 14 | "table <- readHTMLTable(url)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "table <- table$constituents\n", 24 | "names(table) <- as.character(unlist(table[1,]))\n", 25 | "table <- table[-1,]" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/html": [ 36 | "\n", 37 | "\n", 38 | "\n", 39 | "\t\n", 40 | "\t\n", 41 | "\n", 42 | "\n", 43 | "\t\n", 44 | "\t\n", 45 | "\t\n", 46 | "\t\n", 47 | "\t\n", 48 | "\t\n", 49 | "\n", 50 | "
A data.frame: 6 × 9
SymbolSecuritySEC filingsGICS SectorGICS Sub-IndustryHeadquarters LocationDate first addedCIKFounded
<chr><chr><chr><chr><chr><chr><chr><chr><chr>
2MMM 3M reportsIndustrials Industrial Conglomerates Saint Paul, Minnesota 1976-08-0900000667401902
3AOS A. O. SmithreportsIndustrials Building Products Milwaukee, Wisconsin 2017-07-2600000911421916
4ABT Abbott reportsHealth Care Health Care Equipment North Chicago, Illinois1964-03-3100000018001888
5ABBVAbbVie reportsHealth Care Pharmaceuticals North Chicago, Illinois2012-12-3100015511522013 (1888)
6ABMDAbiomed reportsHealth Care Health Care Equipment Danvers, Massachusetts 2018-05-3100008150941981
7ACN Accenture reportsInformation TechnologyIT Consulting & Other ServicesDublin, Ireland 2011-07-0600014673731989
\n" 51 | ], 52 | "text/latex": [ 53 | "A data.frame: 6 × 9\n", 54 | "\\begin{tabular}{r|lllllllll}\n", 55 | " & Symbol & Security & SEC filings & GICS Sector & GICS Sub-Industry & Headquarters Location & Date first added & CIK & Founded\\\\\n", 56 | " & & & & & & & & & \\\\\n", 57 | "\\hline\n", 58 | "\t2 & MMM & 3M & reports & Industrials & Industrial Conglomerates & Saint Paul, Minnesota & 1976-08-09 & 0000066740 & 1902 \\\\\n", 59 | "\t3 & AOS & A. O. Smith & reports & Industrials & Building Products & Milwaukee, Wisconsin & 2017-07-26 & 0000091142 & 1916 \\\\\n", 60 | "\t4 & ABT & Abbott & reports & Health Care & Health Care Equipment & North Chicago, Illinois & 1964-03-31 & 0000001800 & 1888 \\\\\n", 61 | "\t5 & ABBV & AbbVie & reports & Health Care & Pharmaceuticals & North Chicago, Illinois & 2012-12-31 & 0001551152 & 2013 (1888)\\\\\n", 62 | "\t6 & ABMD & Abiomed & reports & Health Care & Health Care Equipment & Danvers, Massachusetts & 2018-05-31 & 0000815094 & 1981 \\\\\n", 63 | "\t7 & ACN & Accenture & reports & Information Technology & IT Consulting \\& Other Services & Dublin, Ireland & 2011-07-06 & 0001467373 & 1989 \\\\\n", 64 | "\\end{tabular}\n" 65 | ], 66 | "text/markdown": [ 67 | "\n", 68 | "A data.frame: 6 × 9\n", 69 | "\n", 70 | "| | Symbol <chr> | Security <chr> | SEC filings <chr> | GICS Sector <chr> | GICS Sub-Industry <chr> | Headquarters Location <chr> | Date first added <chr> | CIK <chr> | Founded <chr> |\n", 71 | "|---|---|---|---|---|---|---|---|---|---|\n", 72 | "| 2 | MMM | 3M | reports | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1976-08-09 | 0000066740 | 1902 |\n", 73 | "| 3 | AOS | A. O. Smith | reports | Industrials | Building Products | Milwaukee, Wisconsin | 2017-07-26 | 0000091142 | 1916 |\n", 74 | "| 4 | ABT | Abbott | reports | Health Care | Health Care Equipment | North Chicago, Illinois | 1964-03-31 | 0000001800 | 1888 |\n", 75 | "| 5 | ABBV | AbbVie | reports | Health Care | Pharmaceuticals | North Chicago, Illinois | 2012-12-31 | 0001551152 | 2013 (1888) |\n", 76 | "| 6 | ABMD | Abiomed | reports | Health Care | Health Care Equipment | Danvers, Massachusetts | 2018-05-31 | 0000815094 | 1981 |\n", 77 | "| 7 | ACN | Accenture | reports | Information Technology | IT Consulting & Other Services | Dublin, Ireland | 2011-07-06 | 0001467373 | 1989 |\n", 78 | "\n" 79 | ], 80 | "text/plain": [ 81 | " Symbol Security SEC filings GICS Sector \n", 82 | "2 MMM 3M reports Industrials \n", 83 | "3 AOS A. O. Smith reports Industrials \n", 84 | "4 ABT Abbott reports Health Care \n", 85 | "5 ABBV AbbVie reports Health Care \n", 86 | "6 ABMD Abiomed reports Health Care \n", 87 | "7 ACN Accenture reports Information Technology\n", 88 | " GICS Sub-Industry Headquarters Location Date first added\n", 89 | "2 Industrial Conglomerates Saint Paul, Minnesota 1976-08-09 \n", 90 | "3 Building Products Milwaukee, Wisconsin 2017-07-26 \n", 91 | "4 Health Care Equipment North Chicago, Illinois 1964-03-31 \n", 92 | "5 Pharmaceuticals North Chicago, Illinois 2012-12-31 \n", 93 | "6 Health Care Equipment Danvers, Massachusetts 2018-05-31 \n", 94 | "7 IT Consulting & Other Services Dublin, Ireland 2011-07-06 \n", 95 | " CIK Founded \n", 96 | "2 0000066740 1902 \n", 97 | "3 0000091142 1916 \n", 98 | "4 0000001800 1888 \n", 99 | "5 0001551152 2013 (1888)\n", 100 | "6 0000815094 1981 \n", 101 | "7 0001467373 1989 " 102 | ] 103 | }, 104 | "metadata": {}, 105 | "output_type": "display_data" 106 | } 107 | ], 108 | "source": [ 109 | "head(table)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "sector_table <- data.frame(c(table[\"Symbol\"], table[\"GICS Sector\"]))" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "sector_table <- sector_table[order(sector_table$Symbol),]" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "write.csv(sector_table, \"SP500-sectors.csv\")" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 7, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "SP500 <- read.csv(\"SP500-sectors.csv\")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 8, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/html": [ 156 | "\n", 157 | "\n", 158 | "\n", 159 | "\t\n", 160 | "\t\n", 161 | "\n", 162 | "\n", 163 | "\t\n", 164 | "\t\n", 165 | "\t\n", 166 | "\t\n", 167 | "\t\n", 168 | "\t\n", 169 | "\t\n", 170 | "\t\n", 171 | "\t\n", 172 | "\t\n", 173 | "\t\n", 174 | "\t\n", 175 | "\t\n", 176 | "\t\n", 177 | "\t\n", 178 | "\t\n", 179 | "\t\n", 180 | "\t\n", 181 | "\t\n", 182 | "\t\n", 183 | "\t\n", 184 | "\t\n", 185 | "\t\n", 186 | "\t\n", 187 | "\t\n", 188 | "\t\n", 189 | "\t\n", 190 | "\t\n", 191 | "\t\n", 192 | "\t\n", 193 | "\t\n", 194 | "\t\n", 195 | "\t\n", 196 | "\t\n", 197 | "\t\n", 198 | "\t\n", 199 | "\t\n", 200 | "\t\n", 201 | "\t\n", 202 | "\t\n", 203 | "\t\n", 204 | "\t\n", 205 | "\t\n", 206 | "\t\n", 207 | "\t\n", 208 | "\t\n", 209 | "\t\n", 210 | "\t\n", 211 | "\t\n", 212 | "\t\n", 213 | "\t\n", 214 | "\t\n", 215 | "\t\n", 216 | "\t\n", 217 | "\t\n", 218 | "\t\n", 219 | "\t\n", 220 | "\t\n", 221 | "\t\n", 222 | "\t\n", 223 | "\t\n", 224 | "\n", 225 | "
A data.frame: 503 × 3
XSymbolGICS.Sector
<int><chr><chr>
14A Health Care
31AAL Industrials
11AAP Consumer Discretionary
46AAPLInformation Technology
4ABBVHealth Care
38ABC Health Care
5ABMDHealth Care
3ABT Health Care
6ACN Information Technology
9ADBEInformation Technology
42ADI Information Technology
8ADM Consumer Staples
10ADP Information Technology
54ADSKInformation Technology
30AEE Utilities
32AEP Utilities
12AES Utilities
13AFL Financials
34AIG Financials
51AIZ Financials
50AJG Financials
16AKAMInformation Technology
18ALB Materials
20ALGNHealth Care
17ALK Industrials
23ALL Financials
21ALLEIndustrials
47AMATInformation Technology
28AMCRMaterials
29AMD Information Technology
468VTR Real Estate
474VTRSHealth Care
471VZ Communication Services
479WAB Industrials
484WAT Health Care
480WBA Consumer Staples
482WBD Communication Services
489WDC Information Technology
485WEC Utilities
487WELLReal Estate
486WFC Financials
492WHR Consumer Discretionary
483WM Industrials
493WMB Energy
481WMT Consumer Staples
64WRB Financials
490WRK Materials
488WST Health Care
494WTW Financials
491WY Real Estate
496WYNNConsumer Discretionary
497XEL Utilities
191XOM Energy
145XRAYHealth Care
498XYL Industrials
499YUM Consumer Discretionary
501ZBH Health Care
500ZBRAInformation Technology
502ZIONFinancials
503ZTS Health Care
\n" 226 | ], 227 | "text/latex": [ 228 | "A data.frame: 503 × 3\n", 229 | "\\begin{tabular}{lll}\n", 230 | " X & Symbol & GICS.Sector\\\\\n", 231 | " & & \\\\\n", 232 | "\\hline\n", 233 | "\t 14 & A & Health Care \\\\\n", 234 | "\t 31 & AAL & Industrials \\\\\n", 235 | "\t 11 & AAP & Consumer Discretionary\\\\\n", 236 | "\t 46 & AAPL & Information Technology\\\\\n", 237 | "\t 4 & ABBV & Health Care \\\\\n", 238 | "\t 38 & ABC & Health Care \\\\\n", 239 | "\t 5 & ABMD & Health Care \\\\\n", 240 | "\t 3 & ABT & Health Care \\\\\n", 241 | "\t 6 & ACN & Information Technology\\\\\n", 242 | "\t 9 & ADBE & Information Technology\\\\\n", 243 | "\t 42 & ADI & Information Technology\\\\\n", 244 | "\t 8 & ADM & Consumer Staples \\\\\n", 245 | "\t 10 & ADP & Information Technology\\\\\n", 246 | "\t 54 & ADSK & Information Technology\\\\\n", 247 | "\t 30 & AEE & Utilities \\\\\n", 248 | "\t 32 & AEP & Utilities \\\\\n", 249 | "\t 12 & AES & Utilities \\\\\n", 250 | "\t 13 & AFL & Financials \\\\\n", 251 | "\t 34 & AIG & Financials \\\\\n", 252 | "\t 51 & AIZ & Financials \\\\\n", 253 | "\t 50 & AJG & Financials \\\\\n", 254 | "\t 16 & AKAM & Information Technology\\\\\n", 255 | "\t 18 & ALB & Materials \\\\\n", 256 | "\t 20 & ALGN & Health Care \\\\\n", 257 | "\t 17 & ALK & Industrials \\\\\n", 258 | "\t 23 & ALL & Financials \\\\\n", 259 | "\t 21 & ALLE & Industrials \\\\\n", 260 | "\t 47 & AMAT & Information Technology\\\\\n", 261 | "\t 28 & AMCR & Materials \\\\\n", 262 | "\t 29 & AMD & Information Technology\\\\\n", 263 | "\t ⋮ & ⋮ & ⋮\\\\\n", 264 | "\t 468 & VTR & Real Estate \\\\\n", 265 | "\t 474 & VTRS & Health Care \\\\\n", 266 | "\t 471 & VZ & Communication Services\\\\\n", 267 | "\t 479 & WAB & Industrials \\\\\n", 268 | "\t 484 & WAT & Health Care \\\\\n", 269 | "\t 480 & WBA & Consumer Staples \\\\\n", 270 | "\t 482 & WBD & Communication Services\\\\\n", 271 | "\t 489 & WDC & Information Technology\\\\\n", 272 | "\t 485 & WEC & Utilities \\\\\n", 273 | "\t 487 & WELL & Real Estate \\\\\n", 274 | "\t 486 & WFC & Financials \\\\\n", 275 | "\t 492 & WHR & Consumer Discretionary\\\\\n", 276 | "\t 483 & WM & Industrials \\\\\n", 277 | "\t 493 & WMB & Energy \\\\\n", 278 | "\t 481 & WMT & Consumer Staples \\\\\n", 279 | "\t 64 & WRB & Financials \\\\\n", 280 | "\t 490 & WRK & Materials \\\\\n", 281 | "\t 488 & WST & Health Care \\\\\n", 282 | "\t 494 & WTW & Financials \\\\\n", 283 | "\t 491 & WY & Real Estate \\\\\n", 284 | "\t 496 & WYNN & Consumer Discretionary\\\\\n", 285 | "\t 497 & XEL & Utilities \\\\\n", 286 | "\t 191 & XOM & Energy \\\\\n", 287 | "\t 145 & XRAY & Health Care \\\\\n", 288 | "\t 498 & XYL & Industrials \\\\\n", 289 | "\t 499 & YUM & Consumer Discretionary\\\\\n", 290 | "\t 501 & ZBH & Health Care \\\\\n", 291 | "\t 500 & ZBRA & Information Technology\\\\\n", 292 | "\t 502 & ZION & Financials \\\\\n", 293 | "\t 503 & ZTS & Health Care \\\\\n", 294 | "\\end{tabular}\n" 295 | ], 296 | "text/markdown": [ 297 | "\n", 298 | "A data.frame: 503 × 3\n", 299 | "\n", 300 | "| X <int> | Symbol <chr> | GICS.Sector <chr> |\n", 301 | "|---|---|---|\n", 302 | "| 14 | A | Health Care |\n", 303 | "| 31 | AAL | Industrials |\n", 304 | "| 11 | AAP | Consumer Discretionary |\n", 305 | "| 46 | AAPL | Information Technology |\n", 306 | "| 4 | ABBV | Health Care |\n", 307 | "| 38 | ABC | Health Care |\n", 308 | "| 5 | ABMD | Health Care |\n", 309 | "| 3 | ABT | Health Care |\n", 310 | "| 6 | ACN | Information Technology |\n", 311 | "| 9 | ADBE | Information Technology |\n", 312 | "| 42 | ADI | Information Technology |\n", 313 | "| 8 | ADM | Consumer Staples |\n", 314 | "| 10 | ADP | Information Technology |\n", 315 | "| 54 | ADSK | Information Technology |\n", 316 | "| 30 | AEE | Utilities |\n", 317 | "| 32 | AEP | Utilities |\n", 318 | "| 12 | AES | Utilities |\n", 319 | "| 13 | AFL | Financials |\n", 320 | "| 34 | AIG | Financials |\n", 321 | "| 51 | AIZ | Financials |\n", 322 | "| 50 | AJG | Financials |\n", 323 | "| 16 | AKAM | Information Technology |\n", 324 | "| 18 | ALB | Materials |\n", 325 | "| 20 | ALGN | Health Care |\n", 326 | "| 17 | ALK | Industrials |\n", 327 | "| 23 | ALL | Financials |\n", 328 | "| 21 | ALLE | Industrials |\n", 329 | "| 47 | AMAT | Information Technology |\n", 330 | "| 28 | AMCR | Materials |\n", 331 | "| 29 | AMD | Information Technology |\n", 332 | "| ⋮ | ⋮ | ⋮ |\n", 333 | "| 468 | VTR | Real Estate |\n", 334 | "| 474 | VTRS | Health Care |\n", 335 | "| 471 | VZ | Communication Services |\n", 336 | "| 479 | WAB | Industrials |\n", 337 | "| 484 | WAT | Health Care |\n", 338 | "| 480 | WBA | Consumer Staples |\n", 339 | "| 482 | WBD | Communication Services |\n", 340 | "| 489 | WDC | Information Technology |\n", 341 | "| 485 | WEC | Utilities |\n", 342 | "| 487 | WELL | Real Estate |\n", 343 | "| 486 | WFC | Financials |\n", 344 | "| 492 | WHR | Consumer Discretionary |\n", 345 | "| 483 | WM | Industrials |\n", 346 | "| 493 | WMB | Energy |\n", 347 | "| 481 | WMT | Consumer Staples |\n", 348 | "| 64 | WRB | Financials |\n", 349 | "| 490 | WRK | Materials |\n", 350 | "| 488 | WST | Health Care |\n", 351 | "| 494 | WTW | Financials |\n", 352 | "| 491 | WY | Real Estate |\n", 353 | "| 496 | WYNN | Consumer Discretionary |\n", 354 | "| 497 | XEL | Utilities |\n", 355 | "| 191 | XOM | Energy |\n", 356 | "| 145 | XRAY | Health Care |\n", 357 | "| 498 | XYL | Industrials |\n", 358 | "| 499 | YUM | Consumer Discretionary |\n", 359 | "| 501 | ZBH | Health Care |\n", 360 | "| 500 | ZBRA | Information Technology |\n", 361 | "| 502 | ZION | Financials |\n", 362 | "| 503 | ZTS | Health Care |\n", 363 | "\n" 364 | ], 365 | "text/plain": [ 366 | " X Symbol GICS.Sector \n", 367 | "1 14 A Health Care \n", 368 | "2 31 AAL Industrials \n", 369 | "3 11 AAP Consumer Discretionary\n", 370 | "4 46 AAPL Information Technology\n", 371 | "5 4 ABBV Health Care \n", 372 | "6 38 ABC Health Care \n", 373 | "7 5 ABMD Health Care \n", 374 | "8 3 ABT Health Care \n", 375 | "9 6 ACN Information Technology\n", 376 | "10 9 ADBE Information Technology\n", 377 | "11 42 ADI Information Technology\n", 378 | "12 8 ADM Consumer Staples \n", 379 | "13 10 ADP Information Technology\n", 380 | "14 54 ADSK Information Technology\n", 381 | "15 30 AEE Utilities \n", 382 | "16 32 AEP Utilities \n", 383 | "17 12 AES Utilities \n", 384 | "18 13 AFL Financials \n", 385 | "19 34 AIG Financials \n", 386 | "20 51 AIZ Financials \n", 387 | "21 50 AJG Financials \n", 388 | "22 16 AKAM Information Technology\n", 389 | "23 18 ALB Materials \n", 390 | "24 20 ALGN Health Care \n", 391 | "25 17 ALK Industrials \n", 392 | "26 23 ALL Financials \n", 393 | "27 21 ALLE Industrials \n", 394 | "28 47 AMAT Information Technology\n", 395 | "29 28 AMCR Materials \n", 396 | "30 29 AMD Information Technology\n", 397 | "⋮ ⋮ ⋮ ⋮ \n", 398 | "474 468 VTR Real Estate \n", 399 | "475 474 VTRS Health Care \n", 400 | "476 471 VZ Communication Services\n", 401 | "477 479 WAB Industrials \n", 402 | "478 484 WAT Health Care \n", 403 | "479 480 WBA Consumer Staples \n", 404 | "480 482 WBD Communication Services\n", 405 | "481 489 WDC Information Technology\n", 406 | "482 485 WEC Utilities \n", 407 | "483 487 WELL Real Estate \n", 408 | "484 486 WFC Financials \n", 409 | "485 492 WHR Consumer Discretionary\n", 410 | "486 483 WM Industrials \n", 411 | "487 493 WMB Energy \n", 412 | "488 481 WMT Consumer Staples \n", 413 | "489 64 WRB Financials \n", 414 | "490 490 WRK Materials \n", 415 | "491 488 WST Health Care \n", 416 | "492 494 WTW Financials \n", 417 | "493 491 WY Real Estate \n", 418 | "494 496 WYNN Consumer Discretionary\n", 419 | "495 497 XEL Utilities \n", 420 | "496 191 XOM Energy \n", 421 | "497 145 XRAY Health Care \n", 422 | "498 498 XYL Industrials \n", 423 | "499 499 YUM Consumer Discretionary\n", 424 | "500 501 ZBH Health Care \n", 425 | "501 500 ZBRA Information Technology\n", 426 | "502 502 ZION Financials \n", 427 | "503 503 ZTS Health Care " 428 | ] 429 | }, 430 | "metadata": {}, 431 | "output_type": "display_data" 432 | } 433 | ], 434 | "source": [ 435 | "SP500" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [] 444 | } 445 | ], 446 | "metadata": { 447 | "kernelspec": { 448 | "display_name": "R", 449 | "language": "R", 450 | "name": "ir" 451 | }, 452 | "language_info": { 453 | "codemirror_mode": "r", 454 | "file_extension": ".r", 455 | "mimetype": "text/x-r-source", 456 | "name": "R", 457 | "pygments_lexer": "r", 458 | "version": "4.1.2" 459 | } 460 | }, 461 | "nbformat": 4, 462 | "nbformat_minor": 4 463 | } 464 | -------------------------------------------------------------------------------- /examples/stocks/stock-data-2014-2018.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/convexfi/fingraph/78a3f9e5f81c2b6ca6cfed896f0decffa391cf9f/examples/stocks/stock-data-2014-2018.rds -------------------------------------------------------------------------------- /man/figures/README-plot_crypto_network-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/convexfi/fingraph/78a3f9e5f81c2b6ca6cfed896f0decffa391cf9f/man/figures/README-plot_crypto_network-1.png -------------------------------------------------------------------------------- /man/figures/README-plot_sp500_stocks_network-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/convexfi/fingraph/78a3f9e5f81c2b6ca6cfed896f0decffa391cf9f/man/figures/README-plot_sp500_stocks_network-1.png -------------------------------------------------------------------------------- /man/learn_connected_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/connected-graph-admm.R 3 | \name{learn_connected_graph} 4 | \alias{learn_connected_graph} 5 | \title{Laplacian matrix of a connected graph with Gaussian data 6 | 7 | Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 8 | where we assume the data to be Gaussian distributed.} 9 | \usage{ 10 | learn_connected_graph( 11 | S, 12 | w0 = "naive", 13 | d = 1, 14 | rho = 1, 15 | maxiter = 10000, 16 | reltol = 1e-05, 17 | verbose = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{S}{a p x p covariance matrix, where p is the number of nodes in the graph} 22 | 23 | \item{w0}{initial vector of graph weights. Either a vector of length p(p-1)/2 or 24 | a string indicating the method to compute an initial value.} 25 | 26 | \item{d}{the nodes' degrees. Either a vector or a single value.} 27 | 28 | \item{rho}{constraint relaxation hyperparameter.} 29 | 30 | \item{maxiter}{maximum number of iterations.} 31 | 32 | \item{reltol}{relative tolerance as a convergence criteria.} 33 | 34 | \item{verbose}{whether or not to show a progress bar during the iterations.} 35 | } 36 | \value{ 37 | A list containing possibly the following elements: 38 | \item{\code{laplacian}}{estimated Laplacian matrix} 39 | \item{\code{adjacency}}{estimated adjacency matrix} 40 | \item{\code{theta}}{estimated Laplacian matrix slack variable} 41 | \item{\code{maxiter}}{number of iterations taken to reach convergence} 42 | \item{\code{convergence}}{boolean flag to indicate whether or not the optimization converged} 43 | } 44 | \description{ 45 | Laplacian matrix of a connected graph with Gaussian data 46 | 47 | Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 48 | where we assume the data to be Gaussian distributed. 49 | } 50 | -------------------------------------------------------------------------------- /man/learn_kcomp_heavytail_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/k-component-graph-heavy-tail.R 3 | \name{learn_kcomp_heavytail_graph} 4 | \alias{learn_kcomp_heavytail_graph} 5 | \title{Laplacian matrix of a k-component graph with heavy-tailed data 6 | 7 | Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 8 | where we assume the data to be Student-t distributed.} 9 | \usage{ 10 | learn_kcomp_heavytail_graph( 11 | X, 12 | k = 1, 13 | heavy_type = "gaussian", 14 | nu = NULL, 15 | w0 = "naive", 16 | d = 1, 17 | beta = 1e-08, 18 | update_beta = TRUE, 19 | early_stopping = FALSE, 20 | rho = 1, 21 | update_rho = FALSE, 22 | maxiter = 10000, 23 | reltol = 1e-05, 24 | verbose = TRUE, 25 | record_objective = FALSE 26 | ) 27 | } 28 | \arguments{ 29 | \item{X}{an n x p data matrix, where n is the number of observations and p is 30 | the number of nodes in the graph.} 31 | 32 | \item{k}{the number of components of the graph.} 33 | 34 | \item{heavy_type}{a string which selects the statistical distribution of the data . 35 | Valid values are "gaussian" or "student".} 36 | 37 | \item{nu}{the degrees of freedom of the Student-t distribution. 38 | Must be a real number greater than 2.} 39 | 40 | \item{w0}{initial vector of graph weights. Either a vector of length p(p-1)/2 or 41 | a string indicating the method to compute an initial value.} 42 | 43 | \item{d}{the nodes' degrees. Either a vector or a single value.} 44 | 45 | \item{beta}{hyperparameter that controls the regularization to obtain a 46 | k-component graph} 47 | 48 | \item{update_beta}{whether to update beta during the optimization.} 49 | 50 | \item{early_stopping}{whether to stop the iterations as soon as the rank 51 | constraint is satisfied.} 52 | 53 | \item{rho}{constraint relaxation hyperparameter.} 54 | 55 | \item{update_rho}{whether or not to update rho during the optimization.} 56 | 57 | \item{maxiter}{maximum number of iterations.} 58 | 59 | \item{reltol}{relative tolerance as a convergence criteria.} 60 | 61 | \item{verbose}{whether to show a progress bar during the iterations.} 62 | 63 | \item{record_objective}{whether to record the objective function per iteration.} 64 | } 65 | \value{ 66 | A list containing possibly the following elements: 67 | \item{\code{laplacian}}{estimated Laplacian matrix} 68 | \item{\code{adjacency}}{estimated adjacency matrix} 69 | \item{\code{theta}}{estimated Laplacian matrix slack variable} 70 | \item{\code{maxiter}}{number of iterations taken to reach convergence} 71 | \item{\code{convergence}}{boolean flag to indicate whether or not the optimization conv erged} 72 | \item{\code{beta_seq}}{sequence of values taken by the hyperparameter beta until convergence} 73 | \item{\code{primal_lap_residual}}{primal residual for the Laplacian matrix per iteratio n} 74 | \item{\code{primal_deg_residual}}{primal residual for the degree vector per iteration} 75 | \item{\code{dual_residual}}{dual residual per iteration} 76 | \item{\code{lagrangian}}{Lagrangian value per iteration} 77 | \item{\code{elapsed_time}}{Time taken to reach convergence} 78 | } 79 | \description{ 80 | Laplacian matrix of a k-component graph with heavy-tailed data 81 | 82 | Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 83 | where we assume the data to be Student-t distributed. 84 | } 85 | -------------------------------------------------------------------------------- /man/learn_regular_heavytail_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/connected-graph-heavy-tail-admm.R 3 | \name{learn_regular_heavytail_graph} 4 | \alias{learn_regular_heavytail_graph} 5 | \title{Laplacian matrix of a connected graph with heavy-tailed data 6 | 7 | Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 8 | where we assume the data to be Student-t distributed.} 9 | \usage{ 10 | learn_regular_heavytail_graph( 11 | X, 12 | heavy_type = "gaussian", 13 | nu = NULL, 14 | w0 = "naive", 15 | d = 1, 16 | rho = 1, 17 | update_rho = TRUE, 18 | maxiter = 10000, 19 | reltol = 1e-05, 20 | verbose = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{X}{an n x p data matrix, where n is the number of observations and p is 25 | the number of nodes in the graph} 26 | 27 | \item{heavy_type}{a string which selects the statistical distribution of the data. 28 | Valid values are "gaussian" or "student".} 29 | 30 | \item{nu}{the degrees of freedom of the Student-t distribution. 31 | Must be a real number greater than 2.} 32 | 33 | \item{w0}{initial vector of graph weights. Either a vector of length p(p-1)/2 or 34 | a string indicating the method to compute an initial value.} 35 | 36 | \item{d}{the nodes' degrees. Either a vector or a single value.} 37 | 38 | \item{rho}{constraint relaxation hyperparameter.} 39 | 40 | \item{update_rho}{whether or not to update rho during the optimization.} 41 | 42 | \item{maxiter}{maximum number of iterations.} 43 | 44 | \item{reltol}{relative tolerance as a convergence criteria.} 45 | 46 | \item{verbose}{whether or not to show a progress bar during the iterations.} 47 | } 48 | \value{ 49 | A list containing possibly the following elements: 50 | \item{\code{laplacian}}{estimated Laplacian matrix} 51 | \item{\code{adjacency}}{estimated adjacency matrix} 52 | \item{\code{theta}}{estimated Laplacian matrix slack variable} 53 | \item{\code{maxiter}}{number of iterations taken to reach convergence} 54 | \item{\code{convergence}}{boolean flag to indicate whether or not the optimization conv erged} 55 | \item{\code{primal_lap_residual}}{primal residual for the Laplacian matrix per iteration} 56 | \item{\code{primal_deg_residual}}{primal residual for the degree vector per iteration} 57 | \item{\code{dual_residual}}{dual residual per iteration} 58 | \item{\code{lagrangian}}{Lagrangian value per iteration} 59 | \item{\code{elapsed_time}}{Time taken to reach convergence} 60 | } 61 | \description{ 62 | Laplacian matrix of a connected graph with heavy-tailed data 63 | 64 | Computes the Laplacian matrix of a graph on the basis of an observed data matrix, 65 | where we assume the data to be Student-t distributed. 66 | } 67 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(fingraph) 3 | 4 | test_check("fingraph") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-regular.R: -------------------------------------------------------------------------------- 1 | set.seed(42) 2 | library(spectralGraphTopology) 3 | 4 | test_that("test learn_connected_graph", { 5 | w <- c(1, 1, 1, 1, 1, 1) / 3 6 | Laplacian <- L(w) 7 | p <- ncol(Laplacian) 8 | S <- cov(MASS::mvrnorm(p * 100, rep(0, p), MASS::ginv(Laplacian))) 9 | res <- learn_connected_graph(S, rho = 100) 10 | laplacian <- res$laplacian 11 | expect_true(res$convergence) 12 | expect_true(res$maxiter > 5) 13 | expect_true(spectralGraphTopology:::relative_error(Laplacian, laplacian) < 1e-1) 14 | expect_true(spectralGraphTopology:::fscore(Laplacian, laplacian, 1e-1) > .9) 15 | }) 16 | 17 | test_that("test learn_regular_heavytail_graph", { 18 | w <- c(1, 1, 1, 1, 1, 1) / 3 19 | Laplacian <- L(w) 20 | p <- ncol(Laplacian) 21 | X <- MASS::mvrnorm(p * 1000, rep(0, p), MASS::ginv(Laplacian)) 22 | res <- learn_regular_heavytail_graph(X, rho = 100, heavy_type = "student", nu = 1e4) 23 | laplacian <- res$laplacian 24 | expect_true(res$convergence) 25 | expect_true(res$maxiter > 5) 26 | expect_true(relative_error(Laplacian, laplacian) < 1e-1) 27 | expect_true(fscore(Laplacian, laplacian, 1e-1) > .9) 28 | }) 29 | 30 | test_that("test learn_regular_heavytail_graph", { 31 | w <- c(1, 1, 1, 1, 1, 1) / 3 32 | Laplacian <- L(w) 33 | p <- ncol(Laplacian) 34 | X <- scale(MASS::mvrnorm(p * 100, rep(0, p), MASS::ginv(Laplacian))) 35 | res_1 <- learn_regular_heavytail_graph(X, rho = 100) 36 | res_2 <- learn_regular_heavytail_graph(X, rho = 100, heavy_type = "student", nu = 1e4) 37 | res_3 <- learn_connected_graph(cor(X), rho = 100) 38 | expect_true(relative_error(res_1$laplacian, res_2$laplacian) < 1e-4) 39 | expect_true(relative_error(res_2$laplacian, res_3$laplacian) < 1e-4) 40 | }) 41 | 42 | test_that("test learn_regular_heavytail_graph student", { 43 | w <- c(1, 1, 1, 1, 1, 1) / 3 44 | Laplacian <- L(w) 45 | p <- ncol(Laplacian) 46 | nu <- 4 47 | X <- mvtnorm::rmvt(n = p * 500, delta = rep(0, p), sigma = ((nu-2)/nu) * MASS::ginv(Laplacian), df = nu) 48 | res <- learn_regular_heavytail_graph(X, rho = 1, heavy_type = "student", nu = nu) 49 | laplacian <- res$laplacian 50 | expect_true(res$convergence) 51 | expect_true(res$maxiter > 5) 52 | expect_true(relative_error(Laplacian, laplacian) < 1e-1) 53 | expect_true(fscore(Laplacian, laplacian, 1e-2) > .9) 54 | }) 55 | 56 | 57 | test_that("test learn_kcomp_heavytail_graph", { 58 | w1 <- c(1, 1, 1, 1, 1, 1)/3 59 | w2 <- c(1, 1, 1, 1, 1, 1)/3 60 | Laplacian <- block_diag(L(w1), L(w2)) 61 | p <- ncol(Laplacian) 62 | nu <- 4 63 | X <- mvtnorm::rmvt(n = p * 500, delta = rep(0, p), sigma = ((nu-2)/nu) * MASS::ginv(Laplacian), df = nu) 64 | res <- learn_kcomp_heavytail_graph(X, k = 2, rho = 1e2, heavy_type = "student", nu = nu, reltol = 1e-4) 65 | laplacian <- res$laplacian 66 | expect_true(res$convergence) 67 | expect_true(res$maxiter > 5) 68 | expect_true(spectralGraphTopology:::relative_error(Laplacian, laplacian) < 1e-1) 69 | expect_true(spectralGraphTopology:::fscore(Laplacian, laplacian, 1e-2) > .9) 70 | }) 71 | 72 | 73 | test_that("test learn_kcomp_heavytail_graph", { 74 | w1 <- c(1, 1, 1, 1, 1, 1)/3 75 | w2 <- c(1, 1, 1, 1, 1, 1)/3 76 | Laplacian <- block_diag(L(w1), L(w2)) 77 | p <- ncol(Laplacian) 78 | X <- MASS::mvrnorm(p * 100, rep(0, p), MASS::ginv(Laplacian)) 79 | res <- learn_kcomp_heavytail_graph(X, k = 2, rho = 100) 80 | laplacian <- res$laplacian 81 | expect_true(res$convergence) 82 | expect_true(res$maxiter > 5) 83 | expect_true(spectralGraphTopology:::relative_error(Laplacian, laplacian) < 1e-1) 84 | expect_true(spectralGraphTopology:::fscore(Laplacian, laplacian, 1e-1) > .9) 85 | }) 86 | -------------------------------------------------------------------------------- /vignettes/talk-rfinance-2023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/convexfi/fingraph/78a3f9e5f81c2b6ca6cfed896f0decffa391cf9f/vignettes/talk-rfinance-2023.pdf -------------------------------------------------------------------------------- /vignettes/talk-rfinance-2023.pdf.asis: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Talk package finbipartite at R/Finance 2023} 2 | %\VignetteEngine{R.rsp::asis} 3 | %\VignetteKeyword{graphs} 4 | %\VignetteKeyword{financial markets} 5 | --------------------------------------------------------------------------------