├── CNAME ├── .gitignore ├── r ├── vignettes │ ├── .gitignore │ └── bidask.Rmd ├── LICENSE ├── .Rbuildignore ├── NAMESPACE ├── man │ ├── rmean.Rd │ ├── rsum.Rd │ ├── ROLL.Rd │ ├── rfun.Rd │ ├── AR.Rd │ ├── CS.Rd │ ├── OHLC.Rd │ ├── bidask-package.Rd │ ├── EDGE.Rd │ ├── edge_expanding.Rd │ ├── edge_rolling.Rd │ ├── sim.Rd │ └── spread.Rd ├── tests │ ├── testthat.R │ └── testthat │ │ └── test-edge.R ├── R │ ├── roll.R │ ├── ar.R │ ├── cs.R │ ├── utils.R │ ├── ohlc.R │ ├── sim.R │ ├── spread.R │ └── edge.R ├── inst │ └── CITATION ├── .gitignore ├── DESCRIPTION └── README.md ├── sas ├── ohlc.sas7bdat ├── README.md └── edge.sas ├── _config.yml ├── python ├── bidask │ ├── __init__.py │ ├── edge_expanding.py │ ├── edge.py │ └── edge_rolling.py ├── pyproject.toml ├── LICENSE ├── .gitignore ├── tests │ └── test_edge.py └── README.md ├── c++ ├── edge.h ├── README.md └── edge.cpp ├── julia ├── Project.toml ├── test │ └── runtests.jl ├── README.md └── src │ └── BidAsk.jl ├── LICENSE ├── matlab ├── README.md └── edge.m ├── pseudocode └── README.md └── README.md /CNAME: -------------------------------------------------------------------------------- 1 | bidask.eguidotti.com -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .Rproj.user 3 | -------------------------------------------------------------------------------- /r/vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /r/LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2024 2 | COPYRIGHT HOLDER: Emanuele Guidotti -------------------------------------------------------------------------------- /sas/ohlc.sas7bdat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eguidotti/bidask/HEAD/sas/ohlc.sas7bdat -------------------------------------------------------------------------------- /r/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^doc$ 6 | ^Meta$ 7 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | title: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | description: David Ardia, Emanuele Guidotti, Tim A. Kroencke 3 | theme: jekyll-theme-cayman 4 | -------------------------------------------------------------------------------- /python/bidask/__init__.py: -------------------------------------------------------------------------------- 1 | from .edge import edge 2 | from .edge_rolling import edge_rolling 3 | from .edge_expanding import edge_expanding 4 | __all__ = ['edge', 'edge_rolling', 'edge_expanding'] 5 | -------------------------------------------------------------------------------- /r/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(edge) 4 | export(edge_expanding) 5 | export(edge_rolling) 6 | export(sim) 7 | export(spread) 8 | import(data.table) 9 | importFrom(stats,rbinom) 10 | importFrom(stats,rnorm) 11 | -------------------------------------------------------------------------------- /c++/edge.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | double edge( 5 | const std::vector &open, 6 | const std::vector &high, 7 | const std::vector &low, 8 | const std::vector &close, 9 | const bool sign = false); 10 | -------------------------------------------------------------------------------- /r/man/rmean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{rmean} 4 | \alias{rmean} 5 | \title{Rolling mean} 6 | \usage{ 7 | rmean(x, width, shift, na.rm) 8 | } 9 | \description{ 10 | Rolling mean 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /r/man/rsum.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{rsum} 4 | \alias{rsum} 5 | \title{#' Rolling sum} 6 | \usage{ 7 | rsum(x, width, shift, na.rm) 8 | } 9 | \description{ 10 | #' Rolling sum 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /r/man/ROLL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/roll.R 3 | \name{ROLL} 4 | \alias{ROLL} 5 | \title{Roll Estimator} 6 | \usage{ 7 | ROLL(close, width, sign, na.rm) 8 | } 9 | \description{ 10 | Roll Estimator 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /r/man/rfun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{rfun} 4 | \alias{rfun} 5 | \title{Rolling function} 6 | \usage{ 7 | rfun(froll, x, width, shift, na.rm) 8 | } 9 | \description{ 10 | Rolling function 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /r/man/AR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ar.R 3 | \name{AR} 4 | \alias{AR} 5 | \title{Abdi-Ranaldo Estimator} 6 | \usage{ 7 | AR(high, low, close, width, method, sign, na.rm) 8 | } 9 | \description{ 10 | Abdi-Ranaldo Estimator 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /r/man/CS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cs.R 3 | \name{CS} 4 | \alias{CS} 5 | \title{Corwin-Schultz Estimator} 6 | \usage{ 7 | CS(high, low, close, width, method, sign, na.rm) 8 | } 9 | \description{ 10 | Corwin-Schultz Estimator 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /r/man/OHLC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ohlc.R 3 | \name{OHLC} 4 | \alias{OHLC} 5 | \title{OHLC Estimators} 6 | \usage{ 7 | OHLC(open, high, low, close, width, method, sign, na.rm) 8 | } 9 | \description{ 10 | OHLC Estimators 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /julia/Project.toml: -------------------------------------------------------------------------------- 1 | name = "BidAsk" 2 | uuid = "3db38d6f-c11d-46b7-88ad-5bdaea376200" 3 | version = "2.1.0" 4 | 5 | [deps] 6 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 7 | 8 | [extras] 9 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" 10 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 11 | 12 | [targets] 13 | test = ["CSV", "Test"] -------------------------------------------------------------------------------- /r/tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(bidask) 11 | data.table::setDTthreads(1) 12 | 13 | test_check("bidask") 14 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "bidask" 3 | version = "2.1.0" 4 | license = "MIT" 5 | description = "Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices" 6 | authors = ["Emanuele Guidotti "] 7 | readme = "README.md" 8 | repository = "https://github.com/eguidotti/bidask" 9 | 10 | [tool.poetry.dependencies] 11 | python = "^3.6" 12 | pandas = "*" 13 | numpy = "*" 14 | 15 | [build-system] 16 | requires = ["poetry-core>=1.0.0"] 17 | build-backend = "poetry.core.masonry.api" 18 | -------------------------------------------------------------------------------- /r/R/roll.R: -------------------------------------------------------------------------------- 1 | #' Roll Estimator 2 | #' 3 | #' @keywords internal 4 | #' 5 | ROLL <- function(close, width, sign, na.rm){ 6 | 7 | c <- log(close) 8 | c1 <- shift(c, 1) 9 | c2 <- shift(c, 2) 10 | 11 | r1 <- c - c1 12 | r2 <- c1 - c2 13 | 14 | shift <- 2 15 | x <- data.frame(r1, r2, r1*r2) 16 | m <- rmean(x, width = width, shift = shift, na.rm = na.rm) 17 | n <- rsum(!is.na(r2), width = width, shift = shift, na.rm = na.rm) 18 | 19 | s2 <- -4 * n/(n - 1) * (m[,3] - m[,1]*m[,2]) 20 | s <- base::sign(s2) * sqrt(abs(s2)) 21 | if(!sign) s <- abs(s) 22 | 23 | return(list("ROLL" = s)) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /r/inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | bibtype = "article", 3 | title = "Efficient estimation of bid–ask spreads from open, high, low, and close prices", 4 | journal = "Journal of Financial Economics", 5 | year = 2024, 6 | author = as.person("David Ardia [aut], Emanuele Guidotti [aut], Tim A. Kroencke [aut]"), 7 | volume = "161", 8 | pages = "103916", 9 | doi = "10.1016/j.jfineco.2024.103916", 10 | textVersion = 'Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. doi: 10.1016/j.jfineco.2024.103916' 11 | ) 12 | -------------------------------------------------------------------------------- /r/.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # User-specific files 9 | .Ruserdata 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | *.Rproj 23 | 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | 28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 29 | .httr-oauth 30 | 31 | # knitr and R markdown default cache directories 32 | *_cache/ 33 | /cache/ 34 | 35 | # Temporary files created by R markdown 36 | *.utf8.md 37 | *.knit.md 38 | 39 | # R Environment Variables 40 | .Renviron 41 | 42 | # renv 43 | .Rprofile 44 | renv 45 | renv.lock 46 | 47 | 48 | inst/doc 49 | /doc/ 50 | /Meta/ 51 | -------------------------------------------------------------------------------- /julia/test/runtests.jl: -------------------------------------------------------------------------------- 1 | using BidAsk 2 | using CSV 3 | using Test 4 | 5 | 6 | @testset "edge" begin 7 | 8 | df = CSV.File(download("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")) 9 | 10 | estimate = edge(df.:Open, df.:High, df.:Low, df.:Close) 11 | @test isapprox(0.0101849034905478, estimate) 12 | 13 | estimate = edge(df.:Open[1:10], df.:High[1:10], df.:Low[1:10], df.:Close[1:10], true) 14 | @test isapprox(-0.016889917516422, estimate) 15 | 16 | @test isnan(edge( 17 | [missing, missing, missing], 18 | [missing, missing, missing], 19 | [missing, missing, missing], 20 | [missing, missing, missing], 21 | )) 22 | 23 | @test isnan(edge( 24 | [18.21, 17.61, 17.61], 25 | [18.21, 17.61, 17.61], 26 | [17.61, 17.61, 17.61], 27 | [17.61, 17.61, 17.61] 28 | )) 29 | 30 | end 31 | -------------------------------------------------------------------------------- /r/R/ar.R: -------------------------------------------------------------------------------- 1 | #' Abdi-Ranaldo Estimator 2 | #' 3 | #' @keywords internal 4 | #' 5 | AR <- function(high, low, close, width, method, sign, na.rm){ 6 | 7 | ok <- c("AR","AR2") 8 | if(length(ko <- setdiff(method, ok))) 9 | stop(sprintf("Method(s) '%s' not available. The available methods are '%s'.", 10 | paste(ko, collapse = "', '"), paste(ok, collapse = "', '"))) 11 | 12 | h <- log(high) 13 | l <- log(low) 14 | c <- log(close) 15 | 16 | m2 <- (h + l) / 2 17 | m1 <- shift(m2, 1) 18 | c1 <- shift(c, 1) 19 | 20 | s2 <- 4 * (c1 - m1) * (c1 - m2) 21 | 22 | shift <- 1 23 | ar <- ar2 <- NULL 24 | 25 | if("AR" %in% method) { 26 | ar <- rmean(s2, width = width, shift = shift, na.rm = na.rm) 27 | ar <- sign(ar) * sqrt(abs(ar)) 28 | if(!sign) ar <- abs(ar) 29 | ar <- list("AR" = ar) 30 | } 31 | 32 | if("AR2" %in% method){ 33 | s2[s2 < 0] <- 0 34 | s <- sqrt(s2) 35 | ar2 <- rmean(s, width = width, shift = shift, na.rm = na.rm) 36 | ar2 <- list("AR2" = ar2) 37 | } 38 | 39 | return(c(ar, ar2)) 40 | 41 | } 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Emanuele Guidotti 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /python/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Emanuele Guidotti 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /python/bidask/edge_expanding.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from .edge_rolling import edge_rolling 3 | 4 | 5 | def edge_expanding(df: pd.DataFrame, min_periods: int = 1, sign: bool = False) -> pd.Series: 6 | """ 7 | Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices 8 | 9 | Implements an expanding window calculation of the efficient estimator of bid-ask spreads 10 | from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 11 | https://doi.org/10.1016/j.jfineco.2024.103916 12 | 13 | Parameters 14 | ---------- 15 | - `df` : pd.DataFrame 16 | DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive). 17 | - `min_periods` : int 18 | Minimum number of observations in window required to have a value; otherwise, result is `np.nan`. 19 | - `sign` : bool, default False 20 | Whether to return signed estimates. 21 | 22 | Returns 23 | ------- 24 | pd.Series 25 | A pandas Series of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%. 26 | """ 27 | return edge_rolling(df=df, window=len(df), min_periods=min_periods, sign=sign) 28 | -------------------------------------------------------------------------------- /r/R/cs.R: -------------------------------------------------------------------------------- 1 | #' Corwin-Schultz Estimator 2 | #' 3 | #' @keywords internal 4 | #' 5 | CS <- function(high, low, close, width, method, sign, na.rm){ 6 | 7 | ok <- c("CS","CS2") 8 | if(length(ko <- setdiff(method, ok))) 9 | stop(sprintf("Method(s) '%s' not available. The available methods are '%s'.", 10 | paste(ko, collapse = "', '"), paste(ok, collapse = "', '"))) 11 | 12 | h <- log(high) 13 | l <- log(low) 14 | c <- log(close) 15 | 16 | c1 <- shift(c, 1) 17 | h1 <- shift(h, 1) 18 | l1 <- shift(l, 1) 19 | 20 | gap <- pmax(0, c1 - h) + pmin(0, c1 - l) 21 | ah <- h + gap 22 | al <- l + gap 23 | 24 | b <- (h - l)^2 + (h1 - l1)^2 25 | g <- (pmax(ah, h1) - pmin(al, l1))^2 26 | 27 | a <- (sqrt(2*b) - sqrt(b)) / (3 - 2*sqrt(2)) - sqrt(g / (3 - 2*sqrt(2))) 28 | s <- 2*(exp(a) - 1) / (1 + exp(a)) 29 | 30 | shift <- 1 31 | cs <- cs2 <- NULL 32 | 33 | if("CS" %in% method) { 34 | cs <- rmean(s, width = width, shift = shift, na.rm = na.rm) 35 | if(!sign) cs <- abs(cs) 36 | cs <- list("CS" = cs) 37 | } 38 | 39 | if("CS2" %in% method){ 40 | s[s < 0] <- 0 41 | cs2 <- rmean(s, width = width, shift = shift, na.rm = na.rm) 42 | cs2 <- list("CS2" = cs2) 43 | } 44 | 45 | return(c(cs, cs2)) 46 | 47 | } 48 | -------------------------------------------------------------------------------- /r/R/utils.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | #' @import data.table 5 | #' @importFrom stats rbinom rnorm 6 | NULL 7 | 8 | #' Rolling function 9 | #' 10 | #' @keywords internal 11 | #' 12 | rfun <- function(froll, x, width, shift, na.rm){ 13 | 14 | nw <- length(width) 15 | nc <- ncol(x); nr <- nrow(x) 16 | if(is.null(nr)) nr <- length(x) 17 | 18 | n <- width - shift 19 | if(nw != 1 && nw != nr){ 20 | n <- rep(0, nr) 21 | n[width[-1]] <- diff(pmax(1, width)) 22 | } 23 | 24 | if(nw == 1 && n < 1){ 25 | if(is.null(nc)) return(rep(NA, nr)) 26 | return(as.data.frame(matrix(data = NA, nrow = nr, ncol = nc))) 27 | } 28 | 29 | y <- froll(x, n = n, na.rm = na.rm, adaptive = nw > 1, fill = NA) 30 | if(is.list(y)) setDF(y) 31 | 32 | if(nw == 1 && width > 1){ 33 | if(is.data.frame(y)) y[1:(width-1),] <- NA 34 | else y[1:(width-1)] <- NA 35 | } 36 | 37 | return(y) 38 | 39 | } 40 | 41 | #' #' Rolling sum 42 | #' 43 | #' @keywords internal 44 | #' 45 | rsum <- function(x, width, shift, na.rm){ 46 | 47 | rfun(frollsum, x, width, shift, na.rm) 48 | 49 | } 50 | 51 | #' Rolling mean 52 | #' 53 | #' @keywords internal 54 | #' 55 | rmean <- function(x, width, shift, na.rm){ 56 | 57 | rfun(frollmean, x, width, shift, na.rm) 58 | 59 | } 60 | -------------------------------------------------------------------------------- /r/man/bidask-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \docType{package} 4 | \name{bidask-package} 5 | \alias{bidask} 6 | \alias{bidask-package} 7 | \title{bidask: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices} 8 | \description{ 9 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024) \doi{10.1016/j.jfineco.2024.103916}. It also provides an implementation of the estimators described in Roll (JF, 1984) \doi{10.1111/j.1540-6261.1984.tb03897.x}, Corwin & Schultz (JF, 2012) \doi{10.1111/j.1540-6261.2012.01729.x}, and Abdi & Ranaldo (RFS, 2017) \doi{10.1093/rfs/hhx084}. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://github.com/eguidotti/bidask} 15 | \item Report bugs at \url{https://github.com/eguidotti/bidask/issues} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Emanuele Guidotti \email{emanuele.guidotti@usi.ch} (\href{https://orcid.org/0000-0002-8961-6623}{ORCID}) 21 | 22 | Other contributors: 23 | \itemize{ 24 | \item David Ardia (\href{https://orcid.org/0000-0003-2823-782X}{ORCID}) [contributor] 25 | \item Tim Kroencke (\href{https://orcid.org/0000-0001-8700-356X}{ORCID}) [contributor] 26 | } 27 | 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /r/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bidask 2 | Type: Package 3 | Title: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 4 | Version: 2.1.5 5 | Authors@R: c( 6 | person(given = "Emanuele", family = "Guidotti", email = "emanuele.guidotti@usi.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8961-6623")), 7 | person(given = "David", family = "Ardia", role = c("ctb"), comment = c(ORCID = "0000-0003-2823-782X")), 8 | person(given = "Tim", family = "Kroencke", role = c("ctb"), comment = c(ORCID = "0000-0001-8700-356X")) 9 | ) 10 | Description: Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices 11 | described in Ardia, Guidotti, & Kroencke (JFE, 2024) . 12 | It also provides an implementation of the estimators described in 13 | Roll (JF, 1984) , 14 | Corwin & Schultz (JF, 2012) , 15 | and Abdi & Ranaldo (RFS, 2017) . 16 | License: MIT + file LICENSE 17 | URL: https://github.com/eguidotti/bidask 18 | BugReports: https://github.com/eguidotti/bidask/issues 19 | Encoding: UTF-8 20 | Imports: data.table 21 | RoxygenNote: 7.2.3 22 | Suggests: 23 | xts, 24 | zoo, 25 | dplyr, 26 | crypto2, 27 | quantmod, 28 | ggplot2, 29 | knitr, 30 | rmarkdown, 31 | testthat (>= 3.0.0) 32 | Config/testthat/edition: 3 33 | VignetteBuilder: knitr 34 | -------------------------------------------------------------------------------- /r/man/EDGE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \name{edge} 4 | \alias{edge} 5 | \title{Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices} 6 | \usage{ 7 | edge(open, high, low, close, sign = FALSE) 8 | } 9 | \arguments{ 10 | \item{open}{numeric vector of open prices.} 11 | 12 | \item{high}{numeric vector of high prices.} 13 | 14 | \item{low}{numeric vector of low prices.} 15 | 16 | \item{close}{numeric vector of close prices.} 17 | 18 | \item{sign}{whether to return signed estimates.} 19 | } 20 | \value{ 21 | The spread estimate. A value of 0.01 corresponds to a spread of 1\%. 22 | } 23 | \description{ 24 | Implements the efficient estimator of bid-ask spreads from open, high, low, 25 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 26 | \doi{10.1016/j.jfineco.2024.103916} 27 | } 28 | \details{ 29 | Prices must be sorted in ascending order of the timestamp. 30 | } 31 | \examples{ 32 | # reduce number of threads to pass CRAN checks (you can ignore this) 33 | data.table::setDTthreads(1) 34 | 35 | # simulate open, high, low, and close prices with spread 1\% 36 | x <- sim(n = 1000, spread = 0.01) 37 | 38 | # estimate the spread 39 | edge(x$Open, x$High, x$Low, x$Close) 40 | 41 | } 42 | \references{ 43 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 44 | \doi{10.1016/j.jfineco.2024.103916} 45 | } 46 | -------------------------------------------------------------------------------- /r/man/edge_expanding.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \name{edge_expanding} 4 | \alias{edge_expanding} 5 | \title{Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices} 6 | \usage{ 7 | edge_expanding(open, high, low, close, sign = FALSE, na.rm = TRUE) 8 | } 9 | \arguments{ 10 | \item{open}{numeric vector of open prices.} 11 | 12 | \item{high}{numeric vector of high prices.} 13 | 14 | \item{low}{numeric vector of low prices.} 15 | 16 | \item{close}{numeric vector of close prices.} 17 | 18 | \item{sign}{whether to return signed estimates.} 19 | 20 | \item{na.rm}{whether to ignore missing values.} 21 | } 22 | \value{ 23 | Vector of spread estimates. 24 | A value of 0.01 corresponds to a spread of 1\%. 25 | This function always returns a result of the same length as the input prices. 26 | } 27 | \description{ 28 | Implements an expanding window calculation of the efficient estimator of bid-ask spreads 29 | from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 30 | \doi{10.1016/j.jfineco.2024.103916}. 31 | } 32 | \details{ 33 | Prices must be sorted in ascending order of the timestamp. 34 | } 35 | \examples{ 36 | # reduce number of threads to pass CRAN checks (you can ignore this) 37 | data.table::setDTthreads(1) 38 | 39 | # simulate open, high, low, and close prices with spread 1\% 40 | x <- sim(n = 1000, spread = 0.01) 41 | 42 | # estimate the spread using an expanding window 43 | s <- edge_expanding(x$Open, x$High, x$Low, x$Close) 44 | tail(s) 45 | # equivalent to 46 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x), na.rm = TRUE) 47 | tail(s) 48 | 49 | } 50 | \references{ 51 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 52 | \doi{10.1016/j.jfineco.2024.103916} 53 | } 54 | -------------------------------------------------------------------------------- /c++/README.md: -------------------------------------------------------------------------------- 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 4 | 5 | ## Installation 6 | 7 | Download the C++ source file [`edge.cpp`](https://github.com/eguidotti/bidask/tree/main/c++/edge.cpp) and the corresponding header [`edge.h`](https://github.com/eguidotti/bidask/tree/main/c++/edge.h) 8 | 9 | ## Usage 10 | 11 | Arguments: 12 | 13 | ```c++ 14 | edge(open, high, low, close, sign=false) 15 | ``` 16 | 17 | | field | description | 18 | | ------- | -------------------------------------------- | 19 | | `open` | std::vector\ of open prices. | 20 | | `high` | std::vector\ of high prices. | 21 | | `low` | std::vector\ of low prices. | 22 | | `close` | std::vector\ of close prices. | 23 | | `sign` | Whether signed estimates should be returned. | 24 | 25 | The input prices must be sorted in ascending order of the timestamp. 26 | 27 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. 28 | 29 | ## Cite as 30 | 31 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 32 | 33 | A BibTex entry for LaTeX users is: 34 | 35 | ```bibtex 36 | @article{edge, 37 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 38 | journal = {Journal of Financial Economics}, 39 | volume = {161}, 40 | pages = {103916}, 41 | year = {2024}, 42 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 43 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 44 | } 45 | ``` 46 | 47 | -------------------------------------------------------------------------------- /matlab/README.md: -------------------------------------------------------------------------------- 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 4 | 5 | ## Installation 6 | 7 | Download the file [`edge.m`](https://github.com/eguidotti/bidask/tree/main/matlab/edge.m) into your working directory. 8 | 9 | ## Usage 10 | 11 | Import the estimator: 12 | 13 | ```matlab 14 | import edge.* 15 | ``` 16 | 17 | Arguments: 18 | 19 | ```matlab 20 | edge(open, high, low, close, sign=false) 21 | ``` 22 | 23 | | field | description | 24 | | ------- | ------------------------------------------- | 25 | | `open` | Vector of open prices with size `T` x `1`. | 26 | | `high` | Vector of high prices with size `T` x `1`. | 27 | | `low` | Vector of low prices with size `T` x `1`. | 28 | | `close` | Vector of close prices with size `T` x `1`. | 29 | | `sign` | Whether to return signed estimates. | 30 | 31 | The input prices must be sorted in ascending order of the timestamp. 32 | 33 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. 34 | 35 | ## Example 36 | 37 | ```matlab 38 | import edge.* 39 | 40 | df = readmatrix('https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv'); 41 | edge(df(:,1), df(:,2), df(:,3), df(:,4)) 42 | ``` 43 | 44 | ## Cite as 45 | 46 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 47 | 48 | A BibTex entry for LaTeX users is: 49 | 50 | ```bibtex 51 | @article{edge, 52 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 53 | journal = {Journal of Financial Economics}, 54 | volume = {161}, 55 | pages = {103916}, 56 | year = {2024}, 57 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 58 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 59 | } 60 | ``` 61 | 62 | -------------------------------------------------------------------------------- /julia/README.md: -------------------------------------------------------------------------------- 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 4 | 5 | ## Installation 6 | 7 | Install this package with: 8 | 9 | ```julia 10 | using Pkg 11 | Pkg.add(PackageSpec(url="https://github.com/eguidotti/bidask.git", subdir="julia/")) 12 | ``` 13 | 14 | ## Usage 15 | 16 | Import the package: 17 | 18 | ```julia 19 | using BidAsk 20 | ``` 21 | 22 | Arguments: 23 | 24 | ```julia 25 | edge(open, high, low, close, sign=false) 26 | ``` 27 | 28 | | field | description | 29 | | ------- | ----------------------------------- | 30 | | `open` | AbstractVector of open prices. | 31 | | `high` | AbstractVector of high prices. | 32 | | `low` | AbstractVector of low prices. | 33 | | `close` | AbstractVector of close prices. | 34 | | `sign` | Whether to return signed estimates. | 35 | 36 | The input prices must be sorted in ascending order of the timestamp. 37 | 38 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. 39 | 40 | ## Example 41 | 42 | ```julia 43 | using BidAsk 44 | using CSV 45 | 46 | df = CSV.File(download("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")) 47 | edge(df.:Open, df.:High, df.:Low, df.:Close) 48 | ``` 49 | 50 | ## Cite as 51 | 52 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 53 | 54 | A BibTex entry for LaTeX users is: 55 | 56 | ```bibtex 57 | @article{edge, 58 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 59 | journal = {Journal of Financial Economics}, 60 | volume = {161}, 61 | pages = {103916}, 62 | year = {2024}, 63 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 64 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 65 | } 66 | ``` 67 | 68 | -------------------------------------------------------------------------------- /r/R/ohlc.R: -------------------------------------------------------------------------------- 1 | #' OHLC Estimators 2 | #' 3 | #' @keywords internal 4 | #' 5 | OHLC <- function(open, high, low, close, width, method, sign, na.rm){ 6 | 7 | splitmethods <- strsplit(method, split = ".", fixed = TRUE) 8 | uniquemethods <- unique(unlist(splitmethods)) 9 | ok <- c("OHL","OHLC","CHL","CHLO") 10 | if(length(ko <- setdiff(uniquemethods, ok))) 11 | stop(sprintf( 12 | "Method(s) '%s' not available. The available methods include '%s', or any combination of them, e.g. 'OHLC.CHLO'.", 13 | paste(ko, collapse = "', '"), paste(ok, collapse = "', '") 14 | )) 15 | 16 | o <- log(open) 17 | h <- log(high) 18 | l <- log(low) 19 | c <- log(close) 20 | m <- (h + l) / 2 21 | 22 | c1 <- shift(c, 1) 23 | h1 <- shift(h, 1) 24 | l1 <- shift(l, 1) 25 | m1 <- shift(m, 1) 26 | 27 | if(length(c1) == 0) c1 <- rep(NA, length(h)) 28 | tau <- ifelse(is.na(h) | is.na(l), NA, h != l | l != c1) 29 | tau[1] <- NA 30 | 31 | shift <- 1 32 | pt <- rmean(tau, width = width, shift = shift, na.rm = na.rm) 33 | nt <- rsum(tau, width = width, shift = shift, na.rm = TRUE) 34 | 35 | if("OHL" %in% uniquemethods | "OHLC" %in% uniquemethods){ 36 | po1 <- rmean(tau * (o != h), width = width, shift = shift, na.rm = na.rm) 37 | po2 <- rmean(tau * (o != l), width = width, shift = shift, na.rm = na.rm) 38 | po <- po1 + po2 39 | } 40 | 41 | if("CHL" %in% uniquemethods | "CHLO" %in% uniquemethods){ 42 | pc1 <- rmean(tau * (c1 != h1), width = width, shift = shift, na.rm = na.rm) 43 | pc2 <- rmean(tau * (c1 != l1), width = width, shift = shift, na.rm = na.rm) 44 | pc <- pc1 + pc2 45 | } 46 | 47 | s2 <- function(r1, r2, pi){ 48 | x <- data.frame(r1*r2, r1, tau*r2); x[1,] <- NA 49 | m <- rmean(x, width = width, shift = shift, na.rm = na.rm) 50 | m[which(nt < 2 | pi == 0),] <- NA 51 | -8 / pi * (m[,1] - (m[,2] * m[,3]) / pt) 52 | } 53 | 54 | if("OHL" %in% uniquemethods) 55 | s2.OHL <- s2(m - o, o - m1, po) 56 | if("OHLC" %in% uniquemethods) 57 | s2.OHLC <- s2(m - o, o - c1, po) 58 | if("CHL" %in% uniquemethods) 59 | s2.CHL <- s2(m - c1, c1 - m1, pc) 60 | if("CHLO" %in% uniquemethods) 61 | s2.CHLO <- s2(o - c1, c1 - m1, pc) 62 | 63 | s <- lapply(splitmethods, function(m){ 64 | expr <- sprintf("(%s)/%s", paste0("s2.", m, collapse = "+"), length(m)) 65 | s2 <- eval(parse(text = expr)) 66 | s <- sqrt(abs(s2)) 67 | if(sign) s <- s * base::sign(s2) 68 | return(s) 69 | }) 70 | 71 | names(s) <- method 72 | return(s) 73 | 74 | } 75 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Extra 132 | .DS_Store 133 | .idea 134 | poetry.lock 135 | -------------------------------------------------------------------------------- /r/man/edge_rolling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \name{edge_rolling} 4 | \alias{edge_rolling} 5 | \title{Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices} 6 | \usage{ 7 | edge_rolling(open, high, low, close, width, sign = FALSE, na.rm = FALSE) 8 | } 9 | \arguments{ 10 | \item{open}{numeric vector of open prices.} 11 | 12 | \item{high}{numeric vector of high prices.} 13 | 14 | \item{low}{numeric vector of low prices.} 15 | 16 | \item{close}{numeric vector of close prices.} 17 | 18 | \item{width}{if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples.} 19 | 20 | \item{sign}{whether to return signed estimates.} 21 | 22 | \item{na.rm}{whether to ignore missing values.} 23 | } 24 | \value{ 25 | Vector of spread estimates. 26 | A value of 0.01 corresponds to a spread of 1\%. 27 | This function always returns a result of the same length as the input prices. 28 | } 29 | \description{ 30 | Implements a rolling window calculation of the efficient estimator of bid-ask spreads 31 | from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 32 | \doi{10.1016/j.jfineco.2024.103916}. 33 | } 34 | \details{ 35 | Prices must be sorted in ascending order of the timestamp. 36 | } 37 | \examples{ 38 | # reduce number of threads to pass CRAN checks (you can ignore this) 39 | data.table::setDTthreads(1) 40 | 41 | # simulate open, high, low, and close prices with spread 1\% 42 | x <- sim(n = 1000, spread = 0.01) 43 | 44 | # estimate the spread using a rolling window 45 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) 46 | tail(s) 47 | 48 | # estimate the spread using custom endpoints 49 | ep <- c(3, 35, 100) 50 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = ep) 51 | s[c(35, 100)] 52 | # equivalent to 53 | edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) 54 | edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) 55 | 56 | # estimate the spread using an expanding window 57 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x)) 58 | tail(s) 59 | # equivalent to 60 | s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) 61 | tail(s) 62 | 63 | } 64 | \references{ 65 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 66 | \doi{10.1016/j.jfineco.2024.103916} 67 | } 68 | -------------------------------------------------------------------------------- /r/man/sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sim.R 3 | \name{sim} 4 | \alias{sim} 5 | \title{Simulation of Open, High, Low, and Close Prices} 6 | \usage{ 7 | sim( 8 | n = 10000, 9 | trades = 390, 10 | prob = 1, 11 | spread = 0.01, 12 | volatility = 0.03, 13 | overnight = 0, 14 | drift = 0, 15 | units = 1, 16 | sign = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{n}{the number of periods to simulate.} 21 | 22 | \item{trades}{the number of trades per period.} 23 | 24 | \item{prob}{the probability to observe a trade.} 25 | 26 | \item{spread}{the bid-ask spread.} 27 | 28 | \item{volatility}{the open-to-close volatility.} 29 | 30 | \item{overnight}{the close-to-open volatility.} 31 | 32 | \item{drift}{the expected return per period.} 33 | 34 | \item{units}{the units of the time period. One of: \code{1}, \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}.} 35 | 36 | \item{sign}{whether to return positive prices for buys and negative prices for sells.} 37 | } 38 | \value{ 39 | A data.frame of open, high, low, and close prices if \code{units=1} (default). 40 | Otherwise, an \code{xts} object is returned (requires the \code{xts} package to be installed). 41 | } 42 | \description{ 43 | This function performs simulations consisting of \code{n} periods and where each period consists of a given number of \code{trades}. 44 | For each trade, the actual price \eqn{P_t} is simulated as \eqn{P_t = P_{t-1}e^{\sigma x}}, where \eqn{\sigma} is the standard deviation per trade and \eqn{x} is a random draw from a unit normal distribution. 45 | The standard deviation per trade equals the \code{volatility} divided by the square root of the number of \code{trades}. 46 | Trades are assumed to be observed with a given \code{probability}. 47 | The bid (ask) for each trade is defined as \eqn{P_t} multiplied by one minus (plus) half the \code{spread} and we assume a 50\% chance that a bid (ask) is observed. 48 | High and low prices equal the highest and lowest prices observed during the period. 49 | Open and Close prices equal the first and the last price observed in the period. 50 | If no trade is observed for a period, then the previous Close is used as the Open, High, Low, and Close prices for that period. 51 | } 52 | \examples{ 53 | # reduce number of threads to pass CRAN checks (you can ignore this) 54 | data.table::setDTthreads(1) 55 | 56 | # simulate 10 open, high, low, and close prices with spread 1\% 57 | sim(n = 10, spread = 0.01) 58 | 59 | } 60 | \references{ 61 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 62 | \doi{10.1016/j.jfineco.2024.103916} 63 | } 64 | -------------------------------------------------------------------------------- /julia/src/BidAsk.jl: -------------------------------------------------------------------------------- 1 | module BidAsk 2 | 3 | using Statistics 4 | 5 | """ 6 | Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 7 | 8 | Implements the efficient estimator of bid-ask spreads from open, high, low, 9 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 10 | https://doi.org/10.1016/j.jfineco.2024.103916 11 | 12 | Parameters 13 | ---------- 14 | - `open`: AbstractVector of open prices 15 | - `high`: AbstractVector of high prices 16 | - `low`: AbstractVector of low prices 17 | - `close`: AbstractVector of close prices 18 | - `sign`: Whether to return signed estimates 19 | 20 | Notes 21 | ----- 22 | Prices must be sorted in ascending order of the timestamp. 23 | 24 | Returns 25 | ------- 26 | The spread estimate. A value of 0.01 corresponds to a spread of 1%. 27 | 28 | """ 29 | function edge(open::AbstractVector, high::AbstractVector, low::AbstractVector, close::AbstractVector, sign::Bool = false) 30 | 31 | o = log.(open) 32 | h = log.(high) 33 | l = log.(low) 34 | c = log.(close) 35 | m = (h .+ l) ./ 2.0 36 | 37 | h1 = h[1:end-1] 38 | l1 = l[1:end-1] 39 | c1 = c[1:end-1] 40 | m1 = m[1:end-1] 41 | 42 | o = o[2:end] 43 | h = h[2:end] 44 | l = l[2:end] 45 | c = c[2:end] 46 | m = m[2:end] 47 | 48 | tau = ifelse.(ismissing.(h) .| ismissing.(l) .| ismissing.(c1), missing, (h .!= l) .| (l .!= c1)) 49 | phi1 = collect(skipmissing(tau .* (o .!= h))) 50 | phi2 = collect(skipmissing(tau .* (o .!= l))) 51 | phi3 = collect(skipmissing(tau .* (c1 .!= h1))) 52 | phi4 = collect(skipmissing(tau .* (c1 .!= l1))) 53 | 54 | nt = sum(skipmissing(tau), init=0) 55 | if nt < 2 || length(phi1) == 0 || length(phi2) == 0 || length(phi3) == 0 || length(phi4) == 0 56 | return NaN 57 | end 58 | 59 | pt = nt / count(!ismissing, tau) 60 | po = mean(phi1) + mean(phi2) 61 | pc = mean(phi3) + mean(phi4) 62 | if po == 0 || pc == 0 63 | return NaN 64 | end 65 | 66 | r1 = m .- o 67 | r2 = o .- m1 68 | r3 = m .- c1 69 | r4 = c1 .- m1 70 | r5 = o .- c1 71 | 72 | d1 = r1 .- tau .* mean(skipmissing(r1)) ./ pt 73 | d3 = r3 .- tau .* mean(skipmissing(r3)) ./ pt 74 | d5 = r5 .- tau .* mean(skipmissing(r5)) ./ pt 75 | 76 | x1 = - 4.0 ./ po .* d1 .* r2 .- 4.0 ./ pc .* d3 .* r4 77 | x2 = - 4.0 ./ po .* d1 .* r5 .- 4.0 ./ pc .* d5 .* r4 78 | 79 | e1 = mean(skipmissing(x1)) 80 | e2 = mean(skipmissing(x2)) 81 | 82 | v1 = mean(skipmissing(x1 .* x1)) - e1 * e1 83 | v2 = mean(skipmissing(x2 .* x2)) - e2 * e2 84 | 85 | vt = v1 + v2 86 | s2 = ifelse(vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2) 87 | 88 | s = sqrt(abs(s2)) 89 | if sign && s2 < 0 90 | s = -s 91 | end 92 | 93 | return s 94 | end 95 | 96 | export 97 | edge 98 | 99 | end # module 100 | -------------------------------------------------------------------------------- /matlab/edge.m: -------------------------------------------------------------------------------- 1 | function s = edge(open, high, low, close, sign) 2 | % Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 3 | % 4 | % Implements the efficient estimator of bid-ask spreads from open, high, low, 5 | % and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 6 | % https://doi.org/10.1016/j.jfineco.2024.103916 7 | % 8 | % Parameters 9 | % ---------- 10 | % - `open`: vector of open prices with size Tx1 11 | % - `high`: vector of high prices with size Tx1 12 | % - `low`: vector of low prices with size Tx1 13 | % - `close`: vector of close prices with size Tx1 14 | % - `sign`: boolean value indicating whether to return signed estimates 15 | % 16 | % Notes 17 | % ----- 18 | % Prices must be sorted in ascending order of the timestamp. 19 | % 20 | % Returns 21 | % ------- 22 | % The spread estimate. A value of 0.01 corresponds to a spread of 1%. 23 | % 24 | if nargin < 5 25 | sign = false; 26 | end 27 | 28 | p = log([open, high, low, close]); 29 | 30 | o = p(:,1); 31 | h = p(:,2); 32 | l = p(:,3); 33 | c = p(:,4); 34 | m = (h + l) / 2; 35 | 36 | h1 = h(1:end-1,:); 37 | l1 = l(1:end-1,:); 38 | c1 = c(1:end-1,:); 39 | m1 = m(1:end-1,:); 40 | 41 | o = o(2:end,:); 42 | h = h(2:end,:); 43 | l = l(2:end,:); 44 | c = c(2:end,:); 45 | m = m(2:end,:); 46 | 47 | tau = NaN(size(c)); 48 | idx = ~(isnan(h) | isnan(l) | isnan(c1)); 49 | tau(idx) = (h(idx) ~= l(idx)) | (l(idx) ~= c1(idx)); 50 | 51 | phi1 = NaN(size(c)); 52 | idx = ~(isnan(o) | isnan(h)); 53 | phi1(idx) = tau(idx) .* (o(idx) ~= h(idx)); 54 | 55 | phi2 = NaN(size(c)); 56 | idx = ~(isnan(o) | isnan(l)); 57 | phi2(idx) = tau(idx) .* (o(idx) ~= l(idx)); 58 | 59 | phi3 = NaN(size(c)); 60 | idx = ~(isnan(c1) | isnan(h1)); 61 | phi3(idx) = tau(idx) .* (c1(idx) ~= h1(idx)); 62 | 63 | phi4 = NaN(size(c)); 64 | idx = ~(isnan(c1) | isnan(l1)); 65 | phi4(idx) = tau(idx) .* (c1(idx) ~= l1(idx)); 66 | 67 | pt = mean(tau, "omitnan"); 68 | po = mean(phi1, "omitnan") + mean(phi2, "omitnan"); 69 | pc = mean(phi3, "omitnan") + mean(phi4, "omitnan"); 70 | 71 | if sum(tau, "omitnan") < 2 || po == 0 || pc == 0 72 | s = NaN; 73 | return; 74 | end 75 | 76 | r1 = m-o; 77 | r2 = o-m1; 78 | r3 = m-c1; 79 | r4 = c1-m1; 80 | r5 = o-c1; 81 | 82 | d1 = r1 - tau .* mean(r1, "omitnan") / pt; 83 | d3 = r3 - tau .* mean(r3, "omitnan") / pt; 84 | d5 = r5 - tau .* mean(r5, "omitnan") / pt; 85 | 86 | x1 = -4. / po .* d1 .* r2 -4. / pc .* d3 .* r4; 87 | x2 = -4. / po .* d1 .* r5 -4. / pc .* d5 .* r4; 88 | 89 | e1 = mean(x1, "omitnan"); 90 | e2 = mean(x2, "omitnan"); 91 | 92 | v1 = mean(x1.^2, "omitnan") - e1^2; 93 | v2 = mean(x2.^2, "omitnan") - e2^2; 94 | 95 | vt = v1 + v2; 96 | if vt > 0 97 | s2 = (v2*e1 + v1*e2) / vt; 98 | else 99 | s2 = (e1 + e2) / 2; 100 | end 101 | 102 | s = sqrt(abs(s2)); 103 | if sign && s2 < 0 104 | s = -s; 105 | end 106 | 107 | end 108 | 109 | -------------------------------------------------------------------------------- /c++/edge.cpp: -------------------------------------------------------------------------------- 1 | #include "edge.h" 2 | #include 3 | #include 4 | #include 5 | 6 | template 7 | double mean(const std::vector &x){ 8 | unsigned int n = x.size(); double sum = 0.0; 9 | for(unsigned int i=0; i &open, 39 | const std::vector &high, 40 | const std::vector &low, 41 | const std::vector &close, 42 | const bool sign){ 43 | 44 | unsigned int n = open.size(); 45 | if(high.size() != n or low.size() != n or close.size() != n){ 46 | throw std::invalid_argument( 47 | "open, high, low, close must have the same length" 48 | ); 49 | } 50 | 51 | std::vector o(n), h(n), l(n), c(n), m(n); 52 | for(unsigned int i=0; i tau(n-1), phi1(n-1), phi2(n-1), phi3(n-1), phi4(n-1); 61 | std::vector r1(n-1), r2(n-1), r3(n-1), r4(n-1), r5(n-1); 62 | for(unsigned int i=0; i d1(n-1), d3(n-1), d5(n-1); 78 | for(unsigned int i=0; i x1(n-1), x2(n-1), x11(n-1), x22(n-1); 86 | for(unsigned int i=0; i \ 20 | -set out <...> \ 21 | -set by <...> \ 22 | -set open <...> \ 23 | -set high <...> \ 24 | -set low <...> \ 25 | -set close <...> \ 26 | -set sign <...> 27 | ``` 28 | 29 | | field | description | 30 | | ------- | ------------------------------------------------------------ | 31 | | `in` | The path to a SAS dataset containing open, high, low, and close prices for multiple groups. | 32 | | `out` | The name of the file to output spread estimates. See [here](https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/acpcref/p1d0tocg3njhmfn1d4ld2covlwm0.htm) for supported file extensions. | 33 | | `group` | Comma separated list of column(s) to group by; e.g., `symbol` or `date,symbol`. | 34 | | `open` | The name of the column containing open prices. | 35 | | `high` | The name of the column containing high prices. | 36 | | `low` | The name of the column containing low prices. | 37 | | `close` | The name of the column containing close prices. | 38 | | `sign` | Boolean value (0/1) indicating whether to return signed estimates. | 39 | 40 | The input prices must be sorted in ascending order of the timestamp within each group. 41 | 42 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. 43 | 44 | ## Example 45 | 46 | The file [`ohlc.sas7bdat`](ohlc.sas7bdat) contains simulated open, high, low, and close prices as described [here](https://github.com/eguidotti/bidask/tree/main/pseudocode) for two symbols. Download the file into your working directory. For instance: 47 | 48 | ```bash 49 | wget https://github.com/eguidotti/bidask/raw/main/sas/ohlc.sas7bdat 50 | ``` 51 | 52 | Estimate the spread for each symbol: 53 | 54 | ```SAS 55 | sas edge.sas \ 56 | -set in ohlc.sas7bdat \ 57 | -set out edge.csv \ 58 | -set by Symbol \ 59 | -set open Open \ 60 | -set high High \ 61 | -set low Low \ 62 | -set close Close \ 63 | -set sign 0 64 | ``` 65 | 66 | The output file `edge.csv` contains the following estimates: 67 | 68 | | Symbol | EDGE | 69 | | ------ | ------------ | 70 | | A | 0.0101849035 | 71 | | B | 0.0101849035 | 72 | 73 | ## Cite as 74 | 75 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 76 | 77 | A BibTex entry for LaTeX users is: 78 | 79 | ```bibtex 80 | @article{edge, 81 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 82 | journal = {Journal of Financial Economics}, 83 | volume = {161}, 84 | pages = {103916}, 85 | year = {2024}, 86 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 87 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 88 | } 89 | ``` 90 | 91 | -------------------------------------------------------------------------------- /python/tests/test_edge.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | from bidask import edge, edge_rolling, edge_expanding 5 | 6 | 7 | df = pd.read_csv( 8 | "https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv" 9 | ) 10 | 11 | df_miss = pd.read_csv( 12 | "https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv" 13 | ) 14 | 15 | 16 | def test_edge(): 17 | """ 18 | Compares the `edge` function to the known test case 19 | """ 20 | estimate = edge(df.Open, df.High, df.Low, df.Close) 21 | assert estimate == pytest.approx(0.0101849034905478) 22 | 23 | estimate = edge(df.Open[0:10], df.High[0:10], df.Low[0:10], df.Close[0:10], True) 24 | assert estimate == pytest.approx(-0.016889917516422) 25 | 26 | estimate = edge(df_miss.Open, df_miss.High, df_miss.Low, df_miss.Close) 27 | assert estimate == pytest.approx(0.01013284969780197) 28 | 29 | assert np.isnan(edge( 30 | [18.21, 17.61, 17.61], 31 | [18.21, 17.61, 17.61], 32 | [17.61, 17.61, 17.61], 33 | [17.61, 17.61, 17.61] 34 | )) 35 | 36 | 37 | @pytest.mark.parametrize("window", [1, 2, 3, 4, 42, 1000]) 38 | @pytest.mark.parametrize("sign", [True, False]) 39 | @pytest.mark.parametrize("step", [1, 2, 5, 10]) 40 | def test_edge_rolling(window: int, step: int, sign: bool): 41 | """ 42 | Compares the rolling vectorized implementation to the original function. 43 | 44 | Parameters 45 | ---------- 46 | - `window` : int 47 | The rolling window size. 48 | - `step`: int 49 | Evaluate the window at every step result. 50 | - `sign`: bool 51 | Whether to use signed estimates. 52 | """ 53 | rolling_estimates = edge_rolling(df=df, window=window, step=step, sign=sign) 54 | assert isinstance(rolling_estimates, pd.Series) 55 | 56 | expected_estimates = [] 57 | for t in range(0, len(df), step): 58 | t1 = t + 1 59 | t0 = t1 - window 60 | expected_estimates.append(edge( 61 | df.Open.values[t0:t1], 62 | df.High.values[t0:t1], 63 | df.Low.values[t0:t1], 64 | df.Close.values[t0:t1], 65 | sign=sign 66 | ) if t0 >= 0 else np.nan) 67 | 68 | np.testing.assert_allclose( 69 | actual = rolling_estimates, 70 | desired = expected_estimates, 71 | rtol=1e-8, 72 | atol=1e-8, 73 | err_msg='Rolling estimates do not match expected estimates' 74 | ) 75 | 76 | 77 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 42, 1000]) 78 | @pytest.mark.parametrize("sign", [True, False]) 79 | def test_edge_expanding(min_periods: int, sign: bool): 80 | """ 81 | Compares the expanding vectorized implementation to the original function. 82 | 83 | Parameters 84 | ---------- 85 | - `min_periods` : int 86 | Minimum number of observations in window required to have a value; otherwise, result is np.nan. 87 | - `sign`: bool 88 | Whether to use signed estimates. 89 | """ 90 | expanding_estimates = edge_expanding(df=df, min_periods=min_periods, sign=sign) 91 | assert isinstance(expanding_estimates, pd.Series) 92 | 93 | expected_estimates = [] 94 | for t in range(0, len(df)): 95 | t1 = t + 1 96 | expected_estimates.append(edge( 97 | df.Open.values[0:t1], 98 | df.High.values[0:t1], 99 | df.Low.values[0:t1], 100 | df.Close.values[0:t1], 101 | sign=sign 102 | ) if t1 >= min_periods else np.nan) 103 | 104 | np.testing.assert_allclose( 105 | actual = expanding_estimates, 106 | desired = expected_estimates, 107 | rtol=1e-8, 108 | atol=1e-8, 109 | err_msg='Expanding estimates do not match expected estimates' 110 | ) 111 | -------------------------------------------------------------------------------- /python/bidask/edge.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | 4 | 5 | def edge(open: np.array, high: np.array, low: np.array, close: np.array, sign: bool = False) -> float: 6 | """ 7 | Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 8 | 9 | Implements the efficient estimator of bid-ask spreads from open, high, low, 10 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 11 | https://doi.org/10.1016/j.jfineco.2024.103916 12 | 13 | Parameters 14 | ---------- 15 | - `open`: array-like 16 | Vector of open prices sorted in ascending order of the timestamp. 17 | - `high`: array-like 18 | Vector of high prices sorted in ascending order of the timestamp. 19 | - `low`: array-like 20 | Vector of low prices sorted in ascending order of the timestamp. 21 | - `close`: array-like 22 | Vector of close prices sorted in ascending order of the timestamp. 23 | - `sign`: 24 | Whether to return signed estimates. 25 | 26 | Returns 27 | ------- 28 | float 29 | The spread estimate. A value of 0.01 corresponds to a spread of 1%. 30 | """ 31 | # check that the open, high, low, and close prices have the same length 32 | nobs = len(open) 33 | if len(high) != nobs or len(low) != nobs or len(close) != nobs: 34 | raise ValueError("Open, high, low, and close prices must have the same length") 35 | 36 | # return missing if there are less than 3 observations 37 | if nobs < 3: 38 | return np.nan 39 | 40 | # compute log-prices 41 | o = np.log(np.asarray(open)) 42 | h = np.log(np.asarray(high)) 43 | l = np.log(np.asarray(low)) 44 | c = np.log(np.asarray(close)) 45 | m = (h + l) / 2. 46 | 47 | # shift log-prices by one period 48 | h1, l1, c1, m1 = h[:-1], l[:-1], c[:-1], m[:-1] 49 | o, h, l, c, m = o[1:], h[1:], l[1:], c[1:], m[1:] 50 | 51 | # compute log-returns 52 | r1 = m - o 53 | r2 = o - m1 54 | r3 = m - c1 55 | r4 = c1 - m1 56 | r5 = o - c1 57 | 58 | # compute indicator variables 59 | tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan, (h != l) | (l != c1)) 60 | po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, o != h) 61 | po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, o != l) 62 | pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, c1 != h1) 63 | pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, c1 != l1) 64 | 65 | # ignore warnings raised by nanmean for all-NaN slices 66 | with warnings.catch_warnings(): 67 | warnings.simplefilter('ignore', RuntimeWarning) 68 | 69 | # compute probabilities 70 | pt = np.nanmean(tau) 71 | po = np.nanmean(po1) + np.nanmean(po2) 72 | pc = np.nanmean(pc1) + np.nanmean(pc2) 73 | 74 | # return missing if there are less than two periods with tau=1 75 | # or po or pc is zero 76 | if np.nansum(tau) < 2 or po == 0 or pc == 0: 77 | return np.nan 78 | 79 | # compute de-meaned log-returns 80 | d1 = r1 - np.nanmean(r1)/pt*tau 81 | d3 = r3 - np.nanmean(r3)/pt*tau 82 | d5 = r5 - np.nanmean(r5)/pt*tau 83 | 84 | # compute input vectors 85 | x1 = -4./po*d1*r2 + -4./pc*d3*r4 86 | x2 = -4./po*d1*r5 + -4./pc*d5*r4 87 | 88 | # compute expectations 89 | e1 = np.nanmean(x1) 90 | e2 = np.nanmean(x2) 91 | 92 | # compute variances 93 | v1 = np.nanmean(x1**2) - e1**2 94 | v2 = np.nanmean(x2**2) - e2**2 95 | 96 | # compute square spread by using a (equally) weighted 97 | # average if the total variance is (not) positive 98 | vt = v1 + v2 99 | s2 = (v2*e1 + v1*e2) / vt if vt > 0 else (e1 + e2) / 2. 100 | 101 | # compute signed root 102 | s = np.sqrt(np.abs(s2)) 103 | if sign: 104 | s *= np.sign(s2) 105 | 106 | # return the spread 107 | return float(s) 108 | -------------------------------------------------------------------------------- /r/man/spread.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spread.R 3 | \name{spread} 4 | \alias{spread} 5 | \title{Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices} 6 | \usage{ 7 | spread(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{tabular data with columns named \code{open}, \code{high}, \code{low}, \code{close} (case-insensitive).} 11 | 12 | \item{width}{if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. By default, the full sample is used to compute a single spread estimate. See examples.} 13 | 14 | \item{method}{the estimators to use. See details.} 15 | 16 | \item{sign}{whether to return signed estimates.} 17 | 18 | \item{na.rm}{whether to ignore missing values.} 19 | } 20 | \value{ 21 | A data.frame of spread estimates, or an \code{xts} object if \code{x} is of class \code{xts}. 22 | A value of 0.01 corresponds to a spread of 1\%. 23 | } 24 | \description{ 25 | This function implements several methods to estimate bid-ask spreads 26 | from open, high, low, and close prices and it is optimized for fast 27 | calculations over rolling and expanding windows. 28 | } 29 | \details{ 30 | The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024). 31 | 32 | The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (JFE, 2024). 33 | They can be combined by concatenating their identifiers, e.g., \code{OHLC.CHLO} uses an average of the \code{OHLC} and \code{CHLO} estimators. 34 | 35 | The method \code{AR} implements the estimator described in Abdi & Ranaldo (RFS, 2017). \code{AR2} implements their 2-period version. 36 | 37 | The method \code{CS} implements the estimator described in Corwin & Schultz (JF, 2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper. 38 | 39 | The method \code{ROLL} implements the estimator described in Roll (JF, 1984). 40 | } 41 | \examples{ 42 | # reduce number of threads to pass CRAN checks (you can ignore this) 43 | data.table::setDTthreads(1) 44 | 45 | # simulate open, high, low, and close prices with spread 1\% 46 | x <- sim(n = 1000, spread = 0.01) 47 | 48 | # estimate the spread 49 | spread(x) 50 | # equivalent to 51 | edge(x$Open, x$High, x$Low, x$Close) 52 | 53 | # estimate the spread using a rolling window of 21 periods 54 | s <- spread(x, width = 21) 55 | tail(s) 56 | # equivalent to 57 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) 58 | tail(s) 59 | 60 | # estimate the spread using an expanding window 61 | s <- spread(x, width = 1:nrow(x)) 62 | tail(s) 63 | # equivalent to 64 | s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) 65 | tail(s) 66 | 67 | # estimate the spread using custom endpoints 68 | ep <- c(3, 35, 100) 69 | spread(x, width = ep) 70 | # equivalent to 71 | edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) 72 | edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) 73 | 74 | # use multiple estimators 75 | spread(x, method = c("EDGE", "AR", "CS", "ROLL", "OHLC", "OHL.CHL")) 76 | 77 | } 78 | \references{ 79 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 80 | \doi{10.1016/j.jfineco.2024.103916} 81 | 82 | Abdi, F., & Ranaldo, A. (2017). A simple estimation of bid-ask spreads from daily close, high, and low prices. Review of Financial Studies, 30 (12), 4437-4480. 83 | \doi{10.1093/rfs/hhx084} 84 | 85 | Corwin, S. A., & Schultz, P. (2012). A simple way to estimate bid-ask spreads from daily high and low prices. Journal of Finance, 67 (2), 719-760. 86 | \doi{10.1111/j.1540-6261.2012.01729.x} 87 | 88 | Roll, R. (1984). A simple implicit measure of the effective bid-ask spread in an efficient market. Journal of Finance, 39 (4), 1127-1139. 89 | \doi{10.1111/j.1540-6261.1984.tb03897.x} 90 | } 91 | -------------------------------------------------------------------------------- /r/R/sim.R: -------------------------------------------------------------------------------- 1 | #' Simulation of Open, High, Low, and Close Prices 2 | #' 3 | #' This function performs simulations consisting of \code{n} periods and where each period consists of a given number of \code{trades}. 4 | #' For each trade, the actual price \eqn{P_t} is simulated as \eqn{P_t = P_{t-1}e^{\sigma x}}, where \eqn{\sigma} is the standard deviation per trade and \eqn{x} is a random draw from a unit normal distribution. 5 | #' The standard deviation per trade equals the \code{volatility} divided by the square root of the number of \code{trades}. 6 | #' Trades are assumed to be observed with a given \code{probability}. 7 | #' The bid (ask) for each trade is defined as \eqn{P_t} multiplied by one minus (plus) half the \code{spread} and we assume a 50\% chance that a bid (ask) is observed. 8 | #' High and low prices equal the highest and lowest prices observed during the period. 9 | #' Open and Close prices equal the first and the last price observed in the period. 10 | #' If no trade is observed for a period, then the previous Close is used as the Open, High, Low, and Close prices for that period. 11 | #' 12 | #' @param n the number of periods to simulate. 13 | #' @param trades the number of trades per period. 14 | #' @param prob the probability to observe a trade. 15 | #' @param spread the bid-ask spread. 16 | #' @param volatility the open-to-close volatility. 17 | #' @param overnight the close-to-open volatility. 18 | #' @param drift the expected return per period. 19 | #' @param units the units of the time period. One of: \code{1}, \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}. 20 | #' @param sign whether to return positive prices for buys and negative prices for sells. 21 | #' 22 | #' @return A data.frame of open, high, low, and close prices if \code{units=1} (default). 23 | #' Otherwise, an \code{xts} object is returned (requires the \code{xts} package to be installed). 24 | #' 25 | #' @references 26 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 27 | #' \doi{10.1016/j.jfineco.2024.103916} 28 | #' 29 | #' @examples 30 | #' # reduce number of threads to pass CRAN checks (you can ignore this) 31 | #' data.table::setDTthreads(1) 32 | #' 33 | #' # simulate 10 open, high, low, and close prices with spread 1% 34 | #' sim(n = 10, spread = 0.01) 35 | #' 36 | #' @export 37 | #' 38 | sim <- function( 39 | n = 10000, 40 | trades = 390, 41 | prob = 1, 42 | spread = 0.01, 43 | volatility = 0.03, 44 | overnight = 0, 45 | drift = 0, 46 | units = 1, 47 | sign = FALSE){ 48 | 49 | # sanitize units 50 | if(units == "minute") units <- "min" 51 | 52 | # check units 53 | valid <- c(1, "sec", "min", "hour", "day", "week", "month", "year") 54 | if(!(units %in% valid)) 55 | stop(sprintf("units must be one of '%s'", paste(valid, collapse = "','"))) 56 | 57 | # total number of observations 58 | m <- n*trades 59 | 60 | # close-to-close returns 61 | r <- rnorm(m, mean = drift/trades, sd = volatility/sqrt(trades)) 62 | 63 | # close-to-open returns 64 | idx <- 0:(n-1) * trades + 1 65 | r[idx] <- r[idx] + rnorm(n, sd = overnight) 66 | 67 | # compute prices 68 | z <- spread * (rbinom(m, size = 1, prob = 0.5) - 0.5) 69 | p <- exp(cumsum(r)) * (1 + z) 70 | 71 | # signed prices 72 | if(sign) 73 | p <- p * base::sign(z) 74 | 75 | # subset observations 76 | keep <- as.logical(rbinom(m, size = 1, prob = prob)) 77 | 78 | # convert to OHLC 79 | ohlc <- matrix(nrow = n, ncol = 4) 80 | prev <- p[1] 81 | for(i in 1:n){ 82 | # indices of the i-th period 83 | idx <- (i-1)*trades + 1:trades 84 | # observed prices 85 | obs <- p[idx][keep[idx]] 86 | # if empty keep previous close 87 | if(!length(obs)) obs <- prev 88 | # index of last observation 89 | last <- length(obs) 90 | # unsigned prices 91 | uobs <- abs(obs) 92 | # fill matrix 93 | ohlc[i,] <- obs[c(1, which.max(uobs), which.min(uobs), last)] 94 | # store previous close 95 | prev <- obs[last] 96 | } 97 | 98 | if(units == 1){ 99 | ohlc <- as.data.frame(ohlc) 100 | } 101 | else { 102 | now <- Sys.time() 103 | if(!(units %in% c("sec", "min", "hour"))) 104 | now <- as.Date(now) 105 | time <- seq(now, length = n, by = units) 106 | ohlc <- xts::xts(ohlc, order.by = time) 107 | } 108 | 109 | colnames(ohlc) <- c("Open", "High", "Low", "Close") 110 | return(ohlc) 111 | 112 | } 113 | -------------------------------------------------------------------------------- /pseudocode/README.md: -------------------------------------------------------------------------------- 1 | # Pseudocode 2 | 3 | This file provides the pseudocode to simplify implementations of the estimator in any programming language. 4 | 5 | ### Input 6 | 7 | Vectors of `open`, `high`, `low`, and `close` prices. The vectors must be sorted in ascending order of the timestamp. The function should also accept the argument `sign` specifying whether to return signed estimates. 8 | 9 | ### Output 10 | 11 | Numeric spread estimate. A value of 0.01 corresponds to a spread of 1%. 12 | 13 | ### Algorithm 14 | 15 | ```python 16 | # check that the open, high, low, and close prices have the same length 17 | nobs = len(open) 18 | if len(high) != nobs or len(low) != nobs or len(close) != nobs: 19 | raise error 20 | 21 | # return missing if there are less than 3 observations 22 | if nobs < 3: 23 | return missing 24 | 25 | # compute log-prices 26 | o = log(open) 27 | h = log(high) 28 | l = log(low) 29 | c = log(close) 30 | m = (h + l) / 2. 31 | 32 | # shift log-prices by one period 33 | h1 = lag(h) 34 | l1 = lag(l) 35 | c1 = lag(c) 36 | m1 = lag(m) 37 | 38 | # compute log-returns 39 | r1 = m - o 40 | r2 = o - m1 41 | r3 = m - c1 42 | r4 = c1 - m1 43 | r5 = o - c1 44 | 45 | # compute indicator variables 46 | tau = (h != l or l != c1) if h, l, c1 are non-missing else missing 47 | po1 = (tau and o != h) if tau, o, h are non-missing else missing 48 | po2 = (tau and o != l) if tau, o, l are non-missing else missing 49 | pc1 = (tau and c1 != h1) if tau, c1, h1 are non-missing else missing 50 | pc2 = (tau and c1 != l1) if tau, c1, l1 are non-missing else missing 51 | 52 | # compute probabilities 53 | pt = mean(tau) 54 | po = mean(po1) + mean(po2) 55 | pc = mean(pc1) + mean(pc2) 56 | 57 | # return missing if there are less than two periods with tau=1 58 | # or po or pc is zero 59 | if sum(tau) < 2 or po == 0 or pc == 0: 60 | return missing 61 | 62 | # compute de-meaned log-returns 63 | d1 = r1 - mean(r1)/pt*tau 64 | d3 = r3 - mean(r3)/pt*tau 65 | d5 = r5 - mean(r5)/pt*tau 66 | 67 | # compute input vectors 68 | x1 = -4./po*d1*r2 + -4./pc*d3*r4 69 | x2 = -4./po*d1*r5 + -4./pc*d5*r4 70 | 71 | # compute expectations 72 | e1 = mean(x1) 73 | e2 = mean(x2) 74 | 75 | # compute variances 76 | v1 = mean(x1*x1) - e1*e1 77 | v2 = mean(x2*x2) - e2*e2 78 | 79 | # compute square spread by using a (equally) weighted 80 | # average if the total variance is (not) positive 81 | vt = v1 + v2 82 | s2 = (v2*e1 + v1*e2) / vt if vt > 0 else (e1 + e2) / 2. 83 | 84 | # compute signed root 85 | s = sqrt(abs(s2)) 86 | if sign and s2 < 0: 87 | s = -s 88 | 89 | # return the spread 90 | return s 91 | ``` 92 | 93 | ### Testing 94 | 95 | To test your implementation, import the data available [here](https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv). The file contains sample OHLC simulated price data to simplify testing. The data have been generated by simulating a price process as described in [Ardia, Guidotti, & Kroencke (2024)](https://doi.org/10.1016/j.jfineco.2024.103916) with 390 trades per day and a 1% probability to observe a trade. The simulation uses a constant spread of 1%. By running the estimation, you should obtain a spread estimate of **0.0101849034905478**. If you obtain a different result, you may use the following table to check and debug the intermediate steps. 96 | 97 | | variable | value | 98 | | -------- | ---------------------- | 99 | | `pt` | 0.9820982098209821 | 100 | | `po` | 1.227922792279228 | 101 | | `pc` | 1.2052205220522052 | 102 | | `e1` | 0.00010702425689560482 | 103 | | `e2` | 0.000101595812797079 | 104 | | `v1` | 2.074215642985551e-06 | 105 | | `v2` | 1.3461279919743572e-06 | 106 | | `s2` | 0.00010373225911177194 | 107 | 108 | To check that your implementation correctly handles missing values, import the data available [here](https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv). The data have been generated by setting to missing a random subset of the previous data file. By running the estimation, you should obtain a spread estimate of **0.01013284969780197**. If you obtain a different result, you may use the following table to check and debug the intermediate steps. 109 | 110 | | variable | value | 111 | | -------- | ---------------------- | 112 | | `pt` | 0.9822078447230085 | 113 | | `po` | 1.2272254421162134 | 114 | | `pc` | 1.205827632480371 | 115 | | `e1` | 0.00010337780767834583 | 116 | | `e2` | 0.00010219271972776808 | 117 | | `v1` | 2.0045420261850617e-06 | 118 | | `v2` | 1.373839551967266e-06 | 119 | | `s2` | 0.00010267464299824543 | 120 | 121 | ### Contribute 122 | 123 | Have you implemented the estimator in a new programming language? If you want your implementation to be included in this repository, please open a [pull request](https://github.com/eguidotti/bidask/pulls) -------------------------------------------------------------------------------- /sas/edge.sas: -------------------------------------------------------------------------------- 1 | /* 2 | Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 3 | 4 | Implements the efficient estimator of bid-ask spreads from open, high, low, 5 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 6 | https://doi.org/10.1016/j.jfineco.2024.103916 7 | 8 | Parameters 9 | ---------- 10 | - `in`: the path to a SAS dataset containing open, high, low, and close prices for multiple groups 11 | - `out`: the name of the file to output spread estimates 12 | - `group`: comma separated list of column(s) to group by; e.g., `symbol` or `date,symbol` 13 | - `open`: the name of the column containing open prices 14 | - `high`: the name of the column containing high prices 15 | - `low`: the name of the column containing low prices 16 | - `close`: the name of the column containing close prices 17 | - `sign`: boolean value (0/1) indicating whether to return signed estimates 18 | 19 | Notes 20 | ----- 21 | Prices must be sorted in ascending order of the timestamp within each group. 22 | 23 | Returns 24 | ------- 25 | The spread estimate. A value of 0.01 corresponds to a spread of 1%. 26 | 27 | */ 28 | 29 | %let in = %sysget(in); 30 | %let out = %sysget(out); 31 | 32 | %let by_csv = %sysget(by); 33 | %let by_lst = %sysfunc(tranwrd(%quote(&by_csv), %str(,), %str( ))); 34 | %let by_grp = %scan(%quote(&by_csv), -1, %str(,)); 35 | 36 | %let open = %sysget(open); 37 | %let high = %sysget(high); 38 | %let low = %sysget(low); 39 | %let close = %sysget(close); 40 | %let sign = %sysget(sign); 41 | 42 | 43 | data prices; 44 | 45 | set "&in"; 46 | by &by_lst; 47 | 48 | o = log(&open); 49 | h = log(&high); 50 | l = log(&low); 51 | c = log(&close); 52 | m = (h + l) / 2; 53 | 54 | h1 = lag1(h); 55 | l1 = lag1(l); 56 | m1 = lag1(m); 57 | c1 = lag1(c); 58 | 59 | r1 = m-o; 60 | r2 = o-m1; 61 | r3 = m-c1; 62 | r4 = c1-m1; 63 | r5 = o-c1; 64 | 65 | if cmiss(h, l, c1) eq 0 then tau = (h ne l) | (l ne c1); else tau = .; 66 | if cmiss(o, h, tau) eq 0 then phi1 = (o ne h) & tau; else phi1 = .; 67 | if cmiss(o, l, tau) eq 0 then phi2 = (o ne l) & tau; else phi2 = .; 68 | if cmiss(c1, h1, tau) eq 0 then phi3 = (c1 ne h1) & tau; else phi3 = .; 69 | if cmiss(c1, l1, tau) eq 0 then phi4 = (c1 ne l1) & tau; else phi4 = .; 70 | 71 | if first.&by_grp = 0; 72 | 73 | run; 74 | 75 | 76 | proc sql; 77 | 78 | CREATE TABLE agg AS 79 | 80 | SELECT 81 | &by_csv, 82 | AVG(r1*r2) AS m1, 83 | AVG(r3*r4) AS m2, 84 | AVG(r1*r5) AS m3, 85 | AVG(r5*r4) AS m4, 86 | AVG(tau) AS m5, 87 | AVG(r1) AS m6, 88 | AVG(tau*r2) AS m7, 89 | AVG(r3) AS m8, 90 | AVG(tau*r4) AS m9, 91 | AVG(r5) AS m10, 92 | AVG(r1**2*r2**2) AS m11, 93 | AVG(r3**2*r4**2) AS m12, 94 | AVG(r1**2*r5**2) AS m13, 95 | AVG(r4**2*r5**2) AS m14, 96 | AVG(r1*r2*r3*r4) AS m15, 97 | AVG(r1*r4*r5**2) AS m16, 98 | AVG(tau*r2**2) AS m17, 99 | AVG(tau*r4**2) AS m18, 100 | AVG(tau*r5**2) AS m19, 101 | AVG(tau*r1*r2**2) AS m20, 102 | AVG(tau*r3*r4**2) AS m21, 103 | AVG(tau*r1*r5**2) AS m22, 104 | AVG(tau*r5*r4**2) AS m23, 105 | AVG(tau*r1*r2*r4) AS m24, 106 | AVG(tau*r2*r3*r4) AS m25, 107 | AVG(tau*r2*r4) AS m26, 108 | AVG(tau*r1*r4*r5) AS m27, 109 | AVG(tau*r4*r5**2) AS m28, 110 | AVG(tau*r4*r5) AS m29, 111 | AVG(tau*r5) AS m30, 112 | AVG(phi1) AS m31, 113 | AVG(phi2) AS m32, 114 | AVG(phi3) AS m33, 115 | AVG(phi4) AS m34, 116 | SUM(tau) AS m35 117 | 118 | FROM 119 | prices 120 | 121 | GROUP BY 122 | &by_csv; 123 | 124 | quit; 125 | 126 | 127 | data edge; 128 | 129 | set agg; 130 | 131 | po = -8 / (m31 + m32); 132 | pc = -8 / (m33 + m34); 133 | 134 | if (m35 lt 2) | (po eq 0) | (pc eq 0) then do; 135 | s = .; 136 | end; 137 | 138 | else do; 139 | 140 | e1 = po/2 * (m1 - m6*m7/m5) + 141 | pc/2 * (m2 - m8*m9/m5); 142 | 143 | e2 = po/2 * (m3 - m6*m30/m5) + 144 | pc/2 * (m4 - m10*m9/m5); 145 | 146 | v1 = po**2/4 * (m11 + m6**2*m17/m5**2 - 2*m20*m6/m5) + 147 | pc**2/4 * (m12 + m8**2*m18/m5**2 - 2*m21*m8/m5) + 148 | po*pc/2 * (m15 - m24*m8/m5 - m6*m25/m5 + m6*m8*m26/m5**2) - 149 | e1**2; 150 | 151 | v2 = po**2/4 * (m13 + m6**2*m19/m5**2 - 2*m22*m6/m5) + 152 | pc**2/4 * (m14 + m10**2*m18/m5**2 - 2*m23*m10/m5) + 153 | po*pc/2 * (m16 - m27*m10/m5 - m6*m28/m5 + m6*m10*m29/m5**2) - 154 | e2**2; 155 | 156 | vt = v1 + v2; 157 | if vt gt 0 then s2 = (v2*e1 + v1*e2) / vt; else s2 = (e1 + e2) / 2; 158 | 159 | s = SQRT(ABS(s2)); 160 | if &sign & (s2 < 0) then s = -s; 161 | 162 | end; 163 | 164 | keep &by_lst s; 165 | rename s=EDGE; 166 | 167 | run; 168 | 169 | 170 | proc export data=edge 171 | outfile="&out" 172 | replace; 173 | run; 174 | -------------------------------------------------------------------------------- /r/tests/testthat/test-edge.R: -------------------------------------------------------------------------------- 1 | test_that("edge", { 2 | 3 | x <- read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv") 4 | s <- edge(x$Open, x$High, x$Low, x$Close) 5 | 6 | expect_equal(s, 0.0101849034905478) 7 | 8 | }) 9 | 10 | test_that("edge-miss", { 11 | 12 | x <- read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv") 13 | s <- edge(x$Open, x$High, x$Low, x$Close) 14 | 15 | expect_equal(s, 0.01013284969780197) 16 | 17 | }) 18 | 19 | test_that("edge-na", { 20 | 21 | expect_true(is.na(edge( 22 | c(18.21, 17.61, 17.61), 23 | c(18.21, 17.61, 17.61), 24 | c(17.61, 17.61, 17.61), 25 | c(17.61, 17.61, 17.61) 26 | ))) 27 | 28 | }) 29 | 30 | test_that("edge-spread", { 31 | 32 | set.seed(123) 33 | x <- sim(prob = 0.01, units = "day") 34 | 35 | s1 <- as.numeric(spread(x, method = "EDGE")) 36 | s2 <- edge(x$Open, x$High, x$Low, x$Close) 37 | 38 | expect_equal(s1, s2) 39 | 40 | }) 41 | 42 | test_that("edge-spread-monthly", { 43 | 44 | set.seed(123) 45 | x <- sim(prob = 0.01, units = "day") 46 | 47 | zoo::index(x) <- zoo::index(x) - as.integer(start(x)) 48 | width <- xts::endpoints(x, on = "months") 49 | 50 | s1 <- as.numeric(spread(x, width = width, method = "EDGE")) 51 | s2 <- sapply(2:length(width), function(i){ 52 | m <- x[width[i-1]:width[i]] 53 | edge(m$Open, m$High, m$Low, m$Close) 54 | }) 55 | 56 | expect_equal(s1, s2) 57 | 58 | }) 59 | 60 | test_that("edge-spread-rolling", { 61 | 62 | set.seed(123) 63 | x <- sim(prob = 0.01, units = "day") 64 | 65 | for(width in c(1, 2, 3, 4, 21, 100)){ 66 | 67 | s1 <- spread(x, width = width, method = "EDGE") 68 | s2 <- zoo::rollapplyr(x, width = width, by.column = FALSE, FUN = function(x){ 69 | edge(x$Open, x$High, x$Low, x$Close) 70 | })[-(1:max(1, width-1))] 71 | 72 | expect_equal(as.numeric(s1), as.numeric(s2), label=paste("width = ", width)) 73 | 74 | } 75 | 76 | }) 77 | 78 | test_that("edge-spread-sign", { 79 | 80 | set.seed(123) 81 | x <- sim(prob = 0.01, units = "day") 82 | 83 | width <- 21 84 | s1 <- spread(x, width = width, method = "EDGE", sign = TRUE) 85 | s2 <- zoo::rollapplyr(x, width = width, by.column = FALSE, FUN = function(x){ 86 | edge(x$Open, x$High, x$Low, x$Close, sign = TRUE) 87 | })[-(1:width-1)] 88 | 89 | expect_equal(as.numeric(s1), as.numeric(s2)) 90 | 91 | }) 92 | 93 | test_that("edge-rolling", { 94 | 95 | set.seed(123) 96 | for(units in c(1, "day")) for(sign in c(TRUE, FALSE)) for(width in c(2, 3, 21)){ 97 | 98 | x <- sim(prob = 0.01, units = units) 99 | 100 | s1 <- spread(x, width = width, method = "EDGE", sign = sign) 101 | s2 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = width, sign = sign) 102 | 103 | if(is.data.frame(x)) 104 | idx <- as.integer(rownames(s1)) 105 | else 106 | idx <- which(zoo::index(x) %in% zoo::index(s1)) 107 | 108 | expect_equal(length(s2), nrow(x)) 109 | expect_equal(as.numeric(s1[,1]), s2[idx]) 110 | 111 | } 112 | 113 | }) 114 | 115 | test_that("edge-rolling-na", { 116 | 117 | set.seed(123) 118 | x <- sim(n = 100) 119 | 120 | s1 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = nrow(x), na.rm = TRUE) 121 | expect_equal(sum(!is.na(s1)), 1) 122 | 123 | s2 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = c(1, nrow(x)), na.rm = TRUE) 124 | expect_equal(s1[!is.na(s1)], s2[!is.na(s2)]) 125 | 126 | }) 127 | 128 | test_that("edge-expanding", { 129 | 130 | set.seed(123) 131 | for(units in c(1, "day")) for(sign in c(TRUE, FALSE)) { 132 | 133 | x <- sim(prob = 0.01, units = units) 134 | 135 | s1 <- spread(x, width = 1:nrow(x), method = "EDGE", sign = sign) 136 | s2 <- edge_expanding(x$Open, x$High, x$Low, x$Close, sign = sign) 137 | 138 | if(is.data.frame(x)) 139 | idx <- as.integer(rownames(s1)) 140 | else 141 | idx <- which(zoo::index(x) %in% zoo::index(s1)) 142 | 143 | expect_equal(length(s2), nrow(x)) 144 | expect_equal(as.numeric(s1[,1]), s2[idx]) 145 | 146 | } 147 | 148 | }) 149 | 150 | test_that("spread", { 151 | 152 | set.seed(123) 153 | x <- sim(prob = 0.01) 154 | 155 | s <- spread(x[, c("Open", "High", "Low", "Close")], method = "EDGE") 156 | expect_equal(as.numeric(s), 0.011211623772355) 157 | 158 | s <- spread(x[, c("Open", "High", "Low", "Close")], method = "OHLC") 159 | expect_equal(as.numeric(s), 0.0111885179011119) 160 | 161 | s <- spread(x[, c("Open", "High", "Low", "Close")], method = "CHLO") 162 | expect_equal(as.numeric(s), 0.0109352942009762) 163 | 164 | s <- spread(x[, c("Open", "High", "Low")], method = "OHL", na.rm = TRUE) 165 | expect_equal(as.numeric(s), 0.0109503006263557) 166 | 167 | s <- spread(x[, c("High", "Low", "Close")], method = "CHL") 168 | expect_equal(as.numeric(s), 0.0113136390567206) 169 | 170 | s <- spread(x[, c("High", "Low", "Close")], method = "AR") 171 | expect_equal(as.numeric(s), 0.00874585212811397) 172 | 173 | s <- spread(x[, c("High", "Low", "Close")], method = "CS") 174 | expect_equal(as.numeric(s), 0.00273953769016127) 175 | 176 | s <- spread(x[, "Close", drop = FALSE], method = "ROLL") 177 | expect_equal(as.numeric(s), 0.0125430188215437) 178 | 179 | }) 180 | -------------------------------------------------------------------------------- /r/README.md: -------------------------------------------------------------------------------- 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 4 | 5 | ## Installation 6 | 7 | ```R 8 | install.packages("bidask") 9 | ``` 10 | 11 | ## Usage 12 | 13 | This package implements the following functions. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. The function `spread` provides additional functionalities for `xts` objects and implements additional estimators. The function `sim` simulates a time series of open, high, low, and close prices. The main functions are presented below. The full [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf) is available on [CRAN](https://cran.r-project.org/package=bidask) and a [vignette](https://cran.r-project.org/package=bidask/vignettes/bidask.html) is also available. 14 | 15 | ```R 16 | library("bidask") 17 | ``` 18 | 19 | ### Function `edge` 20 | 21 | The input prices must be sorted in ascending order of the timestamp. The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. 22 | 23 | ```R 24 | edge(open, high, low, close, sign=FALSE) 25 | ``` 26 | 27 | | field | description | 28 | | ------- | ----------------------------------- | 29 | | `open` | Numeric vector of open prices. | 30 | | `high` | Numeric vector of high prices. | 31 | | `low` | Numeric vector of low prices. | 32 | | `close` | Numeric vector of close prices. | 33 | | `sign` | Whether to return signed estimates. | 34 | 35 | ### Function: `edge_rolling` 36 | 37 | Implements a rolling window calculation of `edge`. The output is a vector of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%. This function always returns a result of the same length as the input prices. 38 | 39 | ```R 40 | edge_rolling(open, high, low, close, width, sign=FALSE, na.rm=FALSE) 41 | ``` 42 | 43 | | field | description | 44 | | ------- | ------------------------------------------------------------ | 45 | | `open` | Numeric vector of open prices. | 46 | | `high` | Numeric vector of high prices. | 47 | | `low` | Numeric vector of low prices. | 48 | | `close` | Numeric vector of close prices. | 49 | | `width` | If an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples. | 50 | | `sign` | Whether to return signed estimates. | 51 | | `na.rm` | Whether to ignore missing values. | 52 | 53 | ### Function: `edge_expanding` 54 | 55 | Implements an expanding window calculation of `edge`. The output is a vector of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%. This function always returns a result of the same length as the input prices. 56 | 57 | ```R 58 | edge_expanding(open, high, low, close, sign=FALSE, na.rm=TRUE) 59 | ``` 60 | 61 | | field | description | 62 | | ------- | ----------------------------------- | 63 | | `open` | Numeric vector of open prices. | 64 | | `high` | Numeric vector of high prices. | 65 | | `low` | Numeric vector of low prices. | 66 | | `close` | Numeric vector of close prices. | 67 | | `sign` | Whether to return signed estimates. | 68 | | `na.rm` | Whether to ignore missing values. | 69 | 70 | ## Examples 71 | 72 | Load the test data. 73 | 74 | ```R 75 | library("bidask") 76 | x = read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv") 77 | ``` 78 | 79 | Compute the spread estimate using all the observations. 80 | 81 | ```R 82 | edge(x$Open, x$High, x$Low, x$Close) 83 | ``` 84 | 85 | Compute rolling estimates using a window of 21 observations. 86 | 87 | ```R 88 | edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) 89 | ``` 90 | 91 | Estimate the spread using custom endpoints. 92 | 93 | ```R 94 | edge_rolling(x$Open, x$High, x$Low, x$Close, width = c(3, 35, 100)) 95 | ``` 96 | 97 | Estimate the spread using an expanding window 98 | 99 | ```R 100 | edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) 101 | ``` 102 | 103 | ## Cite as 104 | 105 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 106 | 107 | A BibTex entry for LaTeX users is: 108 | 109 | ```bibtex 110 | @article{edge, 111 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 112 | journal = {Journal of Financial Economics}, 113 | volume = {161}, 114 | pages = {103916}, 115 | year = {2024}, 116 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 117 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 118 | } 119 | ``` 120 | 121 | -------------------------------------------------------------------------------- /r/R/spread.R: -------------------------------------------------------------------------------- 1 | #' Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | #' 3 | #' This function implements several methods to estimate bid-ask spreads 4 | #' from open, high, low, and close prices and it is optimized for fast 5 | #' calculations over rolling and expanding windows. 6 | #' 7 | #' @details 8 | #' The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024). 9 | #' 10 | #' The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (JFE, 2024). 11 | #' They can be combined by concatenating their identifiers, e.g., \code{OHLC.CHLO} uses an average of the \code{OHLC} and \code{CHLO} estimators. 12 | #' 13 | #' The method \code{AR} implements the estimator described in Abdi & Ranaldo (RFS, 2017). \code{AR2} implements their 2-period version. 14 | #' 15 | #' The method \code{CS} implements the estimator described in Corwin & Schultz (JF, 2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper. 16 | #' 17 | #' The method \code{ROLL} implements the estimator described in Roll (JF, 1984). 18 | #' 19 | #' @param x tabular data with columns named \code{open}, \code{high}, \code{low}, \code{close} (case-insensitive). 20 | #' @param width if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. By default, the full sample is used to compute a single spread estimate. See examples. 21 | #' @param method the estimators to use. See details. 22 | #' @param sign whether to return signed estimates. 23 | #' @param na.rm whether to ignore missing values. 24 | #' 25 | #' @return A data.frame of spread estimates, or an \code{xts} object if \code{x} is of class \code{xts}. 26 | #' A value of 0.01 corresponds to a spread of 1\%. 27 | #' 28 | #' @references 29 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 30 | #' \doi{10.1016/j.jfineco.2024.103916} 31 | #' 32 | #' Abdi, F., & Ranaldo, A. (2017). A simple estimation of bid-ask spreads from daily close, high, and low prices. Review of Financial Studies, 30 (12), 4437-4480. 33 | #' \doi{10.1093/rfs/hhx084} 34 | #' 35 | #' Corwin, S. A., & Schultz, P. (2012). A simple way to estimate bid-ask spreads from daily high and low prices. Journal of Finance, 67 (2), 719-760. 36 | #' \doi{10.1111/j.1540-6261.2012.01729.x} 37 | #' 38 | #' Roll, R. (1984). A simple implicit measure of the effective bid-ask spread in an efficient market. Journal of Finance, 39 (4), 1127-1139. 39 | #' \doi{10.1111/j.1540-6261.1984.tb03897.x} 40 | #' 41 | #' @examples 42 | #' # reduce number of threads to pass CRAN checks (you can ignore this) 43 | #' data.table::setDTthreads(1) 44 | #' 45 | #' # simulate open, high, low, and close prices with spread 1% 46 | #' x <- sim(n = 1000, spread = 0.01) 47 | #' 48 | #' # estimate the spread 49 | #' spread(x) 50 | #' # equivalent to 51 | #' edge(x$Open, x$High, x$Low, x$Close) 52 | #' 53 | #' # estimate the spread using a rolling window of 21 periods 54 | #' s <- spread(x, width = 21) 55 | #' tail(s) 56 | #' # equivalent to 57 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) 58 | #' tail(s) 59 | #' 60 | #' # estimate the spread using an expanding window 61 | #' s <- spread(x, width = 1:nrow(x)) 62 | #' tail(s) 63 | #' # equivalent to 64 | #' s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) 65 | #' tail(s) 66 | #' 67 | #' # estimate the spread using custom endpoints 68 | #' ep <- c(3, 35, 100) 69 | #' spread(x, width = ep) 70 | #' # equivalent to 71 | #' edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) 72 | #' edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) 73 | #' 74 | #' # use multiple estimators 75 | #' spread(x, method = c("EDGE", "AR", "CS", "ROLL", "OHLC", "OHL.CHL")) 76 | #' 77 | #' @export 78 | #' 79 | spread <- function(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE){ 80 | 81 | s <- list() 82 | todo <- method <- toupper(method) 83 | colnames(x) <- tolower(gsub("^(.*\\b)(Open|High|Low|Close)$", "\\2", colnames(x))) 84 | 85 | open <- as.numeric(x$open) 86 | high <- as.numeric(x$high) 87 | low <- as.numeric(x$low) 88 | close <- as.numeric(x$close) 89 | 90 | m <- "EDGE" 91 | if(m %in% todo){ 92 | s <- c(s, EDGE(open, high, low, close, width, sign, na.rm)) 93 | todo <- setdiff(todo, m) 94 | } 95 | 96 | m <- c("AR", "AR2") 97 | if(any(m %in% todo)){ 98 | m <- intersect(todo, m) 99 | s <- c(s, AR(high, low, close, width, m, sign, na.rm)) 100 | todo <- setdiff(todo, m) 101 | } 102 | 103 | m <- c("CS", "CS2") 104 | if(any(m %in% todo)){ 105 | m <- intersect(todo, m) 106 | s <- c(s, CS(high, low, close, width, m, sign, na.rm)) 107 | todo <- setdiff(todo, m) 108 | } 109 | 110 | m <- "ROLL" 111 | if(m %in% todo){ 112 | s <- c(s, ROLL(close, width, sign, na.rm)) 113 | todo <- setdiff(todo, m) 114 | } 115 | 116 | if(length(todo)){ 117 | s <- c(s, OHLC(open, high, low, close, width, todo, sign, na.rm)) 118 | } 119 | 120 | s <- as.data.frame(s, row.names = rownames(x)) 121 | if(requireNamespace("xts", quietly = TRUE) & 122 | requireNamespace("zoo", quietly = TRUE) 123 | ){ 124 | if(xts::is.xts(x)){ 125 | s <- xts::xts(s, order.by = zoo::index(x)) 126 | } 127 | } 128 | 129 | nw <- length(width) 130 | if(nw == 1) s <- s[-(1:pmax(1, width - 1)), , drop = FALSE] 131 | else if(nw != nrow(x)) s <- s[width[-1], , drop = FALSE] 132 | 133 | return(s[, method, drop = FALSE]) 134 | 135 | } 136 | -------------------------------------------------------------------------------- /python/bidask/edge_rolling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def edge_rolling(df: pd.DataFrame, window: int, sign: bool = False, **kwargs) -> pd.Series: 6 | """ 7 | Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices 8 | 9 | Implements a rolling window calculation of the efficient estimator of bid-ask spreads 10 | from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 11 | https://doi.org/10.1016/j.jfineco.2024.103916 12 | 13 | Parameters 14 | ---------- 15 | - `df` : pd.DataFrame 16 | DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive). 17 | - `window` : int, timedelta, str, offset, or BaseIndexer subclass 18 | Size of the moving window. For more information about this parameter, see 19 | https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html 20 | - `sign` : bool, default False 21 | Whether to return signed estimates. 22 | - `kwargs` : dict, optional 23 | Additional keyword arguments to pass to the pandas rolling function. 24 | For more information about the rolling parameters, see 25 | https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html 26 | 27 | Returns 28 | ------- 29 | pd.Series 30 | A pandas Series of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%. 31 | """ 32 | # compute log-prices 33 | df = df.rename(columns=str.lower, inplace=False) 34 | o = np.log(df['open']) 35 | h = np.log(df['high']) 36 | l = np.log(df['low']) 37 | c = np.log(df['close']) 38 | m = (h + l) / 2. 39 | 40 | # shift log-prices by one period 41 | h1 = h.shift(1) 42 | l1 = l.shift(1) 43 | c1 = c.shift(1) 44 | m1 = m.shift(1) 45 | 46 | # compute log-returns 47 | r1 = m - o 48 | r2 = o - m1 49 | r3 = m - c1 50 | r4 = c1 - m1 51 | r5 = o - c1 52 | 53 | # compute indicator variables 54 | tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan, (h != l) | (l != c1)) 55 | po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, o != h) 56 | po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, o != l) 57 | pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, c1 != h1) 58 | pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, c1 != l1) 59 | 60 | # compute base products for rolling means 61 | r12 = r1 * r2 62 | r15 = r1 * r5 63 | r34 = r3 * r4 64 | r45 = r4 * r5 65 | tr1 = tau * r1 66 | tr2 = tau * r2 67 | tr4 = tau * r4 68 | tr5 = tau * r5 69 | 70 | # set up data frame for rolling means 71 | x = pd.DataFrame({ 72 | 1: r12, 73 | 2: r34, 74 | 3: r15, 75 | 4: r45, 76 | 5: tau, 77 | 6: r1, 78 | 7: tr2, 79 | 8: r3, 80 | 9: tr4, 81 | 10: r5, 82 | 11: r12 ** 2, 83 | 12: r34 ** 2, 84 | 13: r15 ** 2, 85 | 14: r45 ** 2, 86 | 15: r12 * r34, 87 | 16: r15 * r45, 88 | 17: tr2 * r2, 89 | 18: tr4 * r4, 90 | 19: tr5 * r5, 91 | 20: tr2 * r12, 92 | 21: tr4 * r34, 93 | 22: tr5 * r15, 94 | 23: tr4 * r45, 95 | 24: tr4 * r12, 96 | 25: tr2 * r34, 97 | 26: tr2 * r4, 98 | 27: tr1 * r45, 99 | 28: tr5 * r45, 100 | 29: tr4 * r5, 101 | 30: tr5, 102 | 31: po1, 103 | 32: po2, 104 | 33: pc1, 105 | 34: pc2 106 | }, index=df.index) 107 | 108 | # mask the first observation and decrement window and min_periods by 1 before 109 | # computing rolling means to account for lagged prices 110 | x.iloc[0] = np.nan 111 | if isinstance(window, (int, np.integer)): 112 | window = max(0, window - 1) 113 | if 'min_periods' in kwargs and isinstance(kwargs['min_periods'], (int, np.integer)): 114 | kwargs['min_periods'] = max(0, kwargs['min_periods'] - 1) 115 | 116 | # compute rolling means 117 | m = x.rolling(window=window, **kwargs).mean() 118 | 119 | # compute probabilities 120 | pt = m[5] 121 | po = m[31] + m[32] 122 | pc = m[33] + m[34] 123 | 124 | # set to missing if there are less than two periods with tau=1 125 | # or po or pc is zero 126 | nt = x[5].rolling(window=window, **kwargs).sum() 127 | m[(nt < 2) | (po == 0) | (pc == 0)] = np.nan 128 | 129 | # compute input vectors 130 | a1 = -4. / po 131 | a2 = -4. / pc 132 | a3 = m[6] / pt 133 | a4 = m[9] / pt 134 | a5 = m[8] / pt 135 | a6 = m[10] / pt 136 | a12 = 2 * a1 * a2 137 | a11 = a1 ** 2 138 | a22 = a2 ** 2 139 | a33 = a3 ** 2 140 | a55 = a5 ** 2 141 | a66 = a6 ** 2 142 | 143 | # compute expectations 144 | e1 = a1 * (m[1] - a3*m[7]) + a2 * (m[2] - a4*m[8]) 145 | e2 = a1 * (m[3] - a3*m[30]) + a2 * (m[4] - a4*m[10]) 146 | 147 | # compute variances 148 | v1 = - e1**2 + ( 149 | a11 * (m[11] - 2*a3*m[20] + a33*m[17]) + 150 | a22 * (m[12] - 2*a5*m[21] + a55*m[18]) + 151 | a12 * (m[15] - a3*m[25] - a5*m[24] + a3*a5*m[26]) 152 | ) 153 | v2 = - e2**2 + ( 154 | a11 * (m[13] - 2*a3*m[22] + a33*m[19]) + 155 | a22 * (m[14] - 2*a6*m[23] + a66*m[18]) + 156 | a12 * (m[16] - a3*m[28] - a6*m[27] + a3*a6*m[29]) 157 | ) 158 | 159 | # compute square spread by using a (equally) weighted 160 | # average if the total variance is (not) positive 161 | vt = v1 + v2 162 | s2 = pd.Series.where( 163 | cond=vt > 0, 164 | self=(v2*e1 + v1*e2) / vt, 165 | other=(e1 + e2) / 2. 166 | ) 167 | 168 | # compute signed root 169 | s = np.sqrt(np.abs(s2)) 170 | if sign: 171 | s *= np.sign(s2) 172 | 173 | # return the spread 174 | return s 175 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 4 | 5 | 6 | ## Installation 7 | 8 | ```bash 9 | pip install bidask 10 | ``` 11 | 12 | ## Usage 13 | 14 | There are three functions in this package. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. 15 | 16 | ```python 17 | from bidask import edge, edge_rolling, edge_expanding 18 | ``` 19 | 20 | ### Function: `edge` 21 | 22 | The input prices must be sorted in ascending order of the timestamp. The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. 23 | 24 | ```python 25 | edge(open, high, low, close, sign=False) 26 | ``` 27 | 28 | | field | description | 29 | | ------- | ----------------------------------- | 30 | | `open` | Array-like vector of open prices. | 31 | | `high` | Array-like vector of high prices. | 32 | | `low` | Array-like vector of low prices. | 33 | | `close` | Array-like vector of close prices. | 34 | | `sign` | Whether to return signed estimates. | 35 | 36 | ### Function: `edge_rolling` 37 | 38 | Implements a rolling window calculation of `edge`. The input is a pandas data frame. The output is a pandas series of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%. 39 | 40 | ```python 41 | edge_rolling(df, window, sign=False, **kwargs) 42 | ``` 43 | 44 | | field | description | 45 | | ---------- | ------------------------------------------------------------ | 46 | | `df` | Data frame with columns 'open', 'high', 'low', 'close' (case-insensitive). | 47 | | `window` | Size of the moving window. For more information about this parameter, see [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html). | 48 | | `sign` | Whether to return signed estimates. | 49 | | `**kwargs` | Additional keyword arguments to pass to the pandas rolling function. For more information about the rolling parameters, see [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html). | 50 | 51 | ### Function: `edge_expanding` 52 | 53 | Implements an expanding window calculation of `edge`. The input is a pandas data frame. The output is a pandas series of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%. 54 | 55 | ```python 56 | edge_expanding(df, min_periods=1, sign=False) 57 | ``` 58 | 59 | | field | description | 60 | | ------------- | ------------------------------------------------------------ | 61 | | `df` | Data frame with columns 'open', 'high', 'low', 'close' (case-insensitive). | 62 | | `min_periods` | Minimum number of observations in window required to have a value; otherwise, result is `np.nan`. | 63 | | `sign` | Whether to return signed estimates. | 64 | 65 | ## Examples 66 | 67 | Load the test data. 68 | 69 | ```python 70 | import pandas as pd 71 | df = pd.read_csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv") 72 | ``` 73 | 74 | Compute the spread estimate using all the observations. 75 | 76 | ```py 77 | from bidask import edge 78 | edge(df.Open, df.High, df.Low, df.Close) 79 | ``` 80 | 81 | Compute rolling estimates using a window of 21 observations. 82 | 83 | ```py 84 | from bidask import edge_rolling 85 | edge_rolling(df=df, window=21) 86 | ``` 87 | 88 | Compute expanding estimates starting with a minimum of 21 observations. 89 | 90 | ```py 91 | from bidask import edge_expanding 92 | edge_expanding(df=df, min_periods=21) 93 | ``` 94 | 95 | ## Notes 96 | 97 | The rolling estimates: 98 | 99 | ```py 100 | rolling_estimates = edge_rolling(df=df, window=window, step=step, sign=sign) 101 | ``` 102 | 103 | are equivalent to, but much faster than: 104 | 105 | ```py 106 | expected_estimates = [] 107 | for t in range(0, len(df), step): 108 | t1 = t + 1 109 | t0 = t1 - window 110 | expected_estimates.append(edge( 111 | df.Open.values[t0:t1], 112 | df.High.values[t0:t1], 113 | df.Low.values[t0:t1], 114 | df.Close.values[t0:t1], 115 | sign=sign 116 | ) if t0 >= 0 else np.nan) 117 | ``` 118 | 119 | The expanding estimates: 120 | 121 | ```py 122 | expanding_estimates = edge_expanding(df=df, min_periods=min_periods, sign=sign) 123 | ``` 124 | 125 | are equivalent to, but much faster than: 126 | 127 | ```py 128 | expected_estimates = [] 129 | for t in range(0, len(df)): 130 | t1 = t + 1 131 | expected_estimates.append(edge( 132 | df.Open.values[0:t1], 133 | df.High.values[0:t1], 134 | df.Low.values[0:t1], 135 | df.Close.values[0:t1], 136 | sign=sign 137 | ) if t1 >= min_periods else np.nan) 138 | ``` 139 | 140 | ## Cite as 141 | 142 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 143 | 144 | A BibTex entry for LaTeX users is: 145 | 146 | ```bibtex 147 | @article{edge, 148 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 149 | journal = {Journal of Financial Economics}, 150 | volume = {161}, 151 | pages = {103916}, 152 | year = {2024}, 153 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 154 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 155 | } 156 | ``` 157 | -------------------------------------------------------------------------------- /r/vignettes/bidask.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{bidask} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | data.table::setDTthreads(1) 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "#>", 15 | fig.width = 6, 16 | out.width="100%", 17 | dpi = 300, 18 | warning = FALSE, 19 | message = FALSE 20 | ) 21 | ``` 22 | 23 | This vignette illustrates how to estimate bid-ask spreads from open, high, low, and close prices using the efficient estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916). 24 | 25 | ```{r setup} 26 | library(bidask) 27 | ``` 28 | 29 | The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. The function `spread` provides additional functionalities for `xts` objects and implements additional estimators. For all functions, an output value of 0.01 corresponds to a spread estimate of 1%. 30 | 31 | ## Functions `edge`, `edge_rolling`, `edge_expanding` 32 | 33 | These functions can be easily used with tidy data. For instance, download daily prices for Bitcoin and Ethereum using the [crypto2](https://cran.r-project.org/package=crypto2) package: 34 | 35 | ```{r, results='hide'} 36 | library(dplyr) 37 | library(crypto2) 38 | df <- crypto_list(only_active=TRUE) %>% 39 | filter(symbol %in% c("BTC", "ETH")) %>% 40 | crypto_history(start_date = "20200101", end_date = "20221231") 41 | ``` 42 | 43 | ```{r} 44 | head(df) 45 | ``` 46 | 47 | Estimate the spread for each coin in each year: 48 | 49 | ```{r} 50 | df %>% 51 | mutate(yyyy = format(timestamp, "%Y")) %>% 52 | group_by(symbol, yyyy) %>% 53 | arrange(timestamp) %>% 54 | summarise("EDGE" = edge(open, high, low, close)) 55 | ``` 56 | 57 | Estimate the spread using a rolling window of 30 days for each coin and plot the results: 58 | 59 | ```{r} 60 | library(ggplot2) 61 | df %>% 62 | group_by(symbol) %>% 63 | arrange(timestamp) %>% 64 | mutate("EDGE (rolling)" = edge_rolling(open, high, low, close, width = 30)) %>% 65 | ggplot(aes(x = timestamp, y = `EDGE (rolling)`, color = symbol)) + 66 | geom_line() + 67 | theme_minimal() 68 | ``` 69 | 70 | Estimate the spread using an expanding window for each coin and plot the results: 71 | ```{r} 72 | df %>% 73 | group_by(symbol) %>% 74 | arrange(timestamp) %>% 75 | mutate("EDGE (expanding)" = edge_expanding(open, high, low, close)) %>% 76 | ggplot(aes(x = timestamp, y = `EDGE (expanding)`, color = symbol)) + 77 | geom_line() + 78 | theme_minimal() 79 | ``` 80 | 81 | Notice that, generally, using intraday data (instead of daily) improves the estimation accuracy, especially when the spread is expected to be small (see example below). 82 | 83 | ## Function `spread` 84 | 85 | The function `spread()` provides additional functionalities for [xts](https://cran.r-project.org/package=xts) objects and implements additional estimators. For instance, download daily data for Microsoft (MSFT) using the [quantmod](https://cran.r-project.org/package=quantmod) package which returns an `xts` object: 86 | 87 | ```{r} 88 | library(quantmod) 89 | x <- try(getSymbols("MSFT", auto.assign = FALSE, start = "2019-01-01", end = "2022-12-31"), silent = TRUE) 90 | if("try-error" %in% class(x)){ 91 | print("Error in getSymbols; using syntentic data instead") 92 | x <- sim(5000, units = "day") 93 | } 94 | ``` 95 | ```{r} 96 | class(x) 97 | ``` 98 | 99 | ```{r} 100 | head(x) 101 | ``` 102 | 103 | Estimate the spread with: 104 | 105 | ```{r} 106 | spread(x) 107 | ``` 108 | 109 | or, equivalently: 110 | 111 | ```{r} 112 | edge(open = x[,1], high = x[,2], low = x[,3], close = x[,4]) 113 | ``` 114 | 115 | Estimate the spread for each month and plot the estimates: 116 | 117 | ```{r} 118 | sp <- spread(x, width = endpoints(x, on = "months")) 119 | plot(sp) 120 | ``` 121 | 122 | Estimate the spread using a rolling window of 21 obervations: 123 | 124 | ```{r} 125 | sp <- spread(x, width = 21) 126 | plot(sp) 127 | ``` 128 | 129 | To illustrate higher-frequency estimates, download intraday data from Alpha Vantage. You must register with Alpha Vantage in order to download their data, but the one-time registration is fast and free. Register at https://www.alphavantage.co/ to receive your key. You can set the API key globally as follows: 130 | 131 | ```{r} 132 | setDefaults(getSymbols.av, api.key = "") 133 | ``` 134 | 135 | Download minute data for Microsoft: 136 | 137 | ```r 138 | x <- getSymbols( 139 | Symbols = "MSFT", 140 | auto.assign = FALSE, 141 | src = "av", 142 | periodicity = "intraday", 143 | interval = "1min", 144 | output.size = "full") 145 | ``` 146 | 147 | ```{r, include=FALSE} 148 | x <- read.csv(system.file("extdata", "msft.csv", package = "bidask")) 149 | x <- xts(x[,-1], order.by = as.POSIXct(x[,1])) 150 | ``` 151 | 152 | Keep only prices during regular market hours: 153 | 154 | ```{r} 155 | x <- x["T09:30/T16:00"] 156 | head(x) 157 | ``` 158 | 159 | Estimate the spread for each day and plot the estimates: 160 | 161 | ```{r} 162 | sp <- spread(x, width = endpoints(x, on = "day")) 163 | plot(sp, type = "b") 164 | ``` 165 | 166 | Use multiple estimators and plot the estimates: 167 | 168 | ```{r} 169 | sp <- spread(x, width = endpoints(x, on = "day"), method = c("EDGE", "AR", "CS", "ROLL")) 170 | plot(sp, type = "b", legend.loc = "topright") 171 | ``` 172 | 173 | ## GitHub 174 | 175 | If you find this package useful, please [star the repo](https://github.com/eguidotti/bidask)! The repository also contains implementations for Python, C++, MATLAB, and more; as well as open data containing bid-ask spread estimates for crypto pairs in Binance and for U.S. stocks in CRSP. 176 | 177 | ## Cite as 178 | 179 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 180 | 181 | A BibTex entry for LaTeX users is: 182 | 183 | ```bibtex 184 | @article{edge, 185 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 186 | journal = {Journal of Financial Economics}, 187 | volume = {161}, 188 | pages = {103916}, 189 | year = {2024}, 190 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 191 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 192 | } 193 | ``` 194 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 2 | 3 | This [repository](https://github.com/eguidotti/bidask/) implements the efficient estimator of the effective bid-ask spread from open, high, low, and close prices described in: 4 | 5 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 6 | 7 | The estimator is available in: 8 | 9 | [C++](https://github.com/eguidotti/bidask/tree/main/c++) | [Julia](https://github.com/eguidotti/bidask/tree/main/julia) | [MATLAB](https://github.com/eguidotti/bidask/tree/main/matlab) | [Python](https://github.com/eguidotti/bidask/tree/main/python) | [R](https://github.com/eguidotti/bidask/tree/main/r) | [SAS](https://github.com/eguidotti/bidask/tree/main/sas) 10 | 11 | You can also check the [pseudocode](https://github.com/eguidotti/bidask/tree/main/pseudocode) to implement the estimator in any programming language. If you implement the estimator in a new programming language and want your implementation included in the repository, please open a [pull request](https://github.com/eguidotti/bidask/pulls). 12 | 13 | ## Open data 14 | 15 | The following datasets are available to download: 16 | 17 | | Download | Dataset | Description | 18 | | ---------------------------------------------- | ---------------------------------------------------- | ------------------------------------------------------------ | 19 | | [download](https://doi.org/10.7910/DVN/YAY4H6) | Bid-Ask Spread Estimates for U.S. Stocks in CRSP | Contains monthly estimates of the effective bid-ask spread for each stock in the CRSP U.S. Stock database | 20 | | [download](https://doi.org/10.7910/DVN/9AVA2B) | Bid-Ask Spread Estimates for Crypto Pairs in Binance | Contains monthly estimates of the effective bid-ask spread for crypto pairs listed in Binance | 21 | 22 | ## FAQ 23 | 24 | > Each transaction price may generally include a different bid-ask spread, but the estimator only returns a single estimate given a sample of open, high, low, and close prices. What is the estimator computing exactly? 25 | 26 | - The estimator estimates the root mean square effective spread within the sample period. 27 | 28 | > What is the minimum number of observations required by the estimator? 29 | 30 | - The estimator requires at least 3 observations. 31 | 32 | > What is the recommended number of observations to use? 33 | 34 | - There is no one-size-fits-all solution. For instance, using a few daily prices would provide estimates closer to the spread in those days but with potentially large estimation uncertainty. Using one year of daily prices would provide more precise estimates, but for the average (more precisely, root mean square) spread in the whole year. For more information, see https://github.com/eguidotti/bidask/issues/2 35 | 36 | > Does the estimator work with intraday data? 37 | 38 | - Yes, the estimator can be used with intraday data. 39 | 40 | > What is the recommended frequency to use? 41 | 42 | - Generally, the higher the frequency, the better (e.g., minute prices are preferable to hourly and daily prices). However, this depends on the underlying asset's trading frequency. For instance, weekly prices should be considered for assets that trade, on average, less than once per day. More generally, the frequency should be chosen so that the average number of trades per period is at least two. The estimation variance may increase significantly below this limit. 43 | 44 | > Does the estimator work with tick data? 45 | 46 | - The estimator does not natively support tick data. However, it is possible to aggregate tick data into open, high, low, and close prices and apply the estimator. 47 | 48 | > How to handle non-positive estimates? 49 | 50 | - By default, the estimator returns the absolute value of the estimates. This is generally a good option if you are interested in point estimates, but may create a small-sample bias if the estimates are used for averaging or regression studies. To reduce this source of bias, you can compute signed estimates with the argument `sign=True` and reset negative values to zero. Keeping negative values is not recommended because more negative estimates are typically associated with larger spreads empirically. For more information, see https://github.com/eguidotti/bidask/issues/3 51 | 52 | > Does the estimator work with missing values? 53 | 54 | - Yes, the estimator works with missing values out-of-the-box. It is recommended to keep missing values and use a regular time grid instead of dropping missing values and using an irregular time grid. For more information, see https://github.com/eguidotti/bidask/issues/16 55 | 56 | > Do the functions `edge` and `edge_rolling` produce the same results? 57 | 58 | - The function `edge_rolling` is a version of `edge` optimized for fast calculations over rolling windows. The two functions produce the same estimates when there are no missing values. If missing values are present, the two functions may provide slightly different estimates due to how missing values are handled, but both estimates are consistent. 59 | 60 | ## Replication code 61 | 62 | All code to replicate the paper is available [here](https://doi.org/10.7910/DVN/G8DPBM). The code meets the requirements of the [cascad](https://www.cascad.tech/certification/145-efficient-estimation-of-bid-ask-spreads-from-open-high-low-and-close-prices/) reproducibility policy for a rating of RRR. 63 | 64 | ## Related works 65 | 66 | You can browse publications related to the paper [here](https://scholar.google.com/scholar?cites=2115798896240699437). 67 | 68 | ## Terms of use 69 | 70 | All code is released under the [MIT](https://github.com/eguidotti/bidask?tab=MIT-1-ov-file#readme) license. All data are released under the [CC BY 4.0](http://creativecommons.org/licenses/by/4.0) license. When using any data or code from this repository, please cite the reference indicated below. 71 | 72 | ## Cite as 73 | 74 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) 75 | 76 | A BibTex entry for LaTeX users is: 77 | 78 | ```bibtex 79 | @article{edge, 80 | title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices}, 81 | journal = {Journal of Financial Economics}, 82 | volume = {161}, 83 | pages = {103916}, 84 | year = {2024}, 85 | doi = {https://doi.org/10.1016/j.jfineco.2024.103916}, 86 | author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke}, 87 | } 88 | ``` -------------------------------------------------------------------------------- /r/R/edge.R: -------------------------------------------------------------------------------- 1 | #' EDGE Estimator 2 | #' 3 | #' @keywords internal 4 | #' 5 | EDGE <- function(open, high, low, close, width, sign, na.rm, aslist = TRUE){ 6 | 7 | # compute log-prices 8 | o <- log(open) 9 | h <- log(high) 10 | l <- log(low) 11 | c <- log(close) 12 | m <- (h + l) / 2 13 | 14 | # shift log-prices by one period 15 | h1 <- shift(h, 1) 16 | l1 <- shift(l, 1) 17 | c1 <- shift(c, 1) 18 | m1 <- shift(m, 1) 19 | 20 | # compute log-returns 21 | r1 <- m - o 22 | r2 <- o - m1 23 | r3 <- m - c1 24 | r4 <- c1 - m1 25 | r5 <- o - c1 26 | 27 | # compute indicator variables 28 | tau <- ifelse(is.na(h) | is.na(l) | is.na(c1), NA, h != l | l != c1) 29 | po1 <- tau * (o != h) 30 | po2 <- tau * (o != l) 31 | pc1 <- tau * (c1 != h1) 32 | pc2 <- tau * (c1 != l1) 33 | 34 | # compute base products for rolling means 35 | r12 <- r1 * r2 36 | r15 <- r1 * r5 37 | r34 <- r3 * r4 38 | r45 <- r4 * r5 39 | tr1 <- tau * r1 40 | tr2 <- tau * r2 41 | tr4 <- tau * r4 42 | tr5 <- tau * r5 43 | 44 | # set up data frame for rolling means 45 | x <- data.frame( 46 | r12, 47 | r34, 48 | r15, 49 | r45, 50 | tau, 51 | r1, 52 | tr2, 53 | r3, 54 | tr4, 55 | r5, 56 | r12^2, 57 | r34^2, 58 | r15^2, 59 | r45^2, 60 | r12 * r34, 61 | r15 * r45, 62 | tr2 * r2, 63 | tr4 * r4, 64 | tr5 * r5, 65 | tr2 * r12, 66 | tr4 * r34, 67 | tr5 * r15, 68 | tr4 * r45, 69 | tr4 * r12, 70 | tr2 * r34, 71 | tr2 * r4, 72 | tr1 * r45, 73 | tr5 * r45, 74 | tr4 * r5, 75 | tr5, 76 | po1, 77 | po2, 78 | pc1, 79 | pc2 80 | ) 81 | 82 | # mask the first observation and decrement width by 1 before 83 | # computing rolling means to account for lagged prices 84 | x[1,] <- NA 85 | shift <- 1 86 | 87 | # compute rolling means 88 | m <- rmean(x, width = width, shift = shift, na.rm = na.rm) 89 | 90 | # compute probabilities 91 | pt <- m[,5] 92 | po <- m[,31] + m[,32] 93 | pc <- m[,33] + m[,34] 94 | 95 | # set to missing if there are less than two periods with tau=1 96 | # or po or pc is zero 97 | nt <- rsum(x[5], width = width, shift = shift, na.rm = TRUE) 98 | m[which(nt < 2 | po == 0 | pc == 0),] <- NA 99 | 100 | # compute input vectors 101 | a1 <- -4. / po 102 | a2 <- -4. / pc 103 | a3 <- m[,6] / pt 104 | a4 <- m[,9] / pt 105 | a5 <- m[,8] / pt 106 | a6 <- m[,10] / pt 107 | a12 <- 2 * a1 * a2 108 | a11 <- a1^2 109 | a22 <- a2^2 110 | a33 <- a3^2 111 | a55 <- a5^2 112 | a66 <- a6^2 113 | 114 | # compute expectations 115 | e1 <- a1 * (m[,1] - a3*m[,7]) + a2 * (m[,2] - a4*m[,8]) 116 | e2 <- a1 * (m[,3] - a3*m[,30]) + a2 * (m[,4] - a4*m[,10]) 117 | 118 | # compute variances 119 | v1 <- - e1^2 + ( 120 | a11 * (m[,11] - 2*a3*m[,20] + a33*m[,17]) + 121 | a22 * (m[,12] - 2*a5*m[,21] + a55*m[,18]) + 122 | a12 * (m[,15] - a3*m[,25] - a5*m[,24] + a3*a5*m[,26]) 123 | ) 124 | v2 <- - e2^2 + ( 125 | a11 * (m[,13] - 2*a3*m[,22] + a33*m[,19]) + 126 | a22 * (m[,14] - 2*a6*m[,23] + a66*m[,18]) + 127 | a12 * (m[,16] - a3*m[,28] - a6*m[,27] + a3*a6*m[,29]) 128 | ) 129 | 130 | # compute square spread by using a (equally) weighted 131 | # average if the total variance is (not) positive 132 | vt <- v1 + v2 133 | s2 <- ifelse(!is.na(vt) & vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2) 134 | 135 | # compute signed root 136 | s <- sqrt(abs(s2)) 137 | if(sign) 138 | s <- s * base::sign(s2) 139 | 140 | # return the spread 141 | if(!aslist) return(s) 142 | return(list("EDGE" = s)) 143 | 144 | } 145 | 146 | #' Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices 147 | #' 148 | #' Implements the efficient estimator of bid-ask spreads from open, high, low, 149 | #' and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 150 | #' \doi{10.1016/j.jfineco.2024.103916} 151 | #' 152 | #' @details 153 | #' Prices must be sorted in ascending order of the timestamp. 154 | #' 155 | #' @param open numeric vector of open prices. 156 | #' @param high numeric vector of high prices. 157 | #' @param low numeric vector of low prices. 158 | #' @param close numeric vector of close prices. 159 | #' @param sign whether to return signed estimates. 160 | #' 161 | #' @return The spread estimate. A value of 0.01 corresponds to a spread of 1\%. 162 | #' 163 | #' @references 164 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 165 | #' \doi{10.1016/j.jfineco.2024.103916} 166 | #' 167 | #' @examples 168 | #' # reduce number of threads to pass CRAN checks (you can ignore this) 169 | #' data.table::setDTthreads(1) 170 | #' 171 | #' # simulate open, high, low, and close prices with spread 1% 172 | #' x <- sim(n = 1000, spread = 0.01) 173 | #' 174 | #' # estimate the spread 175 | #' edge(x$Open, x$High, x$Low, x$Close) 176 | #' 177 | #' @export 178 | #' 179 | edge <- function(open, high, low, close, sign = FALSE){ 180 | 181 | # check that the open, high, low, and close prices have the same length 182 | n <- length(open) 183 | if(length(high) != n | length(low) != n | length(close) != n) 184 | stop("open, high, low, close must have the same length") 185 | 186 | # return missing if there are less than 3 observations 187 | if(n < 3) 188 | return(NA) 189 | 190 | # compute log-prices 191 | o <- log(as.numeric(open)) 192 | h <- log(as.numeric(high)) 193 | l <- log(as.numeric(low)) 194 | c <- log(as.numeric(close)) 195 | m <- (h + l) / 2 196 | 197 | # shift log-prices by one period 198 | h1 <- h[-n]; l1 <- l[-n]; c1 <- c[-n]; m1 <- m[-n] 199 | o <- o[-1]; h <- h[-1]; l <- l[-1]; c <- c[-1]; m <- m[-1] 200 | 201 | # compute log-returns 202 | r1 <- m - o 203 | r2 <- o - m1 204 | r3 <- m - c1 205 | r4 <- c1 - m1 206 | r5 <- o - c1 207 | 208 | # compute indicator variables 209 | tau <- ifelse(is.na(h) | is.na(l) | is.na(c1), NA, h != l | l != c1) 210 | po1 <- tau * (o != h) 211 | po2 <- tau * (o != l) 212 | pc1 <- tau * (c1 != h1) 213 | pc2 <- tau * (c1 != l1) 214 | 215 | # compute probabilities 216 | pt <- mean(tau, na.rm = TRUE) 217 | po <- mean(po1, na.rm = TRUE) + mean(po2, na.rm = TRUE) 218 | pc <- mean(pc1, na.rm = TRUE) + mean(pc2, na.rm = TRUE) 219 | 220 | # return missing if there are less than two periods with tau=1 221 | # or po or pc is zero 222 | nt <- sum(tau, na.rm = TRUE) 223 | if(nt < 2 | (!is.nan(po) & po == 0) | (!is.nan(pc) & pc == 0)) 224 | return(NA) 225 | 226 | # compute de-meaned log-returns 227 | d1 <- r1 - mean(r1, na.rm = TRUE)/pt*tau 228 | d3 <- r3 - mean(r3, na.rm = TRUE)/pt*tau 229 | d5 <- r5 - mean(r5, na.rm = TRUE)/pt*tau 230 | 231 | # compute input vectors 232 | x1 <- -4./po*d1*r2 + -4./pc*d3*r4 233 | x2 <- -4./po*d1*r5 + -4./pc*d5*r4 234 | 235 | # compute expectations 236 | e1 <- mean(x1, na.rm = TRUE) 237 | e2 <- mean(x2, na.rm = TRUE) 238 | 239 | # compute variances 240 | v1 <- mean(x1^2, na.rm = TRUE) - e1^2 241 | v2 <- mean(x2^2, na.rm = TRUE) - e2^2 242 | 243 | # compute square spread by using a (equally) weighted 244 | # average if the total variance is (not) positive 245 | vt = v1 + v2 246 | if(!is.na(vt) & vt > 0) 247 | s2 = (v2*e1 + v1*e2) / vt 248 | else 249 | s2 = (e1 + e2) / 2. 250 | 251 | # compute signed root 252 | s <- sqrt(abs(s2)) 253 | if(sign) 254 | s <- s * base::sign(s2) 255 | 256 | # return the spread 257 | return(s) 258 | 259 | } 260 | 261 | #' Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices 262 | #' 263 | #' Implements a rolling window calculation of the efficient estimator of bid-ask spreads 264 | #' from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 265 | #' \doi{10.1016/j.jfineco.2024.103916}. 266 | #' 267 | #' @details 268 | #' Prices must be sorted in ascending order of the timestamp. 269 | #' 270 | #' @param open numeric vector of open prices. 271 | #' @param high numeric vector of high prices. 272 | #' @param low numeric vector of low prices. 273 | #' @param close numeric vector of close prices. 274 | #' @param width if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples. 275 | #' @param sign whether to return signed estimates. 276 | #' @param na.rm whether to ignore missing values. 277 | #' 278 | #' @return Vector of spread estimates. 279 | #' A value of 0.01 corresponds to a spread of 1\%. 280 | #' This function always returns a result of the same length as the input prices. 281 | #' 282 | #' @references 283 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 284 | #' \doi{10.1016/j.jfineco.2024.103916} 285 | #' 286 | #' @examples 287 | #' # reduce number of threads to pass CRAN checks (you can ignore this) 288 | #' data.table::setDTthreads(1) 289 | #' 290 | #' # simulate open, high, low, and close prices with spread 1% 291 | #' x <- sim(n = 1000, spread = 0.01) 292 | #' 293 | #' # estimate the spread using a rolling window 294 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) 295 | #' tail(s) 296 | #' 297 | #' # estimate the spread using custom endpoints 298 | #' ep <- c(3, 35, 100) 299 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = ep) 300 | #' s[c(35, 100)] 301 | #' # equivalent to 302 | #' edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) 303 | #' edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) 304 | #' 305 | #' # estimate the spread using an expanding window 306 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x)) 307 | #' tail(s) 308 | #' # equivalent to 309 | #' s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) 310 | #' tail(s) 311 | #' 312 | #' @export 313 | #' 314 | edge_rolling <- function(open, high, low, close, width, sign = FALSE, na.rm = FALSE){ 315 | n <- length(open) 316 | if(length(high) != n | length(low) != n | length(close) != n) 317 | stop("open, high, low, close must have the same length") 318 | 319 | EDGE( 320 | open = as.numeric(open), 321 | high = as.numeric(high), 322 | low = as.numeric(low), 323 | close = as.numeric(close), 324 | width = width, 325 | sign = sign, 326 | na.rm = na.rm, 327 | aslist = FALSE 328 | ) 329 | 330 | } 331 | 332 | #' Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices 333 | #' 334 | #' Implements an expanding window calculation of the efficient estimator of bid-ask spreads 335 | #' from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): 336 | #' \doi{10.1016/j.jfineco.2024.103916}. 337 | #' 338 | #' @details 339 | #' Prices must be sorted in ascending order of the timestamp. 340 | #' 341 | #' @param open numeric vector of open prices. 342 | #' @param high numeric vector of high prices. 343 | #' @param low numeric vector of low prices. 344 | #' @param close numeric vector of close prices. 345 | #' @param sign whether to return signed estimates. 346 | #' @param na.rm whether to ignore missing values. 347 | #' 348 | #' @return Vector of spread estimates. 349 | #' A value of 0.01 corresponds to a spread of 1\%. 350 | #' This function always returns a result of the same length as the input prices. 351 | #' 352 | #' @references 353 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 354 | #' \doi{10.1016/j.jfineco.2024.103916} 355 | #' 356 | #' @examples 357 | #' # reduce number of threads to pass CRAN checks (you can ignore this) 358 | #' data.table::setDTthreads(1) 359 | #' 360 | #' # simulate open, high, low, and close prices with spread 1% 361 | #' x <- sim(n = 1000, spread = 0.01) 362 | #' 363 | #' # estimate the spread using an expanding window 364 | #' s <- edge_expanding(x$Open, x$High, x$Low, x$Close) 365 | #' tail(s) 366 | #' # equivalent to 367 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x), na.rm = TRUE) 368 | #' tail(s) 369 | #' 370 | #' @export 371 | #' 372 | edge_expanding <- function(open, high, low, close, sign = FALSE, na.rm = TRUE){ 373 | n <- length(open) 374 | if(length(high) != n | length(low) != n | length(close) != n) 375 | stop("open, high, low, close must have the same length") 376 | 377 | EDGE( 378 | open = as.numeric(open), 379 | high = as.numeric(high), 380 | low = as.numeric(low), 381 | close = as.numeric(close), 382 | width = 1:n, 383 | sign = sign, 384 | na.rm = na.rm, 385 | aslist = FALSE 386 | ) 387 | 388 | } 389 | --------------------------------------------------------------------------------