├── CNAME
├── .gitignore
├── r
    ├── vignettes
    │   ├── .gitignore
    │   └── bidask.Rmd
    ├── LICENSE
    ├── .Rbuildignore
    ├── NAMESPACE
    ├── man
    │   ├── rmean.Rd
    │   ├── rsum.Rd
    │   ├── ROLL.Rd
    │   ├── rfun.Rd
    │   ├── AR.Rd
    │   ├── CS.Rd
    │   ├── OHLC.Rd
    │   ├── bidask-package.Rd
    │   ├── EDGE.Rd
    │   ├── edge_expanding.Rd
    │   ├── edge_rolling.Rd
    │   ├── sim.Rd
    │   └── spread.Rd
    ├── tests
    │   ├── testthat.R
    │   └── testthat
    │   │   └── test-edge.R
    ├── R
    │   ├── roll.R
    │   ├── ar.R
    │   ├── cs.R
    │   ├── utils.R
    │   ├── ohlc.R
    │   ├── sim.R
    │   ├── spread.R
    │   └── edge.R
    ├── inst
    │   └── CITATION
    ├── .gitignore
    ├── DESCRIPTION
    └── README.md
├── sas
    ├── ohlc.sas7bdat
    ├── README.md
    └── edge.sas
├── _config.yml
├── python
    ├── bidask
    │   ├── __init__.py
    │   ├── edge_expanding.py
    │   ├── edge.py
    │   └── edge_rolling.py
    ├── pyproject.toml
    ├── LICENSE
    ├── .gitignore
    ├── tests
    │   └── test_edge.py
    └── README.md
├── c++
    ├── edge.h
    ├── README.md
    └── edge.cpp
├── julia
    ├── Project.toml
    ├── test
    │   └── runtests.jl
    ├── README.md
    └── src
    │   └── BidAsk.jl
├── LICENSE
├── matlab
    ├── README.md
    └── edge.m
├── pseudocode
    └── README.md
└── README.md


/CNAME:
--------------------------------------------------------------------------------
1 | bidask.eguidotti.com


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .Rproj.user
3 | 


--------------------------------------------------------------------------------
/r/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/r/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2024
2 | COPYRIGHT HOLDER: Emanuele Guidotti


--------------------------------------------------------------------------------
/sas/ohlc.sas7bdat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eguidotti/bidask/HEAD/sas/ohlc.sas7bdat


--------------------------------------------------------------------------------
/r/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^renv$
2 | ^renv\.lock$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | ^doc$
6 | ^Meta$
7 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | title: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
2 | description: David Ardia, Emanuele Guidotti, Tim A. Kroencke
3 | theme: jekyll-theme-cayman
4 | 


--------------------------------------------------------------------------------
/python/bidask/__init__.py:
--------------------------------------------------------------------------------
1 | from .edge import edge
2 | from .edge_rolling import edge_rolling
3 | from .edge_expanding import edge_expanding
4 | __all__ = ['edge', 'edge_rolling', 'edge_expanding']
5 | 


--------------------------------------------------------------------------------
/r/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(edge)
 4 | export(edge_expanding)
 5 | export(edge_rolling)
 6 | export(sim)
 7 | export(spread)
 8 | import(data.table)
 9 | importFrom(stats,rbinom)
10 | importFrom(stats,rnorm)
11 | 


--------------------------------------------------------------------------------
/c++/edge.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | 
 4 | double edge(
 5 |     const std::vector<double> &open,
 6 |     const std::vector<double> &high,
 7 |     const std::vector<double> &low,
 8 |     const std::vector<double> &close,
 9 |     const bool sign = false);
10 | 


--------------------------------------------------------------------------------
/r/man/rmean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{rmean}
 4 | \alias{rmean}
 5 | \title{Rolling mean}
 6 | \usage{
 7 | rmean(x, width, shift, na.rm)
 8 | }
 9 | \description{
10 | Rolling mean
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/r/man/rsum.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{rsum}
 4 | \alias{rsum}
 5 | \title{#' Rolling sum}
 6 | \usage{
 7 | rsum(x, width, shift, na.rm)
 8 | }
 9 | \description{
10 | #' Rolling sum
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/r/man/ROLL.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/roll.R
 3 | \name{ROLL}
 4 | \alias{ROLL}
 5 | \title{Roll Estimator}
 6 | \usage{
 7 | ROLL(close, width, sign, na.rm)
 8 | }
 9 | \description{
10 | Roll Estimator
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/r/man/rfun.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{rfun}
 4 | \alias{rfun}
 5 | \title{Rolling function}
 6 | \usage{
 7 | rfun(froll, x, width, shift, na.rm)
 8 | }
 9 | \description{
10 | Rolling function
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/r/man/AR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ar.R
 3 | \name{AR}
 4 | \alias{AR}
 5 | \title{Abdi-Ranaldo Estimator}
 6 | \usage{
 7 | AR(high, low, close, width, method, sign, na.rm)
 8 | }
 9 | \description{
10 | Abdi-Ranaldo Estimator
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/r/man/CS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cs.R
 3 | \name{CS}
 4 | \alias{CS}
 5 | \title{Corwin-Schultz Estimator}
 6 | \usage{
 7 | CS(high, low, close, width, method, sign, na.rm)
 8 | }
 9 | \description{
10 | Corwin-Schultz Estimator
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/r/man/OHLC.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ohlc.R
 3 | \name{OHLC}
 4 | \alias{OHLC}
 5 | \title{OHLC Estimators}
 6 | \usage{
 7 | OHLC(open, high, low, close, width, method, sign, na.rm)
 8 | }
 9 | \description{
10 | OHLC Estimators
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/julia/Project.toml:
--------------------------------------------------------------------------------
 1 | name = "BidAsk"
 2 | uuid = "3db38d6f-c11d-46b7-88ad-5bdaea376200"
 3 | version = "2.1.0"
 4 | 
 5 | [deps]
 6 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 7 | 
 8 | [extras]
 9 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
10 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
11 | 
12 | [targets]
13 | test = ["CSV", "Test"]


--------------------------------------------------------------------------------
/r/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(bidask)
11 | data.table::setDTthreads(1)
12 | 
13 | test_check("bidask")
14 | 


--------------------------------------------------------------------------------
/python/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "bidask"
 3 | version = "2.1.0"
 4 | license = "MIT"
 5 | description = "Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices"
 6 | authors = ["Emanuele Guidotti <emanuele.guidotti@usi.ch>"]
 7 | readme = "README.md"
 8 | repository = "https://github.com/eguidotti/bidask"
 9 | 
10 | [tool.poetry.dependencies]
11 | python = "^3.6"
12 | pandas = "*"
13 | numpy = "*"
14 | 
15 | [build-system]
16 | requires = ["poetry-core>=1.0.0"]
17 | build-backend = "poetry.core.masonry.api"
18 | 


--------------------------------------------------------------------------------
/r/R/roll.R:
--------------------------------------------------------------------------------
 1 | #' Roll Estimator
 2 | #'
 3 | #' @keywords internal
 4 | #'
 5 | ROLL <- function(close, width, sign, na.rm){
 6 | 
 7 |   c <- log(close)
 8 |   c1 <- shift(c, 1)
 9 |   c2 <- shift(c, 2)
10 |   
11 |   r1 <- c - c1
12 |   r2 <- c1 - c2
13 |   
14 |   shift <- 2
15 |   x <- data.frame(r1, r2, r1*r2)
16 |   m <- rmean(x, width = width, shift = shift, na.rm = na.rm)
17 |   n <- rsum(!is.na(r2), width = width, shift = shift, na.rm = na.rm)
18 |   
19 |   s2 <- -4 * n/(n - 1) * (m[,3] - m[,1]*m[,2])
20 |   s <- base::sign(s2) * sqrt(abs(s2))
21 |   if(!sign) s <- abs(s)
22 |   
23 |   return(list("ROLL" = s))
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/r/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(
 2 | 	bibtype     = "article",
 3 | 	title       = "Efficient estimation of bid–ask spreads from open, high, low, and close prices", 
 4 | 	journal     = "Journal of Financial Economics",
 5 | 	year        = 2024,
 6 | 	author      = as.person("David Ardia [aut], Emanuele Guidotti [aut], Tim A. Kroencke [aut]"),
 7 | 	volume      = "161",
 8 | 	pages       = "103916",
 9 |     doi         = "10.1016/j.jfineco.2024.103916",
10 | 	textVersion = 'Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. doi: 10.1016/j.jfineco.2024.103916'
11 | )
12 | 


--------------------------------------------------------------------------------
/r/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # User-specific files
 9 | .Ruserdata
10 | 
11 | # Example code in package build process
12 | *-Ex.R
13 | 
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | 
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 | 
20 | # RStudio files
21 | .Rproj.user/
22 | *.Rproj
23 | 
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | 
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 | 
31 | # knitr and R markdown default cache directories
32 | *_cache/
33 | /cache/
34 | 
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 | 
39 | # R Environment Variables
40 | .Renviron
41 | 
42 | # renv
43 | .Rprofile
44 | renv
45 | renv.lock
46 | 
47 | 
48 | inst/doc
49 | /doc/
50 | /Meta/
51 | 


--------------------------------------------------------------------------------
/julia/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | using BidAsk
 2 | using CSV
 3 | using Test
 4 | 
 5 | 
 6 | @testset "edge" begin
 7 | 
 8 |     df = CSV.File(download("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv"))
 9 | 
10 |     estimate = edge(df.:Open, df.:High, df.:Low, df.:Close)
11 |     @test isapprox(0.0101849034905478, estimate)
12 | 
13 |     estimate = edge(df.:Open[1:10], df.:High[1:10], df.:Low[1:10], df.:Close[1:10], true)
14 |     @test isapprox(-0.016889917516422, estimate)
15 | 
16 |     @test isnan(edge(
17 |         [missing, missing, missing],
18 |         [missing, missing, missing],
19 |         [missing, missing, missing],
20 |         [missing, missing, missing],
21 |     ))
22 | 
23 |     @test isnan(edge(
24 |         [18.21, 17.61, 17.61],
25 |         [18.21, 17.61, 17.61],
26 |         [17.61, 17.61, 17.61],
27 |         [17.61, 17.61, 17.61]
28 |     ))
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/r/R/ar.R:
--------------------------------------------------------------------------------
 1 | #' Abdi-Ranaldo Estimator
 2 | #'
 3 | #' @keywords internal
 4 | #'
 5 | AR <- function(high, low, close, width, method, sign, na.rm){
 6 | 
 7 |   ok <- c("AR","AR2")
 8 |   if(length(ko <- setdiff(method, ok)))
 9 |     stop(sprintf("Method(s) '%s' not available. The available methods are '%s'.",
10 |                  paste(ko, collapse = "', '"), paste(ok, collapse = "', '")))
11 | 
12 |   h <- log(high)
13 |   l <- log(low)
14 |   c <- log(close)
15 | 
16 |   m2 <- (h + l) / 2
17 |   m1 <- shift(m2, 1)
18 |   c1 <- shift(c, 1)
19 | 
20 |   s2 <- 4 * (c1 - m1) * (c1 - m2)
21 | 
22 |   shift <- 1
23 |   ar <- ar2 <- NULL
24 |   
25 |   if("AR" %in% method) {
26 |     ar <- rmean(s2, width = width, shift = shift, na.rm = na.rm)
27 |     ar <- sign(ar) * sqrt(abs(ar))
28 |     if(!sign) ar <- abs(ar)
29 |     ar <- list("AR" = ar)
30 |   }
31 | 
32 |   if("AR2" %in% method){
33 |     s2[s2 < 0] <- 0
34 |     s <- sqrt(s2)
35 |     ar2 <- rmean(s, width = width, shift = shift, na.rm = na.rm)
36 |     ar2 <- list("AR2" = ar2)
37 |   }
38 | 
39 |   return(c(ar, ar2))
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Emanuele Guidotti
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/python/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Emanuele Guidotti
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/python/bidask/edge_expanding.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from .edge_rolling import edge_rolling
 3 | 
 4 | 
 5 | def edge_expanding(df: pd.DataFrame, min_periods: int = 1, sign: bool = False) -> pd.Series:
 6 |     """
 7 |     Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices
 8 | 
 9 |     Implements an expanding window calculation of the efficient estimator of bid-ask spreads 
10 |     from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
11 |     https://doi.org/10.1016/j.jfineco.2024.103916
12 |         
13 |     Parameters
14 |     ----------
15 |     - `df` : pd.DataFrame
16 |         DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive).
17 |     - `min_periods` : int
18 |         Minimum number of observations in window required to have a value; otherwise, result is `np.nan`.
19 |     - `sign` : bool, default False
20 |         Whether to return signed estimates.
21 | 
22 |     Returns
23 |     -------
24 |     pd.Series
25 |         A pandas Series of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%.
26 |     """    
27 |     return edge_rolling(df=df, window=len(df), min_periods=min_periods, sign=sign)
28 | 


--------------------------------------------------------------------------------
/r/R/cs.R:
--------------------------------------------------------------------------------
 1 | #' Corwin-Schultz Estimator
 2 | #'
 3 | #' @keywords internal
 4 | #'
 5 | CS <- function(high, low, close, width, method, sign, na.rm){
 6 | 
 7 |   ok <- c("CS","CS2")
 8 |   if(length(ko <- setdiff(method, ok)))
 9 |     stop(sprintf("Method(s) '%s' not available. The available methods are '%s'.",
10 |                  paste(ko, collapse = "', '"), paste(ok, collapse = "', '")))
11 |   
12 |   h <- log(high)
13 |   l <- log(low)
14 |   c <- log(close)
15 |   
16 |   c1 <- shift(c, 1)
17 |   h1 <- shift(h, 1)
18 |   l1 <- shift(l, 1)
19 |   
20 |   gap <- pmax(0, c1 - h) + pmin(0, c1 - l)
21 |   ah <- h + gap
22 |   al <- l + gap
23 | 
24 |   b <- (h - l)^2 + (h1 - l1)^2
25 |   g <- (pmax(ah, h1) - pmin(al, l1))^2
26 | 
27 |   a <- (sqrt(2*b) - sqrt(b)) / (3 - 2*sqrt(2)) - sqrt(g / (3 - 2*sqrt(2)))
28 |   s <- 2*(exp(a) - 1) / (1 + exp(a))
29 |   
30 |   shift <- 1
31 |   cs <- cs2 <- NULL
32 | 
33 |   if("CS" %in% method) {
34 |     cs <- rmean(s, width = width, shift = shift, na.rm = na.rm)
35 |     if(!sign) cs <- abs(cs)
36 |     cs <- list("CS" = cs)
37 |   }
38 | 
39 |   if("CS2" %in% method){
40 |     s[s < 0] <- 0
41 |     cs2 <- rmean(s, width = width, shift = shift, na.rm = na.rm)
42 |     cs2 <- list("CS2" = cs2)
43 |   }
44 | 
45 |   return(c(cs, cs2))
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/r/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | "_PACKAGE"
 3 | 
 4 | #' @import data.table
 5 | #' @importFrom stats rbinom rnorm
 6 | NULL
 7 | 
 8 | #' Rolling function
 9 | #' 
10 | #' @keywords internal
11 | #' 
12 | rfun <- function(froll, x, width, shift, na.rm){
13 |   
14 |   nw <- length(width)
15 |   nc <- ncol(x); nr <- nrow(x)
16 |   if(is.null(nr)) nr <- length(x)
17 |   
18 |   n <- width - shift
19 |   if(nw != 1 && nw != nr){
20 |     n <- rep(0, nr)
21 |     n[width[-1]] <- diff(pmax(1, width))
22 |   }
23 |   
24 |   if(nw == 1 && n < 1){
25 |     if(is.null(nc)) return(rep(NA, nr))
26 |     return(as.data.frame(matrix(data = NA, nrow = nr, ncol = nc)))
27 |   }
28 |   
29 |   y <- froll(x, n = n, na.rm = na.rm, adaptive = nw > 1, fill = NA)
30 |   if(is.list(y)) setDF(y)
31 |   
32 |   if(nw == 1 && width > 1){
33 |     if(is.data.frame(y)) y[1:(width-1),] <- NA
34 |     else y[1:(width-1)] <- NA
35 |   }
36 | 
37 |   return(y)
38 |   
39 | }
40 | 
41 | #' #' Rolling sum
42 | #' 
43 | #' @keywords internal
44 | #' 
45 | rsum <- function(x, width, shift, na.rm){
46 |   
47 |   rfun(frollsum, x, width, shift, na.rm)
48 | 
49 | }
50 | 
51 | #' Rolling mean
52 | #' 
53 | #' @keywords internal
54 | #' 
55 | rmean <- function(x, width, shift, na.rm){
56 | 
57 |   rfun(frollmean, x, width, shift, na.rm)
58 |   
59 | }
60 | 


--------------------------------------------------------------------------------
/r/man/bidask-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \docType{package}
 4 | \name{bidask-package}
 5 | \alias{bidask}
 6 | \alias{bidask-package}
 7 | \title{bidask: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices}
 8 | \description{
 9 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024) \doi{10.1016/j.jfineco.2024.103916}. It also provides an implementation of the estimators described in Roll (JF, 1984) \doi{10.1111/j.1540-6261.1984.tb03897.x}, Corwin & Schultz (JF, 2012) \doi{10.1111/j.1540-6261.2012.01729.x}, and Abdi & Ranaldo (RFS, 2017) \doi{10.1093/rfs/hhx084}.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/eguidotti/bidask}
15 |   \item Report bugs at \url{https://github.com/eguidotti/bidask/issues}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Emanuele Guidotti \email{emanuele.guidotti@usi.ch} (\href{https://orcid.org/0000-0002-8961-6623}{ORCID})
21 | 
22 | Other contributors:
23 | \itemize{
24 |   \item David Ardia (\href{https://orcid.org/0000-0003-2823-782X}{ORCID}) [contributor]
25 |   \item Tim Kroencke (\href{https://orcid.org/0000-0001-8700-356X}{ORCID}) [contributor]
26 | }
27 | 
28 | }
29 | \keyword{internal}
30 | 


--------------------------------------------------------------------------------
/r/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bidask
 2 | Type: Package
 3 | Title: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 4 | Version: 2.1.5
 5 | Authors@R: c(
 6 |     person(given = "Emanuele", family = "Guidotti", email = "emanuele.guidotti@usi.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8961-6623")),
 7 |     person(given = "David", family = "Ardia", role = c("ctb"), comment = c(ORCID = "0000-0003-2823-782X")),
 8 |     person(given = "Tim", family = "Kroencke", role = c("ctb"), comment = c(ORCID = "0000-0001-8700-356X"))
 9 |   )
10 | Description: Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices
11 |   described in Ardia, Guidotti, & Kroencke (JFE, 2024) <doi:10.1016/j.jfineco.2024.103916>. 
12 |   It also provides an implementation of the estimators described in 
13 |   Roll (JF, 1984) <doi:10.1111/j.1540-6261.1984.tb03897.x>, 
14 |   Corwin & Schultz (JF, 2012) <doi:10.1111/j.1540-6261.2012.01729.x>,
15 |   and Abdi & Ranaldo (RFS, 2017) <doi:10.1093/rfs/hhx084>.
16 | License: MIT + file LICENSE
17 | URL: https://github.com/eguidotti/bidask
18 | BugReports: https://github.com/eguidotti/bidask/issues
19 | Encoding: UTF-8
20 | Imports: data.table
21 | RoxygenNote: 7.2.3
22 | Suggests:
23 |     xts,
24 |     zoo,
25 |     dplyr,
26 |     crypto2,
27 |     quantmod,
28 |     ggplot2,
29 |     knitr,
30 |     rmarkdown,
31 |     testthat (>= 3.0.0)
32 | Config/testthat/edition: 3
33 | VignetteBuilder: knitr
34 | 


--------------------------------------------------------------------------------
/r/man/EDGE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \name{edge}
 4 | \alias{edge}
 5 | \title{Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices}
 6 | \usage{
 7 | edge(open, high, low, close, sign = FALSE)
 8 | }
 9 | \arguments{
10 | \item{open}{numeric vector of open prices.}
11 | 
12 | \item{high}{numeric vector of high prices.}
13 | 
14 | \item{low}{numeric vector of low prices.}
15 | 
16 | \item{close}{numeric vector of close prices.}
17 | 
18 | \item{sign}{whether to return signed estimates.}
19 | }
20 | \value{
21 | The spread estimate. A value of 0.01 corresponds to a spread of 1\%.
22 | }
23 | \description{
24 | Implements the efficient estimator of bid-ask spreads from open, high, low, 
25 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
26 | \doi{10.1016/j.jfineco.2024.103916}
27 | }
28 | \details{
29 | Prices must be sorted in ascending order of the timestamp.
30 | }
31 | \examples{
32 | # reduce number of threads to pass CRAN checks (you can ignore this)
33 | data.table::setDTthreads(1)
34 | 
35 | # simulate open, high, low, and close prices with spread 1\%
36 | x <- sim(n = 1000, spread = 0.01)
37 | 
38 | # estimate the spread
39 | edge(x$Open, x$High, x$Low, x$Close)
40 | 
41 | }
42 | \references{
43 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
44 | \doi{10.1016/j.jfineco.2024.103916}
45 | }
46 | 


--------------------------------------------------------------------------------
/r/man/edge_expanding.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \name{edge_expanding}
 4 | \alias{edge_expanding}
 5 | \title{Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices}
 6 | \usage{
 7 | edge_expanding(open, high, low, close, sign = FALSE, na.rm = TRUE)
 8 | }
 9 | \arguments{
10 | \item{open}{numeric vector of open prices.}
11 | 
12 | \item{high}{numeric vector of high prices.}
13 | 
14 | \item{low}{numeric vector of low prices.}
15 | 
16 | \item{close}{numeric vector of close prices.}
17 | 
18 | \item{sign}{whether to return signed estimates.}
19 | 
20 | \item{na.rm}{whether to ignore missing values.}
21 | }
22 | \value{
23 | Vector of spread estimates. 
24 | A value of 0.01 corresponds to a spread of 1\%.
25 | This function always returns a result of the same length as the input prices.
26 | }
27 | \description{
28 | Implements an expanding window calculation of the efficient estimator of bid-ask spreads 
29 | from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
30 | \doi{10.1016/j.jfineco.2024.103916}.
31 | }
32 | \details{
33 | Prices must be sorted in ascending order of the timestamp.
34 | }
35 | \examples{
36 | # reduce number of threads to pass CRAN checks (you can ignore this)
37 | data.table::setDTthreads(1)
38 | 
39 | # simulate open, high, low, and close prices with spread 1\%
40 | x <- sim(n = 1000, spread = 0.01)
41 | 
42 | # estimate the spread using an expanding window
43 | s <- edge_expanding(x$Open, x$High, x$Low, x$Close)
44 | tail(s)
45 | # equivalent to
46 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x), na.rm = TRUE)
47 | tail(s)
48 | 
49 | }
50 | \references{
51 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
52 | \doi{10.1016/j.jfineco.2024.103916}
53 | }
54 | 


--------------------------------------------------------------------------------
/c++/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 2 | 
 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
 4 | 
 5 | ## Installation
 6 | 
 7 | Download the C++ source file [`edge.cpp`](https://github.com/eguidotti/bidask/tree/main/c++/edge.cpp) and the corresponding header [`edge.h`](https://github.com/eguidotti/bidask/tree/main/c++/edge.h)
 8 | 
 9 | ## Usage
10 | 
11 | Arguments:
12 | 
13 | ```c++
14 | edge(open, high, low, close, sign=false)
15 | ```
16 | 
17 | | field   | description                                  |
18 | | ------- | -------------------------------------------- |
19 | | `open`  | std::vector\<double\> of open prices.        |
20 | | `high`  | std::vector\<double\> of high prices.        |
21 | | `low`   | std::vector\<double\> of low prices.         |
22 | | `close` | std::vector\<double\> of close prices.       |
23 | | `sign`  | Whether signed estimates should be returned. |
24 | 
25 | The input prices must be sorted in ascending order of the timestamp.
26 | 
27 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%.
28 | 
29 | ## Cite as
30 | 
31 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
32 | 
33 | A BibTex  entry for LaTeX users is:
34 | 
35 | ```bibtex
36 | @article{edge,
37 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
38 |   journal = {Journal of Financial Economics},
39 |   volume = {161},
40 |   pages = {103916},
41 |   year = {2024},
42 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
43 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
44 | }
45 | ```
46 | 
47 | 


--------------------------------------------------------------------------------
/matlab/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 2 | 
 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
 4 | 
 5 | ## Installation
 6 | 
 7 | Download the file [`edge.m`](https://github.com/eguidotti/bidask/tree/main/matlab/edge.m) into your working directory.
 8 | 
 9 | ## Usage
10 | 
11 | Import the estimator:
12 | 
13 | ```matlab
14 | import edge.*
15 | ```
16 | 
17 | Arguments:
18 | 
19 | ```matlab
20 | edge(open, high, low, close, sign=false)
21 | ```
22 | 
23 | | field   | description                                 |
24 | | ------- | ------------------------------------------- |
25 | | `open`  | Vector of open prices with size `T` x `1`.  |
26 | | `high`  | Vector of high prices with size `T` x `1`.  |
27 | | `low`   | Vector of low prices with size `T` x `1`.   |
28 | | `close` | Vector of close prices with size `T` x `1`. |
29 | | `sign`  | Whether to return signed estimates.         |
30 | 
31 | The input prices must be sorted in ascending order of the timestamp. 
32 | 
33 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%.
34 | 
35 | ## Example
36 | 
37 | ```matlab
38 | import edge.*
39 | 
40 | df = readmatrix('https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv');
41 | edge(df(:,1), df(:,2), df(:,3), df(:,4))
42 | ```
43 | 
44 | ## Cite as
45 | 
46 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
47 | 
48 | A BibTex  entry for LaTeX users is:
49 | 
50 | ```bibtex
51 | @article{edge,
52 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
53 |   journal = {Journal of Financial Economics},
54 |   volume = {161},
55 |   pages = {103916},
56 |   year = {2024},
57 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
58 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
59 | }
60 | ```
61 | 
62 | 


--------------------------------------------------------------------------------
/julia/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 2 | 
 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
 4 | 
 5 | ## Installation
 6 | 
 7 | Install this package with:
 8 | 
 9 | ```julia
10 | using Pkg
11 | Pkg.add(PackageSpec(url="https://github.com/eguidotti/bidask.git", subdir="julia/"))
12 | ```
13 | 
14 | ## Usage
15 | 
16 | Import the package:
17 | 
18 | ```julia
19 | using BidAsk
20 | ```
21 | 
22 | Arguments:
23 | 
24 | ```julia
25 | edge(open, high, low, close, sign=false)
26 | ```
27 | 
28 | | field   | description                         |
29 | | ------- | ----------------------------------- |
30 | | `open`  | AbstractVector of open prices.      |
31 | | `high`  | AbstractVector of high prices.      |
32 | | `low`   | AbstractVector of low prices.       |
33 | | `close` | AbstractVector of close prices.     |
34 | | `sign`  | Whether to return signed estimates. |
35 | 
36 | The input prices must be sorted in ascending order of the timestamp.
37 | 
38 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%.
39 | 
40 | ## Example
41 | 
42 | ```julia
43 | using BidAsk
44 | using CSV
45 | 
46 | df = CSV.File(download("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv"))
47 | edge(df.:Open, df.:High, df.:Low, df.:Close)    
48 | ```
49 | 
50 | ## Cite as
51 | 
52 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
53 | 
54 | A BibTex  entry for LaTeX users is:
55 | 
56 | ```bibtex
57 | @article{edge,
58 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
59 |   journal = {Journal of Financial Economics},
60 |   volume = {161},
61 |   pages = {103916},
62 |   year = {2024},
63 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
64 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
65 | }
66 | ```
67 | 
68 | 


--------------------------------------------------------------------------------
/r/R/ohlc.R:
--------------------------------------------------------------------------------
 1 | #' OHLC Estimators
 2 | #'
 3 | #' @keywords internal
 4 | #'
 5 | OHLC <- function(open, high, low, close, width, method, sign, na.rm){
 6 | 
 7 |   splitmethods <- strsplit(method, split = ".", fixed = TRUE)
 8 |   uniquemethods <- unique(unlist(splitmethods))
 9 |   ok <- c("OHL","OHLC","CHL","CHLO")
10 |   if(length(ko <- setdiff(uniquemethods, ok)))
11 |     stop(sprintf(
12 |       "Method(s) '%s' not available. The available methods include '%s', or any combination of them, e.g. 'OHLC.CHLO'.",
13 |        paste(ko, collapse = "', '"), paste(ok, collapse = "', '")
14 |     ))
15 | 
16 |   o <- log(open)
17 |   h <- log(high)
18 |   l <- log(low)
19 |   c <- log(close)
20 |   m <- (h + l) / 2
21 |   
22 |   c1 <- shift(c, 1)
23 |   h1 <- shift(h, 1)
24 |   l1 <- shift(l, 1)
25 |   m1 <- shift(m, 1)
26 | 
27 |   if(length(c1) == 0) c1 <- rep(NA, length(h))
28 |   tau <- ifelse(is.na(h) | is.na(l), NA, h != l | l != c1)
29 |   tau[1] <- NA
30 |   
31 |   shift <- 1
32 |   pt <- rmean(tau, width = width, shift = shift, na.rm = na.rm)
33 |   nt <- rsum(tau, width = width, shift = shift, na.rm = TRUE)
34 |   
35 |   if("OHL" %in% uniquemethods | "OHLC" %in% uniquemethods){
36 |     po1 <- rmean(tau * (o != h), width = width, shift = shift, na.rm = na.rm)
37 |     po2 <- rmean(tau * (o != l), width = width, shift = shift, na.rm = na.rm)
38 |     po <- po1 + po2
39 |   }
40 |   
41 |   if("CHL" %in% uniquemethods | "CHLO" %in% uniquemethods){
42 |     pc1 <- rmean(tau * (c1 != h1), width = width, shift = shift, na.rm = na.rm)
43 |     pc2 <- rmean(tau * (c1 != l1), width = width, shift = shift, na.rm = na.rm)
44 |     pc <- pc1 + pc2
45 |   }
46 |   
47 |   s2 <- function(r1, r2, pi){
48 |     x <- data.frame(r1*r2, r1, tau*r2); x[1,] <- NA
49 |     m <- rmean(x, width = width, shift = shift, na.rm = na.rm)
50 |     m[which(nt < 2 | pi == 0),] <- NA
51 |     -8 / pi * (m[,1] - (m[,2] * m[,3]) / pt)
52 |   }
53 |   
54 |   if("OHL" %in% uniquemethods)
55 |     s2.OHL <- s2(m - o, o - m1, po)
56 |   if("OHLC" %in% uniquemethods)
57 |     s2.OHLC <- s2(m - o, o - c1, po)
58 |   if("CHL" %in% uniquemethods)
59 |     s2.CHL <- s2(m - c1, c1 - m1, pc)
60 |   if("CHLO" %in% uniquemethods)
61 |     s2.CHLO <- s2(o - c1, c1 - m1, pc)
62 | 
63 |   s <- lapply(splitmethods, function(m){
64 |     expr <- sprintf("(%s)/%s", paste0("s2.", m, collapse = "+"), length(m))
65 |     s2 <- eval(parse(text = expr))
66 |     s <- sqrt(abs(s2))
67 |     if(sign) s <- s * base::sign(s2)
68 |     return(s)
69 |   })
70 |   
71 |   names(s) <- method
72 |   return(s)
73 |   
74 | }
75 | 


--------------------------------------------------------------------------------
/python/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Extra
132 | .DS_Store
133 | .idea
134 | poetry.lock
135 | 


--------------------------------------------------------------------------------
/r/man/edge_rolling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \name{edge_rolling}
 4 | \alias{edge_rolling}
 5 | \title{Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices}
 6 | \usage{
 7 | edge_rolling(open, high, low, close, width, sign = FALSE, na.rm = FALSE)
 8 | }
 9 | \arguments{
10 | \item{open}{numeric vector of open prices.}
11 | 
12 | \item{high}{numeric vector of high prices.}
13 | 
14 | \item{low}{numeric vector of low prices.}
15 | 
16 | \item{close}{numeric vector of close prices.}
17 | 
18 | \item{width}{if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples.}
19 | 
20 | \item{sign}{whether to return signed estimates.}
21 | 
22 | \item{na.rm}{whether to ignore missing values.}
23 | }
24 | \value{
25 | Vector of spread estimates. 
26 | A value of 0.01 corresponds to a spread of 1\%.
27 | This function always returns a result of the same length as the input prices.
28 | }
29 | \description{
30 | Implements a rolling window calculation of the efficient estimator of bid-ask spreads 
31 | from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
32 | \doi{10.1016/j.jfineco.2024.103916}.
33 | }
34 | \details{
35 | Prices must be sorted in ascending order of the timestamp.
36 | }
37 | \examples{
38 | # reduce number of threads to pass CRAN checks (you can ignore this)
39 | data.table::setDTthreads(1)
40 | 
41 | # simulate open, high, low, and close prices with spread 1\%
42 | x <- sim(n = 1000, spread = 0.01)
43 | 
44 | # estimate the spread using a rolling window
45 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21)
46 | tail(s)
47 | 
48 | # estimate the spread using custom endpoints
49 | ep <- c(3, 35, 100)
50 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = ep)
51 | s[c(35, 100)]
52 | # equivalent to
53 | edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35])
54 | edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100])
55 | 
56 | # estimate the spread using an expanding window
57 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x))
58 | tail(s)
59 | # equivalent to
60 | s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE)
61 | tail(s)
62 | 
63 | }
64 | \references{
65 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
66 | \doi{10.1016/j.jfineco.2024.103916}
67 | }
68 | 


--------------------------------------------------------------------------------
/r/man/sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sim.R
 3 | \name{sim}
 4 | \alias{sim}
 5 | \title{Simulation of Open, High, Low, and Close Prices}
 6 | \usage{
 7 | sim(
 8 |   n = 10000,
 9 |   trades = 390,
10 |   prob = 1,
11 |   spread = 0.01,
12 |   volatility = 0.03,
13 |   overnight = 0,
14 |   drift = 0,
15 |   units = 1,
16 |   sign = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{n}{the number of periods to simulate.}
21 | 
22 | \item{trades}{the number of trades per period.}
23 | 
24 | \item{prob}{the probability to observe a trade.}
25 | 
26 | \item{spread}{the bid-ask spread.}
27 | 
28 | \item{volatility}{the open-to-close volatility.}
29 | 
30 | \item{overnight}{the close-to-open volatility.}
31 | 
32 | \item{drift}{the expected return per period.}
33 | 
34 | \item{units}{the units of the time period. One of: \code{1}, \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}.}
35 | 
36 | \item{sign}{whether to return positive prices for buys and negative prices for sells.}
37 | }
38 | \value{
39 | A data.frame of open, high, low, and close prices if \code{units=1} (default). 
40 | Otherwise, an \code{xts} object is returned (requires the \code{xts} package to be installed).
41 | }
42 | \description{
43 | This function performs simulations consisting of \code{n} periods and where each period consists of a given number of \code{trades}.
44 | For each trade, the actual price \eqn{P_t} is simulated as \eqn{P_t = P_{t-1}e^{\sigma x}}, where \eqn{\sigma} is the standard deviation per trade and \eqn{x} is a random draw from a unit normal distribution.
45 | The standard deviation per trade equals the \code{volatility} divided by the square root of the number of \code{trades}.
46 | Trades are assumed to be observed with a given \code{probability}.
47 | The bid (ask) for each trade is defined as \eqn{P_t} multiplied by one minus (plus) half the \code{spread} and we assume a 50\% chance that a bid (ask) is observed.
48 | High and low prices equal the highest and lowest prices observed during the period.
49 | Open and Close prices equal the first and the last price observed in the period.
50 | If no trade is observed for a period, then the previous Close is used as the Open, High, Low, and Close prices for that period.
51 | }
52 | \examples{
53 | # reduce number of threads to pass CRAN checks (you can ignore this)
54 | data.table::setDTthreads(1)
55 | 
56 | # simulate 10 open, high, low, and close prices with spread 1\%
57 | sim(n = 10, spread = 0.01)
58 | 
59 | }
60 | \references{
61 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
62 | \doi{10.1016/j.jfineco.2024.103916}
63 | }
64 | 


--------------------------------------------------------------------------------
/julia/src/BidAsk.jl:
--------------------------------------------------------------------------------
  1 | module BidAsk
  2 | 
  3 | using Statistics
  4 | 
  5 | """
  6 | Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  7 | 
  8 | Implements the efficient estimator of bid-ask spreads from open, high, low, 
  9 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
 10 | https://doi.org/10.1016/j.jfineco.2024.103916
 11 | 
 12 | Parameters
 13 | ----------
 14 | - `open`: AbstractVector of open prices
 15 | - `high`: AbstractVector of high prices
 16 | - `low`: AbstractVector of low prices
 17 | - `close`: AbstractVector of close prices
 18 | - `sign`: Whether to return signed estimates
 19 | 
 20 | Notes
 21 | -----
 22 | Prices must be sorted in ascending order of the timestamp.
 23 | 
 24 | Returns
 25 | -------
 26 | The spread estimate. A value of 0.01 corresponds to a spread of 1%.
 27 | 
 28 | """
 29 | function edge(open::AbstractVector, high::AbstractVector, low::AbstractVector, close::AbstractVector, sign::Bool = false)
 30 |     
 31 |     o = log.(open)
 32 |     h = log.(high)
 33 |     l = log.(low)
 34 |     c = log.(close)
 35 |     m = (h .+ l) ./ 2.0
 36 | 
 37 |     h1 = h[1:end-1]
 38 |     l1 = l[1:end-1]
 39 |     c1 = c[1:end-1]
 40 |     m1 = m[1:end-1]
 41 | 
 42 |     o = o[2:end]
 43 |     h = h[2:end]
 44 |     l = l[2:end]
 45 |     c = c[2:end]
 46 |     m = m[2:end]
 47 | 
 48 |     tau = ifelse.(ismissing.(h) .| ismissing.(l) .| ismissing.(c1),  missing,  (h .!= l) .| (l .!= c1))
 49 |     phi1 = collect(skipmissing(tau .* (o .!= h)))
 50 |     phi2 = collect(skipmissing(tau .* (o .!= l)))
 51 |     phi3 = collect(skipmissing(tau .* (c1 .!= h1)))
 52 |     phi4 = collect(skipmissing(tau .* (c1 .!= l1)))
 53 | 
 54 |     nt = sum(skipmissing(tau), init=0)
 55 |     if nt < 2 || length(phi1) == 0 || length(phi2) == 0 || length(phi3) == 0 || length(phi4) == 0
 56 |         return NaN
 57 |     end
 58 | 
 59 |     pt = nt / count(!ismissing, tau)
 60 |     po = mean(phi1) + mean(phi2)
 61 |     pc = mean(phi3) + mean(phi4)
 62 |     if po == 0 || pc == 0
 63 |         return NaN
 64 |     end
 65 | 
 66 |     r1 = m .- o
 67 |     r2 = o .- m1
 68 |     r3 = m .- c1
 69 |     r4 = c1 .- m1
 70 |     r5 = o .- c1
 71 | 
 72 |     d1 = r1 .- tau .* mean(skipmissing(r1)) ./ pt
 73 |     d3 = r3 .- tau .* mean(skipmissing(r3)) ./ pt
 74 |     d5 = r5 .- tau .* mean(skipmissing(r5)) ./ pt
 75 | 
 76 |     x1 = - 4.0 ./ po .* d1 .* r2 .- 4.0 ./ pc .* d3 .* r4 
 77 |     x2 = - 4.0 ./ po .* d1 .* r5 .- 4.0 ./ pc .* d5 .* r4 
 78 |     
 79 |     e1 = mean(skipmissing(x1))
 80 |     e2 = mean(skipmissing(x2))
 81 | 
 82 |     v1 = mean(skipmissing(x1 .* x1)) - e1 * e1
 83 |     v2 = mean(skipmissing(x2 .* x2)) - e2 * e2
 84 | 
 85 |     vt = v1 + v2
 86 |     s2 = ifelse(vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2)
 87 | 
 88 |     s = sqrt(abs(s2))
 89 |     if sign && s2 < 0
 90 |         s = -s
 91 |     end
 92 | 
 93 |     return s
 94 | end
 95 | 
 96 | export 
 97 | edge
 98 | 
 99 | end # module
100 | 


--------------------------------------------------------------------------------
/matlab/edge.m:
--------------------------------------------------------------------------------
  1 | function s = edge(open, high, low, close, sign)
  2 |     % Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  3 |     %
  4 |     % Implements the efficient estimator of bid-ask spreads from open, high, low, 
  5 |     % and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
  6 |     % https://doi.org/10.1016/j.jfineco.2024.103916
  7 |     %
  8 |     % Parameters
  9 |     % ----------
 10 |     % - `open`: vector of open prices with size Tx1
 11 |     % - `high`: vector of high prices with size Tx1
 12 |     % - `low`: vector of low prices with size Tx1
 13 |     % - `close`: vector of close prices with size Tx1
 14 |     % - `sign`: boolean value indicating whether to return signed estimates
 15 |     %
 16 |     % Notes
 17 |     % -----
 18 |     % Prices must be sorted in ascending order of the timestamp.
 19 |     %
 20 |     % Returns
 21 |     % -------
 22 |     % The spread estimate. A value of 0.01 corresponds to a spread of 1%.
 23 |     %
 24 |     if nargin < 5
 25 |         sign = false;
 26 |     end
 27 | 
 28 |     p = log([open, high, low, close]);
 29 | 
 30 |     o = p(:,1); 
 31 |     h = p(:,2); 
 32 |     l = p(:,3);
 33 |     c = p(:,4);
 34 |     m = (h + l) / 2;
 35 | 
 36 |     h1 = h(1:end-1,:);
 37 |     l1 = l(1:end-1,:);
 38 |     c1 = c(1:end-1,:);
 39 |     m1 = m(1:end-1,:);
 40 | 
 41 |     o = o(2:end,:);
 42 |     h = h(2:end,:);
 43 |     l = l(2:end,:);
 44 |     c = c(2:end,:);
 45 |     m = m(2:end,:);
 46 | 
 47 |     tau = NaN(size(c));
 48 |     idx = ~(isnan(h) | isnan(l) | isnan(c1));
 49 |     tau(idx) = (h(idx) ~= l(idx)) | (l(idx) ~= c1(idx));
 50 | 
 51 |     phi1 = NaN(size(c));
 52 |     idx = ~(isnan(o) | isnan(h));
 53 |     phi1(idx) = tau(idx) .* (o(idx) ~= h(idx));
 54 | 
 55 |     phi2 = NaN(size(c));
 56 |     idx = ~(isnan(o) | isnan(l));
 57 |     phi2(idx) = tau(idx) .* (o(idx) ~= l(idx));
 58 |     
 59 |     phi3 = NaN(size(c));
 60 |     idx = ~(isnan(c1) | isnan(h1));
 61 |     phi3(idx) = tau(idx) .* (c1(idx) ~= h1(idx));
 62 |     
 63 |     phi4 = NaN(size(c));
 64 |     idx = ~(isnan(c1) | isnan(l1));
 65 |     phi4(idx) = tau(idx) .* (c1(idx) ~= l1(idx));
 66 |   
 67 |     pt = mean(tau, "omitnan");
 68 |     po = mean(phi1, "omitnan") + mean(phi2, "omitnan");
 69 |     pc = mean(phi3, "omitnan") + mean(phi4, "omitnan");
 70 |     
 71 |     if sum(tau, "omitnan") < 2 || po == 0 || pc == 0
 72 |         s = NaN;
 73 |         return;
 74 |     end
 75 | 
 76 |     r1 = m-o;
 77 |     r2 = o-m1;
 78 |     r3 = m-c1;
 79 |     r4 = c1-m1;
 80 |     r5 = o-c1;
 81 |   
 82 |     d1 = r1 - tau .* mean(r1, "omitnan") / pt;
 83 |     d3 = r3 - tau .* mean(r3, "omitnan") / pt;
 84 |     d5 = r5 - tau .* mean(r5, "omitnan") / pt;
 85 |   
 86 |     x1 = -4. / po .* d1 .* r2 -4. / pc .* d3 .* r4;
 87 |     x2 = -4. / po .* d1 .* r5 -4. / pc .* d5 .* r4;
 88 |   
 89 |     e1 = mean(x1, "omitnan");
 90 |     e2 = mean(x2, "omitnan");
 91 | 
 92 |     v1 = mean(x1.^2, "omitnan") - e1^2;
 93 |     v2 = mean(x2.^2, "omitnan") - e2^2;
 94 |   
 95 |     vt = v1 + v2;
 96 |     if vt > 0
 97 |         s2 = (v2*e1 + v1*e2) / vt;
 98 |     else
 99 |         s2 = (e1 + e2) / 2;
100 |     end
101 |   
102 |     s = sqrt(abs(s2));
103 |     if sign && s2 < 0
104 |         s = -s;
105 |     end
106 | 
107 | end
108 | 
109 | 


--------------------------------------------------------------------------------
/c++/edge.cpp:
--------------------------------------------------------------------------------
  1 | #include "edge.h"
  2 | #include <cmath>
  3 | #include <vector>
  4 | #include <stdexcept>
  5 | 
  6 | template <typename T>
  7 | double mean(const std::vector<T> &x){
  8 |   unsigned int n = x.size(); double sum = 0.0;
  9 |   for(unsigned int i=0; i<n; i++) sum += x[i];
 10 |   return sum / n;
 11 | }
 12 | 
 13 | /*
 14 | Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 15 | 
 16 | Implements the efficient estimator of bid-ask spreads from open, high, low, 
 17 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
 18 | https://doi.org/10.1016/j.jfineco.2024.103916
 19 | 
 20 | Parameters
 21 | ----------
 22 | - `open`: std::vector of open prices
 23 | - `high`: std::vector of high prices
 24 | - `low`: std::vector of low prices
 25 | - `close`: std::vector of close prices
 26 | - `sign`: whether to return signed estimates
 27 | 
 28 | Notes
 29 | -----
 30 | Prices must be sorted in ascending order of the timestamp.
 31 | 
 32 | Returns
 33 | -------
 34 | The spread estimate. A value of 0.01 corresponds to a spread of 1%.
 35 | 
 36 | */
 37 | double edge(
 38 |     const std::vector<double> &open,
 39 |     const std::vector<double> &high,
 40 |     const std::vector<double> &low,
 41 |     const std::vector<double> &close,
 42 |     const bool sign){
 43 | 
 44 |   unsigned int n = open.size();
 45 |   if(high.size() != n or low.size() != n or close.size() != n){
 46 |     throw std::invalid_argument(
 47 |         "open, high, low, close must have the same length"
 48 |     );
 49 |   }
 50 |   
 51 |   std::vector<double> o(n), h(n), l(n), c(n), m(n);
 52 |   for(unsigned int i=0; i<n; i++){
 53 |     o[i] = std::log(open[i]);
 54 |     h[i] = std::log(high[i]);
 55 |     l[i] = std::log(low[i]);
 56 |     c[i] = std::log(close[i]);
 57 |     m[i] = (h[i] + l[i]) / 2.;
 58 |   }
 59 |   
 60 |   std::vector<unsigned int> tau(n-1), phi1(n-1), phi2(n-1), phi3(n-1), phi4(n-1);
 61 |   std::vector<double> r1(n-1), r2(n-1), r3(n-1), r4(n-1), r5(n-1);
 62 |   for(unsigned int i=0; i<n-1; i++){
 63 |     tau[i] = (h[i+1] != l[i+1]) | (l[i+1] != c[i]);
 64 |     phi1[i] = (o[i+1] != h[i+1]) & tau[i];
 65 |     phi2[i] = (o[i+1] != l[i+1]) & tau[i];
 66 |     phi3[i] = (c[i] != h[i]) & tau[i];
 67 |     phi4[i] = (c[i] != l[i]) & tau[i];
 68 |     r1[i] = m[i+1] - o[i+1];
 69 |     r2[i] = o[i+1] - m[i];
 70 |     r3[i] = m[i+1] - c[i];
 71 |     r4[i] = c[i] - m[i];
 72 |     r5[i] = o[i+1] - c[i];
 73 |   }
 74 |   
 75 |   double pt = mean(tau);
 76 |   double m1 = mean(r1), m3 = mean(r3), m5 = mean(r5); 
 77 |   std::vector<double> d1(n-1), d3(n-1), d5(n-1);
 78 |   for(unsigned int i=0; i<n-1; i++){
 79 |     d1[i] = r1[i] - tau[i] * m1 / pt;
 80 |     d3[i] = r3[i] - tau[i] * m3 / pt;
 81 |     d5[i] = r5[i] - tau[i] * m5 / pt;
 82 |   }
 83 |   
 84 |   double po = mean(phi1) + mean(phi2), pc = mean(phi3) + mean(phi4);
 85 |   std::vector<double> x1(n-1), x2(n-1), x11(n-1), x22(n-1);
 86 |   for(unsigned int i=0; i<n-1; i++){
 87 |     x1[i] = -4./po*d1[i]*r2[i] -4./pc*d3[i]*r4[i];
 88 |     x2[i] = -4./po*d1[i]*r5[i] -4./pc*d5[i]*r4[i];
 89 |     x11[i] = x1[i] * x1[i];
 90 |     x22[i] = x2[i] * x2[i];
 91 |   }
 92 |   
 93 |   double e1 = mean(x1), e2 = mean(x2);
 94 |   double v1 = mean(x11) - e1*e1, v2 = mean(x22) - e2*e2;
 95 |   double s2 = (v2*e1 + v1*e2) / (v1 + v2);
 96 |   double s = std::sqrt(std::abs(s2));
 97 |   
 98 |   if(sign & (s2 < 0))
 99 |     s = -s;
100 |   
101 |   return s;
102 |   
103 | }
104 | 


--------------------------------------------------------------------------------
/sas/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 2 | 
 3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
 4 | 
 5 | ## Installation
 6 | 
 7 | Download the SAS file [`edge.sas`](https://github.com/eguidotti/bidask/tree/main/sas/edge.sas) into your working directory. For instance:
 8 | 
 9 | ```shell
10 | wget https://github.com/eguidotti/bidask/raw/main/sas/edge.sas
11 | ```
12 | 
13 | ## Usage
14 | 
15 | The code reads a SAS dataset containing open, high, low, close prices for multiple groups, and saves the spread estimates to an output file. Run the file [`edge.sas`](https://github.com/eguidotti/bidask/tree/main/sas/edge.sas) from the command line as follows:
16 | 
17 | ```SAS
18 | sas edge.sas \
19 |   -set in <...> \
20 |   -set out <...> \
21 |   -set by <...> \
22 |   -set open <...> \
23 |   -set high <...> \
24 |   -set low <...> \
25 |   -set close <...> \
26 |   -set sign <...>
27 | ```
28 | 
29 | | field   | description                                                  |
30 | | ------- | ------------------------------------------------------------ |
31 | | `in`    | The path to a SAS dataset containing open, high, low, and close prices for multiple groups. |
32 | | `out`   | The name of the file to output spread estimates. See [here](https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/acpcref/p1d0tocg3njhmfn1d4ld2covlwm0.htm) for supported file extensions. |
33 | | `group` | Comma separated list of column(s) to group by; e.g., `symbol` or `date,symbol`. |
34 | | `open`  | The name of the column containing open prices.               |
35 | | `high`  | The name of the column containing high prices.               |
36 | | `low`   | The name of the column containing low prices.                |
37 | | `close` | The name of the column containing close prices.              |
38 | | `sign`  | Boolean value (0/1) indicating whether to return signed estimates. |
39 | 
40 | The input prices must be sorted in ascending order of the timestamp within each group. 
41 | 
42 | The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%.
43 | 
44 | ## Example
45 | 
46 | The file [`ohlc.sas7bdat`](ohlc.sas7bdat) contains simulated open, high, low, and close prices as described [here](https://github.com/eguidotti/bidask/tree/main/pseudocode) for two symbols. Download the file into your working directory. For instance:
47 | 
48 | ```bash
49 | wget https://github.com/eguidotti/bidask/raw/main/sas/ohlc.sas7bdat
50 | ```
51 | 
52 | Estimate the spread for each symbol:
53 | 
54 | ```SAS
55 | sas edge.sas \
56 |   -set in ohlc.sas7bdat \
57 |   -set out edge.csv \
58 |   -set by Symbol \
59 |   -set open Open \
60 |   -set high High \
61 |   -set low Low \
62 |   -set close Close \
63 |   -set sign 0
64 | ```
65 | 
66 | The output file `edge.csv` contains the following estimates:
67 | 
68 | | Symbol | EDGE         |
69 | | ------ | ------------ |
70 | | A      | 0.0101849035 |
71 | | B      | 0.0101849035 |
72 | 
73 | ## Cite as
74 | 
75 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
76 | 
77 | A BibTex  entry for LaTeX users is:
78 | 
79 | ```bibtex
80 | @article{edge,
81 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
82 |   journal = {Journal of Financial Economics},
83 |   volume = {161},
84 |   pages = {103916},
85 |   year = {2024},
86 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
87 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
88 | }
89 | ```
90 | 
91 | 


--------------------------------------------------------------------------------
/python/tests/test_edge.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | import pandas as pd
  4 | from bidask import edge, edge_rolling, edge_expanding
  5 | 
  6 | 
  7 | df = pd.read_csv(
  8 |     "https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv"
  9 | )
 10 | 
 11 | df_miss = pd.read_csv(
 12 |     "https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv"
 13 | )
 14 | 
 15 | 
 16 | def test_edge():
 17 |     """
 18 |     Compares the `edge` function to the known test case
 19 |     """
 20 |     estimate = edge(df.Open, df.High, df.Low, df.Close)
 21 |     assert estimate == pytest.approx(0.0101849034905478)
 22 |   
 23 |     estimate = edge(df.Open[0:10], df.High[0:10], df.Low[0:10], df.Close[0:10], True)
 24 |     assert estimate == pytest.approx(-0.016889917516422)
 25 | 
 26 |     estimate = edge(df_miss.Open, df_miss.High, df_miss.Low, df_miss.Close)
 27 |     assert estimate == pytest.approx(0.01013284969780197)
 28 | 
 29 |     assert np.isnan(edge(
 30 |         [18.21, 17.61, 17.61],
 31 |         [18.21, 17.61, 17.61],
 32 |         [17.61, 17.61, 17.61],
 33 |         [17.61, 17.61, 17.61]
 34 |     ))
 35 | 
 36 | 
 37 | @pytest.mark.parametrize("window", [1, 2, 3, 4, 42, 1000])
 38 | @pytest.mark.parametrize("sign", [True, False])
 39 | @pytest.mark.parametrize("step", [1, 2, 5, 10])
 40 | def test_edge_rolling(window: int, step: int, sign: bool):
 41 |     """
 42 |     Compares the rolling vectorized implementation to the original function.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     - `window` : int
 47 |         The rolling window size.
 48 |     - `step`: int
 49 |         Evaluate the window at every step result.
 50 |     - `sign`: bool
 51 |         Whether to use signed estimates.
 52 |     """
 53 |     rolling_estimates = edge_rolling(df=df, window=window, step=step, sign=sign)
 54 |     assert isinstance(rolling_estimates, pd.Series)
 55 | 
 56 |     expected_estimates = []
 57 |     for t in range(0, len(df), step):
 58 |         t1 = t + 1
 59 |         t0 = t1 - window
 60 |         expected_estimates.append(edge(
 61 |             df.Open.values[t0:t1],
 62 |             df.High.values[t0:t1],
 63 |             df.Low.values[t0:t1],
 64 |             df.Close.values[t0:t1],
 65 |             sign=sign
 66 |         ) if t0 >= 0 else np.nan)
 67 |         
 68 |     np.testing.assert_allclose(
 69 |         actual = rolling_estimates,
 70 |         desired = expected_estimates,
 71 |         rtol=1e-8,
 72 |         atol=1e-8,
 73 |         err_msg='Rolling estimates do not match expected estimates'
 74 |     )
 75 | 
 76 | 
 77 | @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 42, 1000])
 78 | @pytest.mark.parametrize("sign", [True, False])
 79 | def test_edge_expanding(min_periods: int, sign: bool):
 80 |     """
 81 |     Compares the expanding vectorized implementation to the original function.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     - `min_periods` : int
 86 |         Minimum number of observations in window required to have a value; otherwise, result is np.nan.
 87 |     - `sign`: bool
 88 |         Whether to use signed estimates.
 89 |     """
 90 |     expanding_estimates = edge_expanding(df=df, min_periods=min_periods, sign=sign)
 91 |     assert isinstance(expanding_estimates, pd.Series)
 92 | 
 93 |     expected_estimates = []
 94 |     for t in range(0, len(df)):
 95 |         t1 = t + 1
 96 |         expected_estimates.append(edge(
 97 |             df.Open.values[0:t1],
 98 |             df.High.values[0:t1],
 99 |             df.Low.values[0:t1],
100 |             df.Close.values[0:t1],
101 |             sign=sign
102 |         ) if t1 >= min_periods else np.nan)
103 |         
104 |     np.testing.assert_allclose(
105 |         actual = expanding_estimates,
106 |         desired = expected_estimates,
107 |         rtol=1e-8,
108 |         atol=1e-8,
109 |         err_msg='Expanding estimates do not match expected estimates'
110 |     )
111 | 


--------------------------------------------------------------------------------
/python/bidask/edge.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | 
  4 | 
  5 | def edge(open: np.array, high: np.array, low: np.array, close: np.array, sign: bool = False) -> float:
  6 |     """
  7 |     Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  8 | 
  9 |     Implements the efficient estimator of bid-ask spreads from open, high, low, 
 10 |     and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
 11 |     https://doi.org/10.1016/j.jfineco.2024.103916
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     - `open`: array-like 
 16 |         Vector of open prices sorted in ascending order of the timestamp.
 17 |     - `high`: array-like 
 18 |         Vector of high prices sorted in ascending order of the timestamp.
 19 |     - `low`: array-like 
 20 |         Vector of low prices sorted in ascending order of the timestamp.
 21 |     - `close`: array-like 
 22 |         Vector of close prices sorted in ascending order of the timestamp.
 23 |     - `sign`: 
 24 |         Whether to return signed estimates.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     float
 29 |         The spread estimate. A value of 0.01 corresponds to a spread of 1%.
 30 |     """
 31 |     # check that the open, high, low, and close prices have the same length
 32 |     nobs = len(open)
 33 |     if len(high) != nobs or len(low) != nobs or len(close) != nobs:
 34 |         raise ValueError("Open, high, low, and close prices must have the same length")
 35 | 
 36 |     # return missing if there are less than 3 observations
 37 |     if nobs < 3:
 38 |         return np.nan
 39 | 
 40 |     # compute log-prices
 41 |     o = np.log(np.asarray(open))
 42 |     h = np.log(np.asarray(high))
 43 |     l = np.log(np.asarray(low))
 44 |     c = np.log(np.asarray(close))
 45 |     m = (h + l) / 2.
 46 | 
 47 |     # shift log-prices by one period
 48 |     h1, l1, c1, m1 = h[:-1], l[:-1], c[:-1], m[:-1]
 49 |     o, h, l, c, m = o[1:], h[1:], l[1:], c[1:], m[1:]
 50 | 
 51 |     # compute log-returns
 52 |     r1 = m - o
 53 |     r2 = o - m1
 54 |     r3 = m - c1
 55 |     r4 = c1 - m1
 56 |     r5 = o - c1
 57 | 
 58 |     # compute indicator variables
 59 |     tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan, (h != l) | (l != c1))
 60 |     po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, o != h)
 61 |     po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, o != l)
 62 |     pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, c1 != h1)
 63 |     pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, c1 != l1)
 64 |     
 65 |     # ignore warnings raised by nanmean for all-NaN slices
 66 |     with warnings.catch_warnings():
 67 |         warnings.simplefilter('ignore', RuntimeWarning)
 68 | 
 69 |         # compute probabilities
 70 |         pt = np.nanmean(tau)
 71 |         po = np.nanmean(po1) + np.nanmean(po2)
 72 |         pc = np.nanmean(pc1) + np.nanmean(pc2)
 73 | 
 74 |         # return missing if there are less than two periods with tau=1 
 75 |         # or po or pc is zero
 76 |         if np.nansum(tau) < 2 or po == 0 or pc == 0:
 77 |             return np.nan
 78 |     
 79 |         # compute de-meaned log-returns
 80 |         d1 = r1 - np.nanmean(r1)/pt*tau
 81 |         d3 = r3 - np.nanmean(r3)/pt*tau
 82 |         d5 = r5 - np.nanmean(r5)/pt*tau
 83 |     
 84 |         # compute input vectors
 85 |         x1 = -4./po*d1*r2 + -4./pc*d3*r4 
 86 |         x2 = -4./po*d1*r5 + -4./pc*d5*r4 
 87 |     
 88 |         # compute expectations
 89 |         e1 = np.nanmean(x1)
 90 |         e2 = np.nanmean(x2)
 91 |     
 92 |         # compute variances
 93 |         v1 = np.nanmean(x1**2) - e1**2
 94 |         v2 = np.nanmean(x2**2) - e2**2
 95 |     
 96 |     # compute square spread by using a (equally) weighted 
 97 |     # average if the total variance is (not) positive
 98 |     vt = v1 + v2
 99 |     s2 = (v2*e1 + v1*e2) / vt if vt > 0 else (e1 + e2) / 2.
100 |     
101 |     # compute signed root
102 |     s = np.sqrt(np.abs(s2))
103 |     if sign: 
104 |         s *= np.sign(s2)
105 |   
106 |     # return the spread
107 |     return float(s)
108 | 


--------------------------------------------------------------------------------
/r/man/spread.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spread.R
 3 | \name{spread}
 4 | \alias{spread}
 5 | \title{Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices}
 6 | \usage{
 7 | spread(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{tabular data with columns named \code{open}, \code{high}, \code{low}, \code{close} (case-insensitive).}
11 | 
12 | \item{width}{if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. By default, the full sample is used to compute a single spread estimate. See examples.}
13 | 
14 | \item{method}{the estimators to use. See details.}
15 | 
16 | \item{sign}{whether to return signed estimates.}
17 | 
18 | \item{na.rm}{whether to ignore missing values.}
19 | }
20 | \value{
21 | A data.frame of spread estimates, or an \code{xts} object if \code{x} is of class \code{xts}. 
22 | A value of 0.01 corresponds to a spread of 1\%.
23 | }
24 | \description{
25 | This function implements several methods to estimate bid-ask spreads
26 | from open, high, low, and close prices and it is optimized for fast 
27 | calculations over rolling and expanding windows.
28 | }
29 | \details{
30 | The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024).
31 | 
32 | The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (JFE, 2024).
33 | They can be combined by concatenating their identifiers, e.g., \code{OHLC.CHLO} uses an average of the \code{OHLC} and \code{CHLO} estimators.
34 | 
35 | The method \code{AR} implements the estimator described in Abdi & Ranaldo (RFS, 2017). \code{AR2} implements their 2-period version.
36 | 
37 | The method \code{CS} implements the estimator described in Corwin & Schultz (JF, 2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper.
38 | 
39 | The method \code{ROLL} implements the estimator described in Roll (JF, 1984).
40 | }
41 | \examples{
42 | # reduce number of threads to pass CRAN checks (you can ignore this)
43 | data.table::setDTthreads(1)
44 | 
45 | # simulate open, high, low, and close prices with spread 1\%
46 | x <- sim(n = 1000, spread = 0.01)
47 | 
48 | # estimate the spread
49 | spread(x)
50 | # equivalent to
51 | edge(x$Open, x$High, x$Low, x$Close)
52 | 
53 | # estimate the spread using a rolling window of 21 periods
54 | s <- spread(x, width = 21)
55 | tail(s)
56 | # equivalent to
57 | s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21)
58 | tail(s)
59 | 
60 | # estimate the spread using an expanding window
61 | s <- spread(x, width = 1:nrow(x))
62 | tail(s)
63 | # equivalent to
64 | s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE)
65 | tail(s)
66 | 
67 | # estimate the spread using custom endpoints
68 | ep <- c(3, 35, 100)
69 | spread(x, width = ep)
70 | # equivalent to
71 | edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35])
72 | edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100])
73 | 
74 | # use multiple estimators
75 | spread(x, method = c("EDGE", "AR", "CS", "ROLL", "OHLC", "OHL.CHL"))
76 | 
77 | }
78 | \references{
79 | Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
80 | \doi{10.1016/j.jfineco.2024.103916}
81 | 
82 | Abdi, F., & Ranaldo, A. (2017). A simple estimation of bid-ask spreads from daily close, high, and low prices. Review of Financial Studies, 30 (12), 4437-4480.
83 | \doi{10.1093/rfs/hhx084}
84 | 
85 | Corwin, S. A., & Schultz, P. (2012). A simple way to estimate bid-ask spreads from daily high and low prices. Journal of Finance, 67 (2), 719-760.
86 | \doi{10.1111/j.1540-6261.2012.01729.x}
87 | 
88 | Roll, R. (1984). A simple implicit measure of the effective bid-ask spread in an efficient market. Journal of Finance, 39 (4), 1127-1139.
89 | \doi{10.1111/j.1540-6261.1984.tb03897.x}
90 | }
91 | 


--------------------------------------------------------------------------------
/r/R/sim.R:
--------------------------------------------------------------------------------
  1 | #' Simulation of Open, High, Low, and Close Prices
  2 | #'
  3 | #' This function performs simulations consisting of \code{n} periods and where each period consists of a given number of \code{trades}.
  4 | #' For each trade, the actual price \eqn{P_t} is simulated as \eqn{P_t = P_{t-1}e^{\sigma x}}, where \eqn{\sigma} is the standard deviation per trade and \eqn{x} is a random draw from a unit normal distribution.
  5 | #' The standard deviation per trade equals the \code{volatility} divided by the square root of the number of \code{trades}.
  6 | #' Trades are assumed to be observed with a given \code{probability}.
  7 | #' The bid (ask) for each trade is defined as \eqn{P_t} multiplied by one minus (plus) half the \code{spread} and we assume a 50\% chance that a bid (ask) is observed.
  8 | #' High and low prices equal the highest and lowest prices observed during the period.
  9 | #' Open and Close prices equal the first and the last price observed in the period.
 10 | #' If no trade is observed for a period, then the previous Close is used as the Open, High, Low, and Close prices for that period.
 11 | #'
 12 | #' @param n the number of periods to simulate.
 13 | #' @param trades the number of trades per period.
 14 | #' @param prob the probability to observe a trade.
 15 | #' @param spread the bid-ask spread.
 16 | #' @param volatility the open-to-close volatility.
 17 | #' @param overnight the close-to-open volatility.
 18 | #' @param drift the expected return per period.
 19 | #' @param units the units of the time period. One of: \code{1}, \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}.
 20 | #' @param sign whether to return positive prices for buys and negative prices for sells.
 21 | #'
 22 | #' @return A data.frame of open, high, low, and close prices if \code{units=1} (default). 
 23 | #' Otherwise, an \code{xts} object is returned (requires the \code{xts} package to be installed).
 24 | #'
 25 | #' @references
 26 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
 27 | #' \doi{10.1016/j.jfineco.2024.103916}
 28 | #'
 29 | #' @examples
 30 | #' # reduce number of threads to pass CRAN checks (you can ignore this)
 31 | #' data.table::setDTthreads(1)
 32 | #' 
 33 | #' # simulate 10 open, high, low, and close prices with spread 1%
 34 | #' sim(n = 10, spread = 0.01)
 35 | #' 
 36 | #' @export
 37 | #'
 38 | sim <- function(
 39 |     n = 10000, 
 40 |     trades = 390, 
 41 |     prob = 1, 
 42 |     spread = 0.01, 
 43 |     volatility = 0.03, 
 44 |     overnight = 0, 
 45 |     drift = 0, 
 46 |     units = 1,
 47 |     sign = FALSE){
 48 | 
 49 |   # sanitize units
 50 |   if(units == "minute") units <- "min"
 51 | 
 52 |   # check units
 53 |   valid <- c(1, "sec", "min", "hour", "day", "week", "month", "year")
 54 |   if(!(units %in% valid))
 55 |     stop(sprintf("units must be one of '%s'", paste(valid, collapse = "','")))
 56 | 
 57 |   # total number of observations
 58 |   m <- n*trades
 59 | 
 60 |   # close-to-close returns
 61 |   r <- rnorm(m, mean = drift/trades, sd = volatility/sqrt(trades))
 62 | 
 63 |   # close-to-open returns
 64 |   idx <- 0:(n-1) * trades + 1
 65 |   r[idx] <-  r[idx] + rnorm(n, sd = overnight)
 66 | 
 67 |   # compute prices
 68 |   z <- spread * (rbinom(m, size = 1, prob = 0.5) - 0.5)
 69 |   p <- exp(cumsum(r)) * (1 + z)
 70 |   
 71 |   # signed prices
 72 |   if(sign)
 73 |     p <- p * base::sign(z)
 74 | 
 75 |   # subset observations
 76 |   keep <- as.logical(rbinom(m, size = 1, prob = prob))
 77 | 
 78 |   # convert to OHLC
 79 |   ohlc <- matrix(nrow = n, ncol = 4)
 80 |   prev <- p[1]
 81 |   for(i in 1:n){
 82 |     # indices of the i-th period
 83 |     idx <- (i-1)*trades + 1:trades
 84 |     # observed prices
 85 |     obs <- p[idx][keep[idx]]
 86 |     # if empty keep previous close
 87 |     if(!length(obs)) obs <- prev
 88 |     # index of last observation
 89 |     last <- length(obs)
 90 |     # unsigned prices
 91 |     uobs <- abs(obs)
 92 |     # fill matrix
 93 |     ohlc[i,] <- obs[c(1, which.max(uobs), which.min(uobs), last)]
 94 |     # store previous close
 95 |     prev <- obs[last]
 96 |   }
 97 | 
 98 |   if(units == 1){
 99 |     ohlc <- as.data.frame(ohlc)
100 |   }
101 |   else {
102 |     now <- Sys.time()
103 |     if(!(units %in% c("sec", "min", "hour")))
104 |       now <- as.Date(now)
105 |     time <- seq(now, length = n, by = units)
106 |     ohlc <- xts::xts(ohlc, order.by = time)
107 |   }
108 | 
109 |   colnames(ohlc) <- c("Open", "High", "Low", "Close")
110 |   return(ohlc)
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/pseudocode/README.md:
--------------------------------------------------------------------------------
  1 | # Pseudocode
  2 | 
  3 | This file provides the pseudocode to simplify implementations of the estimator in any programming language. 
  4 | 
  5 | ### Input
  6 | 
  7 | Vectors of `open`, `high`, `low`, and `close` prices. The vectors must be sorted in ascending order of the timestamp. The function should also accept the argument `sign` specifying whether to return signed estimates.
  8 | 
  9 | ### Output
 10 | 
 11 | Numeric spread estimate. A value of 0.01 corresponds to a spread of 1%.
 12 | 
 13 | ### Algorithm
 14 | 
 15 | ```python
 16 | # check that the open, high, low, and close prices have the same length
 17 | nobs = len(open)
 18 | if len(high) != nobs or len(low) != nobs or len(close) != nobs:
 19 | 	raise error
 20 | 
 21 | # return missing if there are less than 3 observations
 22 | if nobs < 3:
 23 | 	return missing
 24 | 
 25 | # compute log-prices
 26 | o = log(open)
 27 | h = log(high)
 28 | l = log(low)
 29 | c = log(close)
 30 | m = (h + l) / 2.
 31 | 
 32 | # shift log-prices by one period
 33 | h1 = lag(h)
 34 | l1 = lag(l)
 35 | c1 = lag(c)
 36 | m1 = lag(m)
 37 | 
 38 | # compute log-returns
 39 | r1 = m - o
 40 | r2 = o - m1
 41 | r3 = m - c1
 42 | r4 = c1 - m1
 43 | r5 = o - c1
 44 | 
 45 | # compute indicator variables
 46 | tau = (h != l or l != c1) if h, l, c1 are non-missing else missing
 47 | po1 = (tau and o != h) if tau, o, h are non-missing else missing
 48 | po2 = (tau and o != l) if tau, o, l are non-missing else missing
 49 | pc1 = (tau and c1 != h1) if tau, c1, h1 are non-missing else missing
 50 | pc2 = (tau and c1 != l1) if tau, c1, l1 are non-missing else missing
 51 | 
 52 | # compute probabilities
 53 | pt = mean(tau)
 54 | po = mean(po1) + mean(po2)
 55 | pc = mean(pc1) + mean(pc2)
 56 | 
 57 | # return missing if there are less than two periods with tau=1 
 58 | # or po or pc is zero
 59 | if sum(tau) < 2 or po == 0 or pc == 0:
 60 |   return missing
 61 | 
 62 | # compute de-meaned log-returns
 63 | d1 = r1 - mean(r1)/pt*tau
 64 | d3 = r3 - mean(r3)/pt*tau
 65 | d5 = r5 - mean(r5)/pt*tau
 66 | 
 67 | # compute input vectors
 68 | x1 = -4./po*d1*r2 + -4./pc*d3*r4 
 69 | x2 = -4./po*d1*r5 + -4./pc*d5*r4 
 70 | 
 71 | # compute expectations
 72 | e1 = mean(x1)
 73 | e2 = mean(x2)
 74 | 
 75 | # compute variances
 76 | v1 = mean(x1*x1) - e1*e1
 77 | v2 = mean(x2*x2) - e2*e2
 78 | 
 79 | # compute square spread by using a (equally) weighted 
 80 | # average if the total variance is (not) positive
 81 | vt = v1 + v2
 82 | s2 = (v2*e1 + v1*e2) / vt if vt > 0 else (e1 + e2) / 2.
 83 | 
 84 | # compute signed root
 85 | s = sqrt(abs(s2))
 86 | if sign and s2 < 0: 
 87 |     s = -s
 88 | 
 89 | # return the spread
 90 | return s
 91 | ```
 92 | 
 93 | ### Testing
 94 | 
 95 | To test your implementation, import the data available [here](https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv). The file contains sample OHLC simulated price data to simplify testing. The data have been generated by simulating a price process as described in [Ardia, Guidotti, & Kroencke (2024)](https://doi.org/10.1016/j.jfineco.2024.103916) with 390 trades per day and a 1% probability to observe a trade. The simulation uses a constant spread of 1%. By running the estimation, you should obtain a spread estimate of **0.0101849034905478**. If you obtain a different result, you may use the following table to check and debug the intermediate steps.
 96 | 
 97 | | variable | value                  |
 98 | | -------- | ---------------------- |
 99 | | `pt`     | 0.9820982098209821     |
100 | | `po`     | 1.227922792279228      |
101 | | `pc`     | 1.2052205220522052     |
102 | | `e1`     | 0.00010702425689560482 |
103 | | `e2`     | 0.000101595812797079   |
104 | | `v1`     | 2.074215642985551e-06  |
105 | | `v2`     | 1.3461279919743572e-06 |
106 | | `s2`     | 0.00010373225911177194 |
107 | 
108 | To check that your implementation correctly handles missing values, import the data available [here](https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv). The data have been generated by setting to missing a random subset of the previous data file. By running the estimation, you should obtain a spread estimate of **0.01013284969780197**. If you obtain a different result, you may use the following table to check and debug the intermediate steps.
109 | 
110 | | variable | value                  |
111 | | -------- | ---------------------- |
112 | | `pt`     | 0.9822078447230085     |
113 | | `po`     | 1.2272254421162134     |
114 | | `pc`     | 1.205827632480371      |
115 | | `e1`     | 0.00010337780767834583 |
116 | | `e2`     | 0.00010219271972776808 |
117 | | `v1`     | 2.0045420261850617e-06 |
118 | | `v2`     | 1.373839551967266e-06  |
119 | | `s2`     | 0.00010267464299824543 |
120 | 
121 | ### Contribute
122 | 
123 | Have you implemented the estimator in a new programming language? If you want your implementation to be included in this repository, please open a [pull request](https://github.com/eguidotti/bidask/pulls) 


--------------------------------------------------------------------------------
/sas/edge.sas:
--------------------------------------------------------------------------------
  1 | /*
  2 | Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  3 | 
  4 | Implements the efficient estimator of bid-ask spreads from open, high, low, 
  5 | and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
  6 | https://doi.org/10.1016/j.jfineco.2024.103916
  7 | 
  8 | Parameters
  9 | ----------
 10 | - `in`: the path to a SAS dataset containing open, high, low, and close prices for multiple groups
 11 | - `out`: the name of the file to output spread estimates
 12 | - `group`: comma separated list of column(s) to group by; e.g., `symbol` or `date,symbol`
 13 | - `open`: the name of the column containing open prices
 14 | - `high`: the name of the column containing high prices
 15 | - `low`: the name of the column containing low prices
 16 | - `close`: the name of the column containing close prices
 17 | - `sign`: boolean value (0/1) indicating whether to return signed estimates
 18 | 
 19 | Notes
 20 | -----
 21 | Prices must be sorted in ascending order of the timestamp within each group.
 22 | 
 23 | Returns
 24 | -------
 25 | The spread estimate. A value of 0.01 corresponds to a spread of 1%.
 26 | 
 27 | */
 28 | 
 29 | %let in = %sysget(in);
 30 | %let out = %sysget(out);
 31 | 
 32 | %let by_csv = %sysget(by);
 33 | %let by_lst = %sysfunc(tranwrd(%quote(&by_csv), %str(,), %str( )));
 34 | %let by_grp = %scan(%quote(&by_csv), -1, %str(,));
 35 | 
 36 | %let open = %sysget(open);
 37 | %let high = %sysget(high);
 38 | %let low = %sysget(low);
 39 | %let close = %sysget(close);
 40 | %let sign = %sysget(sign);
 41 | 
 42 | 
 43 | data prices;
 44 | 
 45 |     set "&in";
 46 |     by &by_lst;
 47 | 
 48 |     o = log(&open);
 49 |     h = log(&high);
 50 |     l = log(&low);
 51 |     c = log(&close);
 52 |     m = (h + l) / 2;
 53 | 
 54 |     h1 = lag1(h);
 55 |     l1 = lag1(l);    
 56 |     m1 = lag1(m);
 57 |     c1 = lag1(c);
 58 | 
 59 |     r1 = m-o;
 60 |     r2 = o-m1;
 61 |     r3 = m-c1;
 62 |     r4 = c1-m1;
 63 |     r5 = o-c1;
 64 | 
 65 |     if cmiss(h, l, c1) eq 0 then tau = (h ne l) | (l ne c1); else tau = .;
 66 |     if cmiss(o, h, tau) eq 0 then phi1 = (o ne h) & tau; else phi1 = .;
 67 |     if cmiss(o, l, tau) eq 0 then phi2 = (o ne l) & tau; else phi2 = .;
 68 |     if cmiss(c1, h1, tau) eq 0 then phi3 = (c1 ne h1) & tau; else phi3 = .;
 69 |     if cmiss(c1, l1, tau) eq 0 then phi4 = (c1 ne l1) & tau; else phi4 = .;
 70 |     
 71 |     if first.&by_grp = 0;
 72 | 
 73 | run;
 74 | 
 75 | 
 76 | proc sql;
 77 | 
 78 |     CREATE TABLE agg AS
 79 |     
 80 |     SELECT 
 81 |         &by_csv,
 82 |         AVG(r1*r2)        AS m1,
 83 |         AVG(r3*r4)        AS m2,
 84 |         AVG(r1*r5)        AS m3,
 85 |         AVG(r5*r4)        AS m4,
 86 |         AVG(tau)          AS m5,
 87 |         AVG(r1)           AS m6,
 88 |         AVG(tau*r2)       AS m7,
 89 |         AVG(r3)           AS m8,
 90 |         AVG(tau*r4)       AS m9,
 91 |         AVG(r5)           AS m10,
 92 |         AVG(r1**2*r2**2)  AS m11,
 93 |         AVG(r3**2*r4**2)  AS m12,
 94 |         AVG(r1**2*r5**2)  AS m13,
 95 |         AVG(r4**2*r5**2)  AS m14,
 96 |         AVG(r1*r2*r3*r4)  AS m15,
 97 |         AVG(r1*r4*r5**2)  AS m16,
 98 |         AVG(tau*r2**2)    AS m17,
 99 |         AVG(tau*r4**2)    AS m18,
100 |         AVG(tau*r5**2)    AS m19,
101 |         AVG(tau*r1*r2**2) AS m20,
102 |         AVG(tau*r3*r4**2) AS m21,
103 |         AVG(tau*r1*r5**2) AS m22,
104 |         AVG(tau*r5*r4**2) AS m23,
105 |         AVG(tau*r1*r2*r4) AS m24,
106 |         AVG(tau*r2*r3*r4) AS m25,
107 |         AVG(tau*r2*r4)    AS m26,
108 |         AVG(tau*r1*r4*r5) AS m27,
109 |         AVG(tau*r4*r5**2) AS m28,
110 |         AVG(tau*r4*r5)    AS m29,
111 |         AVG(tau*r5)       AS m30,
112 |         AVG(phi1)         AS m31,
113 |         AVG(phi2)         AS m32,
114 |         AVG(phi3)         AS m33,
115 |         AVG(phi4)         AS m34,
116 |         SUM(tau)          AS m35
117 |     
118 |     FROM
119 |         prices
120 |         
121 |     GROUP BY
122 |         &by_csv;   
123 | 
124 | quit;
125 | 
126 | 
127 | data edge;
128 | 
129 |     set agg;
130 | 
131 |     po = -8 / (m31 + m32);
132 |     pc = -8 / (m33 + m34);
133 | 
134 |     if (m35 lt 2) | (po eq 0) | (pc eq 0) then do;
135 |         s = .;
136 |     end;
137 | 
138 |     else do;
139 | 
140 |         e1 = po/2 * (m1 - m6*m7/m5) + 
141 |             pc/2 * (m2 - m8*m9/m5);
142 |         
143 |         e2 = po/2 * (m3 - m6*m30/m5) + 
144 |             pc/2 * (m4 - m10*m9/m5);
145 |         
146 |         v1 = po**2/4 * (m11 + m6**2*m17/m5**2 - 2*m20*m6/m5) +
147 |             pc**2/4 * (m12 + m8**2*m18/m5**2 - 2*m21*m8/m5) +
148 |             po*pc/2 * (m15 - m24*m8/m5 - m6*m25/m5 + m6*m8*m26/m5**2) - 
149 |             e1**2;
150 |         
151 |         v2 = po**2/4 * (m13 + m6**2*m19/m5**2 - 2*m22*m6/m5) +
152 |             pc**2/4 * (m14 + m10**2*m18/m5**2 - 2*m23*m10/m5) +
153 |             po*pc/2 * (m16 - m27*m10/m5 - m6*m28/m5 + m6*m10*m29/m5**2) -
154 |             e2**2;
155 |         
156 |         vt = v1 + v2;
157 |         if vt gt 0 then s2 = (v2*e1 + v1*e2) / vt; else s2 = (e1 + e2) / 2;
158 | 
159 |         s = SQRT(ABS(s2));
160 |         if &sign & (s2 < 0) then s = -s;
161 | 
162 |     end;
163 | 
164 |     keep &by_lst s;
165 |     rename s=EDGE;
166 | 
167 | run;
168 | 
169 | 
170 | proc export data=edge 
171 |     outfile="&out" 
172 |     replace; 
173 | run;
174 | 


--------------------------------------------------------------------------------
/r/tests/testthat/test-edge.R:
--------------------------------------------------------------------------------
  1 | test_that("edge", {
  2 |   
  3 |   x <- read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")
  4 |   s <- edge(x$Open, x$High, x$Low, x$Close)
  5 |   
  6 |   expect_equal(s, 0.0101849034905478)
  7 |   
  8 | })
  9 | 
 10 | test_that("edge-miss", {
 11 |   
 12 |   x <- read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv")
 13 |   s <- edge(x$Open, x$High, x$Low, x$Close)
 14 |   
 15 |   expect_equal(s, 0.01013284969780197)
 16 |   
 17 | })
 18 | 
 19 | test_that("edge-na", {
 20 |   
 21 |   expect_true(is.na(edge(
 22 |     c(18.21, 17.61, 17.61),
 23 |     c(18.21, 17.61, 17.61),
 24 |     c(17.61, 17.61, 17.61),
 25 |     c(17.61, 17.61, 17.61)
 26 |   )))
 27 |   
 28 | })
 29 | 
 30 | test_that("edge-spread", {
 31 |   
 32 |   set.seed(123)
 33 |   x <- sim(prob = 0.01, units = "day")
 34 |   
 35 |   s1 <- as.numeric(spread(x, method = "EDGE"))
 36 |   s2 <- edge(x$Open, x$High, x$Low, x$Close)
 37 | 
 38 |   expect_equal(s1, s2)
 39 |   
 40 | })
 41 | 
 42 | test_that("edge-spread-monthly", {
 43 |   
 44 |   set.seed(123)
 45 |   x <- sim(prob = 0.01, units = "day")
 46 |   
 47 |   zoo::index(x) <- zoo::index(x) - as.integer(start(x))
 48 |   width <- xts::endpoints(x, on = "months")
 49 |   
 50 |   s1 <- as.numeric(spread(x, width = width, method = "EDGE"))
 51 |   s2 <- sapply(2:length(width), function(i){
 52 |     m <- x[width[i-1]:width[i]]
 53 |     edge(m$Open, m$High, m$Low, m$Close)
 54 |   })
 55 |   
 56 |   expect_equal(s1, s2)
 57 |   
 58 | })
 59 | 
 60 | test_that("edge-spread-rolling", {
 61 |   
 62 |   set.seed(123)
 63 |   x <- sim(prob = 0.01, units = "day")
 64 |   
 65 |   for(width in c(1, 2, 3, 4, 21, 100)){
 66 |     
 67 |     s1 <- spread(x, width = width, method = "EDGE")
 68 |     s2 <- zoo::rollapplyr(x, width = width, by.column = FALSE, FUN = function(x){
 69 |       edge(x$Open, x$High, x$Low, x$Close)
 70 |     })[-(1:max(1, width-1))]
 71 |     
 72 |     expect_equal(as.numeric(s1), as.numeric(s2), label=paste("width = ", width))
 73 |     
 74 |   }
 75 |   
 76 | })
 77 | 
 78 | test_that("edge-spread-sign", {
 79 |   
 80 |   set.seed(123)
 81 |   x <- sim(prob = 0.01, units = "day")
 82 |   
 83 |   width <- 21
 84 |   s1 <- spread(x, width = width, method = "EDGE", sign = TRUE)
 85 |   s2 <- zoo::rollapplyr(x, width = width, by.column = FALSE, FUN = function(x){
 86 |     edge(x$Open, x$High, x$Low, x$Close, sign = TRUE)
 87 |   })[-(1:width-1)]
 88 |   
 89 |   expect_equal(as.numeric(s1), as.numeric(s2))
 90 |   
 91 | })
 92 | 
 93 | test_that("edge-rolling", {
 94 |   
 95 |   set.seed(123)
 96 |   for(units in c(1, "day")) for(sign in c(TRUE, FALSE)) for(width in c(2, 3, 21)){
 97 |     
 98 |     x <- sim(prob = 0.01, units = units)
 99 |     
100 |     s1 <- spread(x, width = width, method = "EDGE", sign = sign)
101 |     s2 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = width, sign = sign)
102 |     
103 |     if(is.data.frame(x))
104 |       idx <- as.integer(rownames(s1))
105 |     else
106 |       idx <- which(zoo::index(x) %in% zoo::index(s1))
107 |     
108 |     expect_equal(length(s2), nrow(x))
109 |     expect_equal(as.numeric(s1[,1]), s2[idx])
110 |     
111 |   }
112 |   
113 | })
114 | 
115 | test_that("edge-rolling-na", {
116 |   
117 |   set.seed(123)
118 |   x <- sim(n = 100)
119 |   
120 |   s1 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = nrow(x), na.rm = TRUE)
121 |   expect_equal(sum(!is.na(s1)), 1)
122 |   
123 |   s2 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = c(1, nrow(x)), na.rm = TRUE)
124 |   expect_equal(s1[!is.na(s1)], s2[!is.na(s2)])
125 |   
126 | })
127 | 
128 | test_that("edge-expanding", {
129 |   
130 |   set.seed(123)
131 |   for(units in c(1, "day")) for(sign in c(TRUE, FALSE)) {
132 |     
133 |     x <- sim(prob = 0.01, units = units)
134 |     
135 |     s1 <- spread(x, width = 1:nrow(x), method = "EDGE", sign = sign)
136 |     s2 <- edge_expanding(x$Open, x$High, x$Low, x$Close, sign = sign)
137 |     
138 |     if(is.data.frame(x))
139 |       idx <- as.integer(rownames(s1))
140 |     else
141 |       idx <- which(zoo::index(x) %in% zoo::index(s1))
142 |     
143 |     expect_equal(length(s2), nrow(x))
144 |     expect_equal(as.numeric(s1[,1]), s2[idx])
145 |     
146 |   }
147 |   
148 | })
149 | 
150 | test_that("spread", {
151 |   
152 |   set.seed(123)
153 |   x <- sim(prob = 0.01)
154 | 
155 |   s <- spread(x[, c("Open", "High", "Low", "Close")], method = "EDGE")
156 |   expect_equal(as.numeric(s), 0.011211623772355)
157 |   
158 |   s <- spread(x[, c("Open", "High", "Low", "Close")], method = "OHLC")
159 |   expect_equal(as.numeric(s), 0.0111885179011119)
160 |   
161 |   s <- spread(x[, c("Open", "High", "Low", "Close")], method = "CHLO")
162 |   expect_equal(as.numeric(s), 0.0109352942009762)
163 |     
164 |   s <- spread(x[, c("Open", "High", "Low")], method = "OHL", na.rm = TRUE)
165 |   expect_equal(as.numeric(s), 0.0109503006263557)
166 |   
167 |   s <- spread(x[, c("High", "Low", "Close")], method = "CHL")
168 |   expect_equal(as.numeric(s), 0.0113136390567206)
169 |   
170 |   s <- spread(x[, c("High", "Low", "Close")], method = "AR")
171 |   expect_equal(as.numeric(s), 0.00874585212811397)
172 |   
173 |   s <- spread(x[, c("High", "Low", "Close")], method = "CS")
174 |   expect_equal(as.numeric(s), 0.00273953769016127)
175 |   
176 |   s <- spread(x[, "Close", drop = FALSE], method = "ROLL")
177 |   expect_equal(as.numeric(s), 0.0125430188215437)
178 | 
179 | })
180 | 


--------------------------------------------------------------------------------
/r/README.md:
--------------------------------------------------------------------------------
  1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  2 | 
  3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
  4 | 
  5 | ## Installation
  6 | 
  7 | ```R
  8 | install.packages("bidask")
  9 | ```
 10 | 
 11 | ## Usage
 12 | 
 13 | This package implements the following functions. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. The function `spread` provides additional functionalities for `xts` objects and implements additional estimators. The function `sim` simulates a time series of open, high, low, and close prices. The main functions are presented below. The full [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf) is available on [CRAN](https://cran.r-project.org/package=bidask) and a [vignette](https://cran.r-project.org/package=bidask/vignettes/bidask.html) is also available.
 14 | 
 15 | ```R
 16 | library("bidask")
 17 | ```
 18 | 
 19 | ### Function `edge`
 20 | 
 21 | The input prices must be sorted in ascending order of the timestamp. The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%.
 22 | 
 23 | ```R
 24 | edge(open, high, low, close, sign=FALSE)
 25 | ```
 26 | 
 27 | | field   | description                         |
 28 | | ------- | ----------------------------------- |
 29 | | `open`  | Numeric vector of open prices.      |
 30 | | `high`  | Numeric vector of high prices.      |
 31 | | `low`   | Numeric vector of low prices.       |
 32 | | `close` | Numeric vector of close prices.     |
 33 | | `sign`  | Whether to return signed estimates. |
 34 | 
 35 | ### Function: `edge_rolling`
 36 | 
 37 | Implements a rolling window calculation of `edge`. The output is a vector of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%. This function always returns a result of the same length as the input prices. 
 38 | 
 39 | ```R
 40 | edge_rolling(open, high, low, close, width, sign=FALSE, na.rm=FALSE)
 41 | ```
 42 | 
 43 | | field   | description                                                  |
 44 | | ------- | ------------------------------------------------------------ |
 45 | | `open`  | Numeric vector of open prices.                               |
 46 | | `high`  | Numeric vector of high prices.                               |
 47 | | `low`   | Numeric vector of low prices.                                |
 48 | | `close` | Numeric vector of close prices.                              |
 49 | | `width` | If an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples. |
 50 | | `sign`  | Whether to return signed estimates.                          |
 51 | | `na.rm` | Whether to ignore missing values.                            |
 52 | 
 53 | ### Function: `edge_expanding`
 54 | 
 55 | Implements an expanding window calculation of `edge`. The output is a vector of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%. This function always returns a result of the same length as the input prices. 
 56 | 
 57 | ```R
 58 | edge_expanding(open, high, low, close, sign=FALSE, na.rm=TRUE)
 59 | ```
 60 | 
 61 | | field   | description                         |
 62 | | ------- | ----------------------------------- |
 63 | | `open`  | Numeric vector of open prices.      |
 64 | | `high`  | Numeric vector of high prices.      |
 65 | | `low`   | Numeric vector of low prices.       |
 66 | | `close` | Numeric vector of close prices.     |
 67 | | `sign`  | Whether to return signed estimates. |
 68 | | `na.rm` | Whether to ignore missing values.   |
 69 | 
 70 | ## Examples
 71 | 
 72 | Load the test data.
 73 | 
 74 | ```R
 75 | library("bidask")
 76 | x = read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")
 77 | ```
 78 | 
 79 | Compute the spread estimate using all the observations.
 80 | 
 81 | ```R
 82 | edge(x$Open, x$High, x$Low, x$Close)
 83 | ```
 84 | 
 85 | Compute rolling estimates using a window of 21 observations.
 86 | 
 87 | ```R
 88 | edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21)
 89 | ```
 90 | 
 91 | Estimate the spread using custom endpoints.
 92 | 
 93 | ```R
 94 | edge_rolling(x$Open, x$High, x$Low, x$Close, width = c(3, 35, 100))
 95 | ```
 96 | 
 97 | Estimate the spread using an expanding window
 98 | 
 99 | ```R
100 | edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE)
101 | ```
102 | 
103 | ## Cite as
104 | 
105 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
106 | 
107 | A BibTex  entry for LaTeX users is:
108 | 
109 | ```bibtex
110 | @article{edge,
111 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
112 |   journal = {Journal of Financial Economics},
113 |   volume = {161},
114 |   pages = {103916},
115 |   year = {2024},
116 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
117 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
118 | }
119 | ```
120 | 
121 | 


--------------------------------------------------------------------------------
/r/R/spread.R:
--------------------------------------------------------------------------------
  1 | #' Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  2 | #'
  3 | #' This function implements several methods to estimate bid-ask spreads
  4 | #' from open, high, low, and close prices and it is optimized for fast 
  5 | #' calculations over rolling and expanding windows.
  6 | #'
  7 | #' @details
  8 | #' The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024).
  9 | #' 
 10 | #' The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (JFE, 2024).
 11 | #' They can be combined by concatenating their identifiers, e.g., \code{OHLC.CHLO} uses an average of the \code{OHLC} and \code{CHLO} estimators.
 12 | #'
 13 | #' The method \code{AR} implements the estimator described in Abdi & Ranaldo (RFS, 2017). \code{AR2} implements their 2-period version.
 14 | #'
 15 | #' The method \code{CS} implements the estimator described in Corwin & Schultz (JF, 2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper.
 16 | #'
 17 | #' The method \code{ROLL} implements the estimator described in Roll (JF, 1984).
 18 | #'
 19 | #' @param x tabular data with columns named \code{open}, \code{high}, \code{low}, \code{close} (case-insensitive).
 20 | #' @param width if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. By default, the full sample is used to compute a single spread estimate. See examples.
 21 | #' @param method the estimators to use. See details.
 22 | #' @param sign whether to return signed estimates.
 23 | #' @param na.rm whether to ignore missing values.
 24 | #'
 25 | #' @return A data.frame of spread estimates, or an \code{xts} object if \code{x} is of class \code{xts}. 
 26 | #' A value of 0.01 corresponds to a spread of 1\%.
 27 | #'
 28 | #' @references
 29 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
 30 | #' \doi{10.1016/j.jfineco.2024.103916}
 31 | #'
 32 | #' Abdi, F., & Ranaldo, A. (2017). A simple estimation of bid-ask spreads from daily close, high, and low prices. Review of Financial Studies, 30 (12), 4437-4480.
 33 | #' \doi{10.1093/rfs/hhx084}
 34 | #' 
 35 | #' Corwin, S. A., & Schultz, P. (2012). A simple way to estimate bid-ask spreads from daily high and low prices. Journal of Finance, 67 (2), 719-760.
 36 | #' \doi{10.1111/j.1540-6261.2012.01729.x}
 37 | #' 
 38 | #' Roll, R. (1984). A simple implicit measure of the effective bid-ask spread in an efficient market. Journal of Finance, 39 (4), 1127-1139.
 39 | #' \doi{10.1111/j.1540-6261.1984.tb03897.x}
 40 | #'
 41 | #' @examples
 42 | #' # reduce number of threads to pass CRAN checks (you can ignore this)
 43 | #' data.table::setDTthreads(1)
 44 | #' 
 45 | #' # simulate open, high, low, and close prices with spread 1%
 46 | #' x <- sim(n = 1000, spread = 0.01)
 47 | #'
 48 | #' # estimate the spread
 49 | #' spread(x)
 50 | #' # equivalent to
 51 | #' edge(x$Open, x$High, x$Low, x$Close)
 52 | #'
 53 | #' # estimate the spread using a rolling window of 21 periods
 54 | #' s <- spread(x, width = 21)
 55 | #' tail(s)
 56 | #' # equivalent to
 57 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21)
 58 | #' tail(s)
 59 | #' 
 60 | #' # estimate the spread using an expanding window
 61 | #' s <- spread(x, width = 1:nrow(x))
 62 | #' tail(s)
 63 | #' # equivalent to
 64 | #' s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE)
 65 | #' tail(s)
 66 | #' 
 67 | #' # estimate the spread using custom endpoints
 68 | #' ep <- c(3, 35, 100)
 69 | #' spread(x, width = ep)
 70 | #' # equivalent to
 71 | #' edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35])
 72 | #' edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100])
 73 | #'
 74 | #' # use multiple estimators
 75 | #' spread(x, method = c("EDGE", "AR", "CS", "ROLL", "OHLC", "OHL.CHL"))
 76 | #'
 77 | #' @export
 78 | #'
 79 | spread <- function(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE){
 80 | 
 81 |   s <- list()
 82 |   todo <- method <- toupper(method)
 83 |   colnames(x) <- tolower(gsub("^(.*\\b)(Open|High|Low|Close)$", "\\2", colnames(x)))
 84 |   
 85 |   open <- as.numeric(x$open)
 86 |   high <- as.numeric(x$high)
 87 |   low <- as.numeric(x$low)
 88 |   close <- as.numeric(x$close)
 89 | 
 90 |   m <- "EDGE"
 91 |   if(m %in% todo){
 92 |     s <- c(s, EDGE(open, high, low, close, width, sign, na.rm))
 93 |     todo <- setdiff(todo, m)
 94 |   }
 95 |   
 96 |   m <- c("AR", "AR2")
 97 |   if(any(m %in% todo)){
 98 |     m <- intersect(todo, m)
 99 |     s <- c(s, AR(high, low, close, width, m, sign, na.rm))
100 |     todo <- setdiff(todo, m)
101 |   }
102 | 
103 |   m <- c("CS", "CS2")
104 |   if(any(m %in% todo)){
105 |     m <- intersect(todo, m)
106 |     s <- c(s, CS(high, low, close, width, m, sign, na.rm))
107 |     todo <- setdiff(todo, m)
108 |   }
109 | 
110 |   m <- "ROLL"
111 |   if(m %in% todo){
112 |     s <- c(s, ROLL(close, width, sign, na.rm))
113 |     todo <- setdiff(todo, m)
114 |   }
115 |   
116 |   if(length(todo)){
117 |     s <- c(s, OHLC(open, high, low, close, width, todo, sign, na.rm))
118 |   }
119 | 
120 |   s <- as.data.frame(s, row.names = rownames(x))
121 |   if(requireNamespace("xts", quietly = TRUE) & 
122 |      requireNamespace("zoo", quietly = TRUE)
123 |   ){
124 |     if(xts::is.xts(x)){
125 |       s <- xts::xts(s, order.by = zoo::index(x))
126 |     }
127 |   }
128 |   
129 |   nw <- length(width)
130 |   if(nw == 1) s <- s[-(1:pmax(1, width - 1)), , drop = FALSE]
131 |   else if(nw != nrow(x)) s <- s[width[-1], , drop = FALSE]
132 |   
133 |   return(s[, method, drop = FALSE])
134 | 
135 | }
136 | 


--------------------------------------------------------------------------------
/python/bidask/edge_rolling.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | 
  5 | def edge_rolling(df: pd.DataFrame, window: int, sign: bool = False, **kwargs) -> pd.Series:
  6 |     """
  7 |     Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices
  8 | 
  9 |     Implements a rolling window calculation of the efficient estimator of bid-ask spreads 
 10 |     from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
 11 |     https://doi.org/10.1016/j.jfineco.2024.103916
 12 |         
 13 |     Parameters
 14 |     ----------
 15 |     - `df` : pd.DataFrame
 16 |         DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive).
 17 |     - `window` : int, timedelta, str, offset, or BaseIndexer subclass
 18 |         Size of the moving window. For more information about this parameter, see
 19 |         https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html
 20 |     - `sign` : bool, default False
 21 |         Whether to return signed estimates.
 22 |     - `kwargs` : dict, optional
 23 |         Additional keyword arguments to pass to the pandas rolling function.
 24 |         For more information about the rolling parameters, see 
 25 |         https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html
 26 | 
 27 |     Returns
 28 |     -------
 29 |     pd.Series
 30 |         A pandas Series of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%.
 31 |     """    
 32 |     # compute log-prices
 33 |     df = df.rename(columns=str.lower, inplace=False)
 34 |     o = np.log(df['open'])
 35 |     h = np.log(df['high'])
 36 |     l = np.log(df['low'])
 37 |     c = np.log(df['close'])
 38 |     m = (h + l) / 2.
 39 | 
 40 |     # shift log-prices by one period
 41 |     h1 = h.shift(1)
 42 |     l1 = l.shift(1)
 43 |     c1 = c.shift(1)
 44 |     m1 = m.shift(1)
 45 |     
 46 |     # compute log-returns
 47 |     r1 = m - o
 48 |     r2 = o - m1
 49 |     r3 = m - c1
 50 |     r4 = c1 - m1
 51 |     r5 = o - c1
 52 | 
 53 |     # compute indicator variables
 54 |     tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan, (h != l) | (l != c1))
 55 |     po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, o != h)
 56 |     po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, o != l)
 57 |     pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, c1 != h1)
 58 |     pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, c1 != l1)
 59 |     
 60 |     # compute base products for rolling means
 61 |     r12 = r1 * r2
 62 |     r15 = r1 * r5
 63 |     r34 = r3 * r4
 64 |     r45 = r4 * r5
 65 |     tr1 = tau * r1
 66 |     tr2 = tau * r2
 67 |     tr4 = tau * r4
 68 |     tr5 = tau * r5    
 69 | 
 70 |     # set up data frame for rolling means
 71 |     x = pd.DataFrame({
 72 |         1:  r12,
 73 |         2:  r34,
 74 |         3:  r15,
 75 |         4:  r45,
 76 |         5:  tau,
 77 |         6:  r1,
 78 |         7:  tr2,
 79 |         8:  r3,
 80 |         9:  tr4,
 81 |         10: r5,
 82 |         11: r12 ** 2,
 83 |         12: r34 ** 2,
 84 |         13: r15 ** 2,
 85 |         14: r45 ** 2,
 86 |         15: r12 * r34,
 87 |         16: r15 * r45,
 88 |         17: tr2 * r2,
 89 |         18: tr4 * r4,
 90 |         19: tr5 * r5,
 91 |         20: tr2 * r12,
 92 |         21: tr4 * r34,
 93 |         22: tr5 * r15,
 94 |         23: tr4 * r45,
 95 |         24: tr4 * r12,
 96 |         25: tr2 * r34,
 97 |         26: tr2 * r4,
 98 |         27: tr1 * r45,
 99 |         28: tr5 * r45,
100 |         29: tr4 * r5,
101 |         30: tr5,
102 |         31: po1,
103 |         32: po2,
104 |         33: pc1,
105 |         34: pc2
106 |     }, index=df.index)
107 |     
108 |     # mask the first observation and decrement window and min_periods by 1 before
109 |     # computing rolling means to account for lagged prices
110 |     x.iloc[0] = np.nan
111 |     if isinstance(window, (int, np.integer)):
112 |         window = max(0, window - 1)
113 |     if 'min_periods' in kwargs and isinstance(kwargs['min_periods'], (int, np.integer)):
114 |         kwargs['min_periods'] = max(0, kwargs['min_periods'] - 1)
115 | 
116 |     # compute rolling means
117 |     m = x.rolling(window=window, **kwargs).mean()
118 | 
119 |     # compute probabilities
120 |     pt = m[5]
121 |     po = m[31] + m[32]
122 |     pc = m[33] + m[34]
123 | 
124 |     # set to missing if there are less than two periods with tau=1
125 |     # or po or pc is zero
126 |     nt = x[5].rolling(window=window, **kwargs).sum()
127 |     m[(nt < 2) | (po == 0) | (pc == 0)] = np.nan
128 | 
129 |     # compute input vectors
130 |     a1 = -4. / po
131 |     a2 = -4. / pc
132 |     a3 = m[6] / pt
133 |     a4 = m[9] / pt
134 |     a5 = m[8] / pt
135 |     a6 = m[10] / pt
136 |     a12 = 2 * a1 * a2
137 |     a11 = a1 ** 2
138 |     a22 = a2 ** 2
139 |     a33 = a3 ** 2
140 |     a55 = a5 ** 2
141 |     a66 = a6 ** 2
142 | 
143 |     # compute expectations
144 |     e1 = a1 * (m[1] - a3*m[7]) + a2 * (m[2] - a4*m[8])
145 |     e2 = a1 * (m[3] - a3*m[30]) + a2 * (m[4] - a4*m[10])
146 |     
147 |     # compute variances
148 |     v1 = - e1**2 + (
149 |         a11 * (m[11] - 2*a3*m[20] + a33*m[17]) +
150 |         a22 * (m[12] - 2*a5*m[21] + a55*m[18]) +
151 |         a12 * (m[15] - a3*m[25] - a5*m[24] + a3*a5*m[26])
152 |     )
153 |     v2 = - e2**2 + (
154 |         a11 * (m[13] - 2*a3*m[22] + a33*m[19]) + 
155 |         a22 * (m[14] - 2*a6*m[23] + a66*m[18]) +
156 |         a12 * (m[16] - a3*m[28] - a6*m[27] + a3*a6*m[29]) 
157 |     )
158 | 
159 |     # compute square spread by using a (equally) weighted 
160 |     # average if the total variance is (not) positive
161 |     vt = v1 + v2
162 |     s2 = pd.Series.where(
163 |         cond=vt > 0, 
164 |         self=(v2*e1 + v1*e2) / vt, 
165 |         other=(e1 + e2) / 2.
166 |     )
167 | 
168 |     # compute signed root
169 |     s = np.sqrt(np.abs(s2))
170 |     if sign:
171 |         s *= np.sign(s2)
172 | 
173 |     # return the spread
174 |     return s
175 | 


--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
  1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
  2 | 
  3 | Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
  4 | 
  5 | 
  6 | ## Installation
  7 | 
  8 | ```bash
  9 | pip install bidask
 10 | ```
 11 | 
 12 | ## Usage
 13 | 
 14 | There are three functions in this package. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively.
 15 | 
 16 | ```python
 17 | from bidask import edge, edge_rolling, edge_expanding
 18 | ```
 19 | 
 20 | ### Function: `edge`
 21 | 
 22 | The input prices must be sorted in ascending order of the timestamp. The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%.
 23 | 
 24 | ```python
 25 | edge(open, high, low, close, sign=False)
 26 | ```
 27 | 
 28 | | field   | description                         |
 29 | | ------- | ----------------------------------- |
 30 | | `open`  | Array-like vector of open prices.   |
 31 | | `high`  | Array-like vector of high prices.   |
 32 | | `low`   | Array-like vector of low prices.    |
 33 | | `close` | Array-like vector of close prices.  |
 34 | | `sign`  | Whether to return signed estimates. |
 35 | 
 36 | ### Function: `edge_rolling`
 37 | 
 38 | Implements a rolling window calculation of `edge`. The input is a pandas data frame. The output is a pandas series of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%.
 39 | 
 40 | ```python
 41 | edge_rolling(df, window, sign=False, **kwargs)
 42 | ```
 43 | 
 44 | | field      | description                                                  |
 45 | | ---------- | ------------------------------------------------------------ |
 46 | | `df`       | Data frame with columns 'open', 'high', 'low', 'close' (case-insensitive). |
 47 | | `window`   | Size of the moving window. For more information about this parameter, see [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html). |
 48 | | `sign`     | Whether to return signed estimates.                          |
 49 | | `**kwargs` | Additional keyword arguments to pass to the pandas rolling function. For more information about the rolling parameters, see [here](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html). |
 50 | 
 51 | ### Function: `edge_expanding`
 52 | 
 53 | Implements an expanding window calculation of `edge`. The input is a pandas data frame. The output is a pandas series of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%. 
 54 | 
 55 | ```python
 56 | edge_expanding(df, min_periods=1, sign=False)
 57 | ```
 58 | 
 59 | | field         | description                                                  |
 60 | | ------------- | ------------------------------------------------------------ |
 61 | | `df`          | Data frame with columns 'open', 'high', 'low', 'close' (case-insensitive). |
 62 | | `min_periods` | Minimum number of observations in window required to have a value; otherwise, result is `np.nan`. |
 63 | | `sign`        | Whether to return signed estimates.                          |
 64 | 
 65 | ## Examples
 66 | 
 67 | Load the test data.
 68 | 
 69 | ```python
 70 | import pandas as pd
 71 | df = pd.read_csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")
 72 | ```
 73 | 
 74 | Compute the spread estimate using all the observations.
 75 | 
 76 | ```py
 77 | from bidask import edge
 78 | edge(df.Open, df.High, df.Low, df.Close)
 79 | ```
 80 | 
 81 | Compute rolling estimates using a window of 21 observations.
 82 | 
 83 | ```py
 84 | from bidask import edge_rolling
 85 | edge_rolling(df=df, window=21)
 86 | ```
 87 | 
 88 | Compute expanding estimates starting with a minimum of 21 observations.
 89 | 
 90 | ```py
 91 | from bidask import edge_expanding
 92 | edge_expanding(df=df, min_periods=21)
 93 | ```
 94 | 
 95 | ## Notes
 96 | 
 97 | The rolling estimates:
 98 | 
 99 | ```py
100 | rolling_estimates = edge_rolling(df=df, window=window, step=step, sign=sign)
101 | ```
102 | 
103 | are equivalent to, but much faster than:
104 | 
105 | ```py
106 | expected_estimates = []
107 | for t in range(0, len(df), step):
108 |     t1 = t + 1
109 |     t0 = t1 - window
110 |     expected_estimates.append(edge(
111 |         df.Open.values[t0:t1],
112 |         df.High.values[t0:t1],
113 |         df.Low.values[t0:t1],
114 |         df.Close.values[t0:t1],
115 |         sign=sign
116 |     ) if t0 >= 0 else np.nan)
117 | ```
118 | 
119 | The expanding estimates:
120 | 
121 | ```py
122 | expanding_estimates = edge_expanding(df=df, min_periods=min_periods, sign=sign)
123 | ```
124 | 
125 | are equivalent to, but much faster than:
126 | 
127 | ```py
128 | expected_estimates = []
129 | for t in range(0, len(df)):
130 |     t1 = t + 1
131 |     expected_estimates.append(edge(
132 |         df.Open.values[0:t1],
133 |         df.High.values[0:t1],
134 |         df.Low.values[0:t1],
135 |         df.Close.values[0:t1],
136 |         sign=sign
137 |     ) if t1 >= min_periods else np.nan)
138 | ```
139 | 
140 | ## Cite as
141 | 
142 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
143 | 
144 | A BibTex  entry for LaTeX users is:
145 | 
146 | ```bibtex
147 | @article{edge,
148 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
149 |   journal = {Journal of Financial Economics},
150 |   volume = {161},
151 |   pages = {103916},
152 |   year = {2024},
153 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
154 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
155 | }
156 | ```
157 | 


--------------------------------------------------------------------------------
/r/vignettes/bidask.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{bidask}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | data.table::setDTthreads(1)
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>",
 15 |   fig.width = 6,
 16 |   out.width="100%",
 17 |   dpi = 300,
 18 |   warning = FALSE,
 19 |   message = FALSE
 20 | )
 21 | ```
 22 | 
 23 | This vignette illustrates how to estimate bid-ask spreads from open, high, low, and close prices using the efficient estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916). 
 24 | 
 25 | ```{r setup}
 26 | library(bidask)
 27 | ```
 28 | 
 29 | The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. The function `spread` provides additional functionalities for `xts` objects and implements additional estimators. For all functions, an output value of 0.01 corresponds to a spread estimate of 1%.
 30 | 
 31 | ## Functions `edge`, `edge_rolling`, `edge_expanding`
 32 | 
 33 | These functions can be easily used with tidy data. For instance, download daily prices for Bitcoin and Ethereum using the [crypto2](https://cran.r-project.org/package=crypto2) package:
 34 | 
 35 | ```{r, results='hide'}
 36 | library(dplyr)
 37 | library(crypto2)
 38 | df <- crypto_list(only_active=TRUE) %>%
 39 |   filter(symbol %in% c("BTC", "ETH")) %>%
 40 |   crypto_history(start_date = "20200101", end_date = "20221231")
 41 | ```
 42 | 
 43 | ```{r}
 44 | head(df)
 45 | ```
 46 | 
 47 | Estimate the spread for each coin in each year:
 48 | 
 49 | ```{r}
 50 | df %>%
 51 |   mutate(yyyy = format(timestamp, "%Y")) %>%
 52 |   group_by(symbol, yyyy) %>%
 53 |   arrange(timestamp) %>%
 54 |   summarise("EDGE" = edge(open, high, low, close))
 55 | ```
 56 | 
 57 | Estimate the spread using a rolling window of 30 days for each coin and plot the results:
 58 | 
 59 | ```{r}
 60 | library(ggplot2)
 61 | df %>%
 62 |   group_by(symbol) %>%
 63 |   arrange(timestamp) %>%
 64 |   mutate("EDGE (rolling)" = edge_rolling(open, high, low, close, width = 30)) %>%
 65 |   ggplot(aes(x = timestamp, y = `EDGE (rolling)`, color = symbol)) +
 66 |   geom_line() +
 67 |   theme_minimal()
 68 | ```
 69 | 
 70 | Estimate the spread using an expanding window for each coin and plot the results:
 71 | ```{r}
 72 | df %>%
 73 |   group_by(symbol) %>%
 74 |   arrange(timestamp) %>%
 75 |   mutate("EDGE (expanding)" = edge_expanding(open, high, low, close)) %>%
 76 |   ggplot(aes(x = timestamp, y = `EDGE (expanding)`, color = symbol)) +
 77 |   geom_line() +
 78 |   theme_minimal()
 79 | ```
 80 | 
 81 | Notice that, generally, using intraday data (instead of daily) improves the estimation accuracy, especially when the spread is expected to be small (see example below).
 82 | 
 83 | ## Function `spread`
 84 | 
 85 | The function `spread()` provides additional functionalities for [xts](https://cran.r-project.org/package=xts) objects and implements additional estimators. For instance, download daily data for Microsoft (MSFT) using the [quantmod](https://cran.r-project.org/package=quantmod) package which returns an `xts` object:
 86 | 
 87 | ```{r}
 88 | library(quantmod)
 89 | x <- try(getSymbols("MSFT", auto.assign = FALSE, start = "2019-01-01", end = "2022-12-31"), silent = TRUE)
 90 | if("try-error" %in% class(x)){
 91 |   print("Error in getSymbols; using syntentic data instead")
 92 |   x <- sim(5000, units = "day")
 93 | }
 94 | ```
 95 | ```{r}
 96 | class(x)
 97 | ```
 98 | 
 99 | ```{r}
100 | head(x)
101 | ```
102 | 
103 | Estimate the spread with:
104 | 
105 | ```{r}
106 | spread(x)
107 | ```
108 | 
109 | or, equivalently:
110 | 
111 | ```{r}
112 | edge(open = x[,1], high = x[,2], low = x[,3], close = x[,4])
113 | ```
114 | 
115 | Estimate the spread for each month and plot the estimates:
116 | 
117 | ```{r}
118 | sp <- spread(x, width = endpoints(x, on = "months"))
119 | plot(sp)
120 | ```
121 | 
122 | Estimate the spread using a rolling window of 21 obervations:
123 | 
124 | ```{r}
125 | sp <- spread(x, width = 21)
126 | plot(sp)
127 | ```
128 | 
129 | To illustrate higher-frequency estimates, download intraday data from Alpha Vantage. You must register with Alpha Vantage in order to download their data, but the one-time registration is fast and free. Register at https://www.alphavantage.co/ to receive your key. You can set the API key globally as follows:
130 | 
131 | ```{r}
132 | setDefaults(getSymbols.av, api.key = "<API-KEY>")
133 | ```
134 | 
135 | Download minute data for Microsoft:
136 | 
137 | ```r
138 | x <- getSymbols(
139 |   Symbols = "MSFT", 
140 |   auto.assign = FALSE, 
141 |   src = "av", 
142 |   periodicity = "intraday", 
143 |   interval = "1min", 
144 |   output.size = "full")
145 | ```
146 | 
147 | ```{r, include=FALSE}
148 | x <- read.csv(system.file("extdata", "msft.csv", package = "bidask"))
149 | x <- xts(x[,-1], order.by = as.POSIXct(x[,1]))
150 | ```
151 | 
152 | Keep only prices during regular market hours:
153 | 
154 | ```{r}
155 | x <- x["T09:30/T16:00"]
156 | head(x)
157 | ```
158 | 
159 | Estimate the spread for each day and plot the estimates:
160 | 
161 | ```{r}
162 | sp <- spread(x, width = endpoints(x, on = "day"))
163 | plot(sp, type = "b")
164 | ```
165 | 
166 | Use multiple estimators and plot the estimates:
167 | 
168 | ```{r}
169 | sp <- spread(x, width = endpoints(x, on = "day"), method = c("EDGE", "AR", "CS", "ROLL"))
170 | plot(sp, type = "b", legend.loc = "topright")
171 | ```
172 | 
173 | ## GitHub 
174 | 
175 | If you find this package useful, please [star the repo](https://github.com/eguidotti/bidask)! The repository also contains implementations for Python, C++, MATLAB, and more; as well as open data containing bid-ask spread estimates for crypto pairs in Binance and for U.S. stocks in CRSP.
176 | 
177 | ## Cite as
178 | 
179 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
180 | 
181 | A BibTex  entry for LaTeX users is:
182 | 
183 | ```bibtex
184 | @article{edge,
185 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
186 |   journal = {Journal of Financial Economics},
187 |   volume = {161},
188 |   pages = {103916},
189 |   year = {2024},
190 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
191 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
192 | }
193 | ```
194 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
 2 | 
 3 | This [repository](https://github.com/eguidotti/bidask/) implements the efficient estimator of the effective bid-ask spread from open, high, low, and close prices described in:
 4 | 
 5 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
 6 | 
 7 | The estimator is available in:
 8 | 
 9 | [C++](https://github.com/eguidotti/bidask/tree/main/c++) | [Julia](https://github.com/eguidotti/bidask/tree/main/julia) | [MATLAB](https://github.com/eguidotti/bidask/tree/main/matlab) | [Python](https://github.com/eguidotti/bidask/tree/main/python) | [R](https://github.com/eguidotti/bidask/tree/main/r) | [SAS](https://github.com/eguidotti/bidask/tree/main/sas)
10 | 
11 | You can also check the [pseudocode](https://github.com/eguidotti/bidask/tree/main/pseudocode) to implement the estimator in any programming language. If you implement the estimator in a new programming language and want your implementation included in the repository, please open a [pull request](https://github.com/eguidotti/bidask/pulls).
12 | 
13 | ## Open data
14 | 
15 | The following datasets are available to download:
16 | 
17 | | Download                                       | Dataset                                              | Description                                                  |
18 | | ---------------------------------------------- | ---------------------------------------------------- | ------------------------------------------------------------ |
19 | | [download](https://doi.org/10.7910/DVN/YAY4H6) | Bid-Ask Spread Estimates for U.S. Stocks in CRSP     | Contains monthly estimates of the effective bid-ask spread for each stock in the CRSP U.S. Stock database |
20 | | [download](https://doi.org/10.7910/DVN/9AVA2B) | Bid-Ask Spread Estimates for Crypto Pairs in Binance | Contains monthly estimates of the effective bid-ask spread for crypto pairs listed in Binance |
21 | 
22 | ## FAQ 
23 | 
24 | > Each transaction price may generally include a different bid-ask spread, but the estimator only returns a single estimate given a sample of open, high, low, and close prices. What is the estimator computing exactly?
25 | 
26 | - The estimator estimates the root mean square effective spread within the sample period.
27 | 
28 | > What is the minimum number of observations required by the estimator?
29 | 
30 | - The estimator requires at least 3 observations.
31 | 
32 | > What is the recommended number of observations to use? 
33 | 
34 | - There is no one-size-fits-all solution.  For instance, using a few daily prices would provide estimates closer to the spread in those days but with potentially large estimation uncertainty. Using one year of daily prices would provide more precise estimates, but for the average (more precisely, root mean square) spread in the whole year. For more information, see https://github.com/eguidotti/bidask/issues/2
35 | 
36 | > Does the estimator work with intraday data?
37 | 
38 | - Yes, the estimator can be used with intraday data. 
39 | 
40 | > What is the recommended frequency to use? 
41 | 
42 | - Generally, the higher the frequency, the better (e.g., minute prices are preferable to hourly and daily prices). However, this depends on the underlying asset's trading frequency. For instance, weekly prices should be considered for assets that trade, on average, less than once per day. More generally, the frequency should be chosen so that the average number of trades per period is at least two. The estimation variance may increase significantly below this limit. 
43 | 
44 | > Does the estimator work with tick data?
45 | 
46 | - The estimator does not natively support tick data. However, it is possible to aggregate tick data into open, high, low, and close prices and apply the estimator.
47 | 
48 | > How to handle non-positive estimates?
49 | 
50 | - By default, the estimator returns the absolute value of the estimates. This is generally a good option if you are interested in point estimates, but may create a small-sample bias if the estimates are used for averaging or regression studies. To reduce this source of bias, you can compute signed estimates with the argument `sign=True` and reset negative values to zero. Keeping negative values is not recommended because more negative estimates are typically associated with larger spreads empirically. For more information, see https://github.com/eguidotti/bidask/issues/3
51 | 
52 | > Does the estimator work with missing values?
53 | 
54 | - Yes, the estimator works with missing values out-of-the-box. It is recommended to keep missing values and use a regular time grid instead of dropping missing values and using an irregular time grid. For more information, see https://github.com/eguidotti/bidask/issues/16
55 | 
56 | > Do the functions `edge` and `edge_rolling` produce the same results?
57 | 
58 | - The function `edge_rolling` is a version of `edge` optimized for fast calculations over rolling windows. The two functions produce the same estimates when there are no missing values. If missing values are present, the two functions may provide slightly different estimates due to how missing values are handled, but both estimates are consistent.
59 | 
60 | ## Replication code
61 | 
62 | All code to replicate the paper is available [here](https://doi.org/10.7910/DVN/G8DPBM). The code meets the requirements of the [cascad](https://www.cascad.tech/certification/145-efficient-estimation-of-bid-ask-spreads-from-open-high-low-and-close-prices/) reproducibility policy for a rating of RRR.
63 | 
64 | ## Related works
65 | 
66 | You can browse publications related to the paper [here](https://scholar.google.com/scholar?cites=2115798896240699437).
67 | 
68 | ## Terms of use
69 | 
70 | All code is released under the [MIT](https://github.com/eguidotti/bidask?tab=MIT-1-ov-file#readme) license. All data are released under the [CC BY 4.0](http://creativecommons.org/licenses/by/4.0) license. When using any data or code from this repository, please cite the reference indicated below.
71 | 
72 | ## Cite as
73 | 
74 | > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
75 | 
76 | A BibTex  entry for LaTeX users is:
77 | 
78 | ```bibtex
79 | @article{edge,
80 |   title = {Efficient estimation of bid–ask spreads from open, high, low, and close prices},
81 |   journal = {Journal of Financial Economics},
82 |   volume = {161},
83 |   pages = {103916},
84 |   year = {2024},
85 |   doi = {https://doi.org/10.1016/j.jfineco.2024.103916},
86 |   author = {David Ardia and Emanuele Guidotti and Tim A. Kroencke},
87 | }
88 | ```


--------------------------------------------------------------------------------
/r/R/edge.R:
--------------------------------------------------------------------------------
  1 | #' EDGE Estimator
  2 | #'
  3 | #' @keywords internal
  4 | #'
  5 | EDGE <- function(open, high, low, close, width, sign, na.rm, aslist = TRUE){
  6 |   
  7 |   # compute log-prices
  8 |   o <- log(open)
  9 |   h <- log(high)
 10 |   l <- log(low)
 11 |   c <- log(close)
 12 |   m <- (h + l) / 2
 13 |   
 14 |   # shift log-prices by one period
 15 |   h1 <- shift(h, 1)
 16 |   l1 <- shift(l, 1)
 17 |   c1 <- shift(c, 1)
 18 |   m1 <- shift(m, 1)
 19 | 
 20 |   # compute log-returns
 21 |   r1 <- m - o
 22 |   r2 <- o - m1
 23 |   r3 <- m - c1
 24 |   r4 <- c1 - m1
 25 |   r5 <- o - c1
 26 |   
 27 |   # compute indicator variables
 28 |   tau <- ifelse(is.na(h) | is.na(l) | is.na(c1), NA, h != l | l != c1)
 29 |   po1 <- tau * (o != h)
 30 |   po2 <- tau * (o != l)
 31 |   pc1 <- tau * (c1 != h1)
 32 |   pc2 <- tau * (c1 != l1)
 33 | 
 34 |   # compute base products for rolling means
 35 |   r12 <- r1 * r2
 36 |   r15 <- r1 * r5
 37 |   r34 <- r3 * r4
 38 |   r45 <- r4 * r5
 39 |   tr1 <- tau * r1
 40 |   tr2 <- tau * r2
 41 |   tr4 <- tau * r4
 42 |   tr5 <- tau * r5
 43 |   
 44 |   # set up data frame for rolling means
 45 |   x <- data.frame(
 46 |     r12,
 47 |     r34,
 48 |     r15,
 49 |     r45,
 50 |     tau,
 51 |     r1,
 52 |     tr2,
 53 |     r3,
 54 |     tr4,
 55 |     r5,
 56 |     r12^2,
 57 |     r34^2,
 58 |     r15^2,
 59 |     r45^2,
 60 |     r12 * r34,
 61 |     r15 * r45,
 62 |     tr2 * r2,
 63 |     tr4 * r4,
 64 |     tr5 * r5,
 65 |     tr2 * r12,
 66 |     tr4 * r34,
 67 |     tr5 * r15,
 68 |     tr4 * r45,
 69 |     tr4 * r12,
 70 |     tr2 * r34,
 71 |     tr2 * r4,
 72 |     tr1 * r45,
 73 |     tr5 * r45,
 74 |     tr4 * r5,
 75 |     tr5,
 76 |     po1,
 77 |     po2,
 78 |     pc1,
 79 |     pc2
 80 |   )
 81 |   
 82 |   # mask the first observation and decrement width by 1 before 
 83 |   # computing rolling means to account for lagged prices
 84 |   x[1,] <- NA
 85 |   shift <- 1
 86 | 
 87 |   # compute rolling means
 88 |   m <- rmean(x, width = width, shift = shift, na.rm = na.rm)
 89 |   
 90 |   # compute probabilities
 91 |   pt <- m[,5]
 92 |   po <- m[,31] + m[,32]
 93 |   pc <- m[,33] + m[,34]
 94 |   
 95 |   # set to missing if there are less than two periods with tau=1
 96 |   # or po or pc is zero
 97 |   nt <- rsum(x[5], width = width, shift = shift, na.rm = TRUE)
 98 |   m[which(nt < 2 | po == 0 | pc == 0),] <- NA
 99 |   
100 |   # compute input vectors
101 |   a1 <- -4. / po
102 |   a2 <- -4. / pc
103 |   a3 <- m[,6] / pt
104 |   a4 <- m[,9] / pt
105 |   a5 <- m[,8] / pt
106 |   a6 <- m[,10] / pt
107 |   a12 <- 2 * a1 * a2
108 |   a11 <- a1^2
109 |   a22 <- a2^2
110 |   a33 <- a3^2
111 |   a55 <- a5^2
112 |   a66 <- a6^2
113 |   
114 |   # compute expectations
115 |   e1 <- a1 * (m[,1] - a3*m[,7]) + a2 * (m[,2] - a4*m[,8])
116 |   e2 <- a1 * (m[,3] - a3*m[,30]) + a2 * (m[,4] - a4*m[,10])
117 |   
118 |   # compute variances
119 |   v1 <- - e1^2 + (
120 |     a11 * (m[,11] - 2*a3*m[,20] + a33*m[,17]) +
121 |     a22 * (m[,12] - 2*a5*m[,21] + a55*m[,18]) +
122 |     a12 * (m[,15] - a3*m[,25] - a5*m[,24] + a3*a5*m[,26])
123 |   )
124 |   v2 <- - e2^2 + (
125 |     a11 * (m[,13] - 2*a3*m[,22] + a33*m[,19]) + 
126 |     a22 * (m[,14] - 2*a6*m[,23] + a66*m[,18]) +
127 |     a12 * (m[,16] - a3*m[,28] - a6*m[,27] + a3*a6*m[,29]) 
128 |   )
129 |   
130 |   # compute square spread by using a (equally) weighted 
131 |   # average if the total variance is (not) positive
132 |   vt <- v1 + v2
133 |   s2 <- ifelse(!is.na(vt) & vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2)
134 | 
135 |   # compute signed root
136 |   s <- sqrt(abs(s2))
137 |   if(sign) 
138 |     s <- s * base::sign(s2)
139 |   
140 |   # return the spread
141 |   if(!aslist) return(s)
142 |   return(list("EDGE" = s))
143 |   
144 | }
145 | 
146 | #' Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices
147 | #' 
148 | #' Implements the efficient estimator of bid-ask spreads from open, high, low, 
149 | #' and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
150 | #' \doi{10.1016/j.jfineco.2024.103916}
151 | #' 
152 | #' @details
153 | #' Prices must be sorted in ascending order of the timestamp.
154 | #'
155 | #' @param open numeric vector of open prices.
156 | #' @param high numeric vector of high prices.
157 | #' @param low numeric vector of low prices.
158 | #' @param close numeric vector of close prices.
159 | #' @param sign whether to return signed estimates.
160 | #'
161 | #' @return The spread estimate. A value of 0.01 corresponds to a spread of 1\%.
162 | #'
163 | #' @references 
164 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
165 | #' \doi{10.1016/j.jfineco.2024.103916}
166 | #'
167 | #' @examples
168 | #' # reduce number of threads to pass CRAN checks (you can ignore this)
169 | #' data.table::setDTthreads(1)
170 | #' 
171 | #' # simulate open, high, low, and close prices with spread 1%
172 | #' x <- sim(n = 1000, spread = 0.01)
173 | #'
174 | #' # estimate the spread
175 | #' edge(x$Open, x$High, x$Low, x$Close)
176 | #'
177 | #' @export
178 | #' 
179 | edge <- function(open, high, low, close, sign = FALSE){
180 |   
181 |   # check that the open, high, low, and close prices have the same length
182 |   n <- length(open)
183 |   if(length(high) != n | length(low) != n | length(close) != n)
184 |     stop("open, high, low, close must have the same length")
185 |   
186 |   # return missing if there are less than 3 observations
187 |   if(n < 3)
188 |     return(NA)
189 |   
190 |   # compute log-prices
191 |   o <- log(as.numeric(open))
192 |   h <- log(as.numeric(high))
193 |   l <- log(as.numeric(low))
194 |   c <- log(as.numeric(close))
195 |   m <- (h + l) / 2
196 |   
197 |   # shift log-prices by one period
198 |   h1 <- h[-n]; l1 <- l[-n]; c1 <- c[-n]; m1 <- m[-n]
199 |   o <- o[-1]; h <- h[-1]; l <- l[-1]; c <- c[-1]; m <- m[-1]
200 |   
201 |   # compute log-returns
202 |   r1 <- m - o
203 |   r2 <- o - m1
204 |   r3 <- m - c1
205 |   r4 <- c1 - m1
206 |   r5 <- o - c1
207 |   
208 |   # compute indicator variables
209 |   tau <- ifelse(is.na(h) | is.na(l) | is.na(c1), NA, h != l | l != c1)
210 |   po1 <- tau * (o != h)
211 |   po2 <- tau * (o != l)
212 |   pc1 <- tau * (c1 != h1)
213 |   pc2 <- tau * (c1 != l1)
214 |   
215 |   # compute probabilities
216 |   pt <- mean(tau, na.rm = TRUE)
217 |   po <- mean(po1, na.rm = TRUE) + mean(po2, na.rm = TRUE)
218 |   pc <- mean(pc1, na.rm = TRUE) + mean(pc2, na.rm = TRUE)
219 |   
220 |   # return missing if there are less than two periods with tau=1 
221 |   # or po or pc is zero
222 |   nt <- sum(tau, na.rm = TRUE)
223 |   if(nt < 2 | (!is.nan(po) & po == 0) | (!is.nan(pc) & pc == 0))
224 |     return(NA)
225 |   
226 |   # compute de-meaned log-returns
227 |   d1 <- r1 - mean(r1, na.rm = TRUE)/pt*tau
228 |   d3 <- r3 - mean(r3, na.rm = TRUE)/pt*tau
229 |   d5 <- r5 - mean(r5, na.rm = TRUE)/pt*tau
230 |   
231 |   # compute input vectors
232 |   x1 <- -4./po*d1*r2 + -4./pc*d3*r4 
233 |   x2 <- -4./po*d1*r5 + -4./pc*d5*r4
234 |   
235 |   # compute expectations
236 |   e1 <- mean(x1, na.rm = TRUE)
237 |   e2 <- mean(x2, na.rm = TRUE)
238 |   
239 |   # compute variances
240 |   v1 <- mean(x1^2, na.rm = TRUE) - e1^2
241 |   v2 <- mean(x2^2, na.rm = TRUE) - e2^2
242 | 
243 |   # compute square spread by using a (equally) weighted 
244 |   # average if the total variance is (not) positive
245 |   vt = v1 + v2
246 |   if(!is.na(vt) & vt > 0)
247 |     s2 = (v2*e1 + v1*e2) / vt
248 |   else
249 |     s2 = (e1 + e2) / 2.
250 |   
251 |   # compute signed root
252 |   s <- sqrt(abs(s2))
253 |   if(sign) 
254 |     s <- s * base::sign(s2)
255 |   
256 |   # return the spread
257 |   return(s)
258 |   
259 | }
260 | 
261 | #' Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices
262 | #' 
263 | #' Implements a rolling window calculation of the efficient estimator of bid-ask spreads 
264 | #' from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
265 | #' \doi{10.1016/j.jfineco.2024.103916}.
266 | #' 
267 | #' @details
268 | #' Prices must be sorted in ascending order of the timestamp.
269 | #' 
270 | #' @param open numeric vector of open prices.
271 | #' @param high numeric vector of high prices.
272 | #' @param low numeric vector of low prices.
273 | #' @param close numeric vector of close prices.
274 | #' @param width if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples.
275 | #' @param sign whether to return signed estimates.
276 | #' @param na.rm whether to ignore missing values.
277 | #'
278 | #' @return Vector of spread estimates. 
279 | #' A value of 0.01 corresponds to a spread of 1\%.
280 | #' This function always returns a result of the same length as the input prices. 
281 | #'
282 | #' @references 
283 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
284 | #' \doi{10.1016/j.jfineco.2024.103916}
285 | #'
286 | #' @examples
287 | #' # reduce number of threads to pass CRAN checks (you can ignore this)
288 | #' data.table::setDTthreads(1)
289 | #' 
290 | #' # simulate open, high, low, and close prices with spread 1%
291 | #' x <- sim(n = 1000, spread = 0.01)
292 | #'
293 | #' # estimate the spread using a rolling window
294 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21)
295 | #' tail(s)
296 | #' 
297 | #' # estimate the spread using custom endpoints
298 | #' ep <- c(3, 35, 100)
299 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = ep)
300 | #' s[c(35, 100)]
301 | #' # equivalent to
302 | #' edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35])
303 | #' edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100])
304 | #' 
305 | #' # estimate the spread using an expanding window
306 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x))
307 | #' tail(s)
308 | #' # equivalent to
309 | #' s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE)
310 | #' tail(s)
311 | #' 
312 | #' @export
313 | #' 
314 | edge_rolling <- function(open, high, low, close, width, sign = FALSE, na.rm = FALSE){
315 |   n <- length(open)
316 |   if(length(high) != n | length(low) != n | length(close) != n)
317 |     stop("open, high, low, close must have the same length")
318 |   
319 |   EDGE(
320 |     open = as.numeric(open), 
321 |     high = as.numeric(high), 
322 |     low = as.numeric(low), 
323 |     close = as.numeric(close),
324 |     width = width,
325 |     sign = sign, 
326 |     na.rm = na.rm, 
327 |     aslist = FALSE
328 |   )
329 |   
330 | }
331 | 
332 | #' Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices
333 | #' 
334 | #' Implements an expanding window calculation of the efficient estimator of bid-ask spreads 
335 | #' from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024):
336 | #' \doi{10.1016/j.jfineco.2024.103916}.
337 | #' 
338 | #' @details
339 | #' Prices must be sorted in ascending order of the timestamp.
340 | #' 
341 | #' @param open numeric vector of open prices.
342 | #' @param high numeric vector of high prices.
343 | #' @param low numeric vector of low prices.
344 | #' @param close numeric vector of close prices.
345 | #' @param sign whether to return signed estimates.
346 | #' @param na.rm whether to ignore missing values.
347 | #'
348 | #' @return Vector of spread estimates. 
349 | #' A value of 0.01 corresponds to a spread of 1\%.
350 | #' This function always returns a result of the same length as the input prices. 
351 | #'
352 | #' @references 
353 | #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. 
354 | #' \doi{10.1016/j.jfineco.2024.103916}
355 | #'
356 | #' @examples
357 | #' # reduce number of threads to pass CRAN checks (you can ignore this)
358 | #' data.table::setDTthreads(1)
359 | #' 
360 | #' # simulate open, high, low, and close prices with spread 1%
361 | #' x <- sim(n = 1000, spread = 0.01)
362 | #'
363 | #' # estimate the spread using an expanding window
364 | #' s <- edge_expanding(x$Open, x$High, x$Low, x$Close)
365 | #' tail(s)
366 | #' # equivalent to
367 | #' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x), na.rm = TRUE)
368 | #' tail(s)
369 | #' 
370 | #' @export
371 | #' 
372 | edge_expanding <- function(open, high, low, close, sign = FALSE, na.rm = TRUE){
373 |   n <- length(open)
374 |   if(length(high) != n | length(low) != n | length(close) != n)
375 |     stop("open, high, low, close must have the same length")
376 |   
377 |   EDGE(
378 |     open = as.numeric(open), 
379 |     high = as.numeric(high), 
380 |     low = as.numeric(low), 
381 |     close = as.numeric(close),
382 |     width = 1:n, 
383 |     sign = sign, 
384 |     na.rm = na.rm, 
385 |     aslist = FALSE
386 |   )
387 |   
388 | }
389 | 


--------------------------------------------------------------------------------