├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── CRAN-SUBMISSION ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── count_functions.R ├── data_samples.R ├── download_stock_directory.R ├── filter_itch.R ├── globals.R ├── gz_functions.R ├── helpers.R ├── read_functions.R ├── write_itch.R └── zzz.R ├── README.Rmd ├── README.md ├── RITCH.Rproj ├── _pkgdown.yml ├── cran-comments.md ├── debug ├── README.Rmd ├── README.md ├── debug_tools.cpp └── test_debug.R ├── inst ├── extdata │ ├── ex20101224.TEST_ITCH_50 │ └── ex20101224.TEST_ITCH_50.gz └── tinytest │ ├── test_filename_helpers.R │ ├── test_filter_itch.R │ ├── test_gz_functions.R │ ├── test_read_functions.R │ └── test_write_itch.R ├── man ├── add_meta_to_filename.Rd ├── count_functions.Rd ├── count_internal.Rd ├── download_sample_file.Rd ├── download_stock_directory.Rd ├── ex20101224.TEST_ITCH_50.Rd ├── figures │ └── README-ETF_plot-1.png ├── filter_itch.Rd ├── format_bytes.Rd ├── get_date_from_filename.Rd ├── get_exchange_from_filename.Rd ├── get_msg_classes.Rd ├── gz_functions.Rd ├── list_sample_files.Rd ├── open_itch_sample_server.Rd ├── open_itch_specification.Rd ├── read_functions.Rd └── write_itch.Rd ├── simulate_dataset.R ├── src ├── Makevars.win ├── RcppExports.cpp ├── count_messages.cpp ├── count_messages.h ├── filter_itch.cpp ├── filter_itch.h ├── gz_functionality.cpp ├── helper_functions.cpp ├── helper_functions.h ├── read_functions.cpp ├── read_functions.h ├── specifications.h ├── write_functions.cpp └── write_functions.h └── tests └── tinytests.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^[^/]+_ITCH_50$ 6 | ^[^/]+_ITCH_50\.gz$ 7 | ^[^/]+_ITCH50$ 8 | ^[^/]+_ITCH50\.gz$ 9 | ^README_cache$ 10 | ^NQTVITCHspecification.*\.pdf$ 11 | ^debug/*$ 12 | ^simulate_dataset\.R$ 13 | ^\.github$ 14 | ^cran-comments\.md$ 15 | ^_pkgdown\.yml$ 16 | ^docs$ 17 | ^pkgdown$ 18 | ^CRAN-SUBMISSION$ 19 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::pkgdown, local::. 34 | needs: website 35 | 36 | - name: Build site 37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 38 | shell: Rscript {0} 39 | 40 | - name: Deploy to GitHub pages 🚀 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@v4.4.1 43 | with: 44 | clean: false 45 | branch: gh-pages 46 | folder: docs 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/r 3 | 4 | ### R ### 5 | # History files 6 | .Rhistory 7 | .Rapp.history 8 | 9 | # Session Data files 10 | .RData 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | 15 | # Output files from R CMD build 16 | /*.tar.gz 17 | 18 | # Output files from R CMD check 19 | /*.Rcheck/ 20 | 21 | # RStudio files 22 | .Rproj.user/ 23 | 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | 28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 29 | .httr-oauth 30 | 31 | # knitr and R markdown default cache directories 32 | /*_cache/ 33 | /cache/ 34 | 35 | # Temporary files created by R markdown 36 | *.utf8.md 37 | *.knit.md 38 | 39 | # End of https://www.gitignore.io/api/r 40 | 41 | 42 | # Created by https://www.gitignore.io/api/c++ 43 | 44 | ### C++ ### 45 | # Prerequisites 46 | *.d 47 | 48 | # Compiled Object files 49 | *.slo 50 | *.lo 51 | *.o 52 | *.obj 53 | 54 | # Precompiled Headers 55 | *.gch 56 | *.pch 57 | 58 | # Compiled Dynamic libraries 59 | *.so 60 | *.dylib 61 | *.dll 62 | 63 | # Fortran module files 64 | *.mod 65 | *.smod 66 | 67 | # Compiled Static libraries 68 | *.lai 69 | *.la 70 | *.a 71 | *.lib 72 | 73 | # Executables 74 | *.exe 75 | *.out 76 | *.app 77 | 78 | # End of https://www.gitignore.io/api/c++ 79 | 80 | .Rproj.user 81 | 82 | # ITCH FILES 83 | *.*_ITCH_50 84 | *.*_ITCH_50.gz 85 | !/inst/extdata/*.*_ITCH_50 86 | !/inst/extdata/*.*_ITCH_50.gz 87 | 88 | # ITCH Documentation 89 | NQTVITCHspecification*.pdf 90 | 91 | docs 92 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.1.26 2 | Date: 2024-01-15 14:19:16 UTC 3 | SHA: 4a575a1b9627b51aa7567041a84d07dc5ca429ff 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: RITCH 2 | Type: Package 3 | Title: R Parser for the ITCH-Protocol 4 | Version: 0.1.27 5 | Authors@R: c( 6 | person("David", "Zimmermann-Kollenda", , "david_j_zimmermann@hotmail.com", role = c("aut", "cre")) 7 | ) 8 | Description: Allows to efficiently parse, filter, and write binary ITCH Files (Version 5.0) containing detailed financial transactions as distributed by NASDAQ to an R data.table. 9 | License: MIT + file LICENSE 10 | URL: https://davzim.github.io/RITCH/, 11 | https://github.com/DavZim/RITCH 12 | BugReports: https://github.com/DavZim/RITCH/issues 13 | Depends: R (>= 3.5.0) 14 | Imports: data.table, 15 | Rcpp (>= 0.12.12), 16 | nanotime (>= 0.3.2), 17 | bit64 (>= 4.0.5) 18 | LinkingTo: Rcpp 19 | Encoding: UTF-8 20 | RoxygenNote: 7.2.3 21 | Suggests: 22 | tinytest 23 | Roxygen: list(markdown = TRUE) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2022 2 | COPYRIGHT HOLDER: David Zimmermann-Kollenda 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2022 David Zimmermann-Kollenda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(add_meta_to_filename) 4 | export(count_ipo) 5 | export(count_luld) 6 | export(count_market_participant_states) 7 | export(count_messages) 8 | export(count_modifications) 9 | export(count_mwcb) 10 | export(count_noii) 11 | export(count_orders) 12 | export(count_reg_sho) 13 | export(count_rpii) 14 | export(count_stock_directory) 15 | export(count_system_events) 16 | export(count_trades) 17 | export(count_trading_status) 18 | export(download_sample_file) 19 | export(download_stock_directory) 20 | export(filter_itch) 21 | export(format_bytes) 22 | export(get_date_from_filename) 23 | export(get_exchange_from_filename) 24 | export(get_modifications) 25 | export(get_msg_classes) 26 | export(get_orders) 27 | export(get_trades) 28 | export(gunzip_file) 29 | export(gzip_file) 30 | export(list_sample_files) 31 | export(open_itch_sample_server) 32 | export(open_itch_specification) 33 | export(read_ipo) 34 | export(read_itch) 35 | export(read_luld) 36 | export(read_market_participant_states) 37 | export(read_modifications) 38 | export(read_mwcb) 39 | export(read_noii) 40 | export(read_orders) 41 | export(read_reg_sho) 42 | export(read_rpii) 43 | export(read_stock_directory) 44 | export(read_system_events) 45 | export(read_trades) 46 | export(read_trading_status) 47 | export(write_itch) 48 | import(data.table) 49 | importFrom(Rcpp,sourceCpp) 50 | importFrom(bit64,as.integer64) 51 | importFrom(nanotime,nanotime) 52 | importFrom(utils,browseURL) 53 | importFrom(utils,download.file) 54 | useDynLib(RITCH) 55 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # RITCH 0.1.76 2 | 3 | * fix bug where no messages would be reported for larger files 4 | 5 | # RITCH 0.1.26 6 | 7 | * fix bug where gz functionality would write to user library or current directory 8 | 9 | # RITCH 0.1.25 10 | 11 | * fix Debian segfault when writing to user library 12 | 13 | # RITCH 0.1.24 14 | 15 | * fix printf warnings about wrong argument type 16 | 17 | # RITCH 0.1.23 18 | 19 | * fix compilation warning and limit test cases to two cores (CRAN...) 20 | 21 | # RITCH 0.1.22 22 | 23 | * fix CRAN release by shorten example runtimes 24 | 25 | # RITCH 0.1.21 26 | 27 | * fix long running tasks in read functions 28 | 29 | # RITCH 0.1.20 30 | 31 | * fix bug where tests would fail on some platforms where files are written and not cleaned up 32 | * CRAN release 33 | 34 | # RITCH 0.1.19 35 | 36 | * fix bug in tests on some platforms 37 | * CRAN release 38 | 39 | # RITCH 0.1.18 40 | 41 | * CRAN release 42 | 43 | 44 | # RITCH 0.1.11 45 | 46 | * update internal C++ structure, reducing code complexity, increasing read speeds, reducing size of package 47 | * add `filter_itch(infile, outfile, ...)` to filter directly to files 48 | 49 | 50 | # RITCH 0.1.10 51 | 52 | * add `write_itch()` to write ITCH files 53 | * add filters to `read_*` functions 54 | * add read functions for all classes 55 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | count_messages_impl <- function(filename, max_buffer_size, quiet) { 5 | .Call('_RITCH_count_messages_impl', PACKAGE = 'RITCH', filename, max_buffer_size, quiet) 6 | } 7 | 8 | filter_itch_impl <- function(infile, outfile, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, append, max_buffer_size, quiet) { 9 | invisible(.Call('_RITCH_filter_itch_impl', PACKAGE = 'RITCH', infile, outfile, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, append, max_buffer_size, quiet)) 10 | } 11 | 12 | gunzip_file_impl <- function(infile, outfile, buffer_size = 1e9L) { 13 | invisible(.Call('_RITCH_gunzip_file_impl', PACKAGE = 'RITCH', infile, outfile, buffer_size)) 14 | } 15 | 16 | gzip_file_impl <- function(infile, outfile, buffer_size = 1e9L) { 17 | invisible(.Call('_RITCH_gzip_file_impl', PACKAGE = 'RITCH', infile, outfile, buffer_size)) 18 | } 19 | 20 | read_itch_impl <- function(classes, filename, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, max_buffer_size, quiet) { 21 | .Call('_RITCH_read_itch_impl', PACKAGE = 'RITCH', classes, filename, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, max_buffer_size, quiet) 22 | } 23 | 24 | write_itch_impl <- function(ll, filename, append, gz, max_buffer_size, quiet) { 25 | .Call('_RITCH_write_itch_impl', PACKAGE = 'RITCH', ll, filename, append, gz, max_buffer_size, quiet) 26 | } 27 | 28 | -------------------------------------------------------------------------------- /R/count_functions.R: -------------------------------------------------------------------------------- 1 | #' @name count_functions 2 | #' @rdname count_functions 3 | #' @title Counts the messages of an ITCH-file 4 | #' 5 | #' @param file the path to the input file, either a gz-file or a plain-text file 6 | #' @param x a file or a data.table containing the message types and the counts, 7 | #' as outputted by `count_messages` 8 | #' @param add_meta_data if the meta-data of the messages should be added, defaults to FALSE 9 | #' @param buffer_size the size of the buffer in bytes, defaults to 1e8 (100 MB), if you have a large amount of RAM, 1e9 (1GB) might be faster 10 | #' @param quiet if TRUE, the status messages are supressed, defaults to FALSE 11 | #' @param force_gunzip only applies if file is a gz-file and a file with the same (gunzipped) name already exists. 12 | #' if set to TRUE, the existing file is overwritten. Default value is FALSE 13 | #' @param gz_dir a directory where the gz archive is extracted to. 14 | #' Only applies if file is a gz archive. Default is [tempdir()]. 15 | #' @param force_cleanup only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards. 16 | #' @return a data.table containing the message-type and their counts for `count_messages` 17 | #' or an integer value for the other functions. 18 | #' @export 19 | #' 20 | #' @examples 21 | #' file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 22 | #' count_messages(file) 23 | #' count_messages(file, add_meta_data = TRUE, quiet = TRUE) 24 | #' 25 | #' # file can also be a .gz file 26 | #' gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 27 | #' count_messages(gz_file, quiet = TRUE) 28 | #' 29 | #' # count only a specific class 30 | #' msg_count <- count_messages(file, quiet = TRUE) 31 | #' 32 | #' # either count based on a given data.table outputted by count_messages 33 | #' count_orders(msg_count) 34 | #' 35 | #' # or count orders from a file and not from a msg_count 36 | #' count_orders(file) 37 | #' 38 | #' ### Specific class count functions are: 39 | count_messages <- function(file, add_meta_data = FALSE, buffer_size = -1, 40 | quiet = FALSE, force_gunzip = FALSE, 41 | gz_dir = tempdir(), force_cleanup = TRUE) { 42 | t0 <- Sys.time() 43 | if (!file.exists(file)) 44 | stop(sprintf("File '%s' not found!", file)) 45 | 46 | # Set the default value of the buffer size 47 | buffer_size <- check_buffer_size(buffer_size, file) 48 | 49 | orig_file <- file 50 | # only needed for gz files; gz files are not deleted when the raw file already existed 51 | raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", file))) 52 | file <- check_and_gunzip(file, gz_dir, buffer_size, force_gunzip, quiet) 53 | df <- count_messages_impl(file, buffer_size, quiet) 54 | 55 | df <- data.table::setalloccol(df) 56 | 57 | if (add_meta_data) { 58 | dd <- RITCH::get_msg_classes() 59 | df <- df[dd, on = "msg_type"] 60 | } 61 | 62 | report_end(t0, quiet, orig_file) 63 | 64 | if (grepl("\\.gz$", orig_file) && force_cleanup && !raw_file_existed) { 65 | unlink(basename(gsub("\\.gz$", "", file))) 66 | if (!quiet) cat(sprintf("[Cleanup] Removing file '%s'\n", file)) 67 | } 68 | 69 | return(df) 70 | } 71 | 72 | #' Returns the message class data for the message types 73 | #' 74 | #' All information is handled according to the official ITCH 5.0 75 | #' documentation as found here: 76 | #' 77 | #' 78 | #' - `msg_type` the type of the message 79 | #' - `msg_class` the group the message belongs to 80 | #' - `msg_name` the official name of the message 81 | #' - `doc_nr` the number of the message in the documentation 82 | #' 83 | #' @seealso `open_itch_specification()` 84 | #' 85 | #' @return a data.table with the information of the message-types 86 | #' @export 87 | #' 88 | #' @examples 89 | #' get_msg_classes() 90 | get_msg_classes <- function() { 91 | data.table::data.table( 92 | msg_type = c("S", "R", "H", "Y", "L", "V", "W", "K", "J", "h", "A", "F", "E", 93 | "C", "X", "D", "U", "P", "Q", "B", "I", "N"), 94 | msg_class = c("system_events", "stock_directory", "trading_status", 95 | "reg_sho", "market_participant_states", "mwcb", 96 | "mwcb", "ipo", "luld", "trading_status", "orders", "orders", 97 | "modifications", "modifications", "modifications", 98 | "modifications", "modifications", "trades", "trades", "trades", 99 | "noii", "rpii"), 100 | msg_name = c("System Event Message", "Stock Directory", 101 | "Stock Trading Action", "Reg SHO Restriction", 102 | "Market Participant Position", "MWCB Decline Level Message", 103 | "MWCB Status Message", "IPO Quoting Period Update", 104 | "LULD Auction Collar", "Operational Halt", "Add Order Message", 105 | "Add Order - MPID Attribution Message", 106 | "Order Executed Message", 107 | "Order Executed Message With Price Message", 108 | "Order Cancel Message", "Order Delete Message", 109 | "Order Replace Message", "Trade Message (Non-Cross)", 110 | "Cross Trade Message", "Broken Trade Message", 111 | "NOII Message", 112 | "Retail Interest Message"), 113 | doc_nr = c("4.1", "4.2.1", "4.2.2", "4.2.3", "4.2.4", "4.2.5.1", "4.2.5.2", 114 | "4.2.6", "4.2.7", "4.2.8", "4.3.1", "4.3.2", "4.4.1", "4.4.2", "4.4.3", 115 | "4.4.4", "4.4.5", "4.5.1", "4.5.2", "4.5.3", "4.6", "4.7") 116 | ) 117 | } 118 | 119 | #' Internal function to count the messages 120 | #' 121 | #' @param x a data.frame containing the message types and the counts 122 | #' @param types a vector containing the types 123 | #' 124 | #' @keywords internal 125 | #' @return a numeric value of number of orders in x 126 | #' 127 | #' @examples 128 | #' # Only used internally 129 | count_internal <- function(x, types) { 130 | if (!is.data.frame(x)) stop("x has to be a data.table") 131 | if (!all(c("msg_type", "count") %in% names(x))) 132 | stop("x has to have the variables 'msg_type' and 'count'") 133 | 134 | as.integer(x[msg_type %in% types][, sum(count)]) 135 | } 136 | 137 | #' @rdname count_functions 138 | #' @export 139 | #' @details 140 | #' - `count_orders`: Counts order messages. Message type `A` and `F` 141 | #' 142 | #' @examples 143 | #' count_orders(msg_count) 144 | count_orders <- function(x) { 145 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 146 | types <- c("A", "F") 147 | count_internal(x, types) 148 | } 149 | 150 | #' @rdname count_functions 151 | #' @export 152 | #' @details 153 | #' - `count_trades`: Counts trade messages. Message type `P`, `Q` and `B` 154 | #' 155 | #' @examples 156 | #' count_trades(msg_count) 157 | count_trades <- function(x) { 158 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 159 | types <- c("P", "Q", "B") 160 | count_internal(x, types) 161 | } 162 | 163 | #' @rdname count_functions 164 | #' @export 165 | #' @details 166 | #' - `count_modifications`: Counts order modification messages. Message 167 | #' type `E`, `C`, `X`, `D`, and `U` 168 | #' 169 | #' @examples 170 | #' count_modifications(msg_count) 171 | count_modifications <- function(x) { 172 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 173 | types <- c("E", "C", "X", "D", "U") 174 | count_internal(x, types) 175 | } 176 | 177 | #' @rdname count_functions 178 | #' @export 179 | #' @details 180 | #' - `count_system_events`: Counts system event messages. Message type `S` 181 | #' 182 | #' @examples 183 | #' count_system_events(msg_count) 184 | count_system_events <- function(x) { 185 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 186 | types <- c("S") 187 | count_internal(x, types) 188 | } 189 | 190 | #' @rdname count_functions 191 | #' @export 192 | #' @details 193 | #' - `count_stock_directory`: Counts stock trading messages. Message 194 | #' type `R` 195 | #' 196 | #' @examples 197 | #' count_stock_directory(msg_count) 198 | count_stock_directory <- function(x) { 199 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 200 | types <- c("R") 201 | count_internal(x, types) 202 | } 203 | 204 | #' @rdname count_functions 205 | #' @export 206 | #' @details 207 | #' - `count_trading_status`: Counts trading status messages. Message 208 | #' type `H` and `h` 209 | #' 210 | #' @examples 211 | #' count_trading_status(msg_count) 212 | count_trading_status <- function(x) { 213 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 214 | types <- c("H", "h") 215 | count_internal(x, types) 216 | } 217 | 218 | #' @rdname count_functions 219 | #' @export 220 | #' @details 221 | #' - `count_reg_sho`: Counts messages regarding reg SHO. Message type 222 | #' `Y` 223 | #' 224 | #' @examples 225 | #' count_reg_sho(msg_count) 226 | count_reg_sho <- function(x) { 227 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 228 | types <- c("Y") 229 | count_internal(x, types) 230 | } 231 | 232 | #' @rdname count_functions 233 | #' @export 234 | #' @details 235 | #' - `count_market_participant_states`: Counts messages regarding the 236 | #' status of market participants. Message type `L` 237 | #' 238 | #' @examples 239 | #' count_market_participant_states(msg_count) 240 | count_market_participant_states <- function(x) { 241 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 242 | types <- c("L") 243 | count_internal(x, types) 244 | } 245 | 246 | #' @rdname count_functions 247 | #' @export 248 | #' @details 249 | #' - `count_mwcb`: Counts messages regarding Market-Wide-Circuit-Breakers 250 | #' (MWCB). Message type `V` and `W` 251 | #' 252 | #' @examples 253 | #' count_mwcb(msg_count) 254 | count_mwcb <- function(x) { 255 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 256 | types <- c("V", "W") 257 | count_internal(x, types) 258 | } 259 | 260 | #' @rdname count_functions 261 | #' @export 262 | #' @details 263 | #' - `count_ipo`: Counts messages regarding IPOs. Message type `K` 264 | #' 265 | #' @examples 266 | #' count_ipo(msg_count) 267 | count_ipo <- function(x) { 268 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 269 | types <- c("K") 270 | count_internal(x, types) 271 | } 272 | 273 | #' @rdname count_functions 274 | #' @export 275 | #' @details 276 | #' - `count_luld`: Counts messages regarding LULDs (limit up-limit down) 277 | #' auction collars. Message type `J` 278 | #' 279 | #' @examples 280 | #' count_luld(msg_count) 281 | count_luld <- function(x) { 282 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 283 | types <- c("J") 284 | count_internal(x, types) 285 | } 286 | 287 | #' @rdname count_functions 288 | #' @export 289 | #' @details 290 | #' - `count_noii`: Counts Net Order Imbalance Indicatio (NOII) messages. 291 | #' Message type `I` 292 | #' 293 | #' @examples 294 | #' count_noii(msg_count) 295 | count_noii <- function(x) { 296 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 297 | types <- c("I") 298 | count_internal(x, types) 299 | } 300 | 301 | #' @rdname count_functions 302 | #' @export 303 | #' @details 304 | #' - `count_rpii`: Counts Retail Price Improvement Indicator (RPII) 305 | #' messages. Message type `N` 306 | #' 307 | #' @examples 308 | #' count_rpii(msg_count) 309 | count_rpii <- function(x) { 310 | if (is.character(x)) x <- count_messages(x, quiet = TRUE) 311 | types <- c("N") 312 | count_internal(x, types) 313 | } 314 | -------------------------------------------------------------------------------- /R/data_samples.R: -------------------------------------------------------------------------------- 1 | 2 | #' Returns a data.table of the sample files on the server 3 | #' 4 | #' The Server can be found at 5 | #' 6 | #' @return a data.table of the files 7 | #' @export 8 | #' 9 | #' @examples 10 | #' \dontrun{ 11 | #' list_sample_files() 12 | #' } 13 | list_sample_files <- function() { 14 | 15 | url <- "https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/" 16 | raw <- suppressWarnings(readLines(url)) 17 | 18 | cont <- trimws(unlist(strsplit(raw, "
"))) 19 | cont <- cont[grepl("ITCH_?50\\.gz$", cont)] 20 | cont <- strsplit(cont, " +|HREF=\"|\">|") 21 | 22 | df <- data.table::data.table( 23 | file = sapply(cont, function(x) x[8]), 24 | size = sapply(cont, function(x) x[4]), 25 | date = sapply(cont, function(x) x[1]), 26 | time = sapply(cont, function(x) x[2]), 27 | tt = sapply(cont, function(x) x[3]) 28 | ) 29 | 30 | df[, ':=' ( 31 | file_size = as.numeric(size), 32 | last_modified = as.POSIXct(paste(date, time, tt), format = "%m/%d/%Y %H:%M %p", tz = "GMT"), 33 | exchange = get_exchange_from_filename(file), 34 | date = get_date_from_filename(file) 35 | )] 36 | 37 | return(df[, .(file, exchange, date, file_size, last_modified)]) 38 | } 39 | 40 | 41 | #' Downloads a sample ITCH File from NASDAQs Server 42 | #' 43 | #' The Server can be found at 44 | #' 45 | #' Warning: the smallest file is around 300 MB, with the largest exceeding 5 GB. 46 | #' There are about 17 files in total. Downloading all might take a considerable amount of time. 47 | #' 48 | #' @param choice which file should be chosen? One of: smallest (default), largest, 49 | #' earliest (date-wise), latest, random, or all. 50 | #' @param file the name of a specific file, overrules the choice and exchanges arguments 51 | #' @param exchanges A vector of exchanges, can be NASDAQ, BX, or PSX. 52 | #' The default value is to consider all exchanges. 53 | #' @param dir The directory where the files will be saved to, default is current working directory. 54 | #' @param force_download If the file should be downloaded even if it already exists locally. 55 | #' Default value is FALSE. 56 | #' @param check_md5sum If the md5-sum (hash-value) of the downloaded file should be checked, default value is TRUE. 57 | #' @param quiet if TRUE, the status messages are suppressed, defaults to FALSE 58 | #' 59 | #' @return an invisible vector of the files 60 | #' @export 61 | #' 62 | #' @examples 63 | #' \dontrun{ 64 | #' download_sample_file() 65 | #' file <- download_sample_file() 66 | #' file 67 | #' 68 | #' # download a specific sample file 69 | #' file <- download_sample_file(file = "2019130.BX_ITCH_50.gz") 70 | #' file 71 | #' } 72 | download_sample_file <- function(choice = c("smallest", "largest", "earliest", "latest", "random", "all"), 73 | file = NA, 74 | exchanges = NA, 75 | dir = ".", 76 | force_download = FALSE, 77 | check_md5sum = TRUE, 78 | quiet = FALSE) { 79 | choice <- match.arg(choice) 80 | 81 | url <- "https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/" 82 | df <- list_sample_files() 83 | 84 | if (length(exchanges) != 1 && !is.na(exchanges)) 85 | df <- df[exchange %in% toupper(exchanges), ] 86 | 87 | if (!quiet) cat(paste0("Downloading '", choice, "' sample file(s)\n")) 88 | 89 | if (choice %in% c("smallest", "largest")) 90 | df <- df[order(file_size, decreasing = TRUE)] 91 | if (choice %in% c("earliest", "latest")) 92 | df <- df[order(date, decreasing = TRUE)] 93 | 94 | idx <- switch(choice, 95 | smallest = nrow(df), 96 | random = sample.int(nrow(df), 1), 97 | largest = 1, 98 | earliest = nrow(df), 99 | latest = 1, 100 | all = 1:nrow(df)) 101 | 102 | if (!is.na(file)) idx <- df$file == file 103 | df_take <- df[idx, ] 104 | 105 | files <- apply(df_take, 1, function(el) { 106 | file <- el[["file"]] 107 | file_path <- file.path(dir, file) 108 | 109 | download_file <- TRUE 110 | 111 | if (file.exists(file_path)) { 112 | txt <- paste0("File '", file_path, "' exists already, ") 113 | 114 | if (force_download) { 115 | if (!quiet) cat(paste0(txt, "downloading!\n")) 116 | } else { 117 | if (!quiet) cat(paste0(txt, "not downloading it again!\n")) 118 | download_file <- FALSE 119 | } 120 | } 121 | file_url <- paste0(url, file) 122 | 123 | if (download_file) { 124 | if (!quiet) cat(paste0("Downloading File '", file_path, "'.\n")) 125 | download.file(file_url, destfile = file_path, mode = "wb", quiet = quiet) 126 | } 127 | 128 | if (check_md5sum) { 129 | if (!quiet) cat(paste0("Checking md5 sum of file '", file_path, "' ... ")) 130 | md5_url <- paste0(file_url, ".md5sum") 131 | md5 <- try(readLines(md5_url), silent = TRUE) 132 | if (inherits(md5, "try-error")) { 133 | cat(sprintf("Could not find md5 file for file %s, skipping check\n", 134 | file_url)) 135 | return(file) 136 | } 137 | expected <- strsplit(md5, " ")[[1]][1] 138 | got <- tools::md5sum(file_path) 139 | if (expected != got) { 140 | if (!quiet) cat("\n") 141 | warning(paste0("md5 hash for file '", file_path, 142 | "' not matching.\nExpected '", expected, "' got '", got, "'!")) 143 | } else { 144 | if (!quiet) cat(paste0("matches '", expected, "' - success !\n")) 145 | } 146 | } 147 | 148 | return(file) 149 | }) 150 | 151 | return(invisible(files)) 152 | } 153 | -------------------------------------------------------------------------------- /R/download_stock_directory.R: -------------------------------------------------------------------------------- 1 | #' Downloads the stock directory (stock locate codes) for a given date and exchange 2 | #' 3 | #' The data is downloaded from NASDAQs server, which can be found here 4 | #' 5 | #' 6 | #' @param exchange The exchange, either NASDAQ (equivalent to NDQ), BX, or PSX 7 | #' @param date The date, should be of class Date. If not the value is converted 8 | #' using `as.Date`. 9 | #' @param cache If the stock directory should be cached, can be set to TRUE 10 | #' to save the stock directories in the working directory or a character for a 11 | #' target directory. 12 | #' @param quiet If the download function should be quiet, default is FALSE. 13 | #' 14 | #' @return a data.table of the tickers, the respective stock locate codes, and 15 | #' the exchange/date information 16 | #' @export 17 | #' 18 | #' @examples 19 | #' \dontrun{ 20 | #' download_stock_directory("BX", "2019-07-02") 21 | #' download_stock_directory(c("BX", "NDQ"), c("2019-07-02", "2019-07-03")) 22 | #' download_stock_directory("BX", "2019-07-02", cache = TRUE) 23 | #' 24 | #' download_stock_directory("BX", "2019-07-02", cache = "stock_directory") 25 | #' dir.exists("stock_directory") 26 | #' list.files("stock_directory") 27 | #' } 28 | download_stock_directory <- function(exchange, date, cache = FALSE, 29 | quiet = FALSE) { 30 | 31 | exchange <- ifelse(tolower(exchange) == "nasdaq", "ndq", tolower(exchange)) 32 | if (!all(exchange %in% c("ndq", "bx", "psx"))) 33 | stop("Exchange must be 'NASDAQ' ('NDQ'), 'BX', or 'PSX'") 34 | if (length(cache) != 1) stop("cache must be of size 1") 35 | 36 | if (is.character(date)) date <- as.Date(date) 37 | base_url <- "https://emi.nasdaq.com/ITCH/Stock_Locate_Codes/" 38 | 39 | # if multiple exchanges or dates were specified, take all possible combinations 40 | # and call the function recursively 41 | if (length(exchange) > 1 || length(date) > 1) { 42 | vals <- expand.grid(ex = exchange, d = date, stringsAsFactors = FALSE) 43 | 44 | res <- lapply(1:nrow(vals), 45 | function(i) download_stock_directory(vals$ex[i], vals$d[i])) 46 | 47 | d <- data.table::rbindlist(res) 48 | 49 | } else { 50 | filename <- paste0(exchange, "_stocklocate_", format(date, "%Y%m%d"), ".txt") 51 | url <- paste0(base_url, filename) 52 | file <- url 53 | 54 | if (is.character(cache) || is.logical(cache) && cache) { 55 | 56 | destfile <- filename 57 | if (is.character(cache)) { 58 | if (!dir.exists(cache)) dir.create(cache) 59 | destfile <- file.path(cache, filename) 60 | } 61 | 62 | txt <- sprintf("for exchange '%s' and date '%s'", 63 | exchange, format(date, "%Y-%m-%d")) 64 | # download or use cache 65 | if (!file.exists(destfile)) { 66 | if (!quiet) cat(sprintf("[Stock Locate] Downloading %s\n", txt)) 67 | download.file(url, destfile, quiet = quiet) 68 | } else { 69 | if (!quiet) 70 | cat(sprintf("[Stock Locate] File %s already exists, using cache\n", 71 | txt)) 72 | } 73 | file <- destfile 74 | } 75 | 76 | d <- data.table::fread(file, showProgress = !quiet) 77 | 78 | data.table::setnames(d, c("ticker", "stock_locate")) 79 | d[, ':=' (exchange = toupper(exchange), date = date)] 80 | } 81 | 82 | return(d[]) 83 | } 84 | 85 | -------------------------------------------------------------------------------- /R/filter_itch.R: -------------------------------------------------------------------------------- 1 | #' Filters an ITCH file to another ITCH file 2 | #' 3 | #' This function allows to perform very fast filter operations on large ITCH 4 | #' files. The messages are written to another ITCH file. 5 | #' 6 | #' Note that this can be especially useful on larger files or where memory 7 | #' is not large enough to filter the datalimits the analysis. 8 | #' 9 | #' As with the [read_itch()] functions, it allows to filter for 10 | #' `msg_class`, `msg_type`, `stock_locate`/`stock`, and 11 | #' `timestamp`. 12 | #' 13 | #' @inheritParams read_functions 14 | #' @param infile the input file where the messages are taken from, can be a 15 | #' gz-archive or a plain ITCH file. 16 | #' @param outfile the output file where the filtered messages are written to. 17 | #' Note that the date and exchange information from the `infile` are used, 18 | #' see also [add_meta_to_filename()] for further information. 19 | #' @param append if the messages should be appended to the outfile, default is 20 | #' false. Note, this is helpful if `skip` and or `n_max` are used for 21 | #' batch filtering. 22 | #' @param gz if the output file should be gzip-compressed. Note that the name 23 | #' of the output file will be appended with .gz if not already present. The 24 | #' final output name is returned. Default value is false. 25 | #' @param overwrite if an existing outfile with the same name should be 26 | #' overwritten. Default value is false 27 | #' 28 | #' @return the name of the output file (maybe different from the inputted 29 | #' outfile due to adding the date and exchange), silently 30 | #' @export 31 | #' 32 | #' @examples 33 | #' infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 34 | #' outfile <- tempfile(fileext = "_20101224.TEST_ITCH_50") 35 | #' filter_itch( 36 | #' infile, outfile, 37 | #' filter_msg_class = c("orders", "trades"), 38 | #' filter_msg_type = "R", # stock_directory 39 | #' skip = 0, n_max = 100 40 | #' ) 41 | #' 42 | #' # expecting 100 orders, 100 trades, and 3 stock_directory entries 43 | #' count_messages(outfile) 44 | #' 45 | #' # check that the output file contains the same 46 | #' res <- read_itch(outfile, c("orders", "trades", "stock_directory")) 47 | #' sapply(res, nrow) 48 | #' 49 | #' res2 <- read_itch(infile, c("orders", "trades", "stock_directory"), 50 | #' n_max = 100) 51 | #' 52 | #' all.equal(res, res2) 53 | filter_itch <- function(infile, outfile, 54 | filter_msg_class = NA_character_, 55 | filter_msg_type = NA_character_, 56 | filter_stock_locate = NA_integer_, 57 | min_timestamp = bit64::as.integer64(NA), 58 | max_timestamp = bit64::as.integer64(NA), 59 | filter_stock = NA_character_, stock_directory = NA, 60 | skip = 0, n_max = -1, append = FALSE, overwrite = FALSE, 61 | gz = FALSE, buffer_size = -1, quiet = FALSE, 62 | force_gunzip = FALSE, force_cleanup = TRUE) { 63 | t0 <- Sys.time() 64 | msg_classes <- list( 65 | "system_events" = "S", 66 | "stock_directory" = "R", 67 | "trading_status" = c("H", "h"), 68 | "reg_sho" = "Y", 69 | "market_participant_states" = "L", 70 | "mwcb" = c("V", "W"), 71 | "ipo" = "K", 72 | "luld" = "J", 73 | "orders" = c("A", "F"), 74 | "modifications" = c("E", "C", "X", "D", "U"), 75 | "trades" = c("P", "Q", "B"), 76 | "noii" = "I", 77 | "rpii" = "N" 78 | ) 79 | 80 | if (!any(is.na(filter_msg_class))) { 81 | filter_msg_type <- c( 82 | filter_msg_type, 83 | as.character(unlist(msg_classes[tolower(filter_msg_class)])) 84 | ) 85 | } 86 | 87 | if (!file.exists(infile)) 88 | stop(sprintf("File '%s' not found!", infile)) 89 | 90 | date <- get_date_from_filename(infile) 91 | exch <- get_exchange_from_filename(infile) 92 | outfile <- add_meta_to_filename(outfile, date, exch) 93 | 94 | # check that the directory for outfile exists 95 | outfile_dir <- gsub("[^/]+$", "", outfile) 96 | if (outfile_dir != "" && !dir.exists(outfile_dir)) { 97 | if (overwrite) { 98 | dir.create(outfile_dir, recursive = TRUE) 99 | } else { 100 | stop(sprintf( 101 | "Directory '%s' not found, to create/overwrite use overwrite = TRUE", 102 | outfile_dir 103 | )) 104 | } 105 | } 106 | 107 | # first write to unzipped file, than gzip the file later... 108 | if (grepl("\\.gz$", outfile)) outfile <- gsub("\\.gz$", "", outfile) 109 | 110 | if (file.exists(outfile) && !append && !overwrite) 111 | stop(sprintf("File '%s' already found, to overwrite use overwrite = TRUE or use append = TRUE", 112 | outfile)) 113 | 114 | if (!quiet) { 115 | sprintf("[infile] '%s'\n", infile) 116 | sprintf("[outfile] '%s'\n", outfile) 117 | } 118 | 119 | # treat n_max 120 | if (is.data.frame(n_max)) 121 | stop("n_max cannot be a data.frame in filter_itch!") 122 | 123 | # +1 as we want to skip, -1 as cpp is zero indexed 124 | start <- max(skip, 0) 125 | end <- max(skip + n_max - 1, -1) 126 | if (end < start) end <- -1 127 | 128 | if (!quiet && (start != 0 | end != -1)) 129 | cat(sprintf("[Filter] skip: %i n_max: %i (%i - %i)\n", 130 | skip, n_max, start + 1, end + 1)) 131 | 132 | # Treat filters 133 | # Message types 134 | filter_msg_type <- check_msg_types(filter_msg_type, quiet) 135 | 136 | # locate code 137 | filter_stock_locate <- filter_stock_locate[!is.na(filter_stock_locate)] 138 | filter_stock_locate <- as.integer(filter_stock_locate) 139 | 140 | # Timestamp 141 | t <- check_timestamps(min_timestamp, max_timestamp, quiet) 142 | min_timestamp <- t$min 143 | max_timestamp <- t$max 144 | 145 | # Stock 146 | filter_stock_locate <- check_stock_filters(filter_stock, stock_directory, 147 | filter_stock_locate, infile) 148 | 149 | if (!quiet && length(filter_stock_locate) > 0) 150 | cat(paste0("[Filter] stock_locate: '", 151 | paste(filter_stock_locate, collapse = "', '"), 152 | "'\n")) 153 | 154 | # Set the default value of the buffer size 155 | buffer_size <- check_buffer_size(buffer_size, infile) 156 | 157 | filedate <- get_date_from_filename(infile) 158 | 159 | orig_infile <- infile 160 | # only needed for gz files; gz files are not deleted when the raw file already existed 161 | raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", infile))) 162 | infile <- check_and_gunzip(infile, dirname(outfile), buffer_size, force_gunzip, quiet) 163 | 164 | filter_itch_impl(infile, outfile, start, end, 165 | filter_msg_type, filter_stock_locate, 166 | min_timestamp, max_timestamp, 167 | append, buffer_size, quiet) 168 | 169 | if (gz) { 170 | if (!quiet) cat(sprintf("[gzip] outfile\n")) 171 | of <- outfile 172 | outfile <- gzip_file(infile = outfile, 173 | outfile = paste0(outfile, ".gz")) 174 | unlink(of) # delete the temporary file 175 | } 176 | 177 | a <- gc() 178 | 179 | report_end(t0, quiet, infile) 180 | 181 | # if the file was gzipped and the force_cleanup=TRUE, delete unzipped file 182 | if (grepl("\\.gz$", orig_infile) && force_cleanup && !raw_file_existed) { 183 | if (!quiet) cat(sprintf("[Cleanup] Removing file '%s'\n", infile)) 184 | unlink(basename(gsub("\\.gz$", "", infile))) 185 | } 186 | 187 | return(invisible(outfile)) 188 | } 189 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | 2 | utils::globalVariables( 3 | c("count", "datetime", "msg_type", "timestamp", "exchange", "file_size", 4 | "last_modified", ".", "size", "time", "stock", "stock_locate", "tt") 5 | ) 6 | -------------------------------------------------------------------------------- /R/gz_functions.R: -------------------------------------------------------------------------------- 1 | 2 | #' @name gz_functions 3 | #' @rdname gz_functions 4 | #' @title Compresses and uncompresses files to and from gz-archives 5 | #' 6 | #' @description 7 | #' 8 | #' Allows the compression and uncompression of files 9 | #' 10 | #' @param infile the file to be zipped or unzipped 11 | #' @param outfile the resulting zipped or unzipped file 12 | #' @param buffer_size the size of the buffer to read in at once, default is 4 times the file.size (max 2Gb). 13 | #' 14 | #' @details Functions are 15 | #' 16 | #' @return The filename of the unzipped file, invisibly 17 | #' 18 | #' @examples 19 | #' gzfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 20 | #' file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 21 | #' 22 | NULL 23 | 24 | #' @rdname gz_functions 25 | #' @export 26 | #' @details 27 | #' - `gunzip_file`: uncompresses a gz-archive to raw binary data 28 | #' 29 | #' @examples 30 | #' # uncompress file 31 | #' (outfile <- gunzip_file(gzfile, "tmp")) 32 | #' file.info(outfile) 33 | #' unlink(outfile) 34 | #' 35 | gunzip_file <- function(infile, outfile = gsub("\\.gz$", "", infile), 36 | buffer_size = min(4 * file.size(infile), 2e9)) { 37 | 38 | if (!file.exists(infile)) stop(sprintf("File '%s' not found!", infile)) 39 | if (file.exists(outfile)) unlink(outfile) 40 | 41 | gunzip_file_impl(infile, outfile, buffer_size) 42 | return(invisible(outfile)) 43 | } 44 | 45 | #' @rdname gz_functions 46 | #' @export 47 | #' @details 48 | #' -`gzip_file`: compresses a raw binary data file to a gz-archive 49 | #' 50 | #' @examples 51 | #' # compress file 52 | #' (outfile <- gzip_file(file)) 53 | #' file.info(outfile) 54 | #' unlink(outfile) 55 | gzip_file <- function(infile, 56 | outfile = NA, 57 | buffer_size = min(4 * file.size(infile), 2e9)) { 58 | 59 | if (!file.exists(infile)) stop(sprintf("File '%s' not found!", infile)) 60 | 61 | if (is.na(outfile)) { 62 | outfile <- ifelse(grepl("\\.gz$", infile), 63 | infile, 64 | paste0(infile, ".gz")) 65 | # remove path 66 | xx <- strsplit(outfile, "\\\\|/")[[1]] 67 | outfile <- xx[length(xx)] 68 | } 69 | if (file.exists(outfile)) unlink(outfile) 70 | 71 | if (grepl("\\.gz$", infile)) { 72 | warning("Infile is already a gzipped-archive") 73 | return(invisible(infile)) 74 | } 75 | 76 | gzip_file_impl(infile, outfile, buffer_size) 77 | return(invisible(outfile)) 78 | } 79 | 80 | 81 | # Helper function 82 | # returns the (if needed gunzipped) file 83 | # note that it only operates in the dir directory 84 | check_and_gunzip <- function(file, dir = dirname(file), buffer_size, force_gunzip, quiet) { 85 | file <- path.expand(file) 86 | if (!grepl("\\.gz$", file)) return(file) 87 | 88 | outfile <- file.path(dir, basename(gsub("\\.gz$", "", file))) 89 | # check if the raw-file at target directory already exists, if so use this (unless force_gunzip = TRUE) 90 | if (file.exists(outfile) && !quiet && !force_gunzip) { 91 | cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n", 92 | outfile)) 93 | return(outfile) 94 | } 95 | 96 | # check if the raw-file at current directory already exists, if so use this (unless force_gunzip = TRUE) 97 | if (file.exists(outfile) && !force_gunzip) { 98 | if (!quiet) 99 | cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n", 100 | outfile)) 101 | return(outfile) 102 | } else { 103 | # if the unzipped file doesnt exist or the force_gunzip flag is set, unzip file 104 | unlink(outfile) 105 | if (!quiet) 106 | cat(sprintf("[Decompressing] '%s' to '%s'\n", file, outfile)) 107 | 108 | gunzip_file(file, outfile, buffer_size) 109 | } 110 | return(outfile) 111 | } 112 | -------------------------------------------------------------------------------- /R/helpers.R: -------------------------------------------------------------------------------- 1 | #' Returns the date from an ITCH-filename 2 | #' 3 | #' @param file a filename 4 | #' 5 | #' @return the date as fastPOSIXct 6 | #' @export 7 | #' @keywords internal 8 | #' 9 | #' @examples 10 | #' get_date_from_filename("03302017.NASDAQ_ITCH50") 11 | #' get_date_from_filename("20170130.BX_ITCH_50.gz") 12 | #' get_date_from_filename("S030220-v50-bx.txt.gz") 13 | #' get_date_from_filename("unknown_file_format") 14 | get_date_from_filename <- function(file) { 15 | date_ <- data.table::fifelse( 16 | grepl("S\\d{6}", file), 17 | sub(".*(\\d{6}).*", "\\1", file), 18 | sub(".*(\\d{8}).*", "\\1", file) 19 | ) 20 | 21 | date_ <- data.table::fifelse( 22 | grepl("NASDAQ_ITCH50(\\.gz)?$", file), 23 | # format MMDDYYYY 24 | gsub("(\\d{2})(\\d{2})(\\d{4})", "\\3-\\1-\\2", date_), 25 | data.table::fifelse(grepl("S\\d{6}-", file), 26 | # format MMDDYY 27 | gsub("(\\d{2})(\\d{2})(\\d{2})", "20\\3-\\1-\\2", date_), 28 | # format YYYYMMDD 29 | gsub("(\\d{4})(\\d{2})(\\d{2})", "\\1-\\2-\\3", date_) 30 | ) 31 | ) 32 | 33 | date_ <- try(as.POSIXct(date_, tz = "GMT"), silent = TRUE) 34 | if (inherits(date_, "try-error")) date_ <- NA 35 | return(date_) 36 | } 37 | 38 | #' Returns the exchange from an ITCH-filename 39 | #' 40 | #' @param file a filename 41 | #' 42 | #' @return The exchange 43 | #' @export 44 | #' 45 | #' @examples 46 | #' get_exchange_from_filename("03302017.NASDAQ_ITCH50") 47 | #' get_exchange_from_filename("20170130.BX_ITCH_50.gz") 48 | #' get_exchange_from_filename("S030220-v50-bx.txt.gz") 49 | #' get_exchange_from_filename("Unknown_file_format") 50 | get_exchange_from_filename <- function(file) { 51 | res <- regmatches(file, regexpr("(?<=\\.)[A-Z]+(?=_)", file, perl = TRUE)) 52 | if (length(res) == 0) 53 | res <- regmatches(file, regexpr("(?<=-v50-)[a-z]+", file, perl = TRUE)) 54 | res <- toupper(res) 55 | if (length(res) == 0) res <- NA 56 | return(res) 57 | } 58 | 59 | #' Adds meta information (date and exchange) to an itch filename 60 | #' 61 | #' Note that if date and exchange information are already present, 62 | #' they are overwritten 63 | #' 64 | #' @param file the filename 65 | #' @param date the date as a date-class or as a string that is understood by 66 | #' [base::as.Date()]. 67 | #' @param exchange the name of the exchange 68 | #' 69 | #' @return the filename with exchanged or added date and exchange information 70 | #' @export 71 | #' 72 | #' @examples 73 | #' add_meta_to_filename("03302017.NASDAQ_ITCH50", "2010-12-24", "TEST") 74 | #' add_meta_to_filename("20170130.BX_ITCH_50.gz", "2010-12-24", "TEST") 75 | #' add_meta_to_filename("S030220-v50-bx.txt.gz", "2010-12-24", "TEST") 76 | #' add_meta_to_filename("unknown_file.ITCH_50", "2010-12-24", "TEST") 77 | add_meta_to_filename <- function(file, date, exchange) { 78 | if (is.na(date) || is.na(exchange)) return(file) 79 | 80 | if (!"POSIXct" %in% class(date)) date <- as.Date(date) 81 | 82 | # First try to extract if the filename is in the standard formats. 83 | # if not use the "20101224.TEST_ITCH_50" format 84 | if (grepl("NASDAQ_ITCH", file)) { #03302017.NASDAQ_ITCH50 85 | 86 | file <- gsub("\\d{8}", format(date, "%m%d%Y"), file) 87 | file <- gsub("NASDAQ", exchange, file) 88 | 89 | } else if (grepl("S\\d{6}-", file)) { # S030220-v50-bx.txt.gz 90 | 91 | file <- gsub("\\d{6}", format(date, "%m%d%y"), file) 92 | file <- gsub("(?<=v50-)[^\\.]*(?=\\.)", exchange, file, perl = TRUE) 93 | 94 | } else if (grepl("(?. 126 | #' 127 | #' @return the URL (invisible) 128 | #' @export 129 | #' 130 | #' @examples 131 | #' \dontrun{ 132 | #' open_itch_specification() 133 | #' } 134 | open_itch_specification <- function() { 135 | url <- "https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf" 136 | browseURL(url) 137 | return(invisible(url)) 138 | } 139 | 140 | #' Opens the ITCH sample page 141 | #' 142 | #' The server can be found at . 143 | #' 144 | #' @return the URL (invisible) 145 | #' @export 146 | #' 147 | #' @examples 148 | #' \dontrun{ 149 | #' open_itch_sample_server() 150 | #' } 151 | open_itch_sample_server <- function() { 152 | url <- "https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/" 153 | browseURL(url) 154 | return(invisible(url)) 155 | } 156 | 157 | check_msg_types <- function(filter_msg_type, quiet) { 158 | # allow msg_classes: 'AF' (multiple values are split), 159 | # c('A', 'F'), c(NA, 'A') (NAs are ommited) 160 | filter_msg_type <- unique(filter_msg_type) 161 | 162 | if (any(nchar(filter_msg_type) > 1, na.rm = TRUE)) { 163 | x <- sapply(filter_msg_type, strsplit, split = "") 164 | filter_msg_type <- as.character(unlist(x)) 165 | } 166 | 167 | filter_msg_type <- filter_msg_type[!is.na(filter_msg_type)] 168 | 169 | if (!quiet && length(filter_msg_type) > 0) 170 | cat(paste0("[Filter] msg_type: '", 171 | paste(filter_msg_type, collapse = "', '"), 172 | "'\n")) 173 | 174 | return(filter_msg_type) 175 | } 176 | 177 | check_timestamps <- function(min_timestamp, max_timestamp, quiet) { 178 | min_timestamp <- min_timestamp[!is.na(min_timestamp)] 179 | max_timestamp <- max_timestamp[!is.na(max_timestamp)] 180 | 181 | lmin <- length(min_timestamp) 182 | lmax <- length(max_timestamp) 183 | 184 | txt <- "[Filter] timestamp: " 185 | if (lmin != lmax) { 186 | # either vector has to have size 1 the other 0 187 | if ((lmin == 0 && lmax == 1) || 188 | (lmin == 1 && lmax == 0)) { 189 | if (lmin == 0) { 190 | min_timestamp <- 0 191 | txt <- paste0(txt, "<= ", bit64::as.integer64(max_timestamp)) 192 | } else { # lmax == 0 193 | max_timestamp <- -1 194 | txt <- paste0(txt, ">= ", bit64::as.integer64(min_timestamp)) 195 | } 196 | } else { 197 | stop(paste("min_ and and max_timestamp have to have the same length", 198 | "or only one has to have size 1!")) 199 | } 200 | } else { # lmin == lmax 201 | txt <- paste0(txt, 202 | paste(bit64::as.integer64(min_timestamp), 203 | bit64::as.integer64(max_timestamp), 204 | sep = " - ", collapse = ", ")) 205 | } 206 | if (length(min_timestamp) != 0 && !quiet) cat(txt, "\n") 207 | 208 | min_timestamp <- bit64::as.integer64(min_timestamp) 209 | max_timestamp <- bit64::as.integer64(max_timestamp) 210 | 211 | return(list(min = min_timestamp, max = max_timestamp)) 212 | } 213 | 214 | check_stock_filters <- function(filter_stock, stock_directory, 215 | filter_stock_locate, infile) { 216 | 217 | if (!(length(filter_stock) == 1 && is.na(filter_stock))) { 218 | if (length(stock_directory) == 1 && is.na(stock_directory)) { 219 | warning("filter_stock is given, but no stock_directory is specified. Trying to extract stock directory from file\n") 220 | stock_directory <- read_stock_directory(infile, quiet = TRUE) 221 | } 222 | 223 | if (!all(filter_stock %chin% stock_directory$stock)) { 224 | stop(paste0("Not all stocks found in stock_directory, missing: '", 225 | paste(filter_stock[!filter_stock %chin% stock_directory$stock], 226 | collapse = "', '"), 227 | "'")) 228 | } 229 | # extend locate code by the stocks: 230 | filter_stock_locate <- c(filter_stock_locate, 231 | stock_directory[stock %chin%filter_stock, stock_locate]) 232 | } 233 | return(filter_stock_locate) 234 | } 235 | 236 | check_buffer_size <- function(buffer_size, file) { 237 | if (is.na(buffer_size) || buffer_size < 0) 238 | buffer_size <- ifelse(grepl("\\.gz$", file), 239 | min(3 * file.size(file), 1e9), 240 | 1e8) 241 | 242 | if (!is.integer(buffer_size) || !is.numeric(buffer_size)) buffer_size <- 1e8 243 | 244 | if (buffer_size < 50) 245 | stop(paste("buffer_size has to be at least 50 bytes, otherwise the", 246 | "messages won't fit")) 247 | 248 | if (buffer_size > 5e9) 249 | warning(paste("You are trying to allocate a large array on the heap, if", 250 | "the function crashes, try to use a smaller buffer_size")) 251 | return(buffer_size) 252 | } 253 | 254 | #' Formats a number of bytes 255 | #' 256 | #' @param x the values 257 | #' @param digits the number of digits to display, default value is 2 258 | #' @param unit_suffix the unit suffix, default value is 'B' (for bytes), 259 | #' useful is also 'B/s' if you have read/write speeds 260 | #' @param base the base for kilo, mega, ... definition, default is 1000 261 | #' 262 | #' @return the values as a character 263 | #' @export 264 | #' 265 | #' @examples 266 | #' format_bytes(1234) 267 | #' format_bytes(1234567890) 268 | #' format_bytes(123456789012, unit_suffix = "iB", base = 1024) 269 | format_bytes <- function(x, digits = 2, unit_suffix = "B", base = 1000) { 270 | if (!all(is.finite(x))) return(rep(NA, length(x))) 271 | nr <- floor(log(x, base)) 272 | # future proof it :) 273 | mtch <- c("", "K", "M", "G", "T", "P", "E", "Z", "Y") 274 | units <- paste0(mtch[nr + 1], unit_suffix) 275 | val <- x / base^nr 276 | 277 | res <- sprintf(sprintf("%%.%if%%s", digits), val, units) 278 | names(res) <- names(x) 279 | res 280 | } 281 | 282 | report_end <- function(t0, quiet, file = NA) { 283 | diff_secs <- as.numeric(difftime(Sys.time(), t0, units = "secs")) 284 | 285 | if (is.na(file)) { 286 | txt <- "" 287 | } else { 288 | if (file.exists(file)) size <- file.size(file) else size <- file 289 | speed_txt <- format_bytes(size / diff_secs, digits = 2, 290 | unit_suffix = "B/s") 291 | txt <- sprintf(" at %s", speed_txt) 292 | } 293 | if (!quiet) cat(sprintf("[Done] in %.2f secs%s\n", diff_secs, txt)) 294 | } 295 | -------------------------------------------------------------------------------- /R/write_itch.R: -------------------------------------------------------------------------------- 1 | #' Writes a data.frame or a list of data.frames of ITCH messages to file 2 | #' 3 | #' Note that additional information, e.g., columns that were added, will be 4 | #' dropped in the process and only ITCH-compliant information is saved. 5 | #' 6 | #' Note that the ITCH filename contains the information for the date and exchange. 7 | #' This can be specified explicitly in the file argument or it is added if not 8 | #' turned off `add_meta = FALSE`. 9 | #' 10 | #' @param ll a data.frame or a list of data.frames of ITCH messages, in the format 11 | #' that the [read_functions()] return 12 | #' @param file the filename of the target file. If the folder to the file does 13 | #' not exist, it will be created recursively 14 | #' @param add_meta if date and file information should be added to the filename. 15 | #' Default value is TRUE. Note that adding meta information changes the filename. 16 | #' @param append if the information should be appended to the file. Default value 17 | #' is FALSE 18 | #' @param compress if the file should be gzipped. Default value is FALSE. 19 | #' Note that if you compress a file, buffer_size matters a lot, with larger 20 | #' buffers you are more likely to get smaller filesizes in the end. 21 | #' Alternatively, but slower, is to write the file without compression fully 22 | #' and then gzip the file using another program. 23 | #' @param buffer_size the maximum buffer size. Default value is 1e8 (100MB). 24 | #' Accepted values are > 52 and < 5e9 25 | #' @param quiet if TRUE, the status messages are suppressed, defaults to FALSE 26 | #' @param append_warning if append is set, a warning about timestamp ordering is 27 | #' given. Set `append_warning = FALSE` to silence the warning. Default 28 | #' value is TRUE 29 | #' 30 | #' @return the filename (invisibly) 31 | #' @export 32 | #' 33 | #' @examples 34 | #' infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 35 | #' sys <- read_system_events(infile, quiet = TRUE) 36 | #' outfile <- tempfile() 37 | #' write_itch(sys, outfile) 38 | #' 39 | #' # create a list of events, stock directory, and orders and write to a file 40 | #' sdir <- read_stock_directory(infile, quiet = TRUE) 41 | #' od <- read_orders(infile, quiet = TRUE) 42 | #' 43 | #' ll <- list(sys, sdir, od) 44 | #' write_itch(ll, outfile) 45 | write_itch <- function(ll, file, add_meta = TRUE, 46 | append = FALSE, compress = FALSE, 47 | buffer_size = 1e8, quiet = FALSE, 48 | append_warning = TRUE) { 49 | 50 | t0 <- Sys.time() 51 | if (is.data.frame(ll)) ll <- list(ll) 52 | 53 | if (add_meta) { 54 | exchange <- NA 55 | date <- NA 56 | 57 | has_exchange <- sapply(ll, function(x) "exchange" %in% names(x)) 58 | has_name <- sapply(ll, function(x) "date" %in% names(x)) 59 | 60 | if (any(has_exchange) && any(has_name)) { 61 | idx <- seq_along(ll)[has_exchange][1] 62 | exchange <- ll[[idx]]$exchange[1] 63 | 64 | idx <- seq_along(ll)[has_name][1] 65 | date <- ll[[idx]]$date[1] 66 | } else { 67 | warning("add_meta = TRUE but no exchange or date variable found in ll") 68 | } 69 | 70 | file <- add_meta_to_filename(file, date, exchange) 71 | } 72 | 73 | if (append && append_warning) 74 | warning(paste("ITCH files are sorted by timestamp, by appending to an", 75 | "existing file, this is likely not guaranteed!")) 76 | 77 | # check that all lls are about correct 78 | chk <- sapply(ll, function(x) 79 | is.data.frame(x) && 80 | all(c("msg_type", "stock_locate", "tracking_number", "timestamp") %in% names(x))) 81 | if (!all(chk)) 82 | stop("All elements in ll need to be a data.frame of ITCH messages") 83 | 84 | ll <- lapply(ll, data.table::setorder, timestamp) 85 | 86 | # check and correct filename .gz ending... 87 | if (compress && !substr(file, nchar(file) - 2, nchar(file)) == ".gz") 88 | file <- paste0(file, ".gz") 89 | 90 | # check that the file-folder exists 91 | folder <- gsub("[/\\][^/\\]+$", "", file) 92 | if (folder != file && !dir.exists(folder)) 93 | dir.create(folder, recursive = TRUE) 94 | 95 | bytes <- write_itch_impl(ll, file, append = append, gz = compress, 96 | max_buffer_size = buffer_size, quiet = quiet) 97 | 98 | if (!quiet) cat(sprintf("[Outfile] '%s'\n", file)) 99 | 100 | report_end(t0, quiet, file) 101 | 102 | return(invisible(file)) 103 | } 104 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @useDynLib RITCH 2 | #' @importFrom Rcpp sourceCpp 3 | #' @import data.table 4 | #' @importFrom nanotime nanotime 5 | #' @importFrom bit64 as.integer64 6 | #' @importFrom utils browseURL download.file 7 | NULL 8 | 9 | #' @title ITCH 50 Example Testing Dataset 10 | #' @name ex20101224.TEST_ITCH_50 11 | #' 12 | #' @section ex20101224.TEST_ITCH_50: 13 | #' 14 | #' The test dataset contains artificial trading data for three made up stocks: 15 | #' `ALC`, `BOB`, and `CHAR`. 16 | #' 17 | #' The dataset is used in the examples and unit tests of the package. 18 | #' 19 | #' The data contains the following count of messages: 20 | #' 21 | #' - 6 system event (message type `S`) 22 | #' - 3 stock directory (message type `R`) 23 | #' - 3 trading status (message type `H`) 24 | #' - 5000 orders (4997 message type `A` and 3 `F`) 25 | #' - 2000 modifications (198 `F`, 45 `X`, 1745 `D`, and 12 `U` message types) 26 | #' - 5000 trades (message type `P`) 27 | #' 28 | #' The file is also available as `ex20101224.TEST_ITCH_50.gz`. 29 | #' 30 | #' To get real sample ITCH datasets, see the [download_sample_file()] 31 | #' function. 32 | #' @examples 33 | #' file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 34 | #' 35 | #' sys <- read_system_events(file) 36 | NULL 37 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | options(width = 120) 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "#>", 12 | fig.path = "man/figures/README-", 13 | out.width = "100%" 14 | ) 15 | ``` 16 | 17 | # RITCH - an R interface to the ITCH Protocol 18 | 19 | 20 | [![CRAN status](https://www.r-pkg.org/badges/version/RITCH)](https://CRAN.R-project.org/package=RITCH) [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/RITCH)](https://www.r-pkg.org/pkg/RITCH) [![R-CMD-check](https://github.com/DavZim/RITCH/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DavZim/RITCH/actions/workflows/R-CMD-check.yaml) 21 | 22 | 23 | The `RITCH` library provides an `R` interface to NASDAQs ITCH protocol, which is used to distribute financial messages to participants. 24 | Messages include orders, trades, market status, and much more financial information. 25 | A full list of messages is shown later. 26 | The main purpose of this package is to parse the binary ITCH files to a [`data.table`](https://CRAN.R-project.org/package=data.table) in `R`. 27 | 28 | The package leverages [`Rcpp`](https://CRAN.R-project.org/package=Rcpp) and `C++` for efficient message parsing. 29 | 30 | Note that the package provides a small simulated sample dataset in the `ITCH_50` format for testing and example purposes. 31 | Helper functions are provided to list and download sample files from NASDAQs official server. 32 | 33 | ## Install 34 | 35 | To install `RITCH` you can use the following 36 | 37 | ```R 38 | # stable version: 39 | install.packages("RITCH") 40 | 41 | # development version: 42 | # install.packages("remotes") 43 | remotes::install_github("DavZim/RITCH") 44 | ``` 45 | 46 | ## Quick Overview 47 | 48 | The main functions of `RITCH` are read-related and are easily identified by their `read_` prefix. 49 | 50 | Due to the inherent structural differences between message classes, each class has its own read function. 51 | A list of message types and the respective classes are provided later in this Readme. 52 | 53 | Example message classes used in this example are *orders* and *trades*. 54 | First we define the file to load and count the messages, then we read in the orders and the first 100 trades 55 | 56 | ```{r} 57 | library(RITCH) 58 | # use built in example dataset 59 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 60 | 61 | # count the number of messages in the file 62 | msg_count <- count_messages(file) 63 | dim(msg_count) 64 | names(msg_count) 65 | 66 | # read the orders into a data.table 67 | orders <- read_orders(file) 68 | dim(orders) 69 | names(orders) 70 | 71 | # read the first 100 trades 72 | trades <- read_trades(file, n_max = 100) 73 | dim(trades) 74 | names(trades) 75 | ``` 76 | Note that the file can be a plain `ITCH_50` file or a gzipped `ITCH_50.gz` file, which will be decompressed to the current directory. 77 | You may also note that the output reports quite a low read speed in the `MB/s`. 78 | This lowish number is due to including the parsing process, furthermore, due to overhead of setup code, this number gets higher on larger files. 79 | 80 | If you want to know more about the functions of the package, read on. 81 | 82 | ## Main Functions 83 | 84 | `RITCH` provides the following main functions: 85 | 86 | - `read_itch(file, ...)` to read an ITCH file 87 | Convenient wrappers for different message classes such as `orders`, `trades`, etc are also provided as `read_orders()`, `read_trades()`, ... 88 | - `filter_itch(infile, outfile, ...)` to filter an ITCH file and write directly to another file without loading the data into R 89 | - `write_itch(data, file, ...)` to write a dataset to an ITCH file 90 | 91 | There are also some helper functions provided, a selection is: 92 | 93 | - `download_sample_file(choice)` to download a sample file from the NASDAQ server and `list_sample_files()` to get a list of all available sample files 94 | - `download_stock_directory(exchange, date)` to download the stock locate information for a given exchange and date 95 | - `open_itch_sample_server()` to open the official NASDAQ server in your browser, which hosts among other things example data files 96 | - `gzip_file(infile, outfile)` and `gunzip_file(infile, outfile)` for gzip functionality 97 | - `open_itch_specification()` to open the official NASDAQ ITCH specification PDF in your browser 98 | 99 | ## Writing ITCH Files 100 | 101 | `RITCH` also provides functionality for writing ITCH files. 102 | Although it could be stored in other file formats (for example a database or a [`qs`](https://CRAN.R-project.org/package=qs) file), ITCH files are quite optimized regarding size as well as write/read speeds. 103 | Thus the `write_itch()` function allows you to write a single or multiple types of message to an `ITCH_50` file. 104 | Note however, that only the standard columns are supported. 105 | Additional columns will not be written to file! 106 | 107 | Additional information can be saved in the filename. 108 | By default the date, exchange, and fileformat information is added to the filename unless you specify `add_meta = FALSE`, in which case the given name is used. 109 | 110 | As a last note: if you write your data to an ITCH file and want to filter for stocks later on, make sure to save the stock directory of that day/exchange, either externally or in the ITCH file directly (see example below). 111 | 112 | ### Simple Write Example 113 | 114 | A simple write example would be to read all modifications from an ITCH file and save it to a separate file to save space, reduce read times later on, etc. 115 | 116 | ```{r} 117 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 118 | md <- read_modifications(file, quiet = TRUE) 119 | dim(md) 120 | names(md) 121 | 122 | outfile <- write_itch(md, "modifications", compress = TRUE) 123 | 124 | # compare file sizes 125 | files <- c(full_file = file, subset_file = outfile) 126 | format_bytes(sapply(files, file.size)) 127 | ``` 128 | ```{r, include = FALSE} 129 | unlink(outfile) 130 | ``` 131 | 132 | 133 | ### Comprehensive Write Example 134 | 135 | A typical work flow would look like this: 136 | 137 | - read in some message classes from file and filter for certain stocks 138 | - save the results for later analysis, also compress to save disk space 139 | 140 | ```{r} 141 | ## Read in the different message classes 142 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 143 | 144 | # read in the different message types 145 | data <- read_itch(file, 146 | c("system_events", "stock_directory", "orders"), 147 | filter_stock_locate = c(1, 3), 148 | quiet = TRUE) 149 | 150 | str(data, max.level = 1) 151 | 152 | 153 | ## Write the different message classes 154 | outfile <- write_itch(data, 155 | "alc_char_subset", 156 | compress = TRUE) 157 | outfile 158 | 159 | # compare file sizes 160 | format_bytes( 161 | sapply(c(full_file = file, subset_file = outfile), 162 | file.size) 163 | ) 164 | 165 | 166 | ## Lastly, compare the two datasets to see if they are identical 167 | data2 <- read_itch(outfile, quiet = TRUE) 168 | all.equal(data, data2) 169 | ``` 170 | ```{r, include=FALSE} 171 | # remove files from write_itch again... 172 | unlink(outfile) 173 | outfile_unz <- gsub("\\.gz$", "", outfile) 174 | unlink(outfile_unz) 175 | ``` 176 | 177 | For comparison, the same format in the [`qs`](https://CRAN.R-project.org/package=qs) format results in `44788` bytes. 178 | 179 | 180 | ## ITCH Messages 181 | 182 | There are a total of 22 different message types which are grouped into 13 classes by `RITCH`. 183 | 184 | The messages and their respective classes are: 185 | ```{r, echo=FALSE} 186 | d <- get_msg_classes() 187 | d$msg_type <- paste0("", d$msg_type, "") 188 | d$read_function <- paste0("", "read_", d$msg_class, "()", "") 189 | 190 | data.table::setcolorder(d, c("msg_type", "msg_class", "read_function", 191 | "msg_name", "doc_nr")) 192 | data.table::setnames(d, c("Type", "RITCH Class", 193 | "RITCH Read Function", "ITCH Name", 194 | "ITCH Spec Section")) 195 | 196 | knitr::kable(d, escape = FALSE) 197 | ``` 198 | 199 | Note that if you are interested in the exact definition of the messages and its components, you should look into the [official ITCH specification](https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf), which can also be opened by calling `open_itch_specification()`. 200 | 201 | 202 | ## Data 203 | 204 | The `RITCH` package provides a small, artificial dataset in the ITCH format for example and test purposes. 205 | To learn more about the dataset check `?ex20101224.TEST_ITCH_50`. 206 | 207 | To access the dataset use: 208 | ```{r} 209 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 210 | count_messages(file, add_meta_data = TRUE, quiet = TRUE) 211 | ``` 212 | Note that the example dataset does not contain messages from all classes but is limited to 6 system messages, 3 stock directory, 3 stock trading action, 5000 trade, 5000 order, and 2000 order modification messages. 213 | As seen by the 3 stock directory messages, the file contains data about 3 made up stocks (see also the plot later in the Readme). 214 | 215 | MASDAQ provides sample ITCH files on their official server at (or in R use `open_itch_sample_server()`) which can be used to test code on larger datasets. 216 | Note that the sample files are up to 5GB compressed, which inflate to about 13GB. 217 | To interact with the sample files, use `list_sample_files()` and `download_sample_files()`. 218 | 219 | 220 | ## Notes on Memory and Speed 221 | 222 | There are some tweaks available to deal with memory and speed issues. 223 | For faster reading speeds, you can increase the buffer size of the `read_` functions to something around 1 GB or more (`buffer_size = 1e9`). 224 | 225 | ### Provide Message Counts 226 | 227 | If you have to read from a single file multiple times, for example because you want to extract orders and trades, you can count the messages beforehand and provide it to each read's `n_max` argument, reducing the need to pass the file for counting the number of messages. 228 | ```{r} 229 | # count messages once 230 | n_msgs <- count_messages(file, quiet = TRUE) 231 | 232 | # use counted messages multiple times, saving file passes 233 | orders <- read_orders(file, quiet = TRUE, n_max = n_msgs) 234 | trades <- read_trades(file, quiet = TRUE, n_max = n_msgs) 235 | ``` 236 | 237 | ### Batch Read 238 | 239 | If the dataset does not fit entirely into RAM, you can do a partial read specifying `skip` and `n_max`, similar to this: 240 | 241 | ```{r} 242 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 243 | 244 | n_messages <- count_orders(count_messages(file, quiet = TRUE)) 245 | n_messages 246 | 247 | # read 1000 messages at a time 248 | n_batch <- 1000 249 | n_parsed <- 0 250 | 251 | while (n_parsed < n_messages) { 252 | cat(sprintf("Parsing Batch %04i - %04i", n_parsed, n_parsed + n_batch)) 253 | # read in a batch 254 | df <- read_orders(file, quiet = TRUE, skip = n_parsed, n_max = n_batch) 255 | cat(sprintf(": with %04i orders\n", nrow(df))) 256 | # use the data 257 | # ... 258 | n_parsed <- n_parsed + n_batch 259 | } 260 | ``` 261 | 262 | ### Filter when Reading Data 263 | 264 | You can also filter a dataset directly while reading messages for `msg_type`, `stock_locate`, `timestamp` range, as well as `stock`. 265 | Note that filtering for a specific stock, is just a shorthand lookup for the stocks' `stock_locate` code, therefore a `stock_directory` needs to be supplied (either by providing the output from `read_stock_directory()` or `download_stock_locate()`) or the function will try to extract the stock directory from the file (might take some time depending on the size of the file). 266 | 267 | ```{r} 268 | # read in the stock directory as we filter for stock names later on 269 | sdir <- read_stock_directory(file, quiet = TRUE) 270 | 271 | od <- read_orders( 272 | file, 273 | filter_msg_type = "A", # take only 'No MPID add orders' 274 | min_timestamp = 43200000000000, # start at 12:00:00.000000 275 | max_timestamp = 55800000000000, # end at 15:30:00.000000 276 | filter_stock_locate = 1, # take only stock with code 1 277 | filter_stock = "CHAR", # but also take stock CHAR 278 | stock_directory = sdir # provide the stock_directory to match stock names to stock_locates 279 | ) 280 | 281 | # count the different message types 282 | od[, .(n = .N), by = msg_type] 283 | # see if the timestamp is in the specified range 284 | range(od$timestamp) 285 | # count the stock/stock-locate codes 286 | od[, .(n = .N), by = .(stock_locate, stock)] 287 | ``` 288 | 289 | ### Filter Data to File 290 | 291 | On larger files, reading the data into memory might not be the best idea, especially if only a small subset is actually needed. 292 | In this case, the `filter_itch` function will come in handy. 293 | 294 | The basic design is identical to the `read_itch` function but instead of reading the messages into memory, they are immediately written to a file. 295 | 296 | Taking the filter data example from above, we can do the following 297 | 298 | ```{r} 299 | # the function returns the final name of the output file 300 | outfile <- filter_itch( 301 | infile = file, 302 | outfile = "filtered", 303 | filter_msg_type = "A", # take only 'No MPID add orders' 304 | min_timestamp = 43200000000000, # start at 12:00:00.000000 305 | max_timestamp = 55800000000000, # end at 15:30:00.000000 306 | filter_stock_locate = 1, # take only stock with code 1 307 | filter_stock = "CHAR", # but also take stock CHAR 308 | stock_directory = sdir # provide the stock_directory to match stock names to stock_locates 309 | ) 310 | 311 | format_bytes(file.size(outfile)) 312 | 313 | # read in the orders from the filtered file 314 | od2 <- read_orders(outfile) 315 | 316 | # check that the filtered dataset contains the same information as in the example above 317 | all.equal(od, od2) 318 | ``` 319 | ```{r, include=FALSE} 320 | # remove files from filter_itch again... 321 | unlink(outfile) 322 | ``` 323 | 324 | 325 | ## Create a Plot with Trades and Orders of the largest ETFs 326 | 327 | As a last step, a quick visualization of the example dataset 328 | 329 | ```{r ETF_plot} 330 | library(ggplot2) 331 | 332 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 333 | 334 | # load the data 335 | orders <- read_orders(file, quiet = TRUE) 336 | trades <- read_trades(file, quiet = TRUE) 337 | 338 | # replace the buy-factor with something more useful 339 | orders[, buy := ifelse(buy, "Bid", "Ask")] 340 | 341 | ggplot() + 342 | geom_point(data = orders, 343 | aes(x = as.POSIXct(datetime), y = price, color = buy), alpha = 0.2) + 344 | geom_step(data = trades, aes(x = as.POSIXct(datetime), y = price), size = 0.2) + 345 | facet_grid(stock~., scales = "free_y") + 346 | theme_light() + 347 | labs(title = "Orders and Trades of Three Simulated Stocks", 348 | subtitle = "Date: 2010-12-24 | Exchange: TEST", 349 | caption = "Source: RITCH package", x = "Time", y = "Price", color = "Side") + 350 | scale_y_continuous(labels = scales::dollar) + 351 | scale_color_brewer(palette = "Set1") 352 | ``` 353 | 354 | 355 | ## Other Notes 356 | 357 | If you find this package useful or have any other kind of feedback, I'd be happy if you let me know. Otherwise, if you need more functionality, please feel free to create an issue or a pull request. 358 | 359 | Citation and CRAN release are WIP. 360 | 361 | If you are interested in gaining a better understanding of the internal data structures, converting data to and from binary, have a look at the `debug` folder and its contents (only available on the [RITCH's Github page](https://github.com/DavZim/RITCH/)). 362 | -------------------------------------------------------------------------------- /RITCH.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://davzim.github.io/RITCH/ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | Fix bug where the gz functionality would write to the current directory or to the user library. -------------------------------------------------------------------------------- /debug/README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | # Debug Tools for `RITCH` 6 | 7 | This document quickly outlines the debugging tools of the `RITCH` library. 8 | 9 | ## Building 10 | 11 | These tools are used for debugging and understanding the data format. They are not shipped with the package itself but need to be sourced independently. 12 | 13 | If you want to play around with the tools, clone the git repository and source the `debug/debug_tools.cpp` script: 14 | 15 | ```{r, include=FALSE} 16 | Sys.setenv("PKG_LIBS" = "-lz") 17 | Rcpp::sourceCpp("debug_tools.cpp") 18 | ``` 19 | ```{r, eval=FALSE} 20 | Sys.setenv("PKG_LIBS" = "-lz") 21 | Rcpp::sourceCpp("debug/debug_tools.cpp") 22 | ``` 23 | 24 | Note that `debug_tools.cpp` includes `../src/RITCH.h` as well as `../src/MessageTypes.h` (relative from the `debug_tools.cpp` script), if you have cloned the repository as is, it should work out of the box, otherwise, make sure that the two header files are found. 25 | 26 | ## Debug Tools 27 | 28 | - `dbg_get_message_length(msgs)` returns the size of Messages in bytes. Note that each message adds 2 bytes that are not used 29 | ```{r} 30 | dbg_get_message_length(c("A", "F")) 31 | ``` 32 | - `dbg_itch_file(filename)` allows you to interactively list messages in a file 33 | You are asked for input inside the function, which can be: 34 | 35 | - msg type, e.g., `A`, `H`, `h` to see the next instance of that message type 36 | - numeric value, e.g., `3` to see the next N values 37 | 38 | For example: 39 | 40 | ```r 41 | file <- "20191230.BX_ITCH_50" 42 | dbg_itch_file(file) 43 | ## Debugging File '20191230.BX_ITCH_50' (.gz-file? no) 44 | ## Usage: 45 | ## - Empty: next message 46 | ## - Number: for next N messages 47 | ## - Character: if valid message type, print the next message, e.g., 'A' for add order 48 | ## - non valid Character: exits the debugging tool 49 | ## Note: Bytes in parenthesis show the first two bytes, which are not used! 50 | ## Number of Messages: 51 | ## - 'S': 6 52 | ## - 'R': 8906 53 | ## - 'H': 8961 54 | ## - 'Y': 9013 55 | ## - 'L': 6171 56 | ## - 'V': 1 57 | ## - 'W': 0 58 | ## - 'K': 0 59 | ## - 'J': 0 60 | ## - 'h': 0 61 | ## - 'A': 12210139 62 | ## - 'F': 45058 63 | ## - 'E': 578839 64 | ## - 'C': 2686 65 | ## - 'X': 348198 66 | ## - 'D': 11821540 67 | ## - 'U': 1741672 68 | ## - 'P': 134385 69 | ## - 'Q': 0 70 | ## - 'B': 0 71 | ## - 'I': 0 72 | ## - 'N': 2241182 73 | ## ============================= 74 | ## 'S' (len 2 + 12) idx 0 at offset 0 (0x0000) | (00 0c) 53 00 00 00 00 0a 2d f4 92 1d 67 4f 75 | #RITCH> 3 76 | ## Showing next 3 messages 77 | ## 'R' (len 2 + 39) idx 1 at offset 14 (0x000e) | (00 27) 52 00 01 00 00 0a 66 a0 e0 dc 44 41 20 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 00 4e 78 | ## 'R' (len 2 + 39) idx 2 at offset 55 (0x0037) | (00 27) 52 00 02 00 00 0a 66 a0 e2 c8 6c 41 41 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 01 4e 79 | ## 'H' (len 2 + 25) idx 3 at offset 96 (0x0060) | (00 19) 48 00 01 00 00 0a 66 a0 e4 ff bd 41 20 20 20 20 20 20 20 54 20 20 20 20 20 80 | #RITCH> A 81 | ## Applied filter to message type 'A' 82 | ## 'A' (len 2 + 36) idx 32873 at offset 973915 (0xedc5b) | (00 24) 41 20 2c 00 00 16 eb 55 2c 88 24 00 00 00 00 00 00 00 04 42 00 00 2e 7c 55 53 4f 20 20 20 20 20 00 01 fa 40 83 | #RITCH> q 84 | ## Stopping Printing Messages 85 | ``` 86 | 87 | - `dbg_hex_to_char(hex_string)` converts a hex value to character 88 | - `dbg_hex_to_int(hex_string)` converts a hex value to integer 89 | - `dbg_hex_to_dbl(hex_string)` converts a hex value to dbl 90 | ```{r} 91 | dbg_hex_to_char("52 49 54 43 48 20 20 20") # 'RITCH ' 92 | dbg_hex_to_int("01 23 45 67") # 19088743 93 | dbg_hex_to_dbl("00 01 fa 40") # 12.96 94 | ``` 95 | - `dbg_hex_compare(x, y)` to get a quick comparison of two hex strings 96 | ```{r} 97 | x <- "00 01 02 03 04" 98 | y <- "00 01 00 03 0a" 99 | dbg_hex_compare(x, y) 100 | ``` 101 | 102 | - `dbg_hex_count_messages(hex_string)` counts the number of messages by type in a hex string 103 | ```{r} 104 | incomplete_hex_string <- "00 00 53" # . . S 105 | dbg_hex_count_messages(incomplete_hex_string) 106 | ``` 107 | - `dbg_hex_to_*()` to convert hexadecimal strings to message `data.table`s (* can be `orders`, `trades`, `modifications`, `system_events`, `stock_directory`, `trading_status`, `reg_sho`, `market_participant_states`, `mwcb`, `ipo`, `luld`, `noii`, or `rpii`) 108 | ```{r} 109 | hex_string <- paste( 110 | "00 00", # first 2 empty nibbles 111 | "46", # message type 'F' 112 | "20 2c", # stock locate 8236 113 | "00 00", # tracking number 0 114 | "16 eb 55 2c 88 24", # timestamp 25200002107428 115 | "00 00 00 00 00 00 00 04", # order ref 4 116 | "42", # buy == TRUE -> 'B' 117 | "00 00 2e 7c", # shares 11900 118 | "55 53 4f 20 20 20 20 20", # stock 'USO ' (length 8) 119 | "00 01 fa 40", # price 129600 (12.96) 120 | "56 49 52 54" # mpid/attribution 'VIRT 121 | ) 122 | 123 | dbg_hex_to_orders(hex_string) 124 | ``` 125 | 126 | - `dbg_messages_to_hex()` to convert the message `data.table`s to a hexadecimal string 127 | ```{r} 128 | od <- data.table::data.table( 129 | msg_type = "F", 130 | stock_locate = 8236L, 131 | tracking_number = 0L, 132 | timestamp = bit64::as.integer64(25200002107428), 133 | order_ref = bit64::as.integer64(4), 134 | buy = TRUE, 135 | shares = 11900L, 136 | stock = "USO", 137 | price = 12.96, 138 | mpid = "VIRT" 139 | ) 140 | hex_order <- dbg_messages_to_hex(od) 141 | hex_order 142 | 143 | # convert back to a data.table and see if they are identical 144 | od2 <- dbg_hex_to_orders(hex_order) 145 | all.equal(od, od2) 146 | ``` 147 | -------------------------------------------------------------------------------- /debug/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Debug Tools for `RITCH` 3 | 4 | This document quickly outlines the debugging tools of the `RITCH` 5 | library. 6 | 7 | ## Building 8 | 9 | These tools are used for debugging and understanding the data format. 10 | They are not shipped with the package itself but need to be sourced 11 | independently. 12 | 13 | If you want to play around with the tools, clone the git repository and 14 | source the `debug/debug_tools.cpp` script: 15 | 16 | ``` r 17 | Sys.setenv("PKG_LIBS" = "-lz") 18 | Rcpp::sourceCpp("debug/debug_tools.cpp") 19 | ``` 20 | 21 | Note that `debug_tools.cpp` includes `../src/RITCH.h` as well as 22 | `../src/MessageTypes.h` (relative from the `debug_tools.cpp` script), if 23 | you have cloned the repository as is, it should work out of the box, 24 | otherwise, make sure that the two header files are found. 25 | 26 | ## Debug Tools 27 | 28 | - `dbg_get_message_length(msgs)` returns the size of Messages in bytes. 29 | Note that each message adds 2 bytes that are not used 30 | 31 | ``` r 32 | dbg_get_message_length(c("A", "F")) 33 | ``` 34 | 35 | ## A F 36 | ## 38 42 37 | 38 | - `dbg_itch_file(filename)` allows you to interactively list messages in 39 | a file You are asked for input inside the function, which can be: 40 | 41 | - msg type, e.g., `A`, `H`, `h` to see the next instance of that 42 | message type 43 | - numeric value, e.g., `3` to see the next N values 44 | 45 | For example: 46 | 47 | ``` r 48 | file <- "20191230.BX_ITCH_50" 49 | dbg_itch_file(file) 50 | ## Debugging File '20191230.BX_ITCH_50' (.gz-file? no) 51 | ## Usage: 52 | ## - Empty: next message 53 | ## - Number: for next N messages 54 | ## - Character: if valid message type, print the next message, e.g., 'A' for add order 55 | ## - non valid Character: exits the debugging tool 56 | ## Note: Bytes in parenthesis show the first two bytes, which are not used! 57 | ## Number of Messages: 58 | ## - 'S': 6 59 | ## - 'R': 8906 60 | ## - 'H': 8961 61 | ## - 'Y': 9013 62 | ## - 'L': 6171 63 | ## - 'V': 1 64 | ## - 'W': 0 65 | ## - 'K': 0 66 | ## - 'J': 0 67 | ## - 'h': 0 68 | ## - 'A': 12210139 69 | ## - 'F': 45058 70 | ## - 'E': 578839 71 | ## - 'C': 2686 72 | ## - 'X': 348198 73 | ## - 'D': 11821540 74 | ## - 'U': 1741672 75 | ## - 'P': 134385 76 | ## - 'Q': 0 77 | ## - 'B': 0 78 | ## - 'I': 0 79 | ## - 'N': 2241182 80 | ## ============================= 81 | ## 'S' (len 2 + 12) idx 0 at offset 0 (0x0000) | (00 0c) 53 00 00 00 00 0a 2d f4 92 1d 67 4f 82 | #RITCH> 3 83 | ## Showing next 3 messages 84 | ## 'R' (len 2 + 39) idx 1 at offset 14 (0x000e) | (00 27) 52 00 01 00 00 0a 66 a0 e0 dc 44 41 20 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 00 4e 85 | ## 'R' (len 2 + 39) idx 2 at offset 55 (0x0037) | (00 27) 52 00 02 00 00 0a 66 a0 e2 c8 6c 41 41 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 01 4e 86 | ## 'H' (len 2 + 25) idx 3 at offset 96 (0x0060) | (00 19) 48 00 01 00 00 0a 66 a0 e4 ff bd 41 20 20 20 20 20 20 20 54 20 20 20 20 20 87 | #RITCH> A 88 | ## Applied filter to message type 'A' 89 | ## 'A' (len 2 + 36) idx 32873 at offset 973915 (0xedc5b) | (00 24) 41 20 2c 00 00 16 eb 55 2c 88 24 00 00 00 00 00 00 00 04 42 00 00 2e 7c 55 53 4f 20 20 20 20 20 00 01 fa 40 90 | #RITCH> q 91 | ## Stopping Printing Messages 92 | ``` 93 | 94 | - `dbg_hex_to_char(hex_string)` converts a hex value to character 95 | - `dbg_hex_to_int(hex_string)` converts a hex value to integer 96 | - `dbg_hex_to_dbl(hex_string)` converts a hex value to dbl 97 | 98 | ``` r 99 | dbg_hex_to_char("52 49 54 43 48 20 20 20") # 'RITCH ' 100 | ``` 101 | 102 | ## [1] "RITCH " 103 | 104 | ``` r 105 | dbg_hex_to_int("01 23 45 67") # 19088743 106 | ``` 107 | 108 | ## integer64 109 | ## [1] 19088743 110 | 111 | ``` r 112 | dbg_hex_to_dbl("00 01 fa 40") # 12.96 113 | ``` 114 | 115 | ## [1] 12.96 116 | 117 | - `dbg_hex_compare(x, y)` to get a quick comparison of two hex strings 118 | 119 | ``` r 120 | x <- "00 01 02 03 04" 121 | y <- "00 01 00 03 0a" 122 | dbg_hex_compare(x, y) 123 | ``` 124 | 125 | ## idx | x | y | diff 126 | ## ------------------------- 127 | ## 1 | 0x00 | 0x00 | 128 | ## 2 | 0x01 | 0x01 | 129 | ## 3 | 0x02 | 0x00 | XXX 130 | ## 4 | 0x03 | 0x03 | 131 | ## 5 | 0x04 | 0x0a | XXX 132 | 133 | - `dbg_hex_count_messages(hex_string)` counts the number of messages by 134 | type in a hex string 135 | 136 | ``` r 137 | incomplete_hex_string <- "00 00 53" # . . S 138 | dbg_hex_count_messages(incomplete_hex_string) 139 | ``` 140 | 141 | ## msg_type count 142 | ## 1: S 1 143 | ## 2: R 0 144 | ## 3: H 0 145 | ## 4: Y 0 146 | ## 5: L 0 147 | ## 6: V 0 148 | ## 7: W 0 149 | ## 8: K 0 150 | ## 9: J 0 151 | ## 10: h 0 152 | ## 11: A 0 153 | ## 12: F 0 154 | ## 13: E 0 155 | ## 14: C 0 156 | ## 15: X 0 157 | ## 16: D 0 158 | ## 17: U 0 159 | ## 18: P 0 160 | ## 19: Q 0 161 | ## 20: B 0 162 | ## 21: I 0 163 | ## 22: N 0 164 | ## msg_type count 165 | 166 | - `dbg_hex_to_*()` to convert hexadecimal strings to message 167 | `data.table`s (\* can be `orders`, `trades`, `modifications`, 168 | `system_events`, `stock_directory`, `trading_status`, `reg_sho`, 169 | `market_participant_states`, `mwcb`, `ipo`, `luld`, `noii`, or `rpii`) 170 | 171 | ``` r 172 | hex_string <- paste( 173 | "00 00", # first 2 empty nibbles 174 | "46", # message type 'F' 175 | "20 2c", # stock locate 8236 176 | "00 00", # tracking number 0 177 | "16 eb 55 2c 88 24", # timestamp 25200002107428 178 | "00 00 00 00 00 00 00 04", # order ref 4 179 | "42", # buy == TRUE -> 'B' 180 | "00 00 2e 7c", # shares 11900 181 | "55 53 4f 20 20 20 20 20", # stock 'USO ' (length 8) 182 | "00 01 fa 40", # price 129600 (12.96) 183 | "56 49 52 54" # mpid/attribution 'VIRT 184 | ) 185 | 186 | dbg_hex_to_orders(hex_string) 187 | ``` 188 | 189 | ## msg_type stock_locate tracking_number timestamp order_ref buy shares 190 | ## 1: F 8236 0 25200002107428 4 TRUE 11900 191 | ## stock price mpid 192 | ## 1: USO 12.96 VIRT 193 | 194 | - `dbg_messages_to_hex()` to convert the message `data.table`s to a 195 | hexadecimal string 196 | 197 | ``` r 198 | od <- data.table::data.table( 199 | msg_type = "F", 200 | stock_locate = 8236L, 201 | tracking_number = 0L, 202 | timestamp = bit64::as.integer64(25200002107428), 203 | order_ref = bit64::as.integer64(4), 204 | buy = TRUE, 205 | shares = 11900L, 206 | stock = "USO", 207 | price = 12.96, 208 | mpid = "VIRT" 209 | ) 210 | hex_order <- dbg_messages_to_hex(od) 211 | hex_order 212 | ``` 213 | 214 | ## [1] "00 00 46 20 2c 00 00 16 eb 55 2c 88 24 00 00 00 00 00 00 00 04 42 00 00 2e 7c 55 53 4f 20 20 20 20 20 00 01 fa 40 56 49 52 54" 215 | 216 | ``` r 217 | # convert back to a data.table and see if they are identical 218 | od2 <- dbg_hex_to_orders(hex_order) 219 | all.equal(od, od2) 220 | ``` 221 | 222 | ## [1] TRUE 223 | -------------------------------------------------------------------------------- /debug/debug_tools.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ####################################################### 3 | * This file holds debug functions to look at and and write 4 | * ITCH hex buffers 5 | * 6 | * Functions include: 7 | * - dbg_get_message_length to get the length of a message 8 | * - dbg_itch_file to open an interactive mode in which the 9 | * the file is shown as hex code, one message at a time, also 10 | * includes modes to quickly see a certain message type 11 | * Hex related functions to convert hex codes into R types 12 | * - dbg_hex_to_char 13 | * - dbg_hex_to_int 14 | * - dbg_hex_to_dbl 15 | * - dbg_hex_count_messages to count the orders in a hex string 16 | * - dbg_hex_compare to compare two hex strings 17 | * 18 | * Convert Hex Strings into data.tables and vice versa 19 | * - orders: dbg_hex_to_orders and dbg_messages_to_hex 20 | * - trades: 21 | * - modifications: 22 | * 23 | * 24 | * ####################################################### 25 | */ 26 | 27 | // TODO: messages_to_bin: a function that takes in a list of dataframes 28 | // for each list, have a running index (which messages have already been parsed in this element?) 29 | // then find the next smallest timestamp, write one message, find next timestamp... until all are fully written 30 | // write stream to file 31 | 32 | #include 33 | #include 34 | #include "../src/specifications.h" 35 | #include "../src/helper_functions.h" 36 | #include "../src/read_functions.h" 37 | #include "../src/write_functions.h" 38 | 39 | // get_message_length(c("A", "B")) 40 | // [[Rcpp::export]] 41 | int dbg_get_message_length_impl(std::string m) { 42 | unsigned char msg = m[0]; 43 | return get_message_size(msg); 44 | } 45 | 46 | /*** R 47 | dbg_get_message_length <- function(x) { 48 | sapply(x, dbg_get_message_length_impl) 49 | } 50 | */ 51 | 52 | // counts message types in a buffer 53 | std::vector count_messages_buffer(unsigned char* buf, 54 | const uint64_t n_bytes) { 55 | std::vector count(N_TYPES, 0); 56 | uint64_t i = 0; 57 | while (i < n_bytes) { 58 | const unsigned char mt = buf[i + 2]; 59 | 60 | count[mt - 'A']++; 61 | i += get_message_size(mt); 62 | } 63 | 64 | return take_needed_messages(count); 65 | } 66 | int64_t sum_messages(std::vector& count, unsigned char msg) { 67 | return count[msg - 'A']; 68 | } 69 | 70 | /* 71 | * Prints the bytes of each message of an ITCH file 72 | * Inputs are either 73 | * - numeric which result in printing the next N values 74 | * - a single character which corresponds to the message types and prints the next instance of the message 75 | */ 76 | // [[Rcpp::export]] 77 | void dbg_itch_file(std::string filename = "inst/extdata/ex20101224.TEST_ITCH_50", 78 | int64_t buffer_size = 1e9) { 79 | 80 | // to allow readline / user feedbakc 81 | Rcpp::Environment base = Rcpp::Environment("package:base"); 82 | Rcpp::Function readline = base["readline"]; 83 | Rcpp::Function as_character = base["as.character"]; 84 | 85 | const bool is_gz = filename.substr(filename.size() - 3, filename.size()) == ".gz"; 86 | 87 | // only one buffer is used... 88 | unsigned char* bufferPtr; 89 | int64_t bufferCharSize = sizeof(unsigned char) * buffer_size; 90 | bufferPtr = (unsigned char*) malloc(bufferCharSize); 91 | 92 | FILE* rawfile; 93 | gzFile gzfile; 94 | 95 | if (is_gz) { 96 | gzfile = gzopen(filename.c_str(), "rb"); 97 | } else { 98 | rawfile = fopen(filename.c_str(), "rb"); 99 | } 100 | 101 | int64_t buf_size; 102 | if (is_gz) { 103 | buf_size = gzread(gzfile, bufferPtr, bufferCharSize); 104 | } else { 105 | buf_size = fread(bufferPtr, 1, bufferCharSize, rawfile); 106 | } 107 | 108 | std::vector counts_all = count_messages_buffer(bufferPtr, buf_size); 109 | std::vector counts = take_needed_messages(counts_all); 110 | 111 | Rprintf("Debugging File '%s' (.gz-file? %s)\n", filename.c_str(), is_gz ? "yes" : "no"); 112 | Rprintf("Usage:\n"); 113 | Rprintf("- Empty: next message\n"); 114 | Rprintf("- Number: for next N messages\n"); 115 | Rprintf("- Character: if valid message type, print the next message, e.g., 'A' for add order\n"); 116 | Rprintf("- non valid Character: exits the debugging tool\n"); 117 | Rprintf("Note: Bytes in parenthesis show the first two bytes, which are not used!\n"); 118 | 119 | Rprintf("Number of Messages:\n"); 120 | for (int j = 0; j < N_ACT_MSGS; j++) { 121 | Rprintf("- '%c': %ld\n", ACT_MSG_NAMES[j], counts[j]); 122 | } 123 | Rprintf("=============================\n"); 124 | // Use the Buffer 125 | int64_t idx; 126 | 127 | int i = 0; 128 | idx = 0; 129 | std::string exit_code = ""; 130 | int skip_end = 0; 131 | bool skip_print = false; 132 | unsigned char msg_filter = ' '; 133 | 134 | // to enable multiple buffers: use this logic... 135 | // while ((thisBufferSize = fread(bufferPtr, 1, bufferCharSize, infile)) > 0) { 136 | // while (true) { 137 | while (true) { 138 | if (idx > buf_size) { 139 | Rprintf("Reached end of buffer, increase buffer size to read more\n"); 140 | return; 141 | } 142 | unsigned char num = bufferPtr[idx + 2]; 143 | const int l = get_message_size(num); 144 | // Rprintf("At offset '0x%04x' msg '%c' msg len %i (0x%04x)\n", idx, num, l, l); 145 | 146 | if (skip_print) { 147 | if (num != msg_filter) { 148 | // if the current message is not equal to the message filter, skip printing and advance 149 | idx += l; 150 | i++; 151 | continue; 152 | } else { 153 | skip_print = false; 154 | } 155 | } 156 | 157 | Rprintf("'%c' (len 2 + %i) idx %4i at offset %5ld (0x%04lx) | ", num, l - 2, i, idx, idx); 158 | Rprintf("(%02x %02x) ", bufferPtr[idx], bufferPtr[idx + 1]); 159 | for (int x = 2; x < l; x++) Rprintf("%02x ", bufferPtr[idx + x]); 160 | Rprintf("\n"); 161 | 162 | // interactive element, allow numeric input (for N messages), 163 | // Message Types for the next message type, or other non empty for quit 164 | if (i >= skip_end) { 165 | exit_code = Rcpp::as(as_character(readline("#RITCH> "))); 166 | 167 | if (exit_code != "") { 168 | // check if all numeric, than skip N 169 | const bool only_numeric = exit_code.find_first_not_of("0123456789") == std::string::npos; 170 | if (only_numeric) { 171 | const int n = std::stoi(exit_code); 172 | skip_end = i + n; 173 | Rprintf("Showing next %i messages\n", n); 174 | } else { 175 | // check messages 176 | unsigned char exit_msg = exit_code.at(0); 177 | 178 | // check if the input is an itch message 179 | 180 | bool is_itch_message = false; 181 | for (const unsigned char c : ACT_MSG_NAMES) if (c == exit_msg) { 182 | is_itch_message = true; 183 | break; 184 | } 185 | 186 | if (is_itch_message) { 187 | const bool has_message = sum_messages(counts, exit_msg) > 0; 188 | if (!has_message) { 189 | Rprintf("No messages found for type '%c' increase buffer size or use different message type.\n", exit_msg); 190 | continue; 191 | } 192 | skip_print = true; 193 | msg_filter = exit_code[0]; 194 | 195 | Rcpp::Rcout << "Applied filter to message type '" << msg_filter << "'\n"; 196 | } else { 197 | // else break 198 | Rprintf("Stopping Printing Messages\n"); 199 | break; 200 | } 201 | } 202 | } // else: continue with next message 203 | } 204 | 205 | idx += l; 206 | i++; 207 | } 208 | 209 | free(bufferPtr); 210 | if (is_gz) { 211 | gzclose(gzfile); 212 | } else { 213 | fclose(rawfile); 214 | } 215 | } 216 | 217 | /*** R 218 | # Converts a hex string into char 219 | # i.e., dbg_hex_to_char("4f") == "O" 220 | dbg_hex_to_char <- function(h) { 221 | h <- gsub(" +", "", h) 222 | xx <- sapply(seq(1, nchar(h), by=2), function(x) substr(h, x, x+1)) 223 | rawToChar(as.raw(strtoi(xx, 16L))) 224 | } 225 | # dbg_hex_to_int("01 23 45 67") == 19088743 226 | # dbg_hex_to_int("0a 2d f4 92 1d 67") == 11192493022567 227 | dbg_hex_to_int <- function(h) { 228 | h <- gsub(" +", "", h) 229 | l <- nchar(h) %/% 2 230 | bit64::as.integer64(as.numeric(paste0("0x", h))) 231 | } 232 | # dbg_hex_to_dbl("00 01 fa 40") == 12.96 233 | # dbg_hex_to_dbl("00 00 00 46 28 21 94 40", prec = 8) == 3013.21 234 | dbg_hex_to_dbl <- function(h, prec = 4) { 235 | dbg_hex_to_int(h) / 10^prec 236 | } 237 | */ 238 | 239 | // converts a std::string of hex values to a buffer 240 | unsigned char * to_buffer(std::string x) { 241 | x.erase(remove_if(x.begin(), x.end(), isspace), x.end()); 242 | const uint64_t n_bytes = x.size() / 2; 243 | unsigned char * buf; 244 | // Rprintf("Found %u bytes\n", x.size() / 2); 245 | buf = (unsigned char*) calloc(x.size() / 2, sizeof(unsigned char)); 246 | 247 | for (uint64_t j = 0; j < n_bytes; j++) 248 | buf[j] = std::stoul(x.substr(j * 2, 2), nullptr, 16); 249 | return buf; 250 | } 251 | 252 | // ############################## 253 | // User Functions... 254 | // ############################## 255 | 256 | //[[Rcpp::export]] 257 | Rcpp::DataFrame hex_count_messages_impl(std::string x) { 258 | // remove whitespaces 259 | x.erase(remove_if(x.begin(), x.end(), isspace), x.end()); 260 | const uint64_t n_bytes = x.size() / 2; 261 | unsigned char * buf = to_buffer(x); 262 | 263 | std::vector count = count_messages_buffer(buf, n_bytes); 264 | 265 | Rcpp::StringVector types; 266 | for (unsigned char c : ACT_MSG_NAMES) types.push_back(std::string(1, c)); 267 | 268 | Rcpp::List df(2); 269 | df.names() = Rcpp::CharacterVector::create("msg_type", "count"); 270 | df["msg_type"] = types; 271 | const int len = types.size(); 272 | Rcpp::NumericVector ct(len); 273 | std::memcpy(&(ct[0]), &(count[0]), len * sizeof(double)); 274 | ct.attr("class") = "integer64"; 275 | df["count"] = ct; 276 | 277 | df.attr("class") = Rcpp::CharacterVector::create("data.table", "data.frame"); 278 | 279 | return df; 280 | } 281 | /***R 282 | dbg_hex_compare <- function(x, y) { 283 | reset_whitespaces <- function(x) { 284 | xx <- strsplit(gsub(" ", "", x), split = "")[[1]] 285 | paste(paste0(xx[c(T, F)], xx[c(F, T)]), collapse = " ") 286 | } 287 | x <- reset_whitespaces(x) 288 | y <- reset_whitespaces(y) 289 | xx <- strsplit(x, " ")[[1]] 290 | yy <- strsplit(y, " ")[[1]] 291 | 292 | min_x <- min(length(xx), length(yy)) 293 | cat(sprintf(" %3s | %4s | %4s | %4s\n%s\n", "idx", "x", "y", "diff", 294 | paste(rep("-", 25), collapse = ""))) 295 | for (i in seq_len(min_x)) { 296 | cat(sprintf(" %3s | 0x%2s | 0x%2s | %4s\n", i, xx[i], yy[i], 297 | ifelse(xx[i] == yy[i], "", "XXX"))) 298 | } 299 | } 300 | # count orders for a hex string 301 | # dbg_hex_count_messages("00 00 41") 302 | dbg_hex_count_messages <- function(x) { 303 | d <- hex_count_messages_impl(x) 304 | data.table::setalloccol(d) 305 | } 306 | */ 307 | 308 | /* 309 | * HEX to Ordertypes 310 | */ 311 | 312 | Rcpp::DataFrame dbg_hex_to_df(std::string x, std::string msg_class) { 313 | // create buffer 314 | x.erase(remove_if(x.begin(), x.end(), isspace), x.end()); 315 | const uint64_t n_bytes = x.size() / 2; 316 | unsigned char * buf = to_buffer(x); 317 | std::vector count = count_messages_buffer(buf, n_bytes); 318 | 319 | int64_t n_messages = 0; 320 | for (const int64_t p : count) n_messages += p; 321 | 322 | MessageParser mp(msg_class, 0, 100); // take max 100 messages... 323 | mp.activate(); 324 | mp.init_vectors(n_messages + 100); 325 | uint64_t i = 2; 326 | 327 | while (i < n_bytes) { 328 | mp.parse_message(&buf[i]); 329 | i += get_message_size(buf[i]); 330 | } 331 | 332 | return mp.get_data_frame(); 333 | } 334 | //[[Rcpp::export]] 335 | Rcpp::DataFrame dbg_hex_to_orders(std::string x) { 336 | return dbg_hex_to_df(x, "orders"); 337 | } 338 | //[[Rcpp::export]] 339 | Rcpp::DataFrame dbg_hex_to_trades(std::string x) { 340 | return dbg_hex_to_df(x, "trades"); 341 | } 342 | //[[Rcpp::export]] 343 | Rcpp::DataFrame dbg_hex_to_modifications(std::string x) { 344 | return dbg_hex_to_df(x, "modifications"); 345 | } 346 | //[[Rcpp::export]] 347 | Rcpp::DataFrame dbg_hex_to_system_events(std::string x) { 348 | return dbg_hex_to_df(x, "system_events"); 349 | } 350 | //[[Rcpp::export]] 351 | Rcpp::DataFrame dbg_hex_to_stock_directory(std::string x) { 352 | return dbg_hex_to_df(x, "stock_directory"); 353 | } 354 | //[[Rcpp::export]] 355 | Rcpp::DataFrame dbg_hex_to_trading_status(std::string x) { 356 | return dbg_hex_to_df(x, "trading_status"); 357 | } 358 | //[[Rcpp::export]] 359 | Rcpp::DataFrame dbg_hex_to_reg_sho(std::string x) { 360 | return dbg_hex_to_df(x, "reg_sho"); 361 | } 362 | //[[Rcpp::export]] 363 | Rcpp::DataFrame dbg_hex_to_market_participant_states(std::string x) { 364 | return dbg_hex_to_df(x, "market_participant_states"); 365 | } 366 | //[[Rcpp::export]] 367 | Rcpp::DataFrame dbg_hex_to_mwcb(std::string x) { 368 | return dbg_hex_to_df(x, "mwcb"); 369 | } 370 | //[[Rcpp::export]] 371 | Rcpp::DataFrame dbg_hex_to_ipo(std::string x) { 372 | return dbg_hex_to_df(x, "ipo"); 373 | } 374 | //[[Rcpp::export]] 375 | Rcpp::DataFrame dbg_hex_to_luld(std::string x) { 376 | return dbg_hex_to_df(x, "luld"); 377 | } 378 | //[[Rcpp::export]] 379 | Rcpp::DataFrame dbg_hex_to_noii(std::string x) { 380 | return dbg_hex_to_df(x, "noii"); 381 | } 382 | //[[Rcpp::export]] 383 | Rcpp::DataFrame dbg_hex_to_rpii(std::string x) { 384 | return dbg_hex_to_df(x, "rpii"); 385 | } 386 | 387 | 388 | /* 389 | * ############################################################################ 390 | * Messages to hex 391 | * The function takes one data.frame, deduces the type based on the message types 392 | * and converts it into binary (hex) data 393 | * ############################################################################ 394 | */ 395 | //[[Rcpp::export]] 396 | std::string dbg_messages_to_hex(Rcpp::DataFrame df, 397 | size_t max_buffer_size = 1e8) { 398 | Rcpp::CharacterVector msgs = df["msg_type"]; 399 | const int total_messages = msgs.length(); 400 | // Rprintf("Found %i order messages\n", total_messages); 401 | unsigned char * buf; 402 | 403 | size_t req_size = 0; 404 | for (int i = 0; i < total_messages; i++) { 405 | const unsigned char msg = Rcpp::as(msgs[i]); 406 | req_size += get_message_size(msg); 407 | } 408 | 409 | req_size = req_size > max_buffer_size ? max_buffer_size : req_size; 410 | // Rprintf("Need %u bytes for the messages\n", req_size); 411 | // allocate memory to the buffer and initialise it to 0 412 | buf = (unsigned char*) calloc(req_size, sizeof(unsigned char)); 413 | 414 | int64_t i = 0; 415 | int64_t msg_ct = 0; 416 | while (msg_ct < total_messages) { 417 | // Rprintf("Parsing Message %i\n", msg_ct); 418 | i += load_message_to_buffer(&(buf[i]), msg_ct, df); 419 | } 420 | 421 | std::stringstream ss; 422 | for(int j = 0; j < i; ++j) 423 | ss << 424 | std::setfill('0') << 425 | std::setw(2) << 426 | std::hex << 427 | (int) (((int) buf[j] >> (8*0)) & 0xff) << // (int) buf[j] 428 | " "; 429 | std::string res = ss.str(); 430 | 431 | return res.substr(0, res.size() - 1); 432 | } 433 | -------------------------------------------------------------------------------- /inst/extdata/ex20101224.TEST_ITCH_50: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavZim/RITCH/9bd51af48d26703bd95ab4f0db6532a497c104c1/inst/extdata/ex20101224.TEST_ITCH_50 -------------------------------------------------------------------------------- /inst/extdata/ex20101224.TEST_ITCH_50.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavZim/RITCH/9bd51af48d26703bd95ab4f0db6532a497c104c1/inst/extdata/ex20101224.TEST_ITCH_50.gz -------------------------------------------------------------------------------- /inst/tinytest/test_filename_helpers.R: -------------------------------------------------------------------------------- 1 | library(RITCH) 2 | library(tinytest) 3 | 4 | # Get date from filename 5 | expect_equal( 6 | get_date_from_filename("03302017.NASDAQ_ITCH50"), 7 | as.POSIXct("2017-03-30", "GMT") 8 | ) 9 | expect_equal( 10 | get_date_from_filename("20170130.BX_ITCH_50.gz"), 11 | as.POSIXct("2017-01-30", "GMT") 12 | ) 13 | expect_equal( 14 | get_date_from_filename("S030220-v50-bx.txt.gz"), 15 | as.POSIXct("2020-03-02", "GMT") 16 | ) 17 | expect_equal( 18 | get_date_from_filename("unknown_file_format"), 19 | NA 20 | ) 21 | 22 | ## Get exchange from filename 23 | expect_equal( 24 | get_exchange_from_filename("03302017.NASDAQ_ITCH50"), 25 | "NASDAQ" 26 | ) 27 | expect_equal( 28 | get_exchange_from_filename("20170130.BX_ITCH_50.gz"), 29 | "BX" 30 | ) 31 | expect_equal( 32 | get_exchange_from_filename("S030220-v50-bx.txt.gz"), 33 | "BX" 34 | ) 35 | expect_equal( 36 | get_exchange_from_filename("unknown_file_format"), 37 | NA 38 | ) 39 | 40 | ## Add meta to filename 41 | expect_equal( 42 | add_meta_to_filename("03302017.NASDAQ_ITCH50", "2010-12-24", "TEST"), 43 | "12242010.TEST_ITCH50" 44 | ) 45 | 46 | expect_equal( 47 | add_meta_to_filename("20170130.BX_ITCH_50.gz", "2010-12-24", "TEST"), 48 | "20101224.TEST_ITCH_50.gz" 49 | ) 50 | expect_equal( 51 | add_meta_to_filename("S030220-v50-bx.txt.gz", "2010-12-24", "TEST"), 52 | "S122410-v50-TEST.txt.gz" 53 | ) 54 | expect_equal( 55 | add_meta_to_filename("unknown_file.ITCH_50", "2010-12-24", "TEST"), 56 | "unknown_file_20101224.TEST_ITCH_50" 57 | ) 58 | expect_equal( 59 | add_meta_to_filename("some_folder/unknown_file.ITCH_50", "2010-12-24", "TEST"), 60 | "some_folder/unknown_file_20101224.TEST_ITCH_50" 61 | ) 62 | -------------------------------------------------------------------------------- /inst/tinytest/test_filter_itch.R: -------------------------------------------------------------------------------- 1 | library(RITCH) 2 | library(tinytest) 3 | library(data.table) 4 | suppressPackageStartupMessages(library(bit64)) 5 | setDTthreads(2) 6 | 7 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 8 | outfile <- file.path(tempdir(), "testfile_20101224.TEST_ITCH_50") 9 | 10 | 11 | ################################################################################ 12 | # Test that filtering for all trades returns all data entries 13 | orig <- read_itch(infile, quiet = TRUE) 14 | trades <- read_trades(infile, quiet = TRUE) 15 | expect_equal(orig$trades, trades) 16 | 17 | res <- read_itch(infile, quiet = TRUE, filter_msg_class = "trades", 18 | filter_stock_locate = c(1, 2, 3), min_timestamp = 1, 19 | max_timestamp = 6e14, filter_msg_type = "P") 20 | expect_equal(res, trades) 21 | 22 | filter_itch(infile, outfile, filter_msg_class = "trades", quiet = TRUE) 23 | res <- read_itch(outfile, quiet = TRUE) 24 | expect_equal(res$trades, trades) 25 | unlink(outfile) 26 | 27 | 28 | ################################################################################ 29 | # Test that the first and last messages are parsed 30 | of <- filter_itch(infile, outfile, filter_msg_class = "system_events", quiet = TRUE) 31 | 32 | # the filename is not changed 33 | expect_equal(of, outfile) 34 | 35 | unlink(of) 36 | tmpfile <- tempfile("testfile") 37 | of <- filter_itch(infile, tmpfile, filter_msg_class = "system_events", quiet = TRUE) 38 | 39 | # the outfile name is correctly constructed! 40 | expect_equal(of, paste0(tmpfile, "_20101224.TEST_ITCH_50")) 41 | 42 | # test file contents 43 | expect_equal(file.size(of), 84) 44 | df <- read_system_events(of, quiet = TRUE) 45 | expect_equal(nrow(df), 6) 46 | unlink(of) 47 | 48 | ################################################################################ 49 | ################################################################################ 50 | # Test Message Class 51 | filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE) 52 | 53 | # calling it on the file again causes error unless overwrite = TRUE 54 | expect_error( 55 | filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE) 56 | ) 57 | 58 | # test overwrite = TRUE 59 | filter_itch(infile, outfile, filter_msg_class = "orders", overwrite = TRUE, 60 | quiet = TRUE) 61 | 62 | expect_equal(file.size(outfile), 190012) 63 | 64 | # check that the output file contains only orders 65 | df <- read_orders(outfile, quiet = TRUE) 66 | expect_equal(nrow(df), 5000) 67 | 68 | df2 <- read_orders(infile, quiet = TRUE) 69 | expect_equal(df, df2) 70 | 71 | # writing again to the same fail results in error 72 | expect_error( 73 | filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE) 74 | ) 75 | unlink(outfile) 76 | 77 | ################################################################################ 78 | # Test Append 79 | filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE) 80 | filter_itch(infile, outfile, filter_msg_class = "orders", append = TRUE, 81 | quiet = TRUE) 82 | 83 | df <- read_orders(outfile, quiet = TRUE) 84 | dforig <- read_orders(infile, quiet = TRUE) 85 | 86 | expect_equal( 87 | df, 88 | rbindlist(list(dforig, dforig)) 89 | ) 90 | unlink(outfile) 91 | 92 | ################################################################################ 93 | # Test smaller buffer_size 94 | 95 | filter_itch(infile, outfile, filter_msg_class = "orders", 96 | buffer_size = 50, 97 | quiet = TRUE) 98 | 99 | expect_equal(file.size(outfile), 190012) 100 | 101 | # check that the output file contains only orders 102 | df <- read_orders(outfile, quiet = TRUE) 103 | expect_equal(nrow(df), 5000) 104 | 105 | df2 <- read_orders(infile, quiet = TRUE) 106 | expect_equal(df, df2) 107 | unlink(outfile) 108 | 109 | ################################################################################ 110 | ################################################################################ 111 | # Test Msg Type 112 | filter_itch(infile, outfile, filter_msg_type = "S", quiet = TRUE) 113 | 114 | expect_equal(file.size(outfile), 84) 115 | # check that the output file contains only orders 116 | df <- read_system_events(outfile, quiet = TRUE) 117 | expect_equal(nrow(df), 6) 118 | 119 | df2 <- read_system_events(infile, quiet = TRUE) 120 | expect_equal(df, df2) 121 | unlink(outfile) 122 | 123 | 124 | ################################################################################ 125 | ################################################################################ 126 | # Test Stock Locate 127 | filter_itch(infile, outfile, filter_stock_locate = c(2, 3), quiet = TRUE) 128 | 129 | expect_equal(file.size(outfile), 333876) 130 | # check that the output file contains only orders 131 | df <- read_itch(outfile, quiet = TRUE) 132 | exp_count <- c( 133 | stock_directory = 2L, trading_status = 2L, 134 | orders = 4050L, modifications = 1626L, trades = 3115L 135 | ) 136 | expect_equal(sapply(df, nrow), exp_count) 137 | 138 | df2 <- read_itch(infile, filter_stock_locate = c(2, 3), quiet = TRUE) 139 | expect_equal(df, df2) 140 | unlink(outfile) 141 | 142 | 143 | ################################################################################ 144 | ################################################################################ 145 | # Test filter_stock 146 | stock_sel <- c("BOB", "CHAR") 147 | sdir <- data.table(stock = stock_sel, 148 | stock_locate = c(2, 3)) 149 | filter_itch(infile, outfile, filter_stock = stock_sel, stock_directory = sdir, 150 | quiet = TRUE) 151 | 152 | expect_equal(file.size(outfile), 333876) 153 | # check that the output file contains only orders 154 | df <- read_itch(outfile, quiet = TRUE) 155 | exp_count <- c( 156 | stock_directory = 2L, trading_status = 2L, 157 | orders = 4050L, modifications = 1626L, trades = 3115L 158 | ) 159 | expect_equal(sapply(df, nrow), exp_count) 160 | 161 | df2 <- read_itch(infile, filter_stock = stock_sel, stock_directory = sdir, 162 | quiet = TRUE) 163 | expect_equal(df, df2) 164 | unlink(outfile) 165 | 166 | 167 | ################################################################################ 168 | ################################################################################ 169 | # Test Timestamps 170 | 171 | get_func_of_ts <- function(ll, func = min) { 172 | ll <- ll[sapply(ll, nrow) != 0] 173 | mm <- lapply(ll, function(d) list(func(d$timestamp))) 174 | x <- unlist(mm) 175 | class(x) <- "integer64" 176 | func(x) 177 | } 178 | 179 | # check errors 180 | # either min & max timestamp have the same size or 0 and 1 181 | expect_error( 182 | filter_itch(infile, outfile, min_timestamp = 1:2, quiet = TRUE) 183 | ) 184 | expect_error( 185 | filter_itch(infile, outfile, min_timestamp = 1:2, max_timestamp = 1:3, 186 | quiet = TRUE) 187 | ) 188 | expect_error( 189 | filter_itch(infile, outfile, min_timestamp = 1, max_timestamp = 1:3, 190 | quiet = TRUE) 191 | ) 192 | 193 | 194 | ################################################################################ 195 | ## Min only 196 | ms <- as.integer64(45463537089764) 197 | filter_itch(infile, outfile, min_timestamp = ms, quiet = TRUE) 198 | 199 | expect_equal(file.size(outfile), 236547) 200 | # check that the output file contains only orders 201 | df <- read_itch(outfile, quiet = TRUE) 202 | exp_count <- c( 203 | system_events = 3L, orders = 2501L, modifications = 979L, trades = 2598L 204 | ) 205 | expect_equal(sapply(df, nrow), exp_count) 206 | 207 | # read-in all data and filter the data manually 208 | df_all <- read_itch(infile, quiet = TRUE) 209 | df_all_f <- lapply(df, function(d) d[timestamp >= ms, ]) 210 | expect_equal(df_all_f, df) 211 | 212 | # check that for all classes the min timestamp is larger than the expected value 213 | expect_true(get_func_of_ts(df, min) >= ms) 214 | 215 | df2 <- read_itch(infile, min_timestamp = ms, quiet = TRUE) 216 | expect_equal(df, df2) 217 | unlink(outfile) 218 | 219 | 220 | ################################################################################ 221 | ## Max only 222 | ms <- as.integer64(45463537089764) 223 | filter_itch(infile, outfile, max_timestamp = ms, quiet = TRUE) 224 | 225 | expect_equal(file.size(outfile), 228539) 226 | # check that the output file contains only orders 227 | df <- read_itch(outfile, quiet = TRUE) 228 | exp_count <- c( 229 | system_events = 3L, stock_directory = 3L, trading_status = 3L, 230 | orders = 2500L, modifications = 1021L, trades = 2402L 231 | ) 232 | expect_equal(sapply(df, nrow), exp_count) 233 | 234 | # read-in all data and filter the data manually 235 | df_all <- read_itch(infile, quiet = TRUE) 236 | df_all_f <- lapply(df, function(d) d[timestamp <= ms, ]) 237 | expect_equal(df_all_f, df) 238 | 239 | # check that for all classes the max timestamp is smaller than the expected value 240 | expect_true(get_func_of_ts(df, max) <= ms) 241 | 242 | df2 <- read_itch(infile, max_timestamp = ms, quiet = TRUE) 243 | expect_equal(df, df2) 244 | unlink(outfile) 245 | 246 | 247 | ################################################################################ 248 | ## min and max 249 | min_ts <- as.integer64(45463537089764) 250 | max_ts <- as.integer64(51233773867238) 251 | filter_itch(infile, outfile, min_timestamp = min_ts, max_timestamp = max_ts, 252 | quiet = TRUE) 253 | 254 | expect_equal(file.size(outfile), 138558) 255 | 256 | # check that the output file contains only orders 257 | df <- read_itch(outfile, quiet = TRUE) 258 | exp_count <- c(orders = 1501L, modifications = 598L, trades = 1477L) 259 | expect_equal(sapply(df, nrow), exp_count) 260 | 261 | # read-in all data and filter the data manually 262 | df_all <- read_itch(infile, quiet = TRUE) 263 | df_all_f <- lapply(df, function(d) d[timestamp >= min_ts & timestamp <= max_ts, ]) 264 | expect_equal(df_all_f, df) 265 | 266 | 267 | # check that for all classes the max timestamp is smaller than the expected value 268 | dd <- df[sapply(df, nrow) != 0] 269 | expect_true(get_func_of_ts(df, min) >= min_ts) 270 | expect_true(get_func_of_ts(df, max) <= max_ts) 271 | 272 | df2 <- read_itch(infile, min_timestamp = min_ts, max_timestamp = max_ts, 273 | quiet = TRUE) 274 | expect_equal(df, df2) 275 | unlink(outfile) 276 | 277 | 278 | ################################################################################ 279 | ################################################################################ 280 | # Test n_max 281 | 282 | # max number of messages is 5000, taking all messages results in the same file 283 | filter_itch(infile, outfile, n_max = 5000, quiet = TRUE) 284 | expect_equal(file.size(infile), file.size(outfile)) 285 | unlink(outfile) 286 | 287 | # take the first 100 messages for each message class 288 | filter_itch(infile, outfile, n_max = 100, quiet = TRUE) 289 | df <- read_itch(outfile, quiet = TRUE) 290 | exp_count <- c(system_events = 6, stock_directory = 3, trading_status = 3, 291 | orders = 100, modifications = 100, trades = 100) 292 | expect_equal(sapply(df, nrow), exp_count) 293 | 294 | df2 <- read_itch(infile, n_max = 100, quiet = TRUE) 295 | expect_equal(df, df2) 296 | unlink(outfile) 297 | 298 | ################################################################################ 299 | # Test skip 300 | 301 | # skipping 0 messages results in the same file 302 | filter_itch(infile, outfile, skip = 0, quiet = TRUE) 303 | expect_equal(file.size(infile), file.size(outfile)) 304 | unlink(outfile) 305 | 306 | 307 | filter_itch(infile, outfile, skip = 1000, quiet = TRUE) 308 | df <- read_itch(outfile, quiet = TRUE) 309 | exp_count <- c(orders = 4000, modifications = 1000, trades = 4000) 310 | expect_equal(sapply(df, nrow), exp_count) 311 | 312 | df2 <- read_itch(infile, skip = 1000, quiet = TRUE) 313 | expect_equal(df, df2) 314 | unlink(outfile) 315 | 316 | 317 | # skip the first 4000 messages for each message class 318 | # expect to see 5000-4000 trades and 5000-4000 orders 319 | filter_itch( 320 | infile, outfile, 321 | skip = 4000, 322 | quiet = TRUE 323 | ) 324 | df <- read_itch(outfile, quiet = TRUE) 325 | exp_count <- c(orders = 1000, trades = 1000) 326 | expect_equal(sapply(df, nrow), exp_count) 327 | 328 | df2 <- read_itch(infile, skip = 4000, quiet = TRUE) 329 | expect_equal(df, df2) 330 | unlink(outfile) 331 | 332 | 333 | ################################################################################ 334 | ################################################################################ 335 | # Test more complex filter 336 | min_ts <- 40505246803501 # Q1 of all orders 337 | max_ts <- 49358420393946 # Q3 of all orders 338 | 339 | filter_itch( 340 | infile, outfile, 341 | filter_msg_class = c("orders", "trades"), 342 | filter_stock_locate = c(1, 3), 343 | filter_msg_type = "D", 344 | skip = 0, n_max = 100, 345 | min_timestamp = min_ts, 346 | max_timestamp = max_ts, 347 | quiet = TRUE 348 | ) 349 | expect_equal(file.size(outfile), 10500) 350 | 351 | # check that the output file contains the same 352 | filtered_res <- read_itch(outfile, c("orders", "trades", "modifications"), 353 | quiet = TRUE) 354 | expect_equal(sapply(filtered_res, nrow), 355 | c(orders = 100, trades = 100, modifications = 100)) 356 | 357 | # read in the original file, and apply the same filters to each class 358 | df_orig <- read_itch(infile, c("orders", "trades", "modifications"), 359 | quiet = TRUE) 360 | # apply the filters 361 | msg_types <- c('D', 'A', 'F', 'P', 'Q', 'B') 362 | df_orig_res <- lapply(df_orig, function(d) 363 | d[msg_type %in% msg_types & 364 | stock_locate %in% c(1, 3) & 365 | timestamp > min_ts & timestamp < max_ts][1:100,] 366 | ) 367 | 368 | expect_equal(filtered_res, df_orig_res) 369 | unlink(outfile) 370 | 371 | 372 | ################################################################################ 373 | # filter_itch works on gz input files 374 | gzinfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 375 | tmpoutfile <- file.path(tempdir(), "gz_testfile_20101224.TEST_ITCH_50") 376 | 377 | rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders", 378 | quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 379 | expect_equal(rawoutfile, tmpoutfile) 380 | expect_equal(file.size(rawoutfile), 190012) 381 | 382 | odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 383 | idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 384 | expect_equal(odf, idf) 385 | unlink(rawoutfile) 386 | 387 | 388 | ################################################################################ 389 | # works also on gz-output files 390 | rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders", gz = TRUE, 391 | quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 392 | 393 | expect_equal(rawoutfile, paste0(tmpoutfile, ".gz")) 394 | expect_true(file.exists(rawoutfile)) 395 | expect_equal(file.size(rawoutfile), 72619) 396 | 397 | odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 398 | idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 399 | 400 | expect_equal(odf, idf) 401 | unlink(rawoutfile) 402 | unlink(tmpoutfile) 403 | -------------------------------------------------------------------------------- /inst/tinytest/test_gz_functions.R: -------------------------------------------------------------------------------- 1 | library(RITCH) 2 | library(tinytest) 3 | setDTthreads(2) 4 | 5 | # check that using gunzip_file and gzip_file return the same files as the originals! 6 | raw_file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 7 | gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 8 | 9 | tmpfile <- file.path(tempdir(), "raw_20101224.TEST_ITCH_50") 10 | tmpfile2 <- file.path(tempdir(), "gz_20101224.TEST_ITCH_50.gz") 11 | 12 | expect_true(file.exists(raw_file)) 13 | expect_true(file.exists(gz_file)) 14 | 15 | gunzip_file(gz_file, tmpfile) 16 | expect_equal( 17 | tools::md5sum(raw_file)[[1]], 18 | tools::md5sum(tmpfile)[[1]] 19 | ) 20 | 21 | gzip_file(raw_file, tmpfile2) 22 | 23 | # check that the file contents are identical 24 | expect_equal( 25 | read_itch(raw_file, quiet = TRUE), 26 | read_itch(tmpfile2, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 27 | ) 28 | expect_equal( 29 | read_itch(raw_file, quiet = TRUE), 30 | read_itch(tmpfile2, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 31 | ) 32 | 33 | unlink(c(tmpfile, tmpfile2)) 34 | -------------------------------------------------------------------------------- /inst/tinytest/test_write_itch.R: -------------------------------------------------------------------------------- 1 | library(RITCH) 2 | library(tinytest) 3 | library(data.table) 4 | setDTthreads(2) 5 | 6 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 7 | 8 | ll <- read_itch(infile, quiet = TRUE) 9 | 10 | ################################################################################ 11 | ################################################################################ 12 | #### Testing base write functionality 13 | outfile_base <- file.path(tempdir(), "testfile") 14 | outfile <- write_itch(ll, outfile_base, quiet = TRUE) 15 | 16 | expect_equal(file.size(infile)[[1]], file.size(outfile)[[1]]) 17 | 18 | ################################################################################ 19 | # expect identical files 20 | expect_equal(tools::md5sum(infile)[[1]], 21 | tools::md5sum(outfile)[[1]]) 22 | 23 | # read in the file again and compare to outfile 24 | ll2 <- read_itch(outfile, quiet = TRUE) 25 | expect_equal(ll, ll2) 26 | 27 | 28 | ################################################################################ 29 | ################################################################################ 30 | # Appending doubles file size 31 | # appending throws warning 32 | outfile <- write_itch(ll, outfile, quiet = TRUE, add_meta = FALSE) 33 | expect_warning( 34 | outfile <- write_itch(ll, outfile, quiet = TRUE, add_meta = FALSE, 35 | append = TRUE) 36 | ) 37 | expect_equal(file.size(outfile), 465048 * 2) 38 | 39 | ################################################################################ 40 | # read in again and compare to original doubled data 41 | ll3 <- lapply(ll, function(x) rbindlist(list(x, x))) 42 | ll4 <- read_itch(outfile, quiet = TRUE) 43 | expect_equal(ll3, ll4) 44 | 45 | 46 | ################################################################################ 47 | ################################################################################ 48 | #### Testing buffer_size 49 | # buffer too large 50 | expect_warning( 51 | outfile <- write_itch(ll, outfile, buffer_size = 5e9 + 1, 52 | quiet = TRUE, add_meta = FALSE) 53 | ) 54 | ################################################################################ 55 | # buffer too small 56 | expect_warning( 57 | outfile <- write_itch(ll, outfile, buffer_size = 51, 58 | quiet = TRUE, add_meta = FALSE) 59 | ) 60 | ################################################################################ 61 | # small but ok buffer 62 | outfile <- write_itch(ll, outfile, buffer_size = 52, 63 | quiet = TRUE, add_meta = FALSE) 64 | 65 | expect_equal(file.size(outfile), 465048) 66 | # read in the file again and compare to outfile 67 | ll2 <- read_itch(outfile, quiet = TRUE) 68 | expect_equal(ll, ll2) 69 | 70 | unlink(outfile) 71 | 72 | 73 | ################################################################################ 74 | ################################################################################ 75 | #### Test gz compression file 76 | outfile <- write_itch(ll, outfile_base, compress = TRUE, quiet = TRUE) 77 | 78 | expect_equal(file.size(outfile), 159965) 79 | 80 | # read in the file again and compare to outfile 81 | ll2 <- read_itch(outfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 82 | expect_equal(ll, ll2) 83 | 84 | ################################################################################ 85 | # test gz with smaller buffer size 86 | outfile <- write_itch(ll, outfile_base, compress = TRUE, buffer_size = 100, 87 | quiet = TRUE) 88 | 89 | # with smaller buffer sizes when using compress = TRUE, the filesize will increase! 90 | expect_equal(file.size(outfile), 419608) 91 | # read in the file again and compare to outfile 92 | ll2 <- read_itch(outfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE) 93 | expect_equal(ll, ll2) 94 | 95 | unlink(outfile) 96 | 97 | 98 | ################################################################################ 99 | ################################################################################ 100 | #### check append and compress 101 | write_itch(ll, outfile, compress = TRUE, buffer_size = 100, add_meta = FALSE, 102 | quiet = TRUE) 103 | expect_equal(file.size(outfile), 419608) 104 | 105 | expect_warning( 106 | outfile <- write_itch(ll, outfile, compress = TRUE, append = TRUE, 107 | buffer_size = 100, add_meta = FALSE, quiet = TRUE) 108 | ) 109 | 110 | # note that appending to a gzipped file will linearly increase file size... 111 | # only the buffers are compressed! 112 | expect_equal(file.size(outfile), 419608 * 2) 113 | 114 | expect_equal(lapply(ll, function(x) rbindlist(list(x, x))), 115 | read_itch(outfile, quiet = TRUE, force_gunzip = TRUE, 116 | force_cleanup = TRUE)) 117 | 118 | unlink(outfile) 119 | -------------------------------------------------------------------------------- /man/add_meta_to_filename.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{add_meta_to_filename} 4 | \alias{add_meta_to_filename} 5 | \title{Adds meta information (date and exchange) to an itch filename} 6 | \usage{ 7 | add_meta_to_filename(file, date, exchange) 8 | } 9 | \arguments{ 10 | \item{file}{the filename} 11 | 12 | \item{date}{the date as a date-class or as a string that is understood by 13 | \code{\link[base:as.Date]{base::as.Date()}}.} 14 | 15 | \item{exchange}{the name of the exchange} 16 | } 17 | \value{ 18 | the filename with exchanged or added date and exchange information 19 | } 20 | \description{ 21 | Note that if date and exchange information are already present, 22 | they are overwritten 23 | } 24 | \examples{ 25 | add_meta_to_filename("03302017.NASDAQ_ITCH50", "2010-12-24", "TEST") 26 | add_meta_to_filename("20170130.BX_ITCH_50.gz", "2010-12-24", "TEST") 27 | add_meta_to_filename("S030220-v50-bx.txt.gz", "2010-12-24", "TEST") 28 | add_meta_to_filename("unknown_file.ITCH_50", "2010-12-24", "TEST") 29 | } 30 | -------------------------------------------------------------------------------- /man/count_functions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count_functions.R 3 | \name{count_functions} 4 | \alias{count_functions} 5 | \alias{count_messages} 6 | \alias{count_orders} 7 | \alias{count_trades} 8 | \alias{count_modifications} 9 | \alias{count_system_events} 10 | \alias{count_stock_directory} 11 | \alias{count_trading_status} 12 | \alias{count_reg_sho} 13 | \alias{count_market_participant_states} 14 | \alias{count_mwcb} 15 | \alias{count_ipo} 16 | \alias{count_luld} 17 | \alias{count_noii} 18 | \alias{count_rpii} 19 | \title{Counts the messages of an ITCH-file} 20 | \usage{ 21 | count_messages( 22 | file, 23 | add_meta_data = FALSE, 24 | buffer_size = -1, 25 | quiet = FALSE, 26 | force_gunzip = FALSE, 27 | gz_dir = tempdir(), 28 | force_cleanup = TRUE 29 | ) 30 | 31 | count_orders(x) 32 | 33 | count_trades(x) 34 | 35 | count_modifications(x) 36 | 37 | count_system_events(x) 38 | 39 | count_stock_directory(x) 40 | 41 | count_trading_status(x) 42 | 43 | count_reg_sho(x) 44 | 45 | count_market_participant_states(x) 46 | 47 | count_mwcb(x) 48 | 49 | count_ipo(x) 50 | 51 | count_luld(x) 52 | 53 | count_noii(x) 54 | 55 | count_rpii(x) 56 | } 57 | \arguments{ 58 | \item{file}{the path to the input file, either a gz-file or a plain-text file} 59 | 60 | \item{add_meta_data}{if the meta-data of the messages should be added, defaults to FALSE} 61 | 62 | \item{buffer_size}{the size of the buffer in bytes, defaults to 1e8 (100 MB), if you have a large amount of RAM, 1e9 (1GB) might be faster} 63 | 64 | \item{quiet}{if TRUE, the status messages are supressed, defaults to FALSE} 65 | 66 | \item{force_gunzip}{only applies if file is a gz-file and a file with the same (gunzipped) name already exists. 67 | if set to TRUE, the existing file is overwritten. Default value is FALSE} 68 | 69 | \item{gz_dir}{a directory where the gz archive is extracted to. 70 | Only applies if file is a gz archive. Default is \code{\link[=tempdir]{tempdir()}}.} 71 | 72 | \item{force_cleanup}{only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.} 73 | 74 | \item{x}{a file or a data.table containing the message types and the counts, 75 | as outputted by \code{count_messages}} 76 | } 77 | \value{ 78 | a data.table containing the message-type and their counts for \code{count_messages} 79 | or an integer value for the other functions. 80 | } 81 | \description{ 82 | Counts the messages of an ITCH-file 83 | } 84 | \details{ 85 | \itemize{ 86 | \item \code{count_orders}: Counts order messages. Message type \code{A} and \code{F} 87 | } 88 | 89 | \itemize{ 90 | \item \code{count_trades}: Counts trade messages. Message type \code{P}, \code{Q} and \code{B} 91 | } 92 | 93 | \itemize{ 94 | \item \code{count_modifications}: Counts order modification messages. Message 95 | type \code{E}, \code{C}, \code{X}, \code{D}, and \code{U} 96 | } 97 | 98 | \itemize{ 99 | \item \code{count_system_events}: Counts system event messages. Message type \code{S} 100 | } 101 | 102 | \itemize{ 103 | \item \code{count_stock_directory}: Counts stock trading messages. Message 104 | type \code{R} 105 | } 106 | 107 | \itemize{ 108 | \item \code{count_trading_status}: Counts trading status messages. Message 109 | type \code{H} and \code{h} 110 | } 111 | 112 | \itemize{ 113 | \item \code{count_reg_sho}: Counts messages regarding reg SHO. Message type 114 | \code{Y} 115 | } 116 | 117 | \itemize{ 118 | \item \code{count_market_participant_states}: Counts messages regarding the 119 | status of market participants. Message type \code{L} 120 | } 121 | 122 | \itemize{ 123 | \item \code{count_mwcb}: Counts messages regarding Market-Wide-Circuit-Breakers 124 | (MWCB). Message type \code{V} and \code{W} 125 | } 126 | 127 | \itemize{ 128 | \item \code{count_ipo}: Counts messages regarding IPOs. Message type \code{K} 129 | } 130 | 131 | \itemize{ 132 | \item \code{count_luld}: Counts messages regarding LULDs (limit up-limit down) 133 | auction collars. Message type \code{J} 134 | } 135 | 136 | \itemize{ 137 | \item \code{count_noii}: Counts Net Order Imbalance Indicatio (NOII) messages. 138 | Message type \code{I} 139 | } 140 | 141 | \itemize{ 142 | \item \code{count_rpii}: Counts Retail Price Improvement Indicator (RPII) 143 | messages. Message type \code{N} 144 | } 145 | } 146 | \examples{ 147 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 148 | count_messages(file) 149 | count_messages(file, add_meta_data = TRUE, quiet = TRUE) 150 | 151 | # file can also be a .gz file 152 | gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 153 | count_messages(gz_file, quiet = TRUE) 154 | 155 | # count only a specific class 156 | msg_count <- count_messages(file, quiet = TRUE) 157 | 158 | # either count based on a given data.table outputted by count_messages 159 | count_orders(msg_count) 160 | 161 | # or count orders from a file and not from a msg_count 162 | count_orders(file) 163 | 164 | ### Specific class count functions are: 165 | count_orders(msg_count) 166 | count_trades(msg_count) 167 | count_modifications(msg_count) 168 | count_system_events(msg_count) 169 | count_stock_directory(msg_count) 170 | count_trading_status(msg_count) 171 | count_reg_sho(msg_count) 172 | count_market_participant_states(msg_count) 173 | count_mwcb(msg_count) 174 | count_ipo(msg_count) 175 | count_luld(msg_count) 176 | count_noii(msg_count) 177 | count_rpii(msg_count) 178 | } 179 | -------------------------------------------------------------------------------- /man/count_internal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count_functions.R 3 | \name{count_internal} 4 | \alias{count_internal} 5 | \title{Internal function to count the messages} 6 | \usage{ 7 | count_internal(x, types) 8 | } 9 | \arguments{ 10 | \item{x}{a data.frame containing the message types and the counts} 11 | 12 | \item{types}{a vector containing the types} 13 | } 14 | \value{ 15 | a numeric value of number of orders in x 16 | } 17 | \description{ 18 | Internal function to count the messages 19 | } 20 | \examples{ 21 | # Only used internally 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/download_sample_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_samples.R 3 | \name{download_sample_file} 4 | \alias{download_sample_file} 5 | \title{Downloads a sample ITCH File from NASDAQs Server} 6 | \usage{ 7 | download_sample_file( 8 | choice = c("smallest", "largest", "earliest", "latest", "random", "all"), 9 | file = NA, 10 | exchanges = NA, 11 | dir = ".", 12 | force_download = FALSE, 13 | check_md5sum = TRUE, 14 | quiet = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{choice}{which file should be chosen? One of: smallest (default), largest, 19 | earliest (date-wise), latest, random, or all.} 20 | 21 | \item{file}{the name of a specific file, overrules the choice and exchanges arguments} 22 | 23 | \item{exchanges}{A vector of exchanges, can be NASDAQ, BX, or PSX. 24 | The default value is to consider all exchanges.} 25 | 26 | \item{dir}{The directory where the files will be saved to, default is current working directory.} 27 | 28 | \item{force_download}{If the file should be downloaded even if it already exists locally. 29 | Default value is FALSE.} 30 | 31 | \item{check_md5sum}{If the md5-sum (hash-value) of the downloaded file should be checked, default value is TRUE.} 32 | 33 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE} 34 | } 35 | \value{ 36 | an invisible vector of the files 37 | } 38 | \description{ 39 | The Server can be found at \url{https://emi.nasdaq.com/ITCH/Nasdaq\%20ITCH/} 40 | } 41 | \details{ 42 | Warning: the smallest file is around 300 MB, with the largest exceeding 5 GB. 43 | There are about 17 files in total. Downloading all might take a considerable amount of time. 44 | } 45 | \examples{ 46 | \dontrun{ 47 | download_sample_file() 48 | file <- download_sample_file() 49 | file 50 | 51 | # download a specific sample file 52 | file <- download_sample_file(file = "2019130.BX_ITCH_50.gz") 53 | file 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /man/download_stock_directory.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/download_stock_directory.R 3 | \name{download_stock_directory} 4 | \alias{download_stock_directory} 5 | \title{Downloads the stock directory (stock locate codes) for a given date and exchange} 6 | \usage{ 7 | download_stock_directory(exchange, date, cache = FALSE, quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{exchange}{The exchange, either NASDAQ (equivalent to NDQ), BX, or PSX} 11 | 12 | \item{date}{The date, should be of class Date. If not the value is converted 13 | using \code{as.Date}.} 14 | 15 | \item{cache}{If the stock directory should be cached, can be set to TRUE 16 | to save the stock directories in the working directory or a character for a 17 | target directory.} 18 | 19 | \item{quiet}{If the download function should be quiet, default is FALSE.} 20 | } 21 | \value{ 22 | a data.table of the tickers, the respective stock locate codes, and 23 | the exchange/date information 24 | } 25 | \description{ 26 | The data is downloaded from NASDAQs server, which can be found here 27 | \url{https://emi.nasdaq.com/ITCH/Stock_Locate_Codes/} 28 | } 29 | \examples{ 30 | \dontrun{ 31 | download_stock_directory("BX", "2019-07-02") 32 | download_stock_directory(c("BX", "NDQ"), c("2019-07-02", "2019-07-03")) 33 | download_stock_directory("BX", "2019-07-02", cache = TRUE) 34 | 35 | download_stock_directory("BX", "2019-07-02", cache = "stock_directory") 36 | dir.exists("stock_directory") 37 | list.files("stock_directory") 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /man/ex20101224.TEST_ITCH_50.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \name{ex20101224.TEST_ITCH_50} 4 | \alias{ex20101224.TEST_ITCH_50} 5 | \title{ITCH 50 Example Testing Dataset} 6 | \description{ 7 | ITCH 50 Example Testing Dataset 8 | } 9 | \section{ex20101224.TEST_ITCH_50}{ 10 | 11 | 12 | The test dataset contains artificial trading data for three made up stocks: 13 | \code{ALC}, \code{BOB}, and \code{CHAR}. 14 | 15 | The dataset is used in the examples and unit tests of the package. 16 | 17 | The data contains the following count of messages: 18 | \itemize{ 19 | \item 6 system event (message type \code{S}) 20 | \item 3 stock directory (message type \code{R}) 21 | \item 3 trading status (message type \code{H}) 22 | \item 5000 orders (4997 message type \code{A} and 3 \code{F}) 23 | \item 2000 modifications (198 \code{F}, 45 \code{X}, 1745 \code{D}, and 12 \code{U} message types) 24 | \item 5000 trades (message type \code{P}) 25 | } 26 | 27 | The file is also available as \code{ex20101224.TEST_ITCH_50.gz}. 28 | 29 | To get real sample ITCH datasets, see the \code{\link[=download_sample_file]{download_sample_file()}} 30 | function. 31 | } 32 | 33 | \examples{ 34 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 35 | 36 | sys <- read_system_events(file) 37 | } 38 | -------------------------------------------------------------------------------- /man/figures/README-ETF_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavZim/RITCH/9bd51af48d26703bd95ab4f0db6532a497c104c1/man/figures/README-ETF_plot-1.png -------------------------------------------------------------------------------- /man/filter_itch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter_itch.R 3 | \name{filter_itch} 4 | \alias{filter_itch} 5 | \title{Filters an ITCH file to another ITCH file} 6 | \usage{ 7 | filter_itch( 8 | infile, 9 | outfile, 10 | filter_msg_class = NA_character_, 11 | filter_msg_type = NA_character_, 12 | filter_stock_locate = NA_integer_, 13 | min_timestamp = bit64::as.integer64(NA), 14 | max_timestamp = bit64::as.integer64(NA), 15 | filter_stock = NA_character_, 16 | stock_directory = NA, 17 | skip = 0, 18 | n_max = -1, 19 | append = FALSE, 20 | overwrite = FALSE, 21 | gz = FALSE, 22 | buffer_size = -1, 23 | quiet = FALSE, 24 | force_gunzip = FALSE, 25 | force_cleanup = TRUE 26 | ) 27 | } 28 | \arguments{ 29 | \item{infile}{the input file where the messages are taken from, can be a 30 | gz-archive or a plain ITCH file.} 31 | 32 | \item{outfile}{the output file where the filtered messages are written to. 33 | Note that the date and exchange information from the \code{infile} are used, 34 | see also \code{\link[=add_meta_to_filename]{add_meta_to_filename()}} for further information.} 35 | 36 | \item{filter_msg_class}{a vector of classes to load, can be "orders", "trades", 37 | "modifications", ... see also \code{\link[=get_msg_classes]{get_msg_classes()}}. 38 | Default value is to take all message classes.} 39 | 40 | \item{filter_msg_type}{a character vector, specifying a filter for message types. 41 | Note that this can be used to only return 'A' orders for instance.} 42 | 43 | \item{filter_stock_locate}{an integer vector, specifying a filter for locate codes. 44 | The locate codes can be looked up by calling \code{\link[=read_stock_directory]{read_stock_directory()}} 45 | or by downloading from NASDAQ by using \code{\link[=download_stock_directory]{download_stock_directory()}}. 46 | Note that some message types (e.g., system events, MWCB, and IPO) do not use 47 | a locate code.} 48 | 49 | \item{min_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}}) 50 | of minimum timestamp (inclusive). 51 | Note: min and max timestamp must be supplied with the same length or left empty.} 52 | 53 | \item{max_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}}) 54 | of maxium timestamp (inclusive). 55 | Note: min and max timestamp must be supplied with the same length or left empty.} 56 | 57 | \item{filter_stock}{a character vector, specifying a filter for stocks. 58 | Note that this a shorthand for the \code{filter_stock_locate} argument, as it 59 | tries to find the stock_locate based on the \code{stock_directory} argument, 60 | if this is not found, it will try to extract the stock directory from the file, 61 | else an error is thrown.} 62 | 63 | \item{stock_directory}{A data.frame containing the stock-locate code relationship. 64 | As outputted by \code{\link[=read_stock_directory]{read_stock_directory()}}. 65 | Only used if \code{filter_stock} is set. To download the stock directory from 66 | NASDAQs server, use \code{\link[=download_stock_directory]{download_stock_directory()}}.} 67 | 68 | \item{skip}{Number of messages to skip before starting parsing messages, 69 | note the skip parameter applies to the specific message class, i.e., it would 70 | skip the messages for each type (e.g., skip the first 10 messages for each class).} 71 | 72 | \item{n_max}{Maximum number of messages to parse, default is to read all values. 73 | Can also be a data.frame of msg_types and counts, as returned by 74 | \code{\link[=count_messages]{count_messages()}}. 75 | Note the n_max parameter applies to the specific message class not the whole 76 | file.} 77 | 78 | \item{append}{if the messages should be appended to the outfile, default is 79 | false. Note, this is helpful if \code{skip} and or \code{n_max} are used for 80 | batch filtering.} 81 | 82 | \item{overwrite}{if an existing outfile with the same name should be 83 | overwritten. Default value is false} 84 | 85 | \item{gz}{if the output file should be gzip-compressed. Note that the name 86 | of the output file will be appended with .gz if not already present. The 87 | final output name is returned. Default value is false.} 88 | 89 | \item{buffer_size}{the size of the buffer in bytes, defaults to 1e8 (100 MB), 90 | if you have a large amount of RAM, 1e9 (1GB) might be faster} 91 | 92 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE} 93 | 94 | \item{force_gunzip}{only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists. 95 | if set to TRUE, the existing file is overwritten. Default value is FALSE} 96 | 97 | \item{force_cleanup}{only applies if the input file is a gz-archive. 98 | If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards. 99 | Only applies when the gunzipped raw file did not exist before.} 100 | } 101 | \value{ 102 | the name of the output file (maybe different from the inputted 103 | outfile due to adding the date and exchange), silently 104 | } 105 | \description{ 106 | This function allows to perform very fast filter operations on large ITCH 107 | files. The messages are written to another ITCH file. 108 | } 109 | \details{ 110 | Note that this can be especially useful on larger files or where memory 111 | is not large enough to filter the datalimits the analysis. 112 | 113 | As with the \code{\link[=read_itch]{read_itch()}} functions, it allows to filter for 114 | \code{msg_class}, \code{msg_type}, \code{stock_locate}/\code{stock}, and 115 | \code{timestamp}. 116 | } 117 | \examples{ 118 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 119 | outfile <- tempfile(fileext = "_20101224.TEST_ITCH_50") 120 | filter_itch( 121 | infile, outfile, 122 | filter_msg_class = c("orders", "trades"), 123 | filter_msg_type = "R", # stock_directory 124 | skip = 0, n_max = 100 125 | ) 126 | 127 | # expecting 100 orders, 100 trades, and 3 stock_directory entries 128 | count_messages(outfile) 129 | 130 | # check that the output file contains the same 131 | res <- read_itch(outfile, c("orders", "trades", "stock_directory")) 132 | sapply(res, nrow) 133 | 134 | res2 <- read_itch(infile, c("orders", "trades", "stock_directory"), 135 | n_max = 100) 136 | 137 | all.equal(res, res2) 138 | } 139 | -------------------------------------------------------------------------------- /man/format_bytes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{format_bytes} 4 | \alias{format_bytes} 5 | \title{Formats a number of bytes} 6 | \usage{ 7 | format_bytes(x, digits = 2, unit_suffix = "B", base = 1000) 8 | } 9 | \arguments{ 10 | \item{x}{the values} 11 | 12 | \item{digits}{the number of digits to display, default value is 2} 13 | 14 | \item{unit_suffix}{the unit suffix, default value is 'B' (for bytes), 15 | useful is also 'B/s' if you have read/write speeds} 16 | 17 | \item{base}{the base for kilo, mega, ... definition, default is 1000} 18 | } 19 | \value{ 20 | the values as a character 21 | } 22 | \description{ 23 | Formats a number of bytes 24 | } 25 | \examples{ 26 | format_bytes(1234) 27 | format_bytes(1234567890) 28 | format_bytes(123456789012, unit_suffix = "iB", base = 1024) 29 | } 30 | -------------------------------------------------------------------------------- /man/get_date_from_filename.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{get_date_from_filename} 4 | \alias{get_date_from_filename} 5 | \title{Returns the date from an ITCH-filename} 6 | \usage{ 7 | get_date_from_filename(file) 8 | } 9 | \arguments{ 10 | \item{file}{a filename} 11 | } 12 | \value{ 13 | the date as fastPOSIXct 14 | } 15 | \description{ 16 | Returns the date from an ITCH-filename 17 | } 18 | \examples{ 19 | get_date_from_filename("03302017.NASDAQ_ITCH50") 20 | get_date_from_filename("20170130.BX_ITCH_50.gz") 21 | get_date_from_filename("S030220-v50-bx.txt.gz") 22 | get_date_from_filename("unknown_file_format") 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/get_exchange_from_filename.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{get_exchange_from_filename} 4 | \alias{get_exchange_from_filename} 5 | \title{Returns the exchange from an ITCH-filename} 6 | \usage{ 7 | get_exchange_from_filename(file) 8 | } 9 | \arguments{ 10 | \item{file}{a filename} 11 | } 12 | \value{ 13 | The exchange 14 | } 15 | \description{ 16 | Returns the exchange from an ITCH-filename 17 | } 18 | \examples{ 19 | get_exchange_from_filename("03302017.NASDAQ_ITCH50") 20 | get_exchange_from_filename("20170130.BX_ITCH_50.gz") 21 | get_exchange_from_filename("S030220-v50-bx.txt.gz") 22 | get_exchange_from_filename("Unknown_file_format") 23 | } 24 | -------------------------------------------------------------------------------- /man/get_msg_classes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count_functions.R 3 | \name{get_msg_classes} 4 | \alias{get_msg_classes} 5 | \title{Returns the message class data for the message types} 6 | \usage{ 7 | get_msg_classes() 8 | } 9 | \value{ 10 | a data.table with the information of the message-types 11 | } 12 | \description{ 13 | All information is handled according to the official ITCH 5.0 14 | documentation as found here: 15 | \url{http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHSpecification.pdf} 16 | } 17 | \details{ 18 | \itemize{ 19 | \item \code{msg_type} the type of the message 20 | \item \code{msg_class} the group the message belongs to 21 | \item \code{msg_name} the official name of the message 22 | \item \code{doc_nr} the number of the message in the documentation 23 | } 24 | } 25 | \examples{ 26 | get_msg_classes() 27 | } 28 | \seealso{ 29 | \code{open_itch_specification()} 30 | } 31 | -------------------------------------------------------------------------------- /man/gz_functions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gz_functions.R 3 | \name{gz_functions} 4 | \alias{gz_functions} 5 | \alias{gunzip_file} 6 | \alias{gzip_file} 7 | \title{Compresses and uncompresses files to and from gz-archives} 8 | \usage{ 9 | gunzip_file( 10 | infile, 11 | outfile = gsub("\\\\.gz$", "", infile), 12 | buffer_size = min(4 * file.size(infile), 2e+09) 13 | ) 14 | 15 | gzip_file( 16 | infile, 17 | outfile = NA, 18 | buffer_size = min(4 * file.size(infile), 2e+09) 19 | ) 20 | } 21 | \arguments{ 22 | \item{infile}{the file to be zipped or unzipped} 23 | 24 | \item{outfile}{the resulting zipped or unzipped file} 25 | 26 | \item{buffer_size}{the size of the buffer to read in at once, default is 4 times the file.size (max 2Gb).} 27 | } 28 | \value{ 29 | The filename of the unzipped file, invisibly 30 | } 31 | \description{ 32 | Allows the compression and uncompression of files 33 | } 34 | \details{ 35 | Functions are 36 | 37 | \itemize{ 38 | \item \code{gunzip_file}: uncompresses a gz-archive to raw binary data 39 | } 40 | 41 | -\code{gzip_file}: compresses a raw binary data file to a gz-archive 42 | } 43 | \examples{ 44 | gzfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 45 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 46 | 47 | # uncompress file 48 | (outfile <- gunzip_file(gzfile, "tmp")) 49 | file.info(outfile) 50 | unlink(outfile) 51 | 52 | # compress file 53 | (outfile <- gzip_file(file)) 54 | file.info(outfile) 55 | unlink(outfile) 56 | } 57 | -------------------------------------------------------------------------------- /man/list_sample_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_samples.R 3 | \name{list_sample_files} 4 | \alias{list_sample_files} 5 | \title{Returns a data.table of the sample files on the server} 6 | \usage{ 7 | list_sample_files() 8 | } 9 | \value{ 10 | a data.table of the files 11 | } 12 | \description{ 13 | The Server can be found at \url{https://emi.nasdaq.com/ITCH/Nasdaq\%20ITCH/} 14 | } 15 | \examples{ 16 | \dontrun{ 17 | list_sample_files() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /man/open_itch_sample_server.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{open_itch_sample_server} 4 | \alias{open_itch_sample_server} 5 | \title{Opens the ITCH sample page} 6 | \usage{ 7 | open_itch_sample_server() 8 | } 9 | \value{ 10 | the URL (invisible) 11 | } 12 | \description{ 13 | The server can be found at \url{https://emi.nasdaq.com/ITCH/Nasdaq\%20ITCH/}. 14 | } 15 | \examples{ 16 | \dontrun{ 17 | open_itch_sample_server() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /man/open_itch_specification.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{open_itch_specification} 4 | \alias{open_itch_specification} 5 | \title{Opens the ITCH Specification PDF} 6 | \usage{ 7 | open_itch_specification() 8 | } 9 | \value{ 10 | the URL (invisible) 11 | } 12 | \description{ 13 | The specifications can be found as a PDF \url{https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf}. 14 | } 15 | \examples{ 16 | \dontrun{ 17 | open_itch_specification() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /man/read_functions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_functions.R 3 | \name{read_functions} 4 | \alias{read_functions} 5 | \alias{read_itch} 6 | \alias{read_system_events} 7 | \alias{read_stock_directory} 8 | \alias{read_trading_status} 9 | \alias{read_reg_sho} 10 | \alias{read_market_participant_states} 11 | \alias{read_mwcb} 12 | \alias{read_ipo} 13 | \alias{read_luld} 14 | \alias{read_orders} 15 | \alias{read_modifications} 16 | \alias{read_trades} 17 | \alias{read_noii} 18 | \alias{read_rpii} 19 | \alias{get_orders} 20 | \alias{get_trades} 21 | \alias{get_modifications} 22 | \title{Reads certain messages of an ITCH-file into a data.table} 23 | \usage{ 24 | read_itch( 25 | file, 26 | filter_msg_class = NA, 27 | skip = 0, 28 | n_max = -1, 29 | filter_msg_type = NA_character_, 30 | filter_stock_locate = NA_integer_, 31 | min_timestamp = bit64::as.integer64(NA), 32 | max_timestamp = bit64::as.integer64(NA), 33 | filter_stock = NA_character_, 34 | stock_directory = NA, 35 | buffer_size = -1, 36 | quiet = FALSE, 37 | add_meta = TRUE, 38 | force_gunzip = FALSE, 39 | gz_dir = tempdir(), 40 | force_cleanup = TRUE 41 | ) 42 | 43 | read_system_events(file, ..., add_descriptions = FALSE) 44 | 45 | read_stock_directory(file, ..., add_descriptions = FALSE) 46 | 47 | read_trading_status(file, ..., add_descriptions = FALSE) 48 | 49 | read_reg_sho(file, ..., add_descriptions = FALSE) 50 | 51 | read_market_participant_states(file, ..., add_descriptions = FALSE) 52 | 53 | read_mwcb(file, ...) 54 | 55 | read_ipo(file, ..., add_descriptions = FALSE) 56 | 57 | read_luld(file, ...) 58 | 59 | read_orders(file, ...) 60 | 61 | read_modifications(file, ...) 62 | 63 | read_trades(file, ...) 64 | 65 | read_noii(file, ..., add_descriptions = FALSE) 66 | 67 | read_rpii(file, ..., add_descriptions = FALSE) 68 | 69 | get_orders(file, ...) 70 | 71 | get_trades(file, ...) 72 | 73 | get_modifications(file, ...) 74 | } 75 | \arguments{ 76 | \item{file}{the path to the input file, either a gz-archive or a plain ITCH file} 77 | 78 | \item{filter_msg_class}{a vector of classes to load, can be "orders", "trades", 79 | "modifications", ... see also \code{\link[=get_msg_classes]{get_msg_classes()}}. 80 | Default value is to take all message classes.} 81 | 82 | \item{skip}{Number of messages to skip before starting parsing messages, 83 | note the skip parameter applies to the specific message class, i.e., it would 84 | skip the messages for each type (e.g., skip the first 10 messages for each class).} 85 | 86 | \item{n_max}{Maximum number of messages to parse, default is to read all values. 87 | Can also be a data.frame of msg_types and counts, as returned by 88 | \code{\link[=count_messages]{count_messages()}}. 89 | Note the n_max parameter applies to the specific message class not the whole 90 | file.} 91 | 92 | \item{filter_msg_type}{a character vector, specifying a filter for message types. 93 | Note that this can be used to only return 'A' orders for instance.} 94 | 95 | \item{filter_stock_locate}{an integer vector, specifying a filter for locate codes. 96 | The locate codes can be looked up by calling \code{\link[=read_stock_directory]{read_stock_directory()}} 97 | or by downloading from NASDAQ by using \code{\link[=download_stock_directory]{download_stock_directory()}}. 98 | Note that some message types (e.g., system events, MWCB, and IPO) do not use 99 | a locate code.} 100 | 101 | \item{min_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}}) 102 | of minimum timestamp (inclusive). 103 | Note: min and max timestamp must be supplied with the same length or left empty.} 104 | 105 | \item{max_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}}) 106 | of maxium timestamp (inclusive). 107 | Note: min and max timestamp must be supplied with the same length or left empty.} 108 | 109 | \item{filter_stock}{a character vector, specifying a filter for stocks. 110 | Note that this a shorthand for the \code{filter_stock_locate} argument, as it 111 | tries to find the stock_locate based on the \code{stock_directory} argument, 112 | if this is not found, it will try to extract the stock directory from the file, 113 | else an error is thrown.} 114 | 115 | \item{stock_directory}{A data.frame containing the stock-locate code relationship. 116 | As outputted by \code{\link[=read_stock_directory]{read_stock_directory()}}. 117 | Only used if \code{filter_stock} is set. To download the stock directory from 118 | NASDAQs server, use \code{\link[=download_stock_directory]{download_stock_directory()}}.} 119 | 120 | \item{buffer_size}{the size of the buffer in bytes, defaults to 1e8 (100 MB), 121 | if you have a large amount of RAM, 1e9 (1GB) might be faster} 122 | 123 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE} 124 | 125 | \item{add_meta}{if TRUE, the date and exchange information of the file are added, defaults to TRUE} 126 | 127 | \item{force_gunzip}{only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists. 128 | if set to TRUE, the existing file is overwritten. Default value is FALSE} 129 | 130 | \item{gz_dir}{a directory where the gz archive is extracted to. 131 | Only applies if file is a gz archive. Default is \code{\link[=tempdir]{tempdir()}}.} 132 | 133 | \item{force_cleanup}{only applies if the input file is a gz-archive. 134 | If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards. 135 | Only applies when the gunzipped raw file did not exist before.} 136 | 137 | \item{...}{Additional arguments passed to \code{read_itch}} 138 | 139 | \item{add_descriptions}{add longer descriptions to shortened variables. 140 | The added information is taken from the official ITCH documentation 141 | see also \code{\link[=open_itch_specification]{open_itch_specification()}}} 142 | } 143 | \value{ 144 | a data.table containing the messages 145 | } 146 | \description{ 147 | For faster file-reads (at the tradeoff of increased memory usages), you can 148 | increase the \code{buffer_size} to 1GB (1e9) or more. 149 | 150 | If you access the same file multiple times, you can provide the message 151 | counts as outputted from \code{\link[=count_messages]{count_messages()}} to the \code{n_max} 152 | argument, this allows skipping one pass over the file per read instruction. 153 | 154 | If you need to read in multiple message classes, you can specify multiple 155 | message classes to \code{read_itch}, which results in only a single file pass. 156 | 157 | If the file is too large to be loaded into the workspace at once, you can 158 | specify different \code{skip} and \code{n_max} to load only 159 | a specific range of messages. 160 | Alternatively, you can filter certain messages to another file using 161 | \code{\link[=filter_itch]{filter_itch()}}, which is substantially faster than parsing a file 162 | and filtering it. 163 | 164 | Note that all read functions allow both plain ITCH files as well as gzipped 165 | files. 166 | If a gzipped file is found, it will look for a plain ITCH file with 167 | the same name and use that instead. 168 | If this file is not found, it will be created by unzipping the archive. 169 | Note that the unzipped file is NOT deleted by default (the file will be 170 | created in the current working directory). 171 | It might result in increased disk usage but reduces future read times for 172 | that specific file. 173 | To force RITCH to delete "temporary" files after uncompressing, use 174 | \code{force_cleanup = TRUE} (only deletes the files if they were extracted 175 | before, does not remove the archive itself). 176 | } 177 | \details{ 178 | The details of the different messages types can be found in the official 179 | ITCH specification (see also \code{\link[=open_itch_specification]{open_itch_specification()}}) 180 | 181 | \itemize{ 182 | \item \code{read_itch}: Reads a message class message, can also read multiple 183 | classes in one file-pass. 184 | } 185 | 186 | \itemize{ 187 | \item \code{read_system_events}: Reads system event messages. Message type \code{S} 188 | } 189 | 190 | \itemize{ 191 | \item \code{read_stock_directory}: Reads stock trading messages. Message type \code{R} 192 | } 193 | 194 | \itemize{ 195 | \item \code{read_trading_status}: Reads trading status messages. Message type \code{H} 196 | and \code{h} 197 | } 198 | 199 | \itemize{ 200 | \item \code{read_reg_sho}: Reads messages regarding reg SHO. Message type \code{Y} 201 | } 202 | 203 | \itemize{ 204 | \item \code{read_market_participant_states}: Reads messages regarding the 205 | status of market participants. Message type \code{L} 206 | } 207 | 208 | \itemize{ 209 | \item \code{read_mwcb}: Reads messages regarding Market-Wide-Circuit-Breakers 210 | (MWCB). Message type \code{V} and \code{W} 211 | } 212 | 213 | \itemize{ 214 | \item \code{read_ipo}: Reads messages regarding IPOs. Message type \code{K} 215 | } 216 | 217 | \itemize{ 218 | \item \code{read_luld}: Reads messages regarding LULDs (limit up-limit down) 219 | auction collars. Message type \code{J} 220 | } 221 | 222 | \itemize{ 223 | \item \code{read_orders}: Reads order messages. Message type \code{A} and \code{F} 224 | } 225 | 226 | \itemize{ 227 | \item \code{read_modifications}: Reads order modification messages. Message 228 | type \code{E}, \code{C}, \code{X}, \code{D}, and \code{U} 229 | } 230 | 231 | \itemize{ 232 | \item \code{read_trades}: Reads trade messages. Message type \code{P}, \code{Q} and \code{B} 233 | } 234 | 235 | \itemize{ 236 | \item \code{read_noii}: Reads Net Order Imbalance Indicatio (NOII) messages. 237 | Message type \code{I} 238 | } 239 | 240 | \itemize{ 241 | \item \code{read_rpii}: Reads Retail Price Improvement Indicator (RPII) 242 | messages. Message type \code{N} 243 | } 244 | 245 | For backwards compatability reasons, the following functions are provided as 246 | well: 247 | \itemize{ 248 | \item \code{get_orders}: Redirects to \code{read_orders} 249 | } 250 | 251 | \itemize{ 252 | \item \code{get_trades}: Redirects to \code{read_trades} 253 | } 254 | 255 | \itemize{ 256 | \item \code{get_modifications}: Redirects to \code{read_modifications} 257 | } 258 | } 259 | \examples{ 260 | \dontshow{ 261 | data.table::setDTthreads(2) 262 | } 263 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 264 | od <- read_orders(file, quiet = FALSE) # note quiet = FALSE is the default 265 | tr <- read_trades(file, quiet = TRUE) 266 | 267 | ## Alternatively 268 | od <- read_itch(file, "orders", quiet = TRUE) 269 | 270 | ll <- read_itch(file, c("orders", "trades"), quiet = TRUE) 271 | 272 | od 273 | tr 274 | str(ll, max.level = 1) 275 | 276 | ## additional options: 277 | 278 | # take only subset of messages 279 | od <- read_orders(file, skip = 3, n_max = 10) 280 | 281 | # a message count can be provided for slightly faster reads 282 | msg_count <- count_messages(file, quiet = TRUE) 283 | od <- read_orders(file, n_max = msg_count) 284 | 285 | ## .gz archive functionality 286 | # .gz archives will be automatically unzipped 287 | gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH") 288 | od <- read_orders(gz_file) 289 | # force a decompress and delete the decompressed file afterwards 290 | od <- read_orders(gz_file, force_gunzip = TRUE, force_cleanup = TRUE) 291 | 292 | ## read_itch() 293 | otm <- read_itch(file, c("orders", "trades"), quiet = TRUE) 294 | str(otm, max.level = 1) 295 | 296 | ## read_system_events() 297 | se <- read_system_events(file, add_descriptions = TRUE, quiet = TRUE) 298 | se 299 | 300 | ## read_stock_directory() 301 | sd <- read_stock_directory(file, add_descriptions = TRUE, quiet = TRUE) 302 | sd 303 | 304 | ## read_trading_status() 305 | ts <- read_trading_status(file, add_descriptions = TRUE, quiet = TRUE) 306 | ts 307 | 308 | ## read_reg_sho() 309 | \dontrun{ 310 | # note the example file has no reg SHO messages 311 | rs <- read_reg_sho(file, add_descriptions = TRUE, quiet = TRUE) 312 | rs 313 | } 314 | 315 | ## read_market_participant_states() 316 | \dontrun{ 317 | # note the example file has no market participant states 318 | mps <- read_market_participant_states(file, add_descriptions = TRUE, 319 | quiet = TRUE) 320 | mps 321 | } 322 | 323 | ## read_mwcb() 324 | \dontrun{ 325 | # note the example file has no circuit breakers messages 326 | mwcb <- read_mwcb(file, quiet = TRUE) 327 | mwcb 328 | } 329 | 330 | ## read_ipo() 331 | \dontrun{ 332 | # note the example file has no IPOs 333 | ipo <- read_ipo(file, add_descriptions = TRUE, quiet = TRUE) 334 | ipo 335 | } 336 | 337 | ## read_luld() 338 | \dontrun{ 339 | # note the example file has no LULD messages 340 | luld <- read_luld(file, quiet = TRUE) 341 | luld 342 | } 343 | 344 | ## read_orders() 345 | od <- read_orders(file, quiet = TRUE) 346 | od 347 | 348 | ## read_modifications() 349 | mod <- read_modifications(file, quiet = TRUE) 350 | mod 351 | 352 | ## read_trades() 353 | tr <- read_trades(file, quiet = TRUE) 354 | tr 355 | 356 | ## read_noii() 357 | \dontrun{ 358 | # note the example file has no NOII messages 359 | noii <- read_noii(file, add_descriptions = TRUE, quiet = TRUE) 360 | noii 361 | } 362 | 363 | ## read_rpii() 364 | \dontrun{ 365 | # note the example file has no RPII messages 366 | rpii <- read_rpii(file, add_descriptions = TRUE, quiet = TRUE) 367 | rpii 368 | } 369 | } 370 | \references{ 371 | \url{https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf} 372 | } 373 | -------------------------------------------------------------------------------- /man/write_itch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write_itch.R 3 | \name{write_itch} 4 | \alias{write_itch} 5 | \title{Writes a data.frame or a list of data.frames of ITCH messages to file} 6 | \usage{ 7 | write_itch( 8 | ll, 9 | file, 10 | add_meta = TRUE, 11 | append = FALSE, 12 | compress = FALSE, 13 | buffer_size = 1e+08, 14 | quiet = FALSE, 15 | append_warning = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{ll}{a data.frame or a list of data.frames of ITCH messages, in the format 20 | that the \code{\link[=read_functions]{read_functions()}} return} 21 | 22 | \item{file}{the filename of the target file. If the folder to the file does 23 | not exist, it will be created recursively} 24 | 25 | \item{add_meta}{if date and file information should be added to the filename. 26 | Default value is TRUE. Note that adding meta information changes the filename.} 27 | 28 | \item{append}{if the information should be appended to the file. Default value 29 | is FALSE} 30 | 31 | \item{compress}{if the file should be gzipped. Default value is FALSE. 32 | Note that if you compress a file, buffer_size matters a lot, with larger 33 | buffers you are more likely to get smaller filesizes in the end. 34 | Alternatively, but slower, is to write the file without compression fully 35 | and then gzip the file using another program.} 36 | 37 | \item{buffer_size}{the maximum buffer size. Default value is 1e8 (100MB). 38 | Accepted values are > 52 and < 5e9} 39 | 40 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE} 41 | 42 | \item{append_warning}{if append is set, a warning about timestamp ordering is 43 | given. Set \code{append_warning = FALSE} to silence the warning. Default 44 | value is TRUE} 45 | } 46 | \value{ 47 | the filename (invisibly) 48 | } 49 | \description{ 50 | Note that additional information, e.g., columns that were added, will be 51 | dropped in the process and only ITCH-compliant information is saved. 52 | } 53 | \details{ 54 | Note that the ITCH filename contains the information for the date and exchange. 55 | This can be specified explicitly in the file argument or it is added if not 56 | turned off \code{add_meta = FALSE}. 57 | } 58 | \examples{ 59 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH") 60 | sys <- read_system_events(infile, quiet = TRUE) 61 | outfile <- tempfile() 62 | write_itch(sys, outfile) 63 | 64 | # create a list of events, stock directory, and orders and write to a file 65 | sdir <- read_stock_directory(infile, quiet = TRUE) 66 | od <- read_orders(infile, quiet = TRUE) 67 | 68 | ll <- list(sys, sdir, od) 69 | write_itch(ll, outfile) 70 | } 71 | -------------------------------------------------------------------------------- /simulate_dataset.R: -------------------------------------------------------------------------------- 1 | ############################## 2 | #' This script takes an existing dataset and samples and obfuscates the data 3 | #' to create a smaller testing/example dataset. 4 | #' 5 | #' Messages that are sampled are: 6 | #' - System Event Messages 7 | #' - Stock Directory 8 | #' - Trading Status 9 | #' - Orders 10 | #' - Modifications 11 | #' - Trades 12 | #' 13 | ############################## 14 | 15 | library(RITCH) 16 | library(data.table) 17 | 18 | # take 3 most traded stocks in orders, trades 19 | file <- "20191230.BX_ITCH_50" 20 | 21 | loc_code <- read_stock_directory(file, add_meta = FALSE, quiet = TRUE) 22 | trades <- read_trades(file, add_meta = FALSE, quiet = TRUE) 23 | orders <- read_orders(file, add_meta = FALSE, quiet = TRUE) 24 | mods <- read_modifications(file, add_meta = FALSE, quiet = TRUE) 25 | 26 | names_trades <- names(trades) 27 | names_orders <- names(orders) 28 | names_mods <- names(mods) 29 | 30 | # look at the most active stocks 31 | orders[, .(n = .N), by = stock][order(-n)][1:3] 32 | trades[, .(n = .N), by = stock][order(-n)][1:3] 33 | merge( 34 | mods[, .(n = .N), by = stock_locate][order(-n)][1:3], 35 | loc_code[, .(stock_locate, stock)], by = "stock_locate", all.x = TRUE 36 | ) 37 | 38 | # take the following stocks as a base 39 | stock_select <- c("TSLA" = "ALC", "NIO" = "BOB", "BABA" = "CHAR") 40 | 41 | loc_codes <- loc_code[ 42 | stock %chin% names(stock_select) 43 | ][, 44 | .(stock_old = stock, 45 | old_loc_code = stock_locate, 46 | stock = stock_select[stock]) 47 | ][order(stock)][, stock_locate := 1:.N][] 48 | 49 | # removes price outliers outside of a given sigma range... 50 | remove_price_outliers <- function(dt, sigma = 3) { 51 | dd <- dt[] 52 | setorder(dd, stock, timestamp) 53 | dd[, rmean := frollmean(price, 100, align = "left"), by = stock][, rmean := nafill(rmean, type = "locf"), by = stock] 54 | dd[, diff := (price - rmean), by = stock] 55 | dd[, diff := (diff - mean(diff, na.rm = TRUE)) / sd(diff, na.rm = TRUE), by = .(buy, stock)] 56 | dd <- dd[diff > -sigma & diff < sigma] 57 | 58 | dd[, -c("diff", "rmean")] 59 | } 60 | 61 | # obfuscates prices in a "standard" way 62 | obfuscate_prices <- function(dt) { 63 | price_info <- data.table(stock = c("ALC", "BOB", "CHAR"), 64 | tar_min_price = c(180, 45, 90), 65 | tar_range = c(20, 5, 15), 66 | est_min_price = c(410, 2.5, 210), 67 | est_range = c(30, 6, 6)) 68 | 69 | dd <- merge(dt, price_info, by = "stock", all.x = TRUE) 70 | # dd[, ':=' ( 71 | # min_price = min(price), 72 | # price_range = max(price) - min(price) 73 | # ), by = stock] 74 | 75 | # scale the price by the base prices... 76 | dd[, price := (price - est_min_price) / est_range * (tar_range) + tar_range] 77 | dd[, price := round(price, 4)] 78 | return(dd[, -c("tar_min_price", "tar_range", "est_min_price", "est_range")]) 79 | } 80 | 81 | 82 | ###################### 83 | # Prepare System Event Messages 84 | set.seed(65411235) 85 | 86 | sys_ev <- read_system_events(file, add_meta = FALSE, quiet = TRUE) 87 | sys_ev[, timestamp := timestamp + rnorm(.N, 0, 1e10)] 88 | 89 | 90 | ###################### 91 | # Prepare Stock Directory Messages 92 | set.seed(76411948) 93 | 94 | stock_dir <- read_stock_directory(file, add_meta = FALSE, quiet = TRUE) 95 | names_dir <- names(stock_dir) 96 | sdir <- stock_dir[stock %chin% names(stock_select)][, stock := stock_select[stock]][] 97 | 98 | valid_market_cat <- c("Q", "G", "S", "N", "A", "P", "Z", "V", " ") 99 | sdir[, ':='( 100 | market_category = sample(valid_market_cat, .N, replace = TRUE), 101 | financial_status = "N", 102 | issue_classification = "A", 103 | ipo_flag = FALSE, 104 | luld_price_tier = 2, 105 | etp_leverage = 0, 106 | stock_locate = NULL 107 | )] 108 | sdir <- sdir[loc_codes[, .(stock, stock_locate)], on = "stock"] 109 | setorder(sdir, stock) 110 | # rearrange timestamp to fit alphabetic stock names 111 | sdir[, timestamp := sort(timestamp)] 112 | setcolorder(sdir, names_dir) 113 | 114 | ###################### 115 | # Prepare Trading Status Messages 116 | set.seed(198179841) 117 | 118 | trad_stat <- read_trading_status(file, add_meta = FALSE, quiet = TRUE) 119 | names_stat <- names(trad_stat) 120 | 121 | # shuffle the timestamps and rename the stocks 122 | trstat <- trad_stat[stock_locate %in% loc_codes$old_loc_code][ 123 | , ':='( 124 | timestamp = timestamp + rnorm(.N, 0, 1e8), 125 | stock = stock_select[stock] 126 | ) 127 | ][] 128 | 129 | # add the new stock_locates 130 | trstat <- merge(trstat[, -c("stock_locate")], 131 | loc_codes[, .(stock, stock_locate)], 132 | by = "stock", all.x = TRUE) 133 | 134 | # order the timestamps by locate code... 135 | trstat[, timestamp := timestamp[order(-stock_locate)]] 136 | 137 | setcolorder(trstat, names_stat) 138 | 139 | ###################### 140 | # Prepare Orders Messages 141 | set.seed(654918413) 142 | N_ORDERS <- 5000 143 | 144 | # rename the stock and stock_locates 145 | or <- orders[stock %chin% names(stock_select)][, stock := stock_select[stock]] 146 | or <- merge(or[, -c("stock_locate")], loc_codes[, .(stock, stock_locate)]) 147 | 148 | or <- remove_price_outliers(or, 2) 149 | 150 | # Sample N orders 151 | or <- or[sample.int(.N, N_ORDERS)] 152 | # change timestamp 153 | or <- or[, timestamp := timestamp + rnorm(.N, 0, 1e6)][order(timestamp)] 154 | 155 | # treat order_ref 156 | MIN_ORDER_REF <- min(or$order_ref) 157 | or[, order_ref := order_ref - MIN_ORDER_REF] 158 | 159 | # obfuscate prices 160 | or <- obfuscate_prices(or) 161 | setcolorder(or, names_orders) 162 | 163 | 164 | ###################### 165 | # Prepare Trades Messages 166 | set.seed(7451984) 167 | N_TRADES <- 1000 168 | 169 | tr <- trades[stock %chin% names(stock_select)][, stock := stock_select[stock]] 170 | tr <- merge(tr[, -c("stock_locate")], loc_codes[, .(stock, stock_locate)]) 171 | 172 | tr <- remove_price_outliers(tr, 2) 173 | 174 | # Sample N orders 175 | tr <- tr[sample.int(.N, N_ORDERS)] 176 | # change timestamp 177 | tr <- tr[, timestamp := timestamp + rnorm(.N, 0, 1e6)][order(timestamp)] 178 | 179 | tr <- obfuscate_prices(tr) 180 | setcolorder(tr, names_trades) 181 | 182 | 183 | ###################### 184 | # Prepare Modifications Messages 185 | set.seed(78632176) 186 | N_MODS <- 2000 187 | 188 | md <- mods[stock_locate %in% loc_codes$old_loc_code][, old_loc_code := stock_locate] 189 | md <- merge(md[, -c("stock_locate")], 190 | loc_codes[, .(stock, stock_locate, old_loc_code)], 191 | by = "old_loc_code")[, -c("old_loc_code")] 192 | 193 | # subset only for stocks that are also in the orders 194 | md[, order_ref := order_ref - MIN_ORDER_REF] 195 | md <- md[order_ref %in% or$order_ref] 196 | 197 | md <- md[sample.int(.N, N_MODS)] 198 | 199 | md <- obfuscate_prices(md) 200 | md[, stock := NULL] 201 | setcolorder(md, names_mods) 202 | 203 | 204 | ######################################## 205 | # Combine datasets and write to file 206 | 207 | ll <- list( 208 | sys_ev, 209 | sdir, 210 | trstat, 211 | or, 212 | tr, 213 | md 214 | ) 215 | 216 | # write the dataset to file 217 | if (!dir.exists("inst/extdata")) dir.create("inst/extdata") 218 | outfile <- "inst/extdata/ex20101224.TEST_ITCH_50" 219 | 220 | write_itch(ll, outfile, add_meta = FALSE, quiet = TRUE) 221 | write_itch(ll, outfile, compress = TRUE, add_meta = FALSE, quiet = TRUE) 222 | 223 | cat(sprintf("Wrote sample dataset to '%s' with size '%.2f'KB\n", 224 | outfile, file.info(outfile)[["size"]] / 1024)) 225 | 226 | ####################################### 227 | # Read in the dataset and compare results 228 | funcs <- list(read_system_events, read_stock_directory, read_trading_status, 229 | read_orders, read_trades, read_modifications) 230 | 231 | ll_read <- lapply(funcs, function(f) f(outfile, quiet = TRUE, add_meta = FALSE)) 232 | all.equal(ll, ll_read, check.attributes = FALSE) 233 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_LIBS = -lz 2 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 9 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 10 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 11 | #endif 12 | 13 | // count_messages_impl 14 | Rcpp::DataFrame count_messages_impl(std::string filename, int64_t max_buffer_size, bool quiet); 15 | RcppExport SEXP _RITCH_count_messages_impl(SEXP filenameSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) { 16 | BEGIN_RCPP 17 | Rcpp::RObject rcpp_result_gen; 18 | Rcpp::RNGScope rcpp_rngScope_gen; 19 | Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); 20 | Rcpp::traits::input_parameter< int64_t >::type max_buffer_size(max_buffer_sizeSEXP); 21 | Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP); 22 | rcpp_result_gen = Rcpp::wrap(count_messages_impl(filename, max_buffer_size, quiet)); 23 | return rcpp_result_gen; 24 | END_RCPP 25 | } 26 | // filter_itch_impl 27 | void filter_itch_impl(std::string infile, std::string outfile, int64_t start, int64_t end, Rcpp::CharacterVector filter_msg_type, Rcpp::IntegerVector filter_stock_locate, Rcpp::NumericVector min_timestamp, Rcpp::NumericVector max_timestamp, bool append, int64_t max_buffer_size, bool quiet); 28 | RcppExport SEXP _RITCH_filter_itch_impl(SEXP infileSEXP, SEXP outfileSEXP, SEXP startSEXP, SEXP endSEXP, SEXP filter_msg_typeSEXP, SEXP filter_stock_locateSEXP, SEXP min_timestampSEXP, SEXP max_timestampSEXP, SEXP appendSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) { 29 | BEGIN_RCPP 30 | Rcpp::RNGScope rcpp_rngScope_gen; 31 | Rcpp::traits::input_parameter< std::string >::type infile(infileSEXP); 32 | Rcpp::traits::input_parameter< std::string >::type outfile(outfileSEXP); 33 | Rcpp::traits::input_parameter< int64_t >::type start(startSEXP); 34 | Rcpp::traits::input_parameter< int64_t >::type end(endSEXP); 35 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type filter_msg_type(filter_msg_typeSEXP); 36 | Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type filter_stock_locate(filter_stock_locateSEXP); 37 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type min_timestamp(min_timestampSEXP); 38 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type max_timestamp(max_timestampSEXP); 39 | Rcpp::traits::input_parameter< bool >::type append(appendSEXP); 40 | Rcpp::traits::input_parameter< int64_t >::type max_buffer_size(max_buffer_sizeSEXP); 41 | Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP); 42 | filter_itch_impl(infile, outfile, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, append, max_buffer_size, quiet); 43 | return R_NilValue; 44 | END_RCPP 45 | } 46 | // gunzip_file_impl 47 | void gunzip_file_impl(std::string infile, std::string outfile, int64_t buffer_size); 48 | RcppExport SEXP _RITCH_gunzip_file_impl(SEXP infileSEXP, SEXP outfileSEXP, SEXP buffer_sizeSEXP) { 49 | BEGIN_RCPP 50 | Rcpp::RNGScope rcpp_rngScope_gen; 51 | Rcpp::traits::input_parameter< std::string >::type infile(infileSEXP); 52 | Rcpp::traits::input_parameter< std::string >::type outfile(outfileSEXP); 53 | Rcpp::traits::input_parameter< int64_t >::type buffer_size(buffer_sizeSEXP); 54 | gunzip_file_impl(infile, outfile, buffer_size); 55 | return R_NilValue; 56 | END_RCPP 57 | } 58 | // gzip_file_impl 59 | void gzip_file_impl(std::string infile, std::string outfile, int64_t buffer_size); 60 | RcppExport SEXP _RITCH_gzip_file_impl(SEXP infileSEXP, SEXP outfileSEXP, SEXP buffer_sizeSEXP) { 61 | BEGIN_RCPP 62 | Rcpp::RNGScope rcpp_rngScope_gen; 63 | Rcpp::traits::input_parameter< std::string >::type infile(infileSEXP); 64 | Rcpp::traits::input_parameter< std::string >::type outfile(outfileSEXP); 65 | Rcpp::traits::input_parameter< int64_t >::type buffer_size(buffer_sizeSEXP); 66 | gzip_file_impl(infile, outfile, buffer_size); 67 | return R_NilValue; 68 | END_RCPP 69 | } 70 | // read_itch_impl 71 | Rcpp::List read_itch_impl(std::vector classes, std::string filename, int64_t start, int64_t end, Rcpp::CharacterVector filter_msg_type, Rcpp::IntegerVector filter_stock_locate, Rcpp::NumericVector min_timestamp, Rcpp::NumericVector max_timestamp, int64_t max_buffer_size, bool quiet); 72 | RcppExport SEXP _RITCH_read_itch_impl(SEXP classesSEXP, SEXP filenameSEXP, SEXP startSEXP, SEXP endSEXP, SEXP filter_msg_typeSEXP, SEXP filter_stock_locateSEXP, SEXP min_timestampSEXP, SEXP max_timestampSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) { 73 | BEGIN_RCPP 74 | Rcpp::RObject rcpp_result_gen; 75 | Rcpp::RNGScope rcpp_rngScope_gen; 76 | Rcpp::traits::input_parameter< std::vector >::type classes(classesSEXP); 77 | Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); 78 | Rcpp::traits::input_parameter< int64_t >::type start(startSEXP); 79 | Rcpp::traits::input_parameter< int64_t >::type end(endSEXP); 80 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type filter_msg_type(filter_msg_typeSEXP); 81 | Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type filter_stock_locate(filter_stock_locateSEXP); 82 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type min_timestamp(min_timestampSEXP); 83 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type max_timestamp(max_timestampSEXP); 84 | Rcpp::traits::input_parameter< int64_t >::type max_buffer_size(max_buffer_sizeSEXP); 85 | Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP); 86 | rcpp_result_gen = Rcpp::wrap(read_itch_impl(classes, filename, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, max_buffer_size, quiet)); 87 | return rcpp_result_gen; 88 | END_RCPP 89 | } 90 | // write_itch_impl 91 | int64_t write_itch_impl(Rcpp::List ll, std::string filename, bool append, bool gz, size_t max_buffer_size, bool quiet); 92 | RcppExport SEXP _RITCH_write_itch_impl(SEXP llSEXP, SEXP filenameSEXP, SEXP appendSEXP, SEXP gzSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) { 93 | BEGIN_RCPP 94 | Rcpp::RObject rcpp_result_gen; 95 | Rcpp::RNGScope rcpp_rngScope_gen; 96 | Rcpp::traits::input_parameter< Rcpp::List >::type ll(llSEXP); 97 | Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); 98 | Rcpp::traits::input_parameter< bool >::type append(appendSEXP); 99 | Rcpp::traits::input_parameter< bool >::type gz(gzSEXP); 100 | Rcpp::traits::input_parameter< size_t >::type max_buffer_size(max_buffer_sizeSEXP); 101 | Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP); 102 | rcpp_result_gen = Rcpp::wrap(write_itch_impl(ll, filename, append, gz, max_buffer_size, quiet)); 103 | return rcpp_result_gen; 104 | END_RCPP 105 | } 106 | 107 | static const R_CallMethodDef CallEntries[] = { 108 | {"_RITCH_count_messages_impl", (DL_FUNC) &_RITCH_count_messages_impl, 3}, 109 | {"_RITCH_filter_itch_impl", (DL_FUNC) &_RITCH_filter_itch_impl, 11}, 110 | {"_RITCH_gunzip_file_impl", (DL_FUNC) &_RITCH_gunzip_file_impl, 3}, 111 | {"_RITCH_gzip_file_impl", (DL_FUNC) &_RITCH_gzip_file_impl, 3}, 112 | {"_RITCH_read_itch_impl", (DL_FUNC) &_RITCH_read_itch_impl, 10}, 113 | {"_RITCH_write_itch_impl", (DL_FUNC) &_RITCH_write_itch_impl, 6}, 114 | {NULL, NULL, 0} 115 | }; 116 | 117 | RcppExport void R_init_RITCH(DllInfo *dll) { 118 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 119 | R_useDynamicSymbols(dll, FALSE); 120 | } 121 | -------------------------------------------------------------------------------- /src/count_messages.cpp: -------------------------------------------------------------------------------- 1 | #include "count_messages.h" 2 | 3 | #ifdef __APPLE__ 4 | # define fseeko64 fseeko 5 | # define ftello64 ftello 6 | #endif 7 | 8 | // counts messages in a file 9 | std::vector count_messages_internal(std::string filename, 10 | int64_t max_buffer_size) { 11 | FILE* infile; 12 | infile = fopen(filename.c_str(), "rb"); 13 | if (infile == NULL) { 14 | char buffer [50]; 15 | snprintf(buffer, sizeof(buffer), "File Error number %i!", errno); 16 | Rcpp::stop(buffer); 17 | } 18 | 19 | // get size of the file 20 | if (fseeko64(infile, 0L, SEEK_END) != 0) { 21 | Rcpp::stop("Error seeking to end of file"); 22 | } 23 | int64_t filesize = ftello64(infile); 24 | if (filesize == -1) { 25 | Rcpp::stop("Error getting file size"); 26 | } 27 | if (fseeko64(infile, 0L, SEEK_SET) != 0) { 28 | Rcpp::stop("Error seeking back to start of file"); 29 | } 30 | 31 | // create buffer 32 | int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size; 33 | unsigned char * buf; 34 | buf = (unsigned char*) malloc(buf_size); 35 | 36 | int64_t bytes_read = 0, this_buffer_size = 0; 37 | std::vector count(sizeof(MSG_SIZES)/sizeof(MSG_SIZES[0])); 38 | 39 | while (bytes_read < filesize) { 40 | Rcpp::checkUserInterrupt(); 41 | 42 | // read in buffer buffers 43 | this_buffer_size = fread(buf, 1, buf_size, infile); 44 | int64_t i = 0; 45 | 46 | int msg_size = 0; 47 | do { 48 | msg_size = get_message_size(buf[i + 2]); 49 | 50 | count[buf[i + 2] - 'A']++; 51 | i += msg_size; 52 | 53 | } while (i + msg_size <= this_buffer_size && bytes_read + i <= filesize); 54 | 55 | // align the file pointer to read in a full message again 56 | const int64_t offset = i - this_buffer_size; 57 | fseeko64(infile, offset, SEEK_CUR); 58 | bytes_read += i; 59 | } 60 | 61 | free(buf); 62 | fclose(infile); 63 | return count; 64 | } 65 | 66 | // [[Rcpp::export]] 67 | Rcpp::DataFrame count_messages_impl(std::string filename, 68 | int64_t max_buffer_size, 69 | bool quiet) { 70 | 71 | std::vector ct_raw = count_messages_internal(filename, max_buffer_size); 72 | std::vector count = take_needed_messages(ct_raw); 73 | 74 | int64_t total_msgs = 0; 75 | for (int64_t v : count) total_msgs += v; 76 | 77 | if (!quiet) Rprintf("[Counting] %s total messages found\n", 78 | format_thousands(total_msgs).c_str()); 79 | 80 | if (!quiet) Rprintf("[Converting] to data.table\n"); 81 | 82 | Rcpp::CharacterVector names; 83 | for (const unsigned char c : ACT_MSG_NAMES) names.push_back(std::string(1, c)); 84 | 85 | Rcpp::NumericVector ct(N_ACT_MSGS); 86 | ct.attr("class") = "integer64"; 87 | std::memcpy(&(ct[0]), &(count[0]), N_ACT_MSGS * sizeof(double)); 88 | 89 | Rcpp::List res = Rcpp::List::create( 90 | Rcpp::Named("msg_type") = names, 91 | Rcpp::Named("count") = ct 92 | ); 93 | 94 | res.attr("class") = Rcpp::CharacterVector::create("data.table", "data.frame"); 95 | return res; 96 | } 97 | -------------------------------------------------------------------------------- /src/count_messages.h: -------------------------------------------------------------------------------- 1 | #ifndef COUNTMESSAGES_H 2 | #define COUNTMESSAGES_H 3 | 4 | #include 5 | #include "specifications.h" 6 | #include "helper_functions.h" 7 | 8 | // internal main worker function that counts the messages 9 | std::vector count_messages_internal(std::string filename, 10 | int64_t max_buffer_size); 11 | 12 | // Entry function for returning the count data.frame 13 | Rcpp::DataFrame count_messages_impl(std::string filename, 14 | int64_t max_buffer_size = 1e8, 15 | bool quiet = false); 16 | 17 | #endif // COUNTMESSAGES_H -------------------------------------------------------------------------------- /src/filter_itch.cpp: -------------------------------------------------------------------------------- 1 | #include "filter_itch.h" 2 | 3 | #ifdef __APPLE__ 4 | # define fseeko64 fseeko 5 | # define ftello64 ftello 6 | #endif 7 | 8 | // [[Rcpp::export]] 9 | void filter_itch_impl(std::string infile, std::string outfile, 10 | int64_t start, int64_t end, 11 | Rcpp::CharacterVector filter_msg_type, 12 | Rcpp::IntegerVector filter_stock_locate, 13 | Rcpp::NumericVector min_timestamp, 14 | Rcpp::NumericVector max_timestamp, 15 | bool append, 16 | int64_t max_buffer_size, 17 | bool quiet) { 18 | 19 | // treat filters 20 | std::vector filter_msgs; 21 | std::vector filter_sloc; 22 | 23 | for (auto f : filter_msg_type) filter_msgs.push_back(Rcpp::as(f)); 24 | for (int s : filter_stock_locate) filter_sloc.push_back(s); 25 | 26 | const size_t ts_size = min_timestamp.size(); 27 | std::vector min_ts(ts_size); 28 | if (ts_size > 0) 29 | std::memcpy(&(min_ts[0]), &(min_timestamp[0]), ts_size * sizeof(int64_t)); 30 | 31 | std::vector max_ts(ts_size); 32 | if (ts_size > 0) 33 | std::memcpy(&(max_ts[0]), &(max_timestamp[0]), ts_size * sizeof(int64_t)); 34 | if (max_ts.size() == 1 && max_ts[0] == -1) 35 | max_ts[0] = std::numeric_limits::max(); 36 | 37 | // get the max_ts_value! 38 | int64_t max_ts_val = -1; 39 | for (auto t : max_ts) if (t > max_ts_val) max_ts_val = t; 40 | if (max_ts_val == -1) max_ts_val = std::numeric_limits::max(); 41 | 42 | if (end < 0) end = std::numeric_limits::max(); 43 | 44 | if (filter_msgs.size() == 0 && 45 | filter_sloc.size() == 0 && 46 | min_ts.size() == 0 && 47 | max_ts.size() == 0 && 48 | start == 0 && 49 | end == -1) 50 | Rcpp::stop("No filters where set, aborting filter process!"); 51 | 52 | // parse the messages 53 | // redirect to the correct msg types only 54 | FILE* ifile; 55 | ifile = fopen(infile.c_str(), "rb"); 56 | if (ifile == NULL) { 57 | char buffer [50]; 58 | snprintf(buffer, sizeof(buffer), "Input File Error number %i!", errno); 59 | Rcpp::stop(buffer); 60 | } 61 | 62 | FILE* ofile; 63 | std::string omode = append ? "ab" : "wb"; 64 | ofile = fopen(outfile.c_str(), omode.c_str()); 65 | if (ofile == NULL) { 66 | char buffer [50]; 67 | snprintf(buffer, sizeof(buffer), "Output File Error number %i!", errno); 68 | Rcpp::stop(buffer); 69 | } 70 | 71 | // get size of the file 72 | if (fseeko64(ifile, 0L, SEEK_END) != 0) { 73 | Rcpp::stop("Error seeking to end of file"); 74 | } 75 | int64_t filesize = ftello64(ifile); 76 | if (filesize == -1) { 77 | Rcpp::stop("Error getting file size"); 78 | } 79 | if (fseeko64(ifile, 0L, SEEK_SET) != 0) { 80 | Rcpp::stop("Error seeking back to start of file"); 81 | } 82 | 83 | // create buffer 84 | int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size; 85 | unsigned char * ibuf; 86 | unsigned char * obuf; 87 | ibuf = (unsigned char*) malloc(buf_size); 88 | obuf = (unsigned char*) malloc(buf_size); 89 | // Rprintf("Allocating buffer to size %lld\n", buf_size); 90 | 91 | int64_t bytes_read = 0, this_buffer_size = 0, bytes_written = 0; 92 | int64_t msg_read = 0, msg_count = 0; 93 | std::vector msg_reads(MSG_CLASS_SIZE, 0); 94 | 95 | int64_t o = 0; 96 | int msg_size; 97 | bool max_ts_reached = false; 98 | 99 | while (bytes_read < filesize && !max_ts_reached) { 100 | Rcpp::checkUserInterrupt(); 101 | 102 | // read in buffer buffers 103 | this_buffer_size = fread(ibuf, 1, buf_size, ifile); 104 | int64_t i = 0; 105 | msg_size = 0; 106 | 107 | do { 108 | // check early stop in max_timestamp 109 | const int64_t cur_ts = getNBytes64<6>(&ibuf[i + 2 + 5]); 110 | if (cur_ts > max_ts_val) { 111 | max_ts_reached = true; 112 | break; 113 | } 114 | 115 | const unsigned char mt = ibuf[i + 2]; 116 | // Check Filter Messages 117 | bool parse_message = true; 118 | // only check the filter if previous tests are all OK 119 | if (parse_message) 120 | parse_message = passes_filter(&ibuf[i + 2], filter_msgs); 121 | if (parse_message) 122 | parse_message = passes_filter(&ibuf[i + 2 + 1], filter_sloc); 123 | if (parse_message) 124 | parse_message = passes_filter_in(&ibuf[i + 2 + 5], min_ts, max_ts); 125 | // use TYPE_CLASS_TRANSLATOR as we count per message class not per msg_type! 126 | if (parse_message) { 127 | // count here the msg_reads to make sure that the count is within the 128 | // other filters 129 | parse_message = msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']] >= start && 130 | msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']] <= end; 131 | msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']]++; 132 | } 133 | 134 | msg_size = get_message_size(mt); 135 | 136 | if (o + msg_size > buf_size) { 137 | // write to buffer until o 138 | // Rprintf("New obuf, write %9lld bytes to ofile next msg %i\n", 139 | // o, msg_size); 140 | fwrite(obuf, sizeof(unsigned char), o, ofile); 141 | // reset obuf 142 | std::memset(obuf, 0x00, buf_size); 143 | 144 | bytes_written += o; 145 | o = 0; 146 | } 147 | 148 | if (parse_message) { 149 | // Rprintf("Filter ibuf at %lld copy into obuf at %lld\n", 150 | // i, o); 151 | msg_read++; 152 | // Rprintf("Copying '%i' from ibuf at %lld to obuf at %lld\n", 153 | // msg_size, i, o); 154 | std::memcpy(&(obuf[o]), &(ibuf[i]), msg_size); 155 | o += msg_size; 156 | // msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']]++; 157 | } 158 | 159 | msg_count++; 160 | i += msg_size; 161 | // 50 = max msg_size 162 | } while (i + 50 <= this_buffer_size && bytes_read + i <= filesize); 163 | 164 | // offset file pointer to fit the next message into the buffer 165 | const int64_t offset = i - this_buffer_size; 166 | // Rprintf("Filter ibuf at %6lld offsetting by %3lld - Total bytes read %lld\n", 167 | // i, offset, bytes_read + i); 168 | fseeko64(ifile, offset, SEEK_CUR); 169 | bytes_read += i; 170 | } 171 | 172 | if (o > 0) { 173 | // write to buffer until o 174 | // Rprintf("Last obuf, write %9lld bytes to ofile\n", o); 175 | fwrite(obuf, sizeof(unsigned char), o, ofile); 176 | } 177 | 178 | if (!quiet) { 179 | Rprintf("[Bytes] scanned %lld, filtered %lld\n", 180 | (long long int) filesize, (long long int) bytes_written + o); 181 | Rprintf("[Messages] scanned %lld, filtered %lld\n", 182 | (long long int) msg_count, (long long int) msg_read); 183 | } 184 | 185 | free(ibuf); 186 | fclose(ifile); 187 | 188 | free(obuf); 189 | fclose(ofile); 190 | } 191 | -------------------------------------------------------------------------------- /src/filter_itch.h: -------------------------------------------------------------------------------- 1 | #ifndef FILTERITCH_H 2 | #define FILTERITCH_H 3 | 4 | #include 5 | #include "specifications.h" 6 | #include "helper_functions.h" 7 | 8 | void filter_itch_impl(std::string infile, std::string outfile, 9 | int64_t start, int64_t end, 10 | Rcpp::CharacterVector filter_msg_type, 11 | Rcpp::IntegerVector filter_stock_locate, 12 | Rcpp::NumericVector min_timestamp, 13 | Rcpp::NumericVector max_timestamp, 14 | bool append = false, 15 | int64_t max_buffer_size = 1e8, 16 | bool quiet = false); 17 | 18 | #endif // FILTERITCH_H -------------------------------------------------------------------------------- /src/gz_functionality.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /** 5 | * @brief Inflates (uncompresses) a gz file of binary data 6 | * 7 | * @param infile The name of the compressed gz archive 8 | * @param outfile The name of the uncompressed target file (make sure it does not exist before for faster speeds!) 9 | * @param buffer_size the size of the buffer, default is 1e9 bytes. 10 | */ 11 | // [[Rcpp::export]] 12 | void gunzip_file_impl(std::string infile, 13 | std::string outfile, 14 | int64_t buffer_size = 1e9) { 15 | 16 | gzFile gzfile = gzopen(infile.c_str(), "rb"); 17 | if (gzfile == NULL) { 18 | Rcpp::stop("Could not open file '%s' for gunzip", infile.c_str()); 19 | } 20 | 21 | unsigned char* buf; 22 | int64_t buffer_char_size = sizeof(unsigned char) * buffer_size > UINT_MAX ? 23 | UINT_MAX : 24 | sizeof(unsigned char) * buffer_size; 25 | buf = (unsigned char*) malloc(buffer_char_size); 26 | 27 | int64_t this_buffer_size; 28 | 29 | FILE* ofile = fopen(outfile.c_str(), "wb"); 30 | if (ofile == NULL) { 31 | Rcpp::stop("Could not open file '%s' for gunzip", outfile.c_str()); 32 | } 33 | // iterate over the file until the all information is gathered 34 | 35 | while (1) { 36 | // fill the buffer 37 | this_buffer_size = gzread(gzfile, buf, buffer_char_size); 38 | // write the buffer 39 | fwrite(&buf[0], 1, this_buffer_size, ofile); 40 | 41 | // check if the read buffer is smaller than the asked size 42 | if (this_buffer_size < buffer_char_size || this_buffer_size == 0) { 43 | break; 44 | } 45 | } 46 | 47 | free(buf); 48 | fclose(ofile); 49 | gzclose(gzfile); 50 | } 51 | 52 | 53 | /** 54 | * @brief Deflates (compresses) a gz file of binary data 55 | * 56 | * @param infile The name of the raw uncompressed file 57 | * @param outfile The name of the compressed target file (make sure it does not exist before for faster speeds!) 58 | * @param buffer_size the size of the buffer, default is 1e9 bytes. 59 | */ 60 | // [[Rcpp::export]] 61 | void gzip_file_impl(std::string infile, 62 | std::string outfile, 63 | int64_t buffer_size = 1e9) { 64 | 65 | FILE* file = fopen(infile.c_str(), "rb"); 66 | if (file == NULL) { 67 | Rcpp::stop("Could not open file %s for gzip", infile.c_str()); 68 | } 69 | 70 | unsigned char* buf; 71 | int64_t buffer_char_size = sizeof(unsigned char) * buffer_size > UINT_MAX ? 72 | UINT_MAX : 73 | sizeof(unsigned char) * buffer_size; 74 | buf = (unsigned char*) malloc(buffer_char_size); 75 | 76 | int64_t this_buffer_size; 77 | 78 | gzFile ofile = gzopen(outfile.c_str(), "wb"); 79 | if (ofile == NULL) { 80 | Rcpp::stop("Could not open file %s for gzip", outfile.c_str()); 81 | } 82 | // iterate over the file until the all information is gathered 83 | 84 | while (1) { 85 | // fill the buffer 86 | this_buffer_size = fread(buf, 1, buffer_char_size, file); 87 | // write the buffer 88 | gzwrite(ofile, &buf[0], this_buffer_size); 89 | 90 | // check if the read buffer is smaller than the asked size 91 | if (this_buffer_size < buffer_char_size || this_buffer_size == 0) { 92 | break; 93 | } 94 | } 95 | 96 | free(buf); 97 | fclose(file); 98 | gzclose(ofile); 99 | } 100 | -------------------------------------------------------------------------------- /src/helper_functions.cpp: -------------------------------------------------------------------------------- 1 | #include "helper_functions.h" 2 | 3 | 4 | // small helper function to get the message size for a char 5 | int get_message_size(const unsigned char msg) { 6 | return MSG_SIZES[msg - 'A'] + 2; 7 | } 8 | 9 | // the count_messages_internal function is optimized and therefore contains 10 | // unused messages (they are used for faster access speeds!) 11 | // (see also Specifications.h) 12 | // this function extracts the needed message classes from the raw vector 13 | std::vector take_needed_messages(std::vector &v) { 14 | std::vector res; 15 | for (const unsigned char act_msg : ACT_MSG_NAMES) { 16 | size_t i = 0; 17 | for (const unsigned char msg : MSG_NAMES) { 18 | if (msg == act_msg) { 19 | res.push_back(v[i]); 20 | break; 21 | } 22 | i++; 23 | } 24 | } 25 | return res; 26 | } 27 | 28 | /* 29 | * @brief Formats an integer number to a std::string with thousands separator 30 | * 31 | * @param num The number to format 32 | * @param sep The thousands separator, default value is a comma 33 | * @param s The return string, this is only used internally, as the function 34 | * is called recursively 35 | * 36 | * @return The number as a string 37 | */ 38 | std::string format_thousands(int64_t num, 39 | const std::string sep, 40 | const std::string s) { 41 | if (num < 1000) { 42 | return std::to_string(num) + s; 43 | } else { 44 | std::string last_three = std::to_string(num % 1000); 45 | const int num_zeros = 3 - last_three.length(); 46 | last_three = std::string(num_zeros, '0').append(last_three); 47 | 48 | const int64_t remainder = (int64_t) num / 1000; 49 | const std::string res = sep + last_three + s; 50 | return format_thousands(remainder, sep, res); 51 | } 52 | } 53 | 54 | // ############################################################################# 55 | // small internal helper function to convert bytes etc 56 | // ############################################################################# 57 | 58 | // return N bytes of a buffer as a string 59 | std::string getNBytes(unsigned char* buf, const int n, const unsigned char empty) { 60 | std::string res; 61 | for (int i = 0; i < n; ++i) if (buf[i] != empty) res += buf[i]; 62 | return res; 63 | } 64 | 65 | // converts a Numeric Vector to int64 66 | Rcpp::NumericVector to_int64(Rcpp::NumericVector v) { 67 | v.attr("class") = "integer64"; 68 | return v; 69 | } 70 | 71 | // helper functions that check if a buffer value is in a vector of filters 72 | // equivalent of R buf_val %in% filter 73 | bool passes_filter(unsigned char* buf, std::vector &filter) { 74 | if (filter.size() == 0) return true; 75 | for (unsigned char cc : filter) if (cc == *buf) return true; 76 | return false; 77 | } 78 | // same helper function as before but for int vector 79 | bool passes_filter(unsigned char* buf, std::vector &filter) { 80 | if (filter.size() == 0) return true; 81 | const int val = (int) getNBytes32<2>(&buf[0]); 82 | for (int cc : filter) if (cc == val) return true; 83 | return false; 84 | } 85 | // check larger/smaller inclusive for 6 byte numbers (timestamp) 86 | // equivalent to R (buf_val >= lower & buf_val <= upper) 87 | bool passes_filter_in(unsigned char* buf, 88 | std::vector &lower, 89 | std::vector &upper) { 90 | // lower and upper have the same size! 91 | if (lower.size() == 0) return true; 92 | const int64_t val = getNBytes64<6>(buf); 93 | for (size_t i = 0; i < lower.size(); i++) { 94 | if (val >= lower[i] && val <= upper[i]) return true; 95 | } 96 | 97 | return false; 98 | } 99 | 100 | // sets inside a unsigned char buffer b, 2 bytes from the value val, returns number of bytes changed 101 | // i.e., convert val = 8236 to 0x202c 102 | uint64_t set2bytes(unsigned char* b, int32_t val) { 103 | b[1] = val & 0xff; 104 | b[0] = (val >> 8) & 0xff; 105 | // Rprintf("Converting: %15i -> 0x %02x %02x\n", 106 | // val, b[0], b[1]); 107 | return 2; 108 | } 109 | 110 | // sets inside a unsigned char buffer b, 4 bytes from the value val, returns number of bytes changed 111 | // i.e., convert val = 11900 to 0x00002e7c 112 | uint64_t set4bytes(unsigned char* b, int32_t val) { 113 | b[3] = val & 0xffff; 114 | b[2] = (val >> 8) & 0xffff; 115 | b[1] = (val >> 16) & 0xffff; 116 | b[0] = (val >> 24) & 0xffff; 117 | // Rprintf("Converting: %15i -> 0x %02x %02x %02x %02x\n", 118 | // val, b[0], b[1], b[2], b[3]); 119 | return 4; 120 | } 121 | // sets inside a unsigned char buffer b, 6 bytes from the value val, returns number of bytes changed 122 | // i.e., 25200002107428 to 0x16eb552c8824 123 | uint64_t set6bytes(unsigned char* b, int64_t val) { 124 | b[5] = val & 0xffffff; 125 | b[4] = (val >> 8) & 0xffffff; 126 | b[3] = (val >> 16) & 0xffffff; 127 | b[2] = (val >> 24) & 0xffffff; 128 | b[1] = (val >> 32) & 0xffffff; 129 | b[0] = (val >> 40) & 0xffffff; 130 | // Rprintf("Converting: %15lld -> 0x %02x %02x %02x %02x %02x %02x\n", 131 | // (long long) val, b[0], b[1], b[2], b[3], b[4], b[5]); 132 | return 6; 133 | } 134 | // sets inside a unsigned char buffer b, 8 bytes from the value val, returns number of bytes changed 135 | // i.e., 4 to 0x0000000000000004 136 | uint64_t set8bytes(unsigned char* b, int64_t val) { 137 | b[7] = val & 0xffffffff; 138 | b[6] = (val >> 8) & 0xffffffff; 139 | b[5] = (val >> 16) & 0xffffffff; 140 | b[4] = (val >> 24) & 0xffffffff; 141 | b[3] = (val >> 32) & 0xffffffff; 142 | b[2] = (val >> 40) & 0xffffffff; 143 | b[1] = (val >> 48) & 0xffffffff; 144 | b[0] = (val >> 56) & 0xffffffff; 145 | // Rprintf("Converting: %15lld -> 0x %02x %02x %02x %02x %02x %02x %02x %02x\n", 146 | // (long long) val, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]); 147 | return 8; 148 | } 149 | // sets inside a unsigned char buffer b, n bytes from the string x, returns number of bytes changed 150 | // i.e., "UFO" with 8 to 0x55534f2020202020 (filled with whitespaces) 151 | uint64_t setCharBytes(unsigned char* b, std::string x, uint64_t n) { 152 | unsigned char *st = new unsigned char[n + 1]; 153 | if (x.size() > n) 154 | Rprintf("ERROR: setChar Bytes for string '%s' larger than capacity %llu\n", 155 | x.c_str(), (long long unsigned int) n); 156 | for (uint64_t j = 0; j < n; j++) st[j] = ' '; // fill with n spaces 157 | for (uint64_t j = 0; j < x.size(); j++) st[j] = x[j]; // copy the string x 158 | memcpy(b, st, n); 159 | // Rprintf("Set %i unsigned char Bytes from '%s' -> 0x %02x %02x %02x %02x %02x %02x %02x %02x\n", 160 | // n, x.c_str(), b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]); 161 | delete[] st; 162 | return n; 163 | } 164 | -------------------------------------------------------------------------------- /src/helper_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef HELPERFUNCTIONS_H 2 | #define HELPERFUNCTIONS_H 3 | 4 | #include 5 | #include "specifications.h" 6 | 7 | // get the message size for a char 8 | int get_message_size(const unsigned char msg); 9 | // converts from the long form (MSG_NAMES) to the shorter used form (ACT_MST_NAMES) 10 | std::vector take_needed_messages(std::vector &v); 11 | // formats a number with thousands separator 12 | std::string format_thousands(int64_t num, 13 | const std::string sep = ",", 14 | const std::string s = ""); 15 | 16 | // get bytes functions 17 | 18 | // Converts n bytes from a buffer in big endian to an int32_t 19 | template int32_t getNBytes32(unsigned char* buff) { 20 | int32_t r = 0; 21 | for (size_t i = 0; i < size; ++i) { 22 | r = (r << 8) + *buff++; 23 | // Rprintf("i %2i, r: %15i (0x%llx), next 6: %02x %02x %02x %02x %02x %02x\n", 24 | // i, r, (long long) r, buff[0], buff[1], buff[2], buff[3], buff[4], buff[5]); 25 | } 26 | return r; 27 | } 28 | // Converts n bytes from a buffer in big endian to an int64_t 29 | template int64_t getNBytes64(unsigned char* buff) { 30 | int64_t r = 0; 31 | for (size_t i = 0; i < size; ++i) { 32 | r = (r << 8) + *buff++; 33 | // Rprintf("i %2i, r: %15i (0x%llx), next 6: %02x %02x %02x %02x %02x %02x\n", 34 | // i, r, (long long) r, buff[0], buff[1], buff[2], buff[3], buff[4], buff[5]); 35 | } 36 | return r; 37 | } 38 | 39 | std::string getNBytes(unsigned char* buf, const int n = 8, const unsigned char empty = ' '); 40 | 41 | // converts a numeric vector to integer64 42 | Rcpp::NumericVector to_int64(Rcpp::NumericVector v); 43 | 44 | // function that checks if a buffer passes a filter 45 | bool passes_filter(unsigned char* buf, std::vector &filter); 46 | bool passes_filter(unsigned char* buf, std::vector &filter); 47 | bool passes_filter_in(unsigned char* buf, std::vector &lower, 48 | std::vector &upper); 49 | 50 | // set functions, set X bytes in a buffer 51 | uint64_t set2bytes(unsigned char* b, int32_t val); 52 | uint64_t set4bytes(unsigned char* b, int32_t val); 53 | uint64_t set6bytes(unsigned char* b, int64_t val); 54 | uint64_t set8bytes(unsigned char* b, int64_t val); 55 | uint64_t setCharBytes(unsigned char* b, std::string x, uint64_t n); 56 | 57 | #endif //HELPERFUNCTIONS_H 58 | -------------------------------------------------------------------------------- /src/read_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef READFUNCTIONS_H 2 | #define READFUNCTIONS_H 3 | 4 | #include "specifications.h" 5 | #include "count_messages.h" 6 | 7 | // Entry Function for the reading function 8 | Rcpp::List read_itch_impl(std::vector classes, 9 | std::string filename, 10 | int64_t start, int64_t end, 11 | Rcpp::CharacterVector filter_msg_type, 12 | Rcpp::IntegerVector filter_stock_locate, 13 | Rcpp::NumericVector min_timestamp, 14 | Rcpp::NumericVector max_timestamp, 15 | int64_t max_buffer_size = 1e8, 16 | bool quiet = false); 17 | 18 | /* 19 | * Message Parser class, each class holds one "class" (stock_directory, 20 | * sytem_events, trades, ...) and is able to parse them. 21 | * 22 | * The main usage is 23 | * 24 | * - create a MessageParser with its type (can be empty for no class) 25 | * - activate the object if messages need to be parsed later on 26 | * - init the vectors to appropriate sizes 27 | * - loop over a buffer and call parse_message on the respective messages 28 | * - convert the parsed messages to a data.frame with get_data_frame 29 | * 30 | * Note that the class holds vectors for all possible classes but only fills 31 | * and uses needed classes. 32 | * 33 | */ 34 | class MessageParser{ 35 | public: 36 | MessageParser(std::string type, 37 | int64_t skip = 0, 38 | int64_t n_max = std::numeric_limits::max()); 39 | 40 | void activate(); 41 | void init_vectors(int64_t n); 42 | void parse_message(unsigned char * buf); 43 | Rcpp::List get_data_frame(); 44 | 45 | std::vector msg_types; 46 | bool active = false; 47 | 48 | private: 49 | void prune_vectors(); 50 | 51 | std::string type; 52 | // msg_buf_idx is only used when the skip/n_max is used. 53 | // index counts the number of messages in the Parser, msg_buf_idx counts the 54 | // running number of messages of this type it has seen (but not necessarily parsed!) 55 | int64_t size = 0, index = 0, msg_buf_idx = 0, start_count, end_count; 56 | std::vector colnames; 57 | 58 | // general data vectors 59 | // NOTE: later classes may use earlier vectors as well, 60 | // e.g., noii also uses cross_type, defined under trades... 61 | 62 | Rcpp::CharacterVector msg_type; 63 | Rcpp::IntegerVector stock_locate, tracking_number; 64 | Rcpp::NumericVector timestamp; 65 | 66 | // system_events 67 | Rcpp::CharacterVector event_code; 68 | 69 | // stock_directory 70 | Rcpp::CharacterVector stock; 71 | Rcpp::CharacterVector market_category, financial_status; 72 | Rcpp::IntegerVector lot_size; 73 | Rcpp::LogicalVector round_lots_only; 74 | Rcpp::CharacterVector issue_classification; 75 | Rcpp::CharacterVector issue_subtype; 76 | Rcpp::LogicalVector authentic; 77 | Rcpp::LogicalVector short_sell_closeout; 78 | Rcpp::LogicalVector ipo_flag; 79 | Rcpp::CharacterVector luld_price_tier; 80 | Rcpp::LogicalVector etp_flag; 81 | Rcpp::IntegerVector etp_leverage; 82 | Rcpp::LogicalVector inverse; 83 | 84 | // trading_status 85 | Rcpp::CharacterVector trading_state, reserved; 86 | Rcpp::CharacterVector reason; 87 | Rcpp::CharacterVector market_code; 88 | Rcpp::LogicalVector operation_halted; 89 | 90 | // reg_sho 91 | Rcpp::CharacterVector regsho_action; 92 | 93 | // Market Participant States 94 | Rcpp::LogicalVector primary_mm; 95 | Rcpp::CharacterVector mm_mode, participant_state; 96 | 97 | // mwcb 98 | Rcpp::NumericVector level1, level2, level3; 99 | Rcpp::IntegerVector breached_level; 100 | 101 | // ipo 102 | Rcpp::IntegerVector release_time; 103 | Rcpp::CharacterVector release_qualifier; 104 | Rcpp::NumericVector ipo_price; 105 | 106 | // luld 107 | Rcpp::NumericVector reference_price, lower_price, upper_price; 108 | Rcpp::IntegerVector extension; 109 | 110 | // orders 111 | Rcpp::NumericVector order_ref; 112 | Rcpp::LogicalVector buy; 113 | Rcpp::IntegerVector shares; 114 | Rcpp::NumericVector price; 115 | Rcpp::CharacterVector mpid; 116 | 117 | // modifications 118 | Rcpp::NumericVector new_order_ref; 119 | Rcpp::LogicalVector printable; 120 | 121 | // trades 122 | Rcpp::NumericVector match_number; 123 | Rcpp::CharacterVector cross_type; 124 | 125 | // noii 126 | Rcpp::NumericVector paired_shares, imbalance_shares; 127 | Rcpp::CharacterVector imbalance_direction; 128 | Rcpp::NumericVector far_price, near_price; 129 | Rcpp::CharacterVector variation_indicator; 130 | 131 | // rpii 132 | Rcpp::CharacterVector interest_flag; 133 | }; 134 | 135 | #endif // READFUNCTIONS_H 136 | -------------------------------------------------------------------------------- /src/specifications.h: -------------------------------------------------------------------------------- 1 | #ifndef SPECIFICATIONS_H 2 | #define SPECIFICATIONS_H 3 | 4 | // to fix windows int64_t typedef issues... 5 | #include 6 | #include 7 | #include 8 | 9 | // Define NA_INT64 10 | const int64_t NA_INT64 = 1ULL << 63; 11 | 12 | // the lengths of the message types ordered based on their ASCII table positions 13 | // To get the respective positions of a message 'msg' (e.g., 'Q') use MSG_SIZES[msg - 'A']; 14 | const int MSG_SIZES [] = { 15 | // A B C D E F G H I J K L M N O P Q R S T 16 | 36, 19, 36, 19, 31, 40, 0, 25, 50, 35, 28, 26, 0, 20, 0, 44, 40, 39, 12, 0, 17 | // U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 18 | 35, 35, 12, 23, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 19 | }; 20 | // the names of the message types 21 | const unsigned char MSG_NAMES [] = { 22 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 23 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', 24 | '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h' 25 | }; 26 | // the number of message types in MSG_SIZES, MSG_NAMES, value is 40... 27 | const int N_TYPES = sizeof(MSG_SIZES) / sizeof(MSG_SIZES[0]); 28 | 29 | // the names of the messages we actually use 30 | const unsigned char ACT_MSG_NAMES [] = { 31 | 'S', 'R', 'H', 'Y', 'L', 'V', 'W', 'K', 'J', 'h', 'A', 'F', 'E', 'C', 'X', 32 | 'D', 'U', 'P', 'Q', 'B', 'I', 'N' 33 | }; 34 | const int N_ACT_MSGS = sizeof(ACT_MSG_NAMES) / sizeof(ACT_MSG_NAMES[0]); 35 | 36 | // The message classes (or groups) 37 | const std::vector MSG_CLASSES { 38 | "system_events", 39 | "stock_directory", 40 | "trading_status", 41 | "reg_sho", 42 | "market_participant_states", 43 | "mwcb", 44 | "ipo", 45 | "luld", 46 | "orders", 47 | "modifications", 48 | "trades", 49 | "noii", 50 | "rpii" 51 | }; 52 | 53 | // How many classes there are 54 | const int MSG_CLASS_SIZE = MSG_CLASSES.size(); 55 | 56 | // translates msg_type to MSG_CLASSES position 57 | // e.g., msg_type 'h' has value 2, belongs to the third class in MSG_CLASSES: trading_status 58 | const int TYPE_CLASS_TRANSLATOR [] = { 59 | // A B C D E F G H I J K L M N O P Q R S T 60 | 8, 10, 9, 9, 9, 8, -1, 2, 11, 7, 6, 4, -1, 12, -1, 10, 10, 1, 0, -1, 61 | // U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 62 | 9, 5, 5, 9, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2 63 | }; 64 | 65 | #endif //SPECIFICATIONS_H 66 | -------------------------------------------------------------------------------- /src/write_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef WRITEFUNCTIONS_H 2 | #define WRITEFUNCTIONS_H 3 | 4 | #include 5 | #include 6 | #include "specifications.h" 7 | #include "helper_functions.h" 8 | 9 | 10 | // parse specific messages into a buffer 11 | uint64_t parse_orders_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 12 | uint64_t parse_trades_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 13 | uint64_t parse_modifications_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 14 | uint64_t parse_system_events_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 15 | uint64_t parse_stock_directory_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 16 | uint64_t parse_trading_status_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 17 | uint64_t parse_reg_sho_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 18 | uint64_t parse_market_participants_states_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 19 | uint64_t parse_mwcb_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 20 | uint64_t parse_ipo_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 21 | uint64_t parse_luld_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 22 | uint64_t parse_noii_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 23 | uint64_t parse_rpii_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num); 24 | 25 | // loads a data.frame at a position into a buffer 26 | int64_t load_message_to_buffer(unsigned char * buf, int64_t &msg_ct, Rcpp::DataFrame df); 27 | 28 | // returns the index at which the values are minimum 29 | int get_min_val_pos(std::vector &x); 30 | 31 | // writes a buffer to file 32 | void write_buffer_to_file(unsigned char* buf, int64_t size, std::string filename, 33 | bool append = false, bool gz = false); 34 | 35 | // Writes a list of data.frames (already sorted by timestamp) 36 | // to a file, if specified, the file is a gz.file 37 | int64_t write_itch_impl(Rcpp::List ll, std::string filename, 38 | bool append = false, bool gz = false, 39 | size_t max_buffer_size = 1e9, bool quiet = false); 40 | 41 | #endif // WRITEFUNCTIONS_H -------------------------------------------------------------------------------- /tests/tinytests.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("tinytest", quietly=TRUE) && 2 | utils::packageVersion("tinytest") >= "1.0.0") { 3 | 4 | ## Set a seed to make the test deterministic 5 | set.seed(42) 6 | 7 | ## R makes us to this 8 | Sys.setenv("R_TESTS" = "") 9 | 10 | ## there are several more granular ways to test files in a tinytest directory, 11 | ## see its package vignette; tests can also run once the package is installed 12 | ## using the same command `test_package(pkgName)`, or by director or file 13 | tinytest::test_package("RITCH") 14 | } --------------------------------------------------------------------------------