├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── pkgdown.yaml
├── .gitignore
├── CRAN-SUBMISSION
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── RcppExports.R
    ├── count_functions.R
    ├── data_samples.R
    ├── download_stock_directory.R
    ├── filter_itch.R
    ├── globals.R
    ├── gz_functions.R
    ├── helpers.R
    ├── read_functions.R
    ├── write_itch.R
    └── zzz.R
├── README.Rmd
├── README.md
├── RITCH.Rproj
├── _pkgdown.yml
├── cran-comments.md
├── debug
    ├── README.Rmd
    ├── README.md
    ├── debug_tools.cpp
    └── test_debug.R
├── inst
    ├── extdata
    │   ├── ex20101224.TEST_ITCH_50
    │   └── ex20101224.TEST_ITCH_50.gz
    └── tinytest
    │   ├── test_filename_helpers.R
    │   ├── test_filter_itch.R
    │   ├── test_gz_functions.R
    │   ├── test_read_functions.R
    │   └── test_write_itch.R
├── man
    ├── add_meta_to_filename.Rd
    ├── count_functions.Rd
    ├── count_internal.Rd
    ├── download_sample_file.Rd
    ├── download_stock_directory.Rd
    ├── ex20101224.TEST_ITCH_50.Rd
    ├── figures
    │   └── README-ETF_plot-1.png
    ├── filter_itch.Rd
    ├── format_bytes.Rd
    ├── get_date_from_filename.Rd
    ├── get_exchange_from_filename.Rd
    ├── get_msg_classes.Rd
    ├── gz_functions.Rd
    ├── list_sample_files.Rd
    ├── open_itch_sample_server.Rd
    ├── open_itch_specification.Rd
    ├── read_functions.Rd
    └── write_itch.Rd
├── simulate_dataset.R
├── src
    ├── Makevars.win
    ├── RcppExports.cpp
    ├── count_messages.cpp
    ├── count_messages.h
    ├── filter_itch.cpp
    ├── filter_itch.h
    ├── gz_functionality.cpp
    ├── helper_functions.cpp
    ├── helper_functions.h
    ├── read_functions.cpp
    ├── read_functions.h
    ├── specifications.h
    ├── write_functions.cpp
    └── write_functions.h
└── tests
    └── tinytests.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^README\.Rmd$
 5 | ^[^/]+_ITCH_50$
 6 | ^[^/]+_ITCH_50\.gz$
 7 | ^[^/]+_ITCH50$
 8 | ^[^/]+_ITCH50\.gz$
 9 | ^README_cache$
10 | ^NQTVITCHspecification.*\.pdf$
11 | ^debug/*$
12 | ^simulate_dataset\.R$
13 | ^\.github$
14 | ^cran-comments\.md$
15 | ^_pkgdown\.yml$
16 | ^docs$
17 | ^pkgdown$
18 | ^CRAN-SUBMISSION$
19 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     steps:
23 |       - uses: actions/checkout@v3
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v2
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::pkgdown, local::.
34 |           needs: website
35 | 
36 |       - name: Build site
37 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38 |         shell: Rscript {0}
39 | 
40 |       - name: Deploy to GitHub pages 🚀
41 |         if: github.event_name != 'pull_request'
42 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
43 |         with:
44 |           clean: false
45 |           branch: gh-pages
46 |           folder: docs
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Created by https://www.gitignore.io/api/r
 3 | 
 4 | ### R ###
 5 | # History files
 6 | .Rhistory
 7 | .Rapp.history
 8 | 
 9 | # Session Data files
10 | .RData
11 | 
12 | # Example code in package build process
13 | *-Ex.R
14 | 
15 | # Output files from R CMD build
16 | /*.tar.gz
17 | 
18 | # Output files from R CMD check
19 | /*.Rcheck/
20 | 
21 | # RStudio files
22 | .Rproj.user/
23 | 
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | 
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 | 
31 | # knitr and R markdown default cache directories
32 | /*_cache/
33 | /cache/
34 | 
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 | 
39 | # End of https://www.gitignore.io/api/r
40 | 
41 | 
42 | # Created by https://www.gitignore.io/api/c++
43 | 
44 | ### C++ ###
45 | # Prerequisites
46 | *.d
47 | 
48 | # Compiled Object files
49 | *.slo
50 | *.lo
51 | *.o
52 | *.obj
53 | 
54 | # Precompiled Headers
55 | *.gch
56 | *.pch
57 | 
58 | # Compiled Dynamic libraries
59 | *.so
60 | *.dylib
61 | *.dll
62 | 
63 | # Fortran module files
64 | *.mod
65 | *.smod
66 | 
67 | # Compiled Static libraries
68 | *.lai
69 | *.la
70 | *.a
71 | *.lib
72 | 
73 | # Executables
74 | *.exe
75 | *.out
76 | *.app
77 | 
78 | # End of https://www.gitignore.io/api/c++
79 | 
80 | .Rproj.user
81 | 
82 | # ITCH FILES
83 | *.*_ITCH_50
84 | *.*_ITCH_50.gz
85 | !/inst/extdata/*.*_ITCH_50
86 | !/inst/extdata/*.*_ITCH_50.gz
87 | 
88 | # ITCH Documentation
89 | NQTVITCHspecification*.pdf
90 | 
91 | docs
92 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.1.26
2 | Date: 2024-01-15 14:19:16 UTC
3 | SHA: 4a575a1b9627b51aa7567041a84d07dc5ca429ff
4 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: RITCH
 2 | Type: Package
 3 | Title: R Parser for the ITCH-Protocol
 4 | Version: 0.1.27
 5 | Authors@R: c(
 6 |     person("David", "Zimmermann-Kollenda", , "david_j_zimmermann@hotmail.com", role = c("aut", "cre"))
 7 |   )
 8 | Description: Allows to efficiently parse, filter, and write binary ITCH Files (Version 5.0) containing detailed financial transactions as distributed by NASDAQ to an R data.table.
 9 | License: MIT + file LICENSE
10 | URL: https://davzim.github.io/RITCH/,
11 |     https://github.com/DavZim/RITCH
12 | BugReports: https://github.com/DavZim/RITCH/issues
13 | Depends: R (>= 3.5.0)
14 | Imports: data.table,
15 |   Rcpp (>= 0.12.12),
16 |   nanotime (>= 0.3.2),
17 |   bit64 (>= 4.0.5)
18 | LinkingTo: Rcpp
19 | Encoding: UTF-8
20 | RoxygenNote: 7.2.3
21 | Suggests:
22 |     tinytest
23 | Roxygen: list(markdown = TRUE)
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2022
2 | COPYRIGHT HOLDER: David Zimmermann-Kollenda
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2022 David Zimmermann-Kollenda
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(add_meta_to_filename)
 4 | export(count_ipo)
 5 | export(count_luld)
 6 | export(count_market_participant_states)
 7 | export(count_messages)
 8 | export(count_modifications)
 9 | export(count_mwcb)
10 | export(count_noii)
11 | export(count_orders)
12 | export(count_reg_sho)
13 | export(count_rpii)
14 | export(count_stock_directory)
15 | export(count_system_events)
16 | export(count_trades)
17 | export(count_trading_status)
18 | export(download_sample_file)
19 | export(download_stock_directory)
20 | export(filter_itch)
21 | export(format_bytes)
22 | export(get_date_from_filename)
23 | export(get_exchange_from_filename)
24 | export(get_modifications)
25 | export(get_msg_classes)
26 | export(get_orders)
27 | export(get_trades)
28 | export(gunzip_file)
29 | export(gzip_file)
30 | export(list_sample_files)
31 | export(open_itch_sample_server)
32 | export(open_itch_specification)
33 | export(read_ipo)
34 | export(read_itch)
35 | export(read_luld)
36 | export(read_market_participant_states)
37 | export(read_modifications)
38 | export(read_mwcb)
39 | export(read_noii)
40 | export(read_orders)
41 | export(read_reg_sho)
42 | export(read_rpii)
43 | export(read_stock_directory)
44 | export(read_system_events)
45 | export(read_trades)
46 | export(read_trading_status)
47 | export(write_itch)
48 | import(data.table)
49 | importFrom(Rcpp,sourceCpp)
50 | importFrom(bit64,as.integer64)
51 | importFrom(nanotime,nanotime)
52 | importFrom(utils,browseURL)
53 | importFrom(utils,download.file)
54 | useDynLib(RITCH)
55 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # RITCH 0.1.76
 2 | 
 3 | * fix bug where no messages would be reported for larger files
 4 | 
 5 | # RITCH 0.1.26
 6 | 
 7 | * fix bug where gz functionality would write to user library or current directory
 8 | 
 9 | # RITCH 0.1.25
10 | 
11 | * fix Debian segfault when writing to user library
12 | 
13 | # RITCH 0.1.24
14 | 
15 | * fix printf warnings about wrong argument type
16 | 
17 | # RITCH 0.1.23
18 | 
19 | * fix compilation warning and limit test cases to two cores (CRAN...)
20 | 
21 | # RITCH 0.1.22
22 | 
23 | * fix CRAN release by shorten example runtimes
24 | 
25 | # RITCH 0.1.21
26 | 
27 | * fix long running tasks in read functions
28 | 
29 | # RITCH 0.1.20
30 | 
31 | * fix bug where tests would fail on some platforms where files are written and not cleaned up
32 | * CRAN release
33 | 
34 | # RITCH 0.1.19
35 | 
36 | * fix bug in tests on some platforms
37 | * CRAN release
38 | 
39 | # RITCH 0.1.18
40 | 
41 | * CRAN release
42 | 
43 | 
44 | # RITCH 0.1.11
45 | 
46 | * update internal C++ structure, reducing code complexity, increasing read speeds, reducing size of package
47 | * add `filter_itch(infile, outfile, ...)` to filter directly to files
48 | 
49 | 
50 | # RITCH 0.1.10
51 | 
52 | * add `write_itch()` to write ITCH files
53 | * add filters to `read_*` functions
54 | * add read functions for all classes
55 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | count_messages_impl <- function(filename, max_buffer_size, quiet) {
 5 |     .Call('_RITCH_count_messages_impl', PACKAGE = 'RITCH', filename, max_buffer_size, quiet)
 6 | }
 7 | 
 8 | filter_itch_impl <- function(infile, outfile, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, append, max_buffer_size, quiet) {
 9 |     invisible(.Call('_RITCH_filter_itch_impl', PACKAGE = 'RITCH', infile, outfile, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, append, max_buffer_size, quiet))
10 | }
11 | 
12 | gunzip_file_impl <- function(infile, outfile, buffer_size = 1e9L) {
13 |     invisible(.Call('_RITCH_gunzip_file_impl', PACKAGE = 'RITCH', infile, outfile, buffer_size))
14 | }
15 | 
16 | gzip_file_impl <- function(infile, outfile, buffer_size = 1e9L) {
17 |     invisible(.Call('_RITCH_gzip_file_impl', PACKAGE = 'RITCH', infile, outfile, buffer_size))
18 | }
19 | 
20 | read_itch_impl <- function(classes, filename, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, max_buffer_size, quiet) {
21 |     .Call('_RITCH_read_itch_impl', PACKAGE = 'RITCH', classes, filename, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, max_buffer_size, quiet)
22 | }
23 | 
24 | write_itch_impl <- function(ll, filename, append, gz, max_buffer_size, quiet) {
25 |     .Call('_RITCH_write_itch_impl', PACKAGE = 'RITCH', ll, filename, append, gz, max_buffer_size, quiet)
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/R/count_functions.R:
--------------------------------------------------------------------------------
  1 | #' @name count_functions
  2 | #' @rdname count_functions
  3 | #' @title Counts the messages of an ITCH-file
  4 | #'
  5 | #' @param file the path to the input file, either a gz-file or a plain-text file
  6 | #' @param x a file or a data.table containing the message types and the counts,
  7 | #' as outputted by `count_messages`
  8 | #' @param add_meta_data if the meta-data of the messages should be added, defaults to FALSE
  9 | #' @param buffer_size the size of the buffer in bytes, defaults to 1e8 (100 MB), if you have a large amount of RAM, 1e9 (1GB) might be faster
 10 | #' @param quiet if TRUE, the status messages are supressed, defaults to FALSE
 11 | #' @param force_gunzip only applies if file is a gz-file and a file with the same (gunzipped) name already exists.
 12 | #'        if set to TRUE, the existing file is overwritten. Default value is FALSE
 13 | #' @param gz_dir a directory where the gz archive is extracted to.
 14 | #'        Only applies if file is a gz archive. Default is [tempdir()].    
 15 | #' @param force_cleanup only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.
 16 | #' @return a data.table containing the message-type and their counts for `count_messages`
 17 | #'  or an integer value for the other functions.
 18 | #' @export
 19 | #'
 20 | #' @examples
 21 | #' file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
 22 | #' count_messages(file)
 23 | #' count_messages(file, add_meta_data = TRUE, quiet = TRUE)
 24 | #'
 25 | #' # file can also be a .gz file
 26 | #' gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
 27 | #' count_messages(gz_file, quiet = TRUE)
 28 | #'
 29 | #' # count only a specific class
 30 | #' msg_count <- count_messages(file, quiet = TRUE)
 31 | #'
 32 | #' # either count based on a given data.table outputted by count_messages
 33 | #' count_orders(msg_count)
 34 | #'
 35 | #' # or count orders from a file and not from a msg_count
 36 | #' count_orders(file)
 37 | #'
 38 | #' ### Specific class count functions are:
 39 | count_messages <- function(file, add_meta_data = FALSE, buffer_size = -1,
 40 |                            quiet = FALSE, force_gunzip = FALSE,
 41 |                            gz_dir = tempdir(), force_cleanup = TRUE) {
 42 |   t0 <- Sys.time()
 43 |   if (!file.exists(file))
 44 |     stop(sprintf("File '%s' not found!", file))
 45 | 
 46 |   # Set the default value of the buffer size
 47 |   buffer_size <- check_buffer_size(buffer_size, file)
 48 | 
 49 |   orig_file <- file
 50 |   # only needed for gz files; gz files are not deleted when the raw file already existed
 51 |   raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", file)))
 52 |   file <- check_and_gunzip(file, gz_dir, buffer_size, force_gunzip, quiet)
 53 |   df <- count_messages_impl(file, buffer_size, quiet)
 54 | 
 55 |   df <- data.table::setalloccol(df)
 56 | 
 57 |   if (add_meta_data) {
 58 |     dd <- RITCH::get_msg_classes()
 59 |     df <- df[dd, on = "msg_type"]
 60 |   }
 61 | 
 62 |   report_end(t0, quiet, orig_file)
 63 | 
 64 |   if (grepl("\\.gz$", orig_file) && force_cleanup && !raw_file_existed) {
 65 |     unlink(basename(gsub("\\.gz$", "", file)))
 66 |     if (!quiet) cat(sprintf("[Cleanup]    Removing file '%s'\n", file))
 67 |   }
 68 | 
 69 |   return(df)
 70 | }
 71 | 
 72 | #' Returns the message class data for the message types
 73 | #'
 74 | #' All information is handled according to the official ITCH 5.0
 75 | #' documentation as found here:
 76 | #' <http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHSpecification.pdf>
 77 | #'
 78 | #' - `msg_type` the type of the message
 79 | #' - `msg_class` the group the message belongs to
 80 | #' - `msg_name` the official name of the message
 81 | #' - `doc_nr` the number of the message in the documentation
 82 | #'
 83 | #' @seealso `open_itch_specification()`
 84 | #'
 85 | #' @return a data.table with the information of the message-types
 86 | #' @export
 87 | #'
 88 | #' @examples
 89 | #' get_msg_classes()
 90 | get_msg_classes <- function() {
 91 |   data.table::data.table(
 92 |     msg_type = c("S", "R", "H", "Y", "L", "V", "W", "K", "J", "h", "A", "F", "E",
 93 |                  "C", "X", "D", "U", "P", "Q", "B", "I", "N"),
 94 |     msg_class = c("system_events", "stock_directory", "trading_status",
 95 |                   "reg_sho", "market_participant_states", "mwcb",
 96 |                   "mwcb", "ipo", "luld", "trading_status", "orders", "orders",
 97 |                   "modifications", "modifications", "modifications",
 98 |                   "modifications", "modifications", "trades", "trades", "trades",
 99 |                   "noii", "rpii"),
100 |     msg_name = c("System Event Message", "Stock Directory",
101 |                  "Stock Trading Action", "Reg SHO Restriction",
102 |                  "Market Participant Position", "MWCB Decline Level Message",
103 |                  "MWCB Status Message", "IPO Quoting Period Update",
104 |                  "LULD Auction Collar", "Operational Halt", "Add Order Message",
105 |                  "Add Order - MPID Attribution Message",
106 |                  "Order Executed Message",
107 |                  "Order Executed Message With Price Message",
108 |                  "Order Cancel Message", "Order Delete Message",
109 |                  "Order Replace Message", "Trade Message (Non-Cross)",
110 |                  "Cross Trade Message", "Broken Trade Message",
111 |                  "NOII Message",
112 |                  "Retail Interest Message"),
113 |     doc_nr = c("4.1", "4.2.1", "4.2.2", "4.2.3", "4.2.4", "4.2.5.1", "4.2.5.2",
114 |                "4.2.6", "4.2.7", "4.2.8", "4.3.1", "4.3.2", "4.4.1", "4.4.2", "4.4.3",
115 |                "4.4.4", "4.4.5", "4.5.1", "4.5.2", "4.5.3", "4.6", "4.7")
116 |   )
117 | }
118 | 
119 | #' Internal function to count the messages
120 | #'
121 | #' @param x a data.frame containing the message types and the counts
122 | #' @param types a vector containing the types
123 | #'
124 | #' @keywords internal
125 | #' @return a numeric value of number of orders in x
126 | #'
127 | #' @examples
128 | #' # Only used internally
129 | count_internal <- function(x, types) {
130 |   if (!is.data.frame(x)) stop("x has to be a data.table")
131 |   if (!all(c("msg_type", "count") %in% names(x)))
132 |     stop("x has to have the variables 'msg_type' and 'count'")
133 | 
134 |   as.integer(x[msg_type %in% types][, sum(count)])
135 | }
136 | 
137 | #' @rdname count_functions
138 | #' @export
139 | #' @details
140 | #' - `count_orders`: Counts order messages. Message type `A` and `F`
141 | #'
142 | #' @examples
143 | #' count_orders(msg_count)
144 | count_orders <- function(x) {
145 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
146 |   types <- c("A", "F")
147 |   count_internal(x, types)
148 | }
149 | 
150 | #' @rdname count_functions
151 | #' @export
152 | #' @details
153 | #' - `count_trades`: Counts trade messages. Message type `P`, `Q` and `B`
154 | #'
155 | #' @examples
156 | #' count_trades(msg_count)
157 | count_trades <- function(x) {
158 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
159 |   types <- c("P", "Q", "B")
160 |   count_internal(x, types)
161 | }
162 | 
163 | #' @rdname count_functions
164 | #' @export
165 | #' @details
166 | #' - `count_modifications`: Counts order modification messages. Message
167 | #'    type `E`, `C`, `X`, `D`, and `U`
168 | #'
169 | #' @examples
170 | #' count_modifications(msg_count)
171 | count_modifications <- function(x) {
172 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
173 |   types <- c("E", "C", "X", "D", "U")
174 |   count_internal(x, types)
175 | }
176 | 
177 | #' @rdname count_functions
178 | #' @export
179 | #' @details
180 | #' - `count_system_events`: Counts system event messages. Message type `S`
181 | #'
182 | #' @examples
183 | #' count_system_events(msg_count)
184 | count_system_events <- function(x) {
185 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
186 |   types <- c("S")
187 |   count_internal(x, types)
188 | }
189 | 
190 | #' @rdname count_functions
191 | #' @export
192 | #' @details
193 | #' - `count_stock_directory`: Counts stock trading messages. Message
194 | #'    type `R`
195 | #'
196 | #' @examples
197 | #' count_stock_directory(msg_count)
198 | count_stock_directory <- function(x) {
199 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
200 |   types <- c("R")
201 |   count_internal(x, types)
202 | }
203 | 
204 | #' @rdname count_functions
205 | #' @export
206 | #' @details
207 | #' - `count_trading_status`: Counts trading status messages. Message
208 | #'    type `H` and `h`
209 | #'
210 | #' @examples
211 | #' count_trading_status(msg_count)
212 | count_trading_status <- function(x) {
213 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
214 |   types <- c("H", "h")
215 |   count_internal(x, types)
216 | }
217 | 
218 | #' @rdname count_functions
219 | #' @export
220 | #' @details
221 | #' - `count_reg_sho`: Counts messages regarding reg SHO. Message type
222 | #'    `Y`
223 | #'
224 | #' @examples
225 | #' count_reg_sho(msg_count)
226 | count_reg_sho <- function(x) {
227 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
228 |   types <- c("Y")
229 |   count_internal(x, types)
230 | }
231 | 
232 | #' @rdname count_functions
233 | #' @export
234 | #' @details
235 | #' - `count_market_participant_states`: Counts messages regarding the
236 | #'    status of market participants. Message type `L`
237 | #'
238 | #' @examples
239 | #' count_market_participant_states(msg_count)
240 | count_market_participant_states <- function(x) {
241 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
242 |   types <- c("L")
243 |   count_internal(x, types)
244 | }
245 | 
246 | #' @rdname count_functions
247 | #' @export
248 | #' @details
249 | #' - `count_mwcb`: Counts messages regarding Market-Wide-Circuit-Breakers
250 | #'    (MWCB). Message type `V` and `W`
251 | #'
252 | #' @examples
253 | #' count_mwcb(msg_count)
254 | count_mwcb <- function(x) {
255 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
256 |   types <- c("V", "W")
257 |   count_internal(x, types)
258 | }
259 | 
260 | #' @rdname count_functions
261 | #' @export
262 | #' @details
263 | #' - `count_ipo`: Counts messages regarding IPOs. Message type `K`
264 | #'
265 | #' @examples
266 | #' count_ipo(msg_count)
267 | count_ipo <- function(x) {
268 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
269 |   types <- c("K")
270 |   count_internal(x, types)
271 | }
272 | 
273 | #' @rdname count_functions
274 | #' @export
275 | #' @details
276 | #' - `count_luld`: Counts messages regarding LULDs (limit up-limit down)
277 | #'    auction collars. Message type `J`
278 | #'
279 | #' @examples
280 | #' count_luld(msg_count)
281 | count_luld <- function(x) {
282 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
283 |   types <- c("J")
284 |   count_internal(x, types)
285 | }
286 | 
287 | #' @rdname count_functions
288 | #' @export
289 | #' @details
290 | #' - `count_noii`: Counts Net Order Imbalance Indicatio (NOII) messages.
291 | #'    Message type `I`
292 | #'
293 | #' @examples
294 | #' count_noii(msg_count)
295 | count_noii <- function(x) {
296 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
297 |   types <- c("I")
298 |   count_internal(x, types)
299 | }
300 | 
301 | #' @rdname count_functions
302 | #' @export
303 | #' @details
304 | #' - `count_rpii`: Counts Retail Price Improvement Indicator (RPII)
305 | #'    messages. Message type `N`
306 | #'
307 | #' @examples
308 | #' count_rpii(msg_count)
309 | count_rpii <- function(x) {
310 |   if (is.character(x)) x <- count_messages(x, quiet = TRUE)
311 |   types <- c("N")
312 |   count_internal(x, types)
313 | }
314 | 


--------------------------------------------------------------------------------
/R/data_samples.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Returns a data.table of the sample files on the server
  3 | #'
  4 | #' The Server can be found at <https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/>
  5 | #'
  6 | #' @return a data.table of the files
  7 | #' @export
  8 | #'
  9 | #' @examples
 10 | #' \dontrun{
 11 | #'   list_sample_files()
 12 | #' }
 13 | list_sample_files <- function() {
 14 | 
 15 |   url <- "https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/"
 16 |   raw <- suppressWarnings(readLines(url))
 17 | 
 18 |   cont <- trimws(unlist(strsplit(raw, "<br>")))
 19 |   cont <- cont[grepl("ITCH_?50\\.gz</A>$", cont)]
 20 |   cont <- strsplit(cont, " +|HREF=\"|\">|</A>")
 21 | 
 22 |   df <- data.table::data.table(
 23 |     file = sapply(cont, function(x) x[8]),
 24 |     size = sapply(cont, function(x) x[4]),
 25 |     date = sapply(cont, function(x) x[1]),
 26 |     time = sapply(cont, function(x) x[2]),
 27 |     tt = sapply(cont, function(x) x[3])
 28 |   )
 29 | 
 30 |   df[, ':=' (
 31 |     file_size = as.numeric(size),
 32 |     last_modified = as.POSIXct(paste(date, time, tt), format = "%m/%d/%Y %H:%M %p", tz = "GMT"),
 33 |     exchange = get_exchange_from_filename(file),
 34 |     date = get_date_from_filename(file)
 35 |   )]
 36 | 
 37 |   return(df[, .(file, exchange, date, file_size, last_modified)])
 38 | }
 39 | 
 40 | 
 41 | #' Downloads a sample ITCH File from NASDAQs Server
 42 | #'
 43 | #' The Server can be found at <https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/>
 44 | #'
 45 | #' Warning: the smallest file is around 300 MB, with the largest exceeding 5 GB.
 46 | #' There are about 17 files in total. Downloading all might take a considerable amount of time.
 47 | #'
 48 | #' @param choice which file should be chosen? One of: smallest (default), largest,
 49 | #' earliest (date-wise), latest, random, or all.
 50 | #' @param file the name of a specific file, overrules the choice and exchanges arguments
 51 | #' @param exchanges A vector of exchanges, can be NASDAQ, BX, or PSX.
 52 | #' The default value is to consider all exchanges.
 53 | #' @param dir The directory where the files will be saved to, default is current working directory.
 54 | #' @param force_download If the file should be downloaded even if it already exists locally.
 55 | #' Default value is FALSE.
 56 | #' @param check_md5sum If the md5-sum (hash-value) of the downloaded file should be checked, default value is TRUE.
 57 | #' @param quiet if TRUE, the status messages are suppressed, defaults to FALSE
 58 | #'
 59 | #' @return an invisible vector of the files
 60 | #' @export
 61 | #'
 62 | #' @examples
 63 | #' \dontrun{
 64 | #' download_sample_file()
 65 | #' file <- download_sample_file()
 66 | #' file
 67 | #'
 68 | #' # download a specific sample file
 69 | #' file <- download_sample_file(file = "2019130.BX_ITCH_50.gz")
 70 | #' file
 71 | #' }
 72 | download_sample_file <- function(choice = c("smallest", "largest", "earliest", "latest",  "random", "all"),
 73 |                                  file = NA,
 74 |                                  exchanges = NA,
 75 |                                  dir = ".",
 76 |                                  force_download = FALSE,
 77 |                                  check_md5sum = TRUE,
 78 |                                  quiet = FALSE) {
 79 |   choice <- match.arg(choice)
 80 | 
 81 |   url <- "https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/"
 82 |   df <- list_sample_files()
 83 | 
 84 |   if (length(exchanges) != 1 && !is.na(exchanges))
 85 |     df <- df[exchange %in% toupper(exchanges), ]
 86 | 
 87 |   if (!quiet) cat(paste0("Downloading '", choice, "' sample file(s)\n"))
 88 | 
 89 |   if (choice %in% c("smallest", "largest"))
 90 |     df <- df[order(file_size, decreasing = TRUE)]
 91 |   if (choice %in% c("earliest", "latest"))
 92 |     df <- df[order(date, decreasing = TRUE)]
 93 | 
 94 |   idx <- switch(choice,
 95 |                 smallest = nrow(df),
 96 |                 random = sample.int(nrow(df), 1),
 97 |                 largest = 1,
 98 |                 earliest = nrow(df),
 99 |                 latest = 1,
100 |                 all = 1:nrow(df))
101 | 
102 |   if (!is.na(file)) idx <- df$file == file
103 |   df_take <- df[idx, ]
104 | 
105 |   files <- apply(df_take, 1, function(el) {
106 |     file <- el[["file"]]
107 |     file_path <- file.path(dir, file)
108 | 
109 |     download_file <- TRUE
110 | 
111 |     if (file.exists(file_path)) {
112 |       txt <- paste0("File '", file_path, "' exists already, ")
113 | 
114 |       if (force_download) {
115 |         if (!quiet) cat(paste0(txt, "downloading!\n"))
116 |       } else {
117 |         if (!quiet) cat(paste0(txt, "not downloading it again!\n"))
118 |         download_file <- FALSE
119 |       }
120 |     }
121 |     file_url <- paste0(url, file)
122 | 
123 |     if (download_file) {
124 |       if (!quiet) cat(paste0("Downloading File '", file_path, "'.\n"))
125 |       download.file(file_url, destfile = file_path, mode = "wb", quiet = quiet)
126 |     }
127 | 
128 |     if (check_md5sum) {
129 |       if (!quiet) cat(paste0("Checking md5 sum of file '", file_path, "' ... "))
130 |       md5_url <- paste0(file_url, ".md5sum")
131 |       md5 <- try(readLines(md5_url), silent = TRUE)
132 |       if (inherits(md5, "try-error")) {
133 |         cat(sprintf("Could not find md5 file for file %s, skipping check\n",
134 |                     file_url))
135 |         return(file)
136 |       }
137 |       expected <- strsplit(md5, " ")[[1]][1]
138 |       got <- tools::md5sum(file_path)
139 |       if (expected != got) {
140 |         if (!quiet) cat("\n")
141 |         warning(paste0("md5 hash for file '", file_path,
142 |                        "' not matching.\nExpected '", expected, "' got '", got, "'!"))
143 |       } else {
144 |         if (!quiet) cat(paste0("matches '", expected, "' - success !\n"))
145 |       }
146 |     }
147 | 
148 |     return(file)
149 |   })
150 | 
151 |   return(invisible(files))
152 | }
153 | 


--------------------------------------------------------------------------------
/R/download_stock_directory.R:
--------------------------------------------------------------------------------
 1 | #' Downloads the stock directory (stock locate codes) for a given date and exchange
 2 | #'
 3 | #' The data is downloaded from NASDAQs server, which can be found here
 4 | #' <https://emi.nasdaq.com/ITCH/Stock_Locate_Codes/>
 5 | #'
 6 | #' @param exchange The exchange, either NASDAQ (equivalent to NDQ), BX, or PSX
 7 | #' @param date The date, should be of class Date. If not the value is converted
 8 | #' using `as.Date`.
 9 | #' @param cache If the stock directory should be cached, can be set to TRUE
10 | #' to save the stock directories in the working directory or a character for a
11 | #' target directory.
12 | #' @param quiet If the download function should be quiet, default is FALSE.
13 | #'
14 | #' @return a data.table of the tickers, the respective stock locate codes, and
15 | #' the exchange/date information
16 | #' @export
17 | #'
18 | #' @examples
19 | #' \dontrun{
20 | #'   download_stock_directory("BX", "2019-07-02")
21 | #'   download_stock_directory(c("BX", "NDQ"), c("2019-07-02", "2019-07-03"))
22 | #'   download_stock_directory("BX", "2019-07-02", cache = TRUE)
23 | #'
24 | #'   download_stock_directory("BX", "2019-07-02", cache = "stock_directory")
25 | #'   dir.exists("stock_directory")
26 | #'   list.files("stock_directory")
27 | #' }
28 | download_stock_directory <- function(exchange, date, cache = FALSE,
29 |                                      quiet = FALSE) {
30 | 
31 |   exchange <- ifelse(tolower(exchange) == "nasdaq", "ndq", tolower(exchange))
32 |   if (!all(exchange %in% c("ndq", "bx", "psx")))
33 |     stop("Exchange must be 'NASDAQ' ('NDQ'), 'BX', or 'PSX'")
34 |   if (length(cache) != 1) stop("cache must be of size 1")
35 | 
36 |   if (is.character(date)) date <- as.Date(date)
37 |   base_url <- "https://emi.nasdaq.com/ITCH/Stock_Locate_Codes/"
38 | 
39 |   # if multiple exchanges or dates were specified, take all possible combinations
40 |   # and call the function recursively
41 |   if (length(exchange) > 1 || length(date) > 1) {
42 |     vals <- expand.grid(ex = exchange, d = date, stringsAsFactors = FALSE)
43 | 
44 |     res <- lapply(1:nrow(vals),
45 |                   function(i) download_stock_directory(vals$ex[i], vals$d[i]))
46 | 
47 |     d <- data.table::rbindlist(res)
48 | 
49 |   } else {
50 |     filename <- paste0(exchange, "_stocklocate_", format(date, "%Y%m%d"), ".txt")
51 |     url <- paste0(base_url, filename)
52 |     file <- url
53 | 
54 |     if (is.character(cache) || is.logical(cache) && cache) {
55 | 
56 |       destfile <- filename
57 |       if (is.character(cache)) {
58 |         if (!dir.exists(cache)) dir.create(cache)
59 |         destfile <- file.path(cache, filename)
60 |       }
61 | 
62 |       txt <- sprintf("for exchange '%s' and date '%s'",
63 |                      exchange, format(date, "%Y-%m-%d"))
64 |       # download or use cache
65 |       if (!file.exists(destfile)) {
66 |         if (!quiet) cat(sprintf("[Stock Locate] Downloading %s\n", txt))
67 |         download.file(url, destfile, quiet = quiet)
68 |       } else {
69 |         if (!quiet)
70 |           cat(sprintf("[Stock Locate] File %s already exists, using cache\n",
71 |                       txt))
72 |       }
73 |       file <- destfile
74 |     }
75 | 
76 |     d <- data.table::fread(file, showProgress = !quiet)
77 | 
78 |     data.table::setnames(d, c("ticker", "stock_locate"))
79 |     d[, ':=' (exchange = toupper(exchange), date = date)]
80 |   }
81 | 
82 |   return(d[])
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/R/filter_itch.R:
--------------------------------------------------------------------------------
  1 | #' Filters an ITCH file to another ITCH file
  2 | #'
  3 | #' This function allows to perform very fast filter operations on large ITCH
  4 | #' files. The messages are written to another ITCH file.
  5 | #'
  6 | #' Note that this can be especially useful on larger files or where memory
  7 | #' is not large enough to filter the datalimits the analysis.
  8 | #'
  9 | #' As with the [read_itch()] functions, it allows to filter for
 10 | #' `msg_class`, `msg_type`, `stock_locate`/`stock`, and
 11 | #' `timestamp`.
 12 | #'
 13 | #' @inheritParams read_functions
 14 | #' @param infile the input file where the messages are taken from, can be a
 15 | #' gz-archive or a plain ITCH file.
 16 | #' @param outfile the output file where the filtered messages are written to.
 17 | #' Note that the date and exchange information from the `infile` are used,
 18 | #' see also [add_meta_to_filename()] for further information.
 19 | #' @param append if the messages should be appended to the outfile, default is
 20 | #' false. Note, this is helpful if `skip` and or `n_max` are used for
 21 | #' batch filtering.
 22 | #' @param gz if the output file should be gzip-compressed. Note that the name
 23 | #' of the output file will be appended with .gz if not already present. The
 24 | #' final output name is returned. Default value is false.
 25 | #' @param overwrite if an existing outfile with the same name should be
 26 | #' overwritten. Default value is false
 27 | #'
 28 | #' @return the name of the output file (maybe different from the inputted
 29 | #' outfile due to adding the date and exchange), silently
 30 | #' @export
 31 | #'
 32 | #' @examples
 33 | #' infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
 34 | #' outfile <- tempfile(fileext = "_20101224.TEST_ITCH_50")
 35 | #' filter_itch(
 36 | #'   infile, outfile,
 37 | #'   filter_msg_class = c("orders", "trades"),
 38 | #'   filter_msg_type = "R", # stock_directory
 39 | #'   skip = 0, n_max = 100
 40 | #' )
 41 | #'
 42 | #' # expecting 100 orders, 100 trades, and 3 stock_directory entries
 43 | #' count_messages(outfile)
 44 | #'
 45 | #' # check that the output file contains the same
 46 | #' res  <- read_itch(outfile, c("orders", "trades", "stock_directory"))
 47 | #' sapply(res, nrow)
 48 | #'
 49 | #' res2 <- read_itch(infile,  c("orders", "trades", "stock_directory"),
 50 | #'                   n_max = 100)
 51 | #'
 52 | #' all.equal(res, res2)
 53 | filter_itch <- function(infile, outfile,
 54 |                         filter_msg_class = NA_character_,
 55 |                         filter_msg_type = NA_character_,
 56 |                         filter_stock_locate = NA_integer_,
 57 |                         min_timestamp = bit64::as.integer64(NA),
 58 |                         max_timestamp = bit64::as.integer64(NA),
 59 |                         filter_stock = NA_character_, stock_directory = NA,
 60 |                         skip = 0, n_max = -1, append = FALSE, overwrite = FALSE,
 61 |                         gz = FALSE, buffer_size = -1, quiet = FALSE,
 62 |                         force_gunzip = FALSE, force_cleanup = TRUE) {
 63 |   t0 <- Sys.time()
 64 |   msg_classes <- list(
 65 |     "system_events" = "S",
 66 |     "stock_directory" = "R",
 67 |     "trading_status" = c("H", "h"),
 68 |     "reg_sho" = "Y",
 69 |     "market_participant_states" = "L",
 70 |     "mwcb" = c("V", "W"),
 71 |     "ipo" = "K",
 72 |     "luld" = "J",
 73 |     "orders" = c("A", "F"),
 74 |     "modifications" = c("E", "C", "X", "D", "U"),
 75 |     "trades" = c("P", "Q", "B"),
 76 |     "noii" = "I",
 77 |     "rpii" = "N"
 78 |   )
 79 | 
 80 |   if (!any(is.na(filter_msg_class))) {
 81 |     filter_msg_type <- c(
 82 |       filter_msg_type,
 83 |       as.character(unlist(msg_classes[tolower(filter_msg_class)]))
 84 |     )
 85 |   }
 86 | 
 87 |   if (!file.exists(infile))
 88 |     stop(sprintf("File '%s' not found!", infile))
 89 | 
 90 |   date <- get_date_from_filename(infile)
 91 |   exch <- get_exchange_from_filename(infile)
 92 |   outfile <- add_meta_to_filename(outfile, date, exch)
 93 | 
 94 |   # check that the directory for outfile exists
 95 |   outfile_dir <- gsub("[^/]+$", "", outfile)
 96 |   if (outfile_dir != "" && !dir.exists(outfile_dir)) {
 97 |     if (overwrite) {
 98 |       dir.create(outfile_dir, recursive = TRUE)
 99 |     } else {
100 |       stop(sprintf(
101 |         "Directory '%s' not found, to create/overwrite use overwrite = TRUE",
102 |         outfile_dir
103 |       ))
104 |     }
105 |   }
106 | 
107 |   # first write to unzipped file, than gzip the file later...
108 |   if (grepl("\\.gz$", outfile)) outfile <- gsub("\\.gz$", "", outfile)
109 | 
110 |   if (file.exists(outfile) && !append && !overwrite)
111 |     stop(sprintf("File '%s' already found, to overwrite use overwrite = TRUE or use append = TRUE",
112 |                  outfile))
113 | 
114 |   if (!quiet) {
115 |     sprintf("[infile]     '%s'\n", infile)
116 |     sprintf("[outfile]    '%s'\n", outfile)
117 |   }
118 | 
119 |   # treat n_max
120 |   if (is.data.frame(n_max))
121 |     stop("n_max cannot be a data.frame in filter_itch!")
122 | 
123 |   # +1 as we want to skip, -1 as cpp is zero indexed
124 |   start <- max(skip, 0)
125 |   end <- max(skip + n_max - 1, -1)
126 |   if (end < start) end <- -1
127 | 
128 |   if (!quiet && (start != 0 | end != -1))
129 |     cat(sprintf("[Filter]     skip: %i n_max: %i (%i - %i)\n",
130 |                 skip, n_max, start + 1, end + 1))
131 | 
132 |   # Treat filters
133 |   # Message types
134 |   filter_msg_type <- check_msg_types(filter_msg_type, quiet)
135 | 
136 |   # locate code
137 |   filter_stock_locate <- filter_stock_locate[!is.na(filter_stock_locate)]
138 |   filter_stock_locate <- as.integer(filter_stock_locate)
139 | 
140 |   # Timestamp
141 |   t <- check_timestamps(min_timestamp, max_timestamp, quiet)
142 |   min_timestamp <- t$min
143 |   max_timestamp <- t$max
144 | 
145 |   # Stock
146 |   filter_stock_locate <- check_stock_filters(filter_stock, stock_directory,
147 |                                              filter_stock_locate, infile)
148 | 
149 |   if (!quiet && length(filter_stock_locate) > 0)
150 |     cat(paste0("[Filter]     stock_locate: '",
151 |                paste(filter_stock_locate, collapse = "', '"),
152 |                "'\n"))
153 | 
154 |   # Set the default value of the buffer size
155 |   buffer_size <- check_buffer_size(buffer_size, infile)
156 | 
157 |   filedate <- get_date_from_filename(infile)
158 | 
159 |   orig_infile <- infile
160 |   # only needed for gz files; gz files are not deleted when the raw file already existed
161 |   raw_file_existed <- file.exists(basename(gsub("\\.gz$", "", infile)))
162 |   infile <- check_and_gunzip(infile, dirname(outfile), buffer_size, force_gunzip, quiet)
163 | 
164 |   filter_itch_impl(infile, outfile, start, end,
165 |                    filter_msg_type, filter_stock_locate,
166 |                    min_timestamp, max_timestamp,
167 |                    append, buffer_size, quiet)
168 | 
169 |   if (gz) {
170 |     if (!quiet) cat(sprintf("[gzip]       outfile\n"))
171 |     of <- outfile
172 |     outfile <- gzip_file(infile = outfile,
173 |                          outfile = paste0(outfile, ".gz"))
174 |     unlink(of) # delete the temporary file
175 |   }
176 | 
177 |   a <- gc()
178 | 
179 |   report_end(t0, quiet, infile)
180 | 
181 |   # if the file was gzipped and the force_cleanup=TRUE, delete unzipped file
182 |   if (grepl("\\.gz$", orig_infile) && force_cleanup && !raw_file_existed) {
183 |     if (!quiet) cat(sprintf("[Cleanup]    Removing file '%s'\n", infile))
184 |     unlink(basename(gsub("\\.gz$", "", infile)))
185 |   }
186 | 
187 |   return(invisible(outfile))
188 | }
189 | 


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
1 | 
2 | utils::globalVariables(
3 |   c("count", "datetime", "msg_type", "timestamp", "exchange", "file_size",
4 |     "last_modified", ".", "size", "time", "stock", "stock_locate", "tt")
5 | )
6 | 


--------------------------------------------------------------------------------
/R/gz_functions.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' @name gz_functions
  3 | #' @rdname gz_functions
  4 | #' @title Compresses and uncompresses files to and from gz-archives
  5 | #'
  6 | #' @description
  7 | #'
  8 | #' Allows the compression and uncompression of files
  9 | #'
 10 | #' @param infile the file to be zipped or unzipped
 11 | #' @param outfile the resulting zipped or unzipped file
 12 | #' @param buffer_size the size of the buffer to read in at once, default is 4 times the file.size (max 2Gb).
 13 | #'
 14 | #' @details Functions are
 15 | #'
 16 | #' @return The filename of the unzipped file, invisibly
 17 | #'
 18 | #' @examples
 19 | #' gzfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
 20 | #' file   <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
 21 | #'
 22 | NULL
 23 | 
 24 | #' @rdname gz_functions
 25 | #' @export
 26 | #' @details
 27 | #' - `gunzip_file`: uncompresses a gz-archive to raw binary data
 28 | #'
 29 | #' @examples
 30 | #' # uncompress file
 31 | #' (outfile <- gunzip_file(gzfile, "tmp"))
 32 | #' file.info(outfile)
 33 | #' unlink(outfile)
 34 | #'
 35 | gunzip_file <- function(infile, outfile = gsub("\\.gz$", "", infile),
 36 |                         buffer_size = min(4 * file.size(infile), 2e9)) {
 37 | 
 38 |   if (!file.exists(infile)) stop(sprintf("File '%s' not found!", infile))
 39 |   if (file.exists(outfile)) unlink(outfile)
 40 | 
 41 |   gunzip_file_impl(infile, outfile, buffer_size)
 42 |   return(invisible(outfile))
 43 | }
 44 | 
 45 | #' @rdname gz_functions
 46 | #' @export
 47 | #' @details
 48 | #' -`gzip_file`: compresses a raw binary data file to a gz-archive
 49 | #'
 50 | #' @examples
 51 | #' # compress file
 52 | #' (outfile <- gzip_file(file))
 53 | #' file.info(outfile)
 54 | #' unlink(outfile)
 55 | gzip_file <- function(infile,
 56 |                       outfile = NA,
 57 |                       buffer_size = min(4 * file.size(infile), 2e9)) {
 58 | 
 59 |   if (!file.exists(infile)) stop(sprintf("File '%s' not found!", infile))
 60 | 
 61 |   if (is.na(outfile)) {
 62 |     outfile <- ifelse(grepl("\\.gz$", infile),
 63 |                       infile,
 64 |                       paste0(infile, ".gz"))
 65 |     # remove path
 66 |     xx <- strsplit(outfile, "\\\\|/")[[1]]
 67 |     outfile <- xx[length(xx)]
 68 |   }
 69 |   if (file.exists(outfile)) unlink(outfile)
 70 | 
 71 |   if (grepl("\\.gz$", infile)) {
 72 |     warning("Infile is already a gzipped-archive")
 73 |     return(invisible(infile))
 74 |   }
 75 | 
 76 |   gzip_file_impl(infile, outfile, buffer_size)
 77 |   return(invisible(outfile))
 78 | }
 79 | 
 80 | 
 81 | # Helper function
 82 | # returns the (if needed gunzipped) file
 83 | # note that it only operates in the dir directory
 84 | check_and_gunzip <- function(file, dir = dirname(file), buffer_size, force_gunzip, quiet) {
 85 |   file <- path.expand(file)
 86 |   if (!grepl("\\.gz$", file)) return(file)
 87 | 
 88 |   outfile <- file.path(dir, basename(gsub("\\.gz$", "", file)))
 89 |   # check if the raw-file at target directory already exists, if so use this (unless force_gunzip = TRUE)
 90 |   if (file.exists(outfile) && !quiet && !force_gunzip) {
 91 |     cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n",
 92 |                 outfile))
 93 |     return(outfile)
 94 |   }
 95 | 
 96 |   # check if the raw-file at current directory already exists, if so use this (unless force_gunzip = TRUE)
 97 |   if (file.exists(outfile) && !force_gunzip) {
 98 |     if (!quiet)
 99 |       cat(sprintf("[INFO] Unzipped file '%s' already found, using that (overwrite with force_gunzip = TRUE)\n",
100 |                   outfile))
101 |     return(outfile)
102 |   } else {
103 |     # if the unzipped file doesnt exist or the force_gunzip flag is set, unzip file
104 |     unlink(outfile)
105 |     if (!quiet)
106 |       cat(sprintf("[Decompressing] '%s' to '%s'\n", file, outfile))
107 | 
108 |     gunzip_file(file, outfile, buffer_size)
109 |   }
110 |   return(outfile)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/helpers.R:
--------------------------------------------------------------------------------
  1 | #' Returns the date from an ITCH-filename
  2 | #'
  3 | #' @param file a filename
  4 | #'
  5 | #' @return the date as fastPOSIXct
  6 | #' @export
  7 | #' @keywords internal
  8 | #'
  9 | #' @examples
 10 | #' get_date_from_filename("03302017.NASDAQ_ITCH50")
 11 | #' get_date_from_filename("20170130.BX_ITCH_50.gz")
 12 | #' get_date_from_filename("S030220-v50-bx.txt.gz")
 13 | #' get_date_from_filename("unknown_file_format")
 14 | get_date_from_filename <- function(file) {
 15 |   date_ <- data.table::fifelse(
 16 |     grepl("S\\d{6}", file),
 17 |     sub(".*(\\d{6}).*", "\\1", file),
 18 |     sub(".*(\\d{8}).*", "\\1", file)
 19 |   )
 20 | 
 21 |   date_ <- data.table::fifelse(
 22 |     grepl("NASDAQ_ITCH50(\\.gz)?$", file),
 23 |     # format MMDDYYYY
 24 |     gsub("(\\d{2})(\\d{2})(\\d{4})", "\\3-\\1-\\2", date_),
 25 |     data.table::fifelse(grepl("S\\d{6}-", file),
 26 |                         # format MMDDYY
 27 |                         gsub("(\\d{2})(\\d{2})(\\d{2})", "20\\3-\\1-\\2", date_),
 28 |                         # format YYYYMMDD
 29 |                         gsub("(\\d{4})(\\d{2})(\\d{2})", "\\1-\\2-\\3", date_)
 30 |                         )
 31 |   )
 32 | 
 33 |   date_ <- try(as.POSIXct(date_, tz = "GMT"), silent = TRUE)
 34 |   if (inherits(date_, "try-error")) date_ <- NA
 35 |   return(date_)
 36 | }
 37 | 
 38 | #' Returns the exchange from an ITCH-filename
 39 | #'
 40 | #' @param file a filename
 41 | #'
 42 | #' @return The exchange
 43 | #' @export
 44 | #'
 45 | #' @examples
 46 | #' get_exchange_from_filename("03302017.NASDAQ_ITCH50")
 47 | #' get_exchange_from_filename("20170130.BX_ITCH_50.gz")
 48 | #' get_exchange_from_filename("S030220-v50-bx.txt.gz")
 49 | #' get_exchange_from_filename("Unknown_file_format")
 50 | get_exchange_from_filename <- function(file) {
 51 |   res <- regmatches(file, regexpr("(?<=\\.)[A-Z]+(?=_)", file, perl = TRUE))
 52 |   if (length(res) == 0)
 53 |     res <- regmatches(file, regexpr("(?<=-v50-)[a-z]+", file, perl = TRUE))
 54 |   res <- toupper(res)
 55 |   if (length(res) == 0) res <- NA
 56 |   return(res)
 57 | }
 58 | 
 59 | #' Adds meta information (date and exchange) to an itch filename
 60 | #'
 61 | #' Note that if date and exchange information are already present,
 62 | #' they are overwritten
 63 | #'
 64 | #' @param file the filename
 65 | #' @param date the date as a date-class or as a string that is understood by
 66 | #'   [base::as.Date()].
 67 | #' @param exchange the name of the exchange
 68 | #'
 69 | #' @return the filename with exchanged or added date and exchange information
 70 | #' @export
 71 | #'
 72 | #' @examples
 73 | #' add_meta_to_filename("03302017.NASDAQ_ITCH50", "2010-12-24", "TEST")
 74 | #' add_meta_to_filename("20170130.BX_ITCH_50.gz", "2010-12-24", "TEST")
 75 | #' add_meta_to_filename("S030220-v50-bx.txt.gz", "2010-12-24", "TEST")
 76 | #' add_meta_to_filename("unknown_file.ITCH_50", "2010-12-24", "TEST")
 77 | add_meta_to_filename <- function(file, date, exchange) {
 78 |   if (is.na(date) || is.na(exchange)) return(file)
 79 | 
 80 |   if (!"POSIXct" %in% class(date)) date <- as.Date(date)
 81 | 
 82 |   # First try to extract if the filename is in the standard formats.
 83 |   # if not use the "20101224.TEST_ITCH_50" format
 84 |   if (grepl("NASDAQ_ITCH", file)) { #03302017.NASDAQ_ITCH50
 85 | 
 86 |     file <- gsub("\\d{8}", format(date, "%m%d%Y"), file)
 87 |     file <- gsub("NASDAQ", exchange, file)
 88 | 
 89 |   } else if (grepl("S\\d{6}-", file)) { # S030220-v50-bx.txt.gz
 90 | 
 91 |     file <- gsub("\\d{6}", format(date, "%m%d%y"), file)
 92 |     file <- gsub("(?<=v50-)[^\\.]*(?=\\.)", exchange, file, perl = TRUE)
 93 | 
 94 |   } else if (grepl("(?<!NASDAQ)_ITCH", file, perl = TRUE)) { # 20170130.BX_ITCH_50.gz
 95 | 
 96 |     # replace the last 8 digits with the date
 97 |     file <- gsub("\\d{8}(?=[^0-9]+50.*)", format(date, "%Y%m%d"), file, perl = TRUE)
 98 |     file <- gsub("(?<=\\d{8}\\.)[^_]+", exchange, file, perl = TRUE)
 99 | 
100 |   } else {
101 | 
102 |     # Unknown format... use 20101224.TEST_ITCH_50
103 |     has_gz <- grepl("\\.gz$", file)
104 |     if (has_gz) file <- gsub("\\.gz$", "", file)
105 |     file <- gsub("\\.?_?ITCH_?50", "", file)
106 | 
107 |     file <- paste0(
108 |       file,
109 |       "_",
110 |       format(date, "%Y%m%d"),
111 |       ".",
112 |       exchange,
113 |       "_ITCH_50"
114 |     )
115 | 
116 |     if (has_gz) file <- paste0(file, ".gz")
117 |   }
118 | 
119 |   return(file)
120 | }
121 | 
122 | 
123 | #' Opens the ITCH Specification PDF
124 | #'
125 | #' The specifications can be found as a PDF <https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf>.
126 | #'
127 | #' @return the URL (invisible)
128 | #' @export
129 | #'
130 | #' @examples
131 | #' \dontrun{
132 | #' open_itch_specification()
133 | #' }
134 | open_itch_specification <- function() {
135 |   url <- "https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf"
136 |   browseURL(url)
137 |   return(invisible(url))
138 | }
139 | 
140 | #' Opens the ITCH sample page
141 | #'
142 | #' The server can be found at <https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/>.
143 | #'
144 | #' @return the URL (invisible)
145 | #' @export
146 | #'
147 | #' @examples
148 | #' \dontrun{
149 | #' open_itch_sample_server()
150 | #' }
151 | open_itch_sample_server <- function() {
152 |   url <- "https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/"
153 |   browseURL(url)
154 |   return(invisible(url))
155 | }
156 | 
157 | check_msg_types <- function(filter_msg_type, quiet) {
158 |   # allow msg_classes: 'AF' (multiple values are split),
159 |   # c('A', 'F'), c(NA, 'A') (NAs are ommited)
160 |   filter_msg_type <- unique(filter_msg_type)
161 | 
162 |   if (any(nchar(filter_msg_type) > 1, na.rm = TRUE)) {
163 |     x <- sapply(filter_msg_type, strsplit, split = "")
164 |     filter_msg_type <- as.character(unlist(x))
165 |   }
166 | 
167 |   filter_msg_type <- filter_msg_type[!is.na(filter_msg_type)]
168 | 
169 |   if (!quiet && length(filter_msg_type) > 0)
170 |     cat(paste0("[Filter]     msg_type: '",
171 |                paste(filter_msg_type, collapse = "', '"),
172 |                "'\n"))
173 | 
174 |   return(filter_msg_type)
175 | }
176 | 
177 | check_timestamps <- function(min_timestamp, max_timestamp, quiet) {
178 |   min_timestamp <- min_timestamp[!is.na(min_timestamp)]
179 |   max_timestamp <- max_timestamp[!is.na(max_timestamp)]
180 | 
181 |   lmin <- length(min_timestamp)
182 |   lmax <- length(max_timestamp)
183 | 
184 |   txt <- "[Filter]     timestamp: "
185 |   if (lmin != lmax) {
186 |     # either vector has to have size 1 the other 0
187 |     if ((lmin == 0 && lmax == 1) ||
188 |         (lmin == 1 && lmax == 0)) {
189 |       if (lmin == 0) {
190 |         min_timestamp <- 0
191 |         txt <- paste0(txt, "<= ", bit64::as.integer64(max_timestamp))
192 |       } else { # lmax == 0
193 |         max_timestamp <- -1
194 |         txt <- paste0(txt, ">= ", bit64::as.integer64(min_timestamp))
195 |       }
196 |     } else {
197 |       stop(paste("min_ and and max_timestamp have to have the same length",
198 |                  "or only one has to have size 1!"))
199 |     }
200 |   } else { # lmin == lmax
201 |     txt <- paste0(txt,
202 |                   paste(bit64::as.integer64(min_timestamp),
203 |                         bit64::as.integer64(max_timestamp),
204 |                         sep = " - ", collapse = ", "))
205 |   }
206 |   if (length(min_timestamp) != 0 && !quiet) cat(txt, "\n")
207 | 
208 |   min_timestamp <- bit64::as.integer64(min_timestamp)
209 |   max_timestamp <- bit64::as.integer64(max_timestamp)
210 | 
211 |   return(list(min = min_timestamp, max = max_timestamp))
212 | }
213 | 
214 | check_stock_filters <- function(filter_stock, stock_directory,
215 |                                 filter_stock_locate, infile) {
216 | 
217 |   if (!(length(filter_stock) == 1 && is.na(filter_stock))) {
218 |     if (length(stock_directory) == 1 && is.na(stock_directory)) {
219 |       warning("filter_stock is given, but no stock_directory is specified. Trying to extract stock directory from file\n")
220 |       stock_directory <- read_stock_directory(infile, quiet = TRUE)
221 |     }
222 | 
223 |     if (!all(filter_stock %chin% stock_directory$stock)) {
224 |       stop(paste0("Not all stocks found in stock_directory, missing: '",
225 |                   paste(filter_stock[!filter_stock %chin% stock_directory$stock],
226 |                         collapse = "', '"),
227 |                   "'"))
228 |     }
229 |     # extend locate code by the stocks:
230 |     filter_stock_locate <- c(filter_stock_locate,
231 |                              stock_directory[stock %chin%filter_stock, stock_locate])
232 |   }
233 |   return(filter_stock_locate)
234 | }
235 | 
236 | check_buffer_size <- function(buffer_size, file) {
237 |   if (is.na(buffer_size) || buffer_size < 0)
238 |     buffer_size <- ifelse(grepl("\\.gz$", file),
239 |                           min(3 * file.size(file), 1e9),
240 |                           1e8)
241 | 
242 |   if (!is.integer(buffer_size) || !is.numeric(buffer_size)) buffer_size <- 1e8
243 | 
244 |   if (buffer_size < 50)
245 |     stop(paste("buffer_size has to be at least 50 bytes, otherwise the",
246 |                "messages won't fit"))
247 | 
248 |   if (buffer_size > 5e9)
249 |     warning(paste("You are trying to allocate a large array on the heap, if",
250 |                   "the function crashes, try to use a smaller buffer_size"))
251 |   return(buffer_size)
252 | }
253 | 
254 | #' Formats a number of bytes
255 | #'
256 | #' @param x the values
257 | #' @param digits the number of digits to display, default value is 2
258 | #' @param unit_suffix the unit suffix, default value is 'B' (for bytes),
259 | #' useful is also 'B/s' if you have read/write speeds
260 | #' @param base the base for kilo, mega, ... definition, default is 1000
261 | #'
262 | #' @return the values as a character
263 | #' @export
264 | #'
265 | #' @examples
266 | #' format_bytes(1234)
267 | #' format_bytes(1234567890)
268 | #' format_bytes(123456789012, unit_suffix = "iB", base = 1024)
269 | format_bytes <- function(x, digits = 2, unit_suffix = "B", base = 1000) {
270 |   if (!all(is.finite(x))) return(rep(NA, length(x)))
271 |   nr <- floor(log(x, base))
272 |   # future proof it :)
273 |   mtch <- c("", "K", "M", "G", "T", "P", "E", "Z", "Y")
274 |   units <- paste0(mtch[nr + 1], unit_suffix)
275 |   val <- x / base^nr
276 | 
277 |   res <- sprintf(sprintf("%%.%if%%s", digits), val, units)
278 |   names(res) <- names(x)
279 |   res
280 | }
281 | 
282 | report_end <- function(t0, quiet, file = NA) {
283 |   diff_secs <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
284 | 
285 |   if (is.na(file)) {
286 |     txt <- ""
287 |   } else {
288 |     if (file.exists(file)) size <- file.size(file) else size <- file
289 |     speed_txt <- format_bytes(size / diff_secs, digits = 2,
290 |                               unit_suffix = "B/s")
291 |     txt <- sprintf(" at %s", speed_txt)
292 |   }
293 |   if (!quiet) cat(sprintf("[Done]       in %.2f secs%s\n", diff_secs, txt))
294 | }
295 | 


--------------------------------------------------------------------------------
/R/write_itch.R:
--------------------------------------------------------------------------------
  1 | #' Writes a data.frame or a list of data.frames of ITCH messages to file
  2 | #'
  3 | #' Note that additional information, e.g., columns that were added, will be
  4 | #' dropped in the process and only ITCH-compliant information is saved.
  5 | #'
  6 | #' Note that the ITCH filename contains the information for the date and exchange.
  7 | #' This can be specified explicitly in the file argument or it is added if not
  8 | #' turned off `add_meta = FALSE`.
  9 | #'
 10 | #' @param ll a data.frame or a list of data.frames of ITCH messages, in the format
 11 | #'  that the [read_functions()] return
 12 | #' @param file the filename of the target file. If the folder to the file does
 13 | #'   not exist, it will be created recursively
 14 | #' @param add_meta if date and file information should be added to the filename.
 15 | #'   Default value is TRUE. Note that adding meta information changes the filename.
 16 | #' @param append if the information should be appended to the file. Default value
 17 | #'   is FALSE
 18 | #' @param compress if the file should be gzipped. Default value is FALSE.
 19 | #'   Note that if you compress a file, buffer_size matters a lot, with larger
 20 | #'   buffers you are more likely to get smaller filesizes in the end.
 21 | #'   Alternatively, but slower, is to write the file without compression fully
 22 | #'   and then gzip the file using another program.
 23 | #' @param buffer_size the maximum buffer size. Default value is 1e8 (100MB).
 24 | #'   Accepted values are > 52 and < 5e9
 25 | #' @param quiet if TRUE, the status messages are suppressed, defaults to FALSE
 26 | #' @param append_warning if append is set, a warning about timestamp ordering is
 27 | #'  given. Set `append_warning = FALSE` to silence the warning. Default
 28 | #'  value is TRUE
 29 | #'
 30 | #' @return the filename (invisibly)
 31 | #' @export
 32 | #'
 33 | #' @examples
 34 | #' infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
 35 | #' sys <- read_system_events(infile, quiet = TRUE)
 36 | #' outfile <- tempfile()
 37 | #' write_itch(sys, outfile)
 38 | #'
 39 | #' # create a list of events, stock directory, and orders and write to a file
 40 | #' sdir <- read_stock_directory(infile, quiet = TRUE)
 41 | #' od   <- read_orders(infile, quiet = TRUE)
 42 | #'
 43 | #' ll <- list(sys, sdir, od)
 44 | #' write_itch(ll, outfile)
 45 | write_itch <- function(ll, file, add_meta = TRUE,
 46 |                        append = FALSE, compress = FALSE,
 47 |                        buffer_size = 1e8, quiet = FALSE,
 48 |                        append_warning = TRUE) {
 49 | 
 50 |   t0 <- Sys.time()
 51 |   if (is.data.frame(ll)) ll <- list(ll)
 52 | 
 53 |   if (add_meta) {
 54 |     exchange <- NA
 55 |     date <- NA
 56 | 
 57 |     has_exchange <- sapply(ll, function(x) "exchange" %in% names(x))
 58 |     has_name <- sapply(ll, function(x) "date" %in% names(x))
 59 | 
 60 |     if (any(has_exchange) && any(has_name)) {
 61 |       idx <- seq_along(ll)[has_exchange][1]
 62 |       exchange <- ll[[idx]]$exchange[1]
 63 | 
 64 |       idx <- seq_along(ll)[has_name][1]
 65 |       date <- ll[[idx]]$date[1]
 66 |     } else {
 67 |       warning("add_meta = TRUE but no exchange or date variable found in ll")
 68 |     }
 69 | 
 70 |     file <- add_meta_to_filename(file, date, exchange)
 71 |   }
 72 | 
 73 |   if (append && append_warning)
 74 |     warning(paste("ITCH files are sorted by timestamp, by appending to an",
 75 |                   "existing file, this is likely not guaranteed!"))
 76 | 
 77 |   # check that all lls are about correct
 78 |   chk <- sapply(ll, function(x)
 79 |     is.data.frame(x) &&
 80 |       all(c("msg_type", "stock_locate", "tracking_number", "timestamp") %in% names(x)))
 81 |   if (!all(chk))
 82 |     stop("All elements in ll need to be a data.frame of ITCH messages")
 83 | 
 84 |   ll <- lapply(ll, data.table::setorder, timestamp)
 85 | 
 86 |   # check and correct filename .gz ending...
 87 |   if (compress && !substr(file, nchar(file) - 2, nchar(file)) == ".gz")
 88 |     file <- paste0(file, ".gz")
 89 | 
 90 |   # check that the file-folder exists
 91 |   folder <- gsub("[/\\][^/\\]+$", "", file)
 92 |   if (folder != file && !dir.exists(folder))
 93 |     dir.create(folder, recursive = TRUE)
 94 | 
 95 |   bytes <- write_itch_impl(ll, file, append = append, gz = compress,
 96 |                            max_buffer_size = buffer_size, quiet = quiet)
 97 | 
 98 |   if (!quiet) cat(sprintf("[Outfile]    '%s'\n", file))
 99 | 
100 |   report_end(t0, quiet, file)
101 | 
102 |   return(invisible(file))
103 | }
104 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | #' @useDynLib RITCH
 2 | #' @importFrom Rcpp sourceCpp
 3 | #' @import data.table
 4 | #' @importFrom nanotime nanotime
 5 | #' @importFrom bit64 as.integer64
 6 | #' @importFrom utils browseURL download.file
 7 | NULL
 8 | 
 9 | #' @title ITCH 50 Example Testing Dataset
10 | #' @name ex20101224.TEST_ITCH_50
11 | #'
12 | #' @section ex20101224.TEST_ITCH_50:
13 | #'
14 | #' The test dataset contains artificial trading data for three made up stocks:
15 | #'  `ALC`, `BOB`, and `CHAR`.
16 | #'
17 | #' The dataset is used in the examples and unit tests of the package.
18 | #'
19 | #' The data contains the following count of messages:
20 | #'
21 | #' - 6 system event (message type `S`)
22 | #' - 3 stock directory (message type `R`)
23 | #' - 3 trading status (message type `H`)
24 | #' - 5000 orders (4997 message type `A` and 3 `F`)
25 | #' - 2000 modifications (198 `F`, 45 `X`, 1745 `D`, and 12 `U` message types)
26 | #' - 5000 trades (message type `P`)
27 | #'
28 | #' The file is also available as `ex20101224.TEST_ITCH_50.gz`.
29 | #'
30 | #' To get real sample ITCH datasets, see the [download_sample_file()]
31 | #' function.
32 | #' @examples
33 | #' file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
34 | #'
35 | #' sys <- read_system_events(file)
36 | NULL
37 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | options(width = 120)
  9 | knitr::opts_chunk$set(
 10 |   collapse = TRUE,
 11 |   comment = "#>",
 12 |   fig.path = "man/figures/README-",
 13 |   out.width = "100%"
 14 | )
 15 | ```
 16 | 
 17 | # RITCH - an R interface to the ITCH Protocol
 18 | 
 19 | <!-- badges: start -->
 20 | [![CRAN status](https://www.r-pkg.org/badges/version/RITCH)](https://CRAN.R-project.org/package=RITCH) [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/RITCH)](https://www.r-pkg.org/pkg/RITCH) [![R-CMD-check](https://github.com/DavZim/RITCH/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DavZim/RITCH/actions/workflows/R-CMD-check.yaml)
 21 | <!-- badges: end -->
 22 | 
 23 | The `RITCH` library provides an `R` interface to NASDAQs ITCH protocol, which is used to distribute financial messages to participants.
 24 | Messages include orders, trades, market status, and much more financial information.
 25 | A full list of messages is shown later.
 26 | The main purpose of this package is to parse the binary ITCH files to a [`data.table`](https://CRAN.R-project.org/package=data.table) in `R`.
 27 | 
 28 | The package leverages [`Rcpp`](https://CRAN.R-project.org/package=Rcpp) and `C++` for efficient message parsing.
 29 | 
 30 | Note that the package provides a small simulated sample dataset in the `ITCH_50` format for testing and example purposes.
 31 | Helper functions are provided to list and download sample files from NASDAQs official server.
 32 | 
 33 | ## Install
 34 | 
 35 | To install `RITCH` you can use the following
 36 | 
 37 | ```R
 38 | # stable version:
 39 | install.packages("RITCH")
 40 | 
 41 | # development version:
 42 | # install.packages("remotes")
 43 | remotes::install_github("DavZim/RITCH")
 44 | ```
 45 | 
 46 | ## Quick Overview
 47 | 
 48 | The main functions of `RITCH` are read-related and are easily identified by their `read_` prefix.
 49 | 
 50 | Due to the inherent structural differences between message classes, each class has its own read function.
 51 | A list of message types and the respective classes are provided later in this Readme.
 52 | 
 53 | Example message classes used in this example are *orders* and *trades*.
 54 | First we define the file to load and count the messages, then we read in the orders and the first 100 trades
 55 | 
 56 | ```{r}
 57 | library(RITCH)
 58 | # use built in example dataset
 59 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
 60 | 
 61 | # count the number of messages in the file
 62 | msg_count <- count_messages(file)
 63 | dim(msg_count)
 64 | names(msg_count)
 65 | 
 66 | # read the orders into a data.table
 67 | orders <- read_orders(file)
 68 | dim(orders)
 69 | names(orders)
 70 | 
 71 | # read the first 100 trades
 72 | trades <- read_trades(file, n_max = 100)
 73 | dim(trades)
 74 | names(trades)
 75 | ```
 76 | Note that the file can be a plain `ITCH_50` file or a gzipped `ITCH_50.gz` file, which will be decompressed to the current directory.
 77 | You may also note that the output reports quite a low read speed in the `MB/s`.
 78 | This lowish number is due to including the parsing process, furthermore, due to overhead of setup code, this number gets higher on larger files.
 79 | 
 80 | If you want to know more about the functions of the package, read on.
 81 | 
 82 | ## Main Functions
 83 | 
 84 | `RITCH` provides the following main functions:
 85 | 
 86 | - `read_itch(file, ...)` to read an ITCH file
 87 | Convenient wrappers for different message classes such as `orders`, `trades`, etc are also provided as `read_orders()`, `read_trades()`, ...
 88 | - `filter_itch(infile, outfile, ...)` to filter an ITCH file and write directly to another file without loading the data into R
 89 | - `write_itch(data, file, ...)` to write a dataset to an ITCH file
 90 | 
 91 | There are also some helper functions provided, a selection is:
 92 | 
 93 | - `download_sample_file(choice)` to download a sample file from the NASDAQ server and `list_sample_files()` to get a list of all available sample files
 94 | - `download_stock_directory(exchange, date)` to download the stock locate information for a given exchange and date
 95 | - `open_itch_sample_server()` to open the official NASDAQ server in your browser, which hosts among other things example data files
 96 | - `gzip_file(infile, outfile)` and `gunzip_file(infile, outfile)` for gzip functionality
 97 | - `open_itch_specification()` to open the official NASDAQ ITCH specification PDF in your browser
 98 | 
 99 | ## Writing ITCH Files
100 | 
101 | `RITCH` also provides functionality for writing ITCH files.
102 | Although it could be stored in other file formats (for example a database or a [`qs`](https://CRAN.R-project.org/package=qs) file), ITCH files are quite optimized regarding size as well as write/read speeds.
103 | Thus the `write_itch()` function allows you to write a single or multiple types of message to an `ITCH_50` file.
104 | Note however, that only the standard columns are supported.
105 | Additional columns will not be written to file!
106 | 
107 | Additional information can be saved in the filename.
108 | By default the date, exchange, and fileformat information is added to the filename unless you specify `add_meta = FALSE`, in which case the given name is used.
109 | 
110 | As a last note: if you write your data to an ITCH file and want to filter for stocks later on, make sure to save the stock directory of that day/exchange, either externally or in the ITCH file directly (see example below).
111 | 
112 | ### Simple Write Example
113 | 
114 | A simple write example would be to read all modifications from an ITCH file and save it to a separate file to save space, reduce read times later on, etc.
115 | 
116 | ```{r}
117 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
118 | md <- read_modifications(file, quiet = TRUE)
119 | dim(md)
120 | names(md)
121 | 
122 | outfile <- write_itch(md, "modifications", compress = TRUE)
123 | 
124 | # compare file sizes
125 | files <- c(full_file = file, subset_file = outfile)
126 | format_bytes(sapply(files, file.size))
127 | ```
128 | ```{r, include = FALSE}
129 | unlink(outfile)
130 | ```
131 | 
132 | 
133 | ### Comprehensive Write Example
134 | 
135 | A typical work flow would look like this:
136 | 
137 | - read in some message classes from file and filter for certain stocks
138 | - save the results for later analysis, also compress to save disk space
139 | 
140 | ```{r}
141 | ## Read in the different message classes
142 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
143 | 
144 | # read in the different message types
145 | data <- read_itch(file,
146 |                   c("system_events", "stock_directory", "orders"),
147 |                   filter_stock_locate = c(1, 3),
148 |                   quiet = TRUE)
149 | 
150 | str(data, max.level = 1)
151 | 
152 | 
153 | ## Write the different message classes
154 | outfile <- write_itch(data,
155 |                       "alc_char_subset",
156 |                       compress = TRUE)
157 | outfile
158 | 
159 | # compare file sizes
160 | format_bytes(
161 |   sapply(c(full_file = file, subset_file = outfile),
162 |          file.size)
163 | )
164 | 
165 | 
166 | ## Lastly, compare the two datasets to see if they are identical
167 | data2 <- read_itch(outfile, quiet = TRUE)
168 | all.equal(data, data2)
169 | ```
170 | ```{r, include=FALSE}
171 | # remove files from write_itch again...
172 | unlink(outfile)
173 | outfile_unz <- gsub("\\.gz$", "", outfile)
174 | unlink(outfile_unz)
175 | ```
176 | 
177 | For comparison, the same format in the [`qs`](https://CRAN.R-project.org/package=qs) format results in `44788` bytes.
178 | <!---qs::qsave(data, "data.qs", preset = "archive");file.info("data.qs")[["size"]];unlink("data.qs")-->
179 | 
180 | ## ITCH Messages
181 | 
182 | There are a total of 22 different message types which are grouped into 13 classes by `RITCH`.
183 | 
184 | The messages and their respective classes are:
185 | ```{r, echo=FALSE}
186 | d <- get_msg_classes()
187 | d$msg_type <- paste0("<code>", d$msg_type, "</code>")
188 | d$read_function <- paste0("<code>", "read_", d$msg_class, "()", "</code>")
189 | 
190 | data.table::setcolorder(d, c("msg_type", "msg_class", "read_function",
191 |                              "msg_name", "doc_nr"))
192 | data.table::setnames(d, c("Type", "<code>RITCH</code> Class",
193 |                           "<code>RITCH</code> Read Function", "ITCH Name",
194 |                           "ITCH Spec Section"))
195 | 
196 | knitr::kable(d, escape = FALSE)
197 | ```
198 | 
199 | Note that if you are interested in the exact definition of the messages and its components, you should look into the [official ITCH specification](https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf), which can also be opened by calling `open_itch_specification()`.
200 | 
201 | 
202 | ## Data
203 | 
204 | The `RITCH` package provides a small, artificial dataset in the ITCH format for example and test purposes.
205 | To learn more about the dataset check `?ex20101224.TEST_ITCH_50`.
206 | 
207 | To access the dataset use:
208 | ```{r}
209 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
210 | count_messages(file, add_meta_data = TRUE, quiet = TRUE)
211 | ```
212 | Note that the example dataset does not contain messages from all classes but is limited to 6 system messages, 3 stock directory, 3 stock trading action, 5000 trade, 5000 order, and 2000 order modification messages.
213 | As seen by the 3 stock directory messages, the file contains data about 3 made up stocks (see also the plot later in the Readme).
214 | 
215 | MASDAQ provides sample ITCH files on their official server at <https://emi.nasdaq.com/ITCH/Nasdaq%20ITCH/> (or in R use `open_itch_sample_server()`) which can be used to test code on larger datasets.
216 | Note that the sample files are up to 5GB compressed, which inflate to about 13GB.
217 | To interact with the sample files, use `list_sample_files()` and `download_sample_files()`.
218 | 
219 | 
220 | ## Notes on Memory and Speed
221 | 
222 | There are some tweaks available to deal with memory and speed issues.
223 | For faster reading speeds, you can increase the buffer size of the `read_` functions to something around 1 GB or more (`buffer_size = 1e9`).
224 | 
225 | ### Provide Message Counts
226 | 
227 | If you have to read from a single file multiple times, for example because you want to extract orders and trades, you can count the messages beforehand and provide it to each read's `n_max` argument, reducing the need to pass the file for counting the number of messages.
228 | ```{r}
229 | # count messages once
230 | n_msgs <- count_messages(file, quiet = TRUE)
231 | 
232 | # use counted messages multiple times, saving file passes
233 | orders <- read_orders(file, quiet = TRUE, n_max = n_msgs)
234 | trades <- read_trades(file, quiet = TRUE, n_max = n_msgs)
235 | ```
236 | 
237 | ### Batch Read
238 | 
239 | If the dataset does not fit entirely into RAM, you can do a partial read specifying `skip` and `n_max`, similar to this:
240 | 
241 | ```{r}
242 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
243 | 
244 | n_messages <- count_orders(count_messages(file, quiet = TRUE))
245 | n_messages
246 | 
247 | # read 1000 messages at a time
248 | n_batch <- 1000
249 | n_parsed <- 0
250 | 
251 | while (n_parsed < n_messages) {
252 |   cat(sprintf("Parsing Batch %04i - %04i", n_parsed, n_parsed + n_batch))
253 |   # read in a batch
254 |   df <- read_orders(file, quiet = TRUE, skip = n_parsed, n_max = n_batch)
255 |   cat(sprintf(": with %04i orders\n", nrow(df)))
256 |   # use the data
257 |   # ...
258 |   n_parsed <- n_parsed + n_batch
259 | }
260 | ```
261 | 
262 | ### Filter when Reading Data
263 | 
264 | You can also filter a dataset directly while reading messages for `msg_type`, `stock_locate`, `timestamp` range, as well as `stock`.
265 | Note that filtering for a specific stock, is just a shorthand lookup for the stocks' `stock_locate` code, therefore a `stock_directory` needs to be supplied (either by providing the output from `read_stock_directory()` or `download_stock_locate()`) or the function will try to extract the stock directory from the file (might take some time depending on the size of the file).
266 | 
267 | ```{r}
268 | # read in the stock directory as we filter for stock names later on
269 | sdir <- read_stock_directory(file, quiet = TRUE)
270 | 
271 | od <- read_orders(
272 |   file,
273 |   filter_msg_type = "A",          # take only 'No MPID add orders'
274 |   min_timestamp = 43200000000000, # start at 12:00:00.000000
275 |   max_timestamp = 55800000000000, # end at 15:30:00.000000
276 |   filter_stock_locate = 1,        # take only stock with code 1
277 |   filter_stock = "CHAR",          # but also take stock CHAR
278 |   stock_directory = sdir          # provide the stock_directory to match stock names to stock_locates
279 | )
280 | 
281 | # count the different message types
282 | od[, .(n = .N), by = msg_type]
283 | # see if the timestamp is in the specified range
284 | range(od$timestamp)
285 | # count the stock/stock-locate codes
286 | od[, .(n = .N), by = .(stock_locate, stock)]
287 | ```
288 | 
289 | ### Filter Data to File
290 | 
291 | On larger files, reading the data into memory might not be the best idea, especially if only a small subset is actually needed.
292 | In this case, the `filter_itch` function will come in handy.
293 | 
294 | The basic design is identical to the `read_itch` function but instead of reading the messages into memory, they are immediately written to a file.
295 | 
296 | Taking the filter data example from above, we can do the following
297 | 
298 | ```{r}
299 | # the function returns the final name of the output file
300 | outfile <- filter_itch(
301 |   infile = file,
302 |   outfile = "filtered",
303 |   filter_msg_type = "A",          # take only 'No MPID add orders'
304 |   min_timestamp = 43200000000000, # start at 12:00:00.000000
305 |   max_timestamp = 55800000000000, # end at 15:30:00.000000
306 |   filter_stock_locate = 1,        # take only stock with code 1
307 |   filter_stock = "CHAR",          # but also take stock CHAR
308 |   stock_directory = sdir          # provide the stock_directory to match stock names to stock_locates
309 | )
310 | 
311 | format_bytes(file.size(outfile))
312 | 
313 | # read in the orders from the filtered file
314 | od2 <- read_orders(outfile)
315 | 
316 | # check that the filtered dataset contains the same information as in the example above
317 | all.equal(od, od2)
318 | ```
319 | ```{r, include=FALSE}
320 | # remove files from filter_itch again...
321 | unlink(outfile)
322 | ```
323 | 
324 | 
325 | ## Create a Plot with Trades and Orders of the largest ETFs
326 | 
327 | As a last step, a quick visualization of the example dataset
328 | 
329 | ```{r ETF_plot}
330 | library(ggplot2)
331 | 
332 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
333 | 
334 | # load the data
335 | orders <- read_orders(file, quiet = TRUE)
336 | trades <- read_trades(file, quiet = TRUE)
337 | 
338 | # replace the buy-factor with something more useful
339 | orders[, buy := ifelse(buy, "Bid", "Ask")]
340 | 
341 | ggplot() +
342 |   geom_point(data = orders,
343 |              aes(x = as.POSIXct(datetime), y = price, color = buy), alpha = 0.2) +
344 |   geom_step(data = trades, aes(x = as.POSIXct(datetime), y = price), size = 0.2) +
345 |   facet_grid(stock~., scales = "free_y") +
346 |   theme_light() +
347 |   labs(title = "Orders and Trades of Three Simulated Stocks",
348 |        subtitle = "Date: 2010-12-24 | Exchange: TEST",
349 |        caption = "Source: RITCH package", x = "Time", y = "Price", color = "Side") +
350 |   scale_y_continuous(labels = scales::dollar) +
351 |   scale_color_brewer(palette = "Set1")
352 | ```
353 | 
354 | 
355 | ## Other Notes
356 | 
357 | If you find this package useful or have any other kind of feedback, I'd be happy if you let me know. Otherwise, if you need more functionality, please feel free to create an issue or a pull request.
358 | 
359 | Citation and CRAN release are WIP.
360 | 
361 | If you are interested in gaining a better understanding of the internal data structures, converting data to and from binary, have a look at the `debug` folder and its contents (only available on the [RITCH's Github page](https://github.com/DavZim/RITCH/)).
362 | 


--------------------------------------------------------------------------------
/RITCH.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://davzim.github.io/RITCH/
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | Fix bug where the gz functionality would write to the current directory or to the user library.


--------------------------------------------------------------------------------
/debug/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | # Debug Tools for `RITCH`
  6 | 
  7 | This document quickly outlines the debugging tools of the `RITCH` library.
  8 | 
  9 | ## Building
 10 | 
 11 | These tools are used for debugging and understanding the data format. They are not shipped with the package itself but need to be sourced independently.
 12 | 
 13 | If you want to play around with the tools, clone the git repository and source the `debug/debug_tools.cpp` script:
 14 | 
 15 | ```{r, include=FALSE}
 16 | Sys.setenv("PKG_LIBS" = "-lz")
 17 | Rcpp::sourceCpp("debug_tools.cpp")
 18 | ```
 19 | ```{r, eval=FALSE}
 20 | Sys.setenv("PKG_LIBS" = "-lz")
 21 | Rcpp::sourceCpp("debug/debug_tools.cpp")
 22 | ```
 23 | 
 24 | Note that `debug_tools.cpp` includes `../src/RITCH.h` as well as `../src/MessageTypes.h` (relative from the `debug_tools.cpp` script), if you have cloned the repository as is, it should work out of the box, otherwise, make sure that the two header files are found.
 25 | 
 26 | ## Debug Tools
 27 | 
 28 | - `dbg_get_message_length(msgs)` returns the size of Messages in bytes. Note that each message adds 2 bytes that are not used
 29 | ```{r}
 30 | dbg_get_message_length(c("A", "F"))
 31 | ```
 32 | - `dbg_itch_file(filename)` allows you to interactively list messages in a file
 33 |   You are asked for input inside the function, which can be:
 34 | 
 35 |   - msg type, e.g., `A`, `H`, `h` to see the next instance of that message type
 36 |   - numeric value, e.g., `3` to see the next N values
 37 | 
 38 |   For example:
 39 | 
 40 | ```r
 41 | file <- "20191230.BX_ITCH_50"
 42 | dbg_itch_file(file)
 43 | ## Debugging File '20191230.BX_ITCH_50' (.gz-file? no)
 44 | ## Usage:
 45 | ## - Empty: next message
 46 | ## - Number: for next N messages
 47 | ## - Character: if valid message type, print the next message, e.g., 'A' for add order
 48 | ## - non valid Character: exits the debugging tool
 49 | ## Note: Bytes in parenthesis show the first two bytes, which are not used!
 50 | ## Number of Messages:
 51 | ## - 'S': 6
 52 | ## - 'R': 8906
 53 | ## - 'H': 8961
 54 | ## - 'Y': 9013
 55 | ## - 'L': 6171
 56 | ## - 'V': 1
 57 | ## - 'W': 0
 58 | ## - 'K': 0
 59 | ## - 'J': 0
 60 | ## - 'h': 0
 61 | ## - 'A': 12210139
 62 | ## - 'F': 45058
 63 | ## - 'E': 578839
 64 | ## - 'C': 2686
 65 | ## - 'X': 348198
 66 | ## - 'D': 11821540
 67 | ## - 'U': 1741672
 68 | ## - 'P': 134385
 69 | ## - 'Q': 0
 70 | ## - 'B': 0
 71 | ## - 'I': 0
 72 | ## - 'N': 2241182
 73 | ## =============================
 74 | ## 'S' (len 2 + 12) idx    0 at offset     0 (0x0000) | (00 0c) 53 00 00 00 00 0a 2d f4 92 1d 67 4f
 75 | #RITCH> 3
 76 | ## Showing next 3 messages
 77 | ## 'R' (len 2 + 39) idx    1 at offset    14 (0x000e) | (00 27) 52 00 01 00 00 0a 66 a0 e0 dc 44 41 20 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 00 4e
 78 | ## 'R' (len 2 + 39) idx    2 at offset    55 (0x0037) | (00 27) 52 00 02 00 00 0a 66 a0 e2 c8 6c 41 41 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 01 4e
 79 | ## 'H' (len 2 + 25) idx    3 at offset    96 (0x0060) | (00 19) 48 00 01 00 00 0a 66 a0 e4 ff bd 41 20 20 20 20 20 20 20 54 20 20 20 20 20
 80 | #RITCH> A
 81 | ## Applied filter to message type 'A'
 82 | ## 'A' (len 2 + 36) idx 32873 at offset 973915 (0xedc5b) | (00 24) 41 20 2c 00 00 16 eb 55 2c 88 24 00 00 00 00 00 00 00 04 42 00 00 2e 7c 55 53 4f 20 20 20 20 20 00 01 fa 40
 83 | #RITCH> q
 84 | ## Stopping Printing Messages
 85 | ```
 86 | 
 87 | - `dbg_hex_to_char(hex_string)` converts a hex value to character
 88 | - `dbg_hex_to_int(hex_string)` converts a hex value to integer
 89 | - `dbg_hex_to_dbl(hex_string)` converts a hex value to dbl
 90 | ```{r}
 91 | dbg_hex_to_char("52 49 54 43 48 20 20 20") # 'RITCH   '
 92 | dbg_hex_to_int("01 23 45 67") # 19088743
 93 | dbg_hex_to_dbl("00 01 fa 40") # 12.96
 94 | ```
 95 | - `dbg_hex_compare(x, y)` to get a quick comparison of two hex strings
 96 | ```{r}
 97 | x <- "00 01 02 03 04"
 98 | y <- "00 01 00 03 0a"
 99 | dbg_hex_compare(x, y)
100 | ```
101 | 
102 | - `dbg_hex_count_messages(hex_string)` counts the number of messages by type in a hex string
103 | ```{r}
104 | incomplete_hex_string <- "00 00 53" # . . S
105 | dbg_hex_count_messages(incomplete_hex_string)
106 | ```
107 | - `dbg_hex_to_*()` to convert hexadecimal strings to message `data.table`s (* can be `orders`, `trades`, `modifications`, `system_events`, `stock_directory`, `trading_status`, `reg_sho`, `market_participant_states`, `mwcb`, `ipo`, `luld`, `noii`, or `rpii`)
108 | ```{r}
109 | hex_string <- paste(
110 |   "00 00", # first 2 empty nibbles
111 |   "46", # message type 'F'
112 |   "20 2c", # stock locate 8236
113 |   "00 00", # tracking number 0
114 |   "16 eb 55 2c 88 24", # timestamp 25200002107428
115 |   "00 00 00 00 00 00 00 04", # order ref 4
116 |   "42", # buy == TRUE -> 'B'
117 |   "00 00 2e 7c", # shares 11900
118 |   "55 53 4f 20 20 20 20 20", # stock 'USO     ' (length 8)
119 |   "00 01 fa 40", # price 129600 (12.96)
120 |   "56 49 52 54" # mpid/attribution 'VIRT
121 | )
122 | 
123 | dbg_hex_to_orders(hex_string)
124 | ```
125 | 
126 | - `dbg_messages_to_hex()` to convert the message `data.table`s to a hexadecimal string
127 | ```{r}
128 | od <- data.table::data.table(
129 |   msg_type = "F",
130 |   stock_locate = 8236L,
131 |   tracking_number = 0L,
132 |   timestamp = bit64::as.integer64(25200002107428),
133 |   order_ref = bit64::as.integer64(4),
134 |   buy = TRUE,
135 |   shares = 11900L,
136 |   stock = "USO",
137 |   price = 12.96,
138 |   mpid = "VIRT"
139 | )
140 | hex_order <- dbg_messages_to_hex(od)
141 | hex_order
142 | 
143 | # convert back to a data.table and see if they are identical
144 | od2 <- dbg_hex_to_orders(hex_order)
145 | all.equal(od, od2)
146 | ```
147 | 


--------------------------------------------------------------------------------
/debug/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Debug Tools for `RITCH`
  3 | 
  4 | This document quickly outlines the debugging tools of the `RITCH`
  5 | library.
  6 | 
  7 | ## Building
  8 | 
  9 | These tools are used for debugging and understanding the data format.
 10 | They are not shipped with the package itself but need to be sourced
 11 | independently.
 12 | 
 13 | If you want to play around with the tools, clone the git repository and
 14 | source the `debug/debug_tools.cpp` script:
 15 | 
 16 | ``` r
 17 | Sys.setenv("PKG_LIBS" = "-lz")
 18 | Rcpp::sourceCpp("debug/debug_tools.cpp")
 19 | ```
 20 | 
 21 | Note that `debug_tools.cpp` includes `../src/RITCH.h` as well as
 22 | `../src/MessageTypes.h` (relative from the `debug_tools.cpp` script), if
 23 | you have cloned the repository as is, it should work out of the box,
 24 | otherwise, make sure that the two header files are found.
 25 | 
 26 | ## Debug Tools
 27 | 
 28 | - `dbg_get_message_length(msgs)` returns the size of Messages in bytes.
 29 |   Note that each message adds 2 bytes that are not used
 30 | 
 31 | ``` r
 32 | dbg_get_message_length(c("A", "F"))
 33 | ```
 34 | 
 35 |     ##  A  F 
 36 |     ## 38 42
 37 | 
 38 | - `dbg_itch_file(filename)` allows you to interactively list messages in
 39 |   a file You are asked for input inside the function, which can be:
 40 | 
 41 |   - msg type, e.g., `A`, `H`, `h` to see the next instance of that
 42 |     message type
 43 |   - numeric value, e.g., `3` to see the next N values
 44 | 
 45 |   For example:
 46 | 
 47 | ``` r
 48 | file <- "20191230.BX_ITCH_50"
 49 | dbg_itch_file(file)
 50 | ## Debugging File '20191230.BX_ITCH_50' (.gz-file? no)
 51 | ## Usage:
 52 | ## - Empty: next message
 53 | ## - Number: for next N messages
 54 | ## - Character: if valid message type, print the next message, e.g., 'A' for add order
 55 | ## - non valid Character: exits the debugging tool
 56 | ## Note: Bytes in parenthesis show the first two bytes, which are not used!
 57 | ## Number of Messages:
 58 | ## - 'S': 6
 59 | ## - 'R': 8906
 60 | ## - 'H': 8961
 61 | ## - 'Y': 9013
 62 | ## - 'L': 6171
 63 | ## - 'V': 1
 64 | ## - 'W': 0
 65 | ## - 'K': 0
 66 | ## - 'J': 0
 67 | ## - 'h': 0
 68 | ## - 'A': 12210139
 69 | ## - 'F': 45058
 70 | ## - 'E': 578839
 71 | ## - 'C': 2686
 72 | ## - 'X': 348198
 73 | ## - 'D': 11821540
 74 | ## - 'U': 1741672
 75 | ## - 'P': 134385
 76 | ## - 'Q': 0
 77 | ## - 'B': 0
 78 | ## - 'I': 0
 79 | ## - 'N': 2241182
 80 | ## =============================
 81 | ## 'S' (len 2 + 12) idx    0 at offset     0 (0x0000) | (00 0c) 53 00 00 00 00 0a 2d f4 92 1d 67 4f
 82 | #RITCH> 3
 83 | ## Showing next 3 messages
 84 | ## 'R' (len 2 + 39) idx    1 at offset    14 (0x000e) | (00 27) 52 00 01 00 00 0a 66 a0 e0 dc 44 41 20 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 00 4e
 85 | ## 'R' (len 2 + 39) idx    2 at offset    55 (0x0037) | (00 27) 52 00 02 00 00 0a 66 a0 e2 c8 6c 41 41 20 20 20 20 20 20 4e 20 00 00 00 64 4e 43 5a 20 50 4e 20 31 4e 00 00 00 01 4e
 86 | ## 'H' (len 2 + 25) idx    3 at offset    96 (0x0060) | (00 19) 48 00 01 00 00 0a 66 a0 e4 ff bd 41 20 20 20 20 20 20 20 54 20 20 20 20 20
 87 | #RITCH> A
 88 | ## Applied filter to message type 'A'
 89 | ## 'A' (len 2 + 36) idx 32873 at offset 973915 (0xedc5b) | (00 24) 41 20 2c 00 00 16 eb 55 2c 88 24 00 00 00 00 00 00 00 04 42 00 00 2e 7c 55 53 4f 20 20 20 20 20 00 01 fa 40
 90 | #RITCH> q
 91 | ## Stopping Printing Messages
 92 | ```
 93 | 
 94 | - `dbg_hex_to_char(hex_string)` converts a hex value to character
 95 | - `dbg_hex_to_int(hex_string)` converts a hex value to integer
 96 | - `dbg_hex_to_dbl(hex_string)` converts a hex value to dbl
 97 | 
 98 | ``` r
 99 | dbg_hex_to_char("52 49 54 43 48 20 20 20") # 'RITCH   '
100 | ```
101 | 
102 |     ## [1] "RITCH   "
103 | 
104 | ``` r
105 | dbg_hex_to_int("01 23 45 67") # 19088743
106 | ```
107 | 
108 |     ## integer64
109 |     ## [1] 19088743
110 | 
111 | ``` r
112 | dbg_hex_to_dbl("00 01 fa 40") # 12.96
113 | ```
114 | 
115 |     ## [1] 12.96
116 | 
117 | - `dbg_hex_compare(x, y)` to get a quick comparison of two hex strings
118 | 
119 | ``` r
120 | x <- "00 01 02 03 04"
121 | y <- "00 01 00 03 0a"
122 | dbg_hex_compare(x, y)
123 | ```
124 | 
125 |     ##  idx |    x |    y | diff
126 |     ## -------------------------
127 |     ##    1 | 0x00 | 0x00 |     
128 |     ##    2 | 0x01 | 0x01 |     
129 |     ##    3 | 0x02 | 0x00 |  XXX
130 |     ##    4 | 0x03 | 0x03 |     
131 |     ##    5 | 0x04 | 0x0a |  XXX
132 | 
133 | - `dbg_hex_count_messages(hex_string)` counts the number of messages by
134 |   type in a hex string
135 | 
136 | ``` r
137 | incomplete_hex_string <- "00 00 53" # . . S
138 | dbg_hex_count_messages(incomplete_hex_string)
139 | ```
140 | 
141 |     ##     msg_type count
142 |     ##  1:        S     1
143 |     ##  2:        R     0
144 |     ##  3:        H     0
145 |     ##  4:        Y     0
146 |     ##  5:        L     0
147 |     ##  6:        V     0
148 |     ##  7:        W     0
149 |     ##  8:        K     0
150 |     ##  9:        J     0
151 |     ## 10:        h     0
152 |     ## 11:        A     0
153 |     ## 12:        F     0
154 |     ## 13:        E     0
155 |     ## 14:        C     0
156 |     ## 15:        X     0
157 |     ## 16:        D     0
158 |     ## 17:        U     0
159 |     ## 18:        P     0
160 |     ## 19:        Q     0
161 |     ## 20:        B     0
162 |     ## 21:        I     0
163 |     ## 22:        N     0
164 |     ##     msg_type count
165 | 
166 | - `dbg_hex_to_*()` to convert hexadecimal strings to message
167 |   `data.table`s (\* can be `orders`, `trades`, `modifications`,
168 |   `system_events`, `stock_directory`, `trading_status`, `reg_sho`,
169 |   `market_participant_states`, `mwcb`, `ipo`, `luld`, `noii`, or `rpii`)
170 | 
171 | ``` r
172 | hex_string <- paste(
173 |   "00 00", # first 2 empty nibbles
174 |   "46", # message type 'F'
175 |   "20 2c", # stock locate 8236
176 |   "00 00", # tracking number 0
177 |   "16 eb 55 2c 88 24", # timestamp 25200002107428
178 |   "00 00 00 00 00 00 00 04", # order ref 4
179 |   "42", # buy == TRUE -> 'B'
180 |   "00 00 2e 7c", # shares 11900
181 |   "55 53 4f 20 20 20 20 20", # stock 'USO     ' (length 8)
182 |   "00 01 fa 40", # price 129600 (12.96)
183 |   "56 49 52 54" # mpid/attribution 'VIRT
184 | )
185 | 
186 | dbg_hex_to_orders(hex_string)
187 | ```
188 | 
189 |     ##    msg_type stock_locate tracking_number      timestamp order_ref  buy shares
190 |     ## 1:        F         8236               0 25200002107428         4 TRUE  11900
191 |     ##    stock price mpid
192 |     ## 1:   USO 12.96 VIRT
193 | 
194 | - `dbg_messages_to_hex()` to convert the message `data.table`s to a
195 |   hexadecimal string
196 | 
197 | ``` r
198 | od <- data.table::data.table(
199 |   msg_type = "F",
200 |   stock_locate = 8236L,
201 |   tracking_number = 0L,
202 |   timestamp = bit64::as.integer64(25200002107428),
203 |   order_ref = bit64::as.integer64(4),
204 |   buy = TRUE,
205 |   shares = 11900L,
206 |   stock = "USO",
207 |   price = 12.96,
208 |   mpid = "VIRT"
209 | )
210 | hex_order <- dbg_messages_to_hex(od)
211 | hex_order
212 | ```
213 | 
214 |     ## [1] "00 00 46 20 2c 00 00 16 eb 55 2c 88 24 00 00 00 00 00 00 00 04 42 00 00 2e 7c 55 53 4f 20 20 20 20 20 00 01 fa 40 56 49 52 54"
215 | 
216 | ``` r
217 | # convert back to a data.table and see if they are identical
218 | od2 <- dbg_hex_to_orders(hex_order)
219 | all.equal(od, od2)
220 | ```
221 | 
222 |     ## [1] TRUE
223 | 


--------------------------------------------------------------------------------
/debug/debug_tools.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * #######################################################
  3 |  * This file holds debug functions to look at and and write
  4 |  * ITCH hex buffers
  5 |  *
  6 |  * Functions include:
  7 |  * - dbg_get_message_length to get the length of a message
  8 |  * - dbg_itch_file to open an interactive mode in which the
  9 |  *   the file is shown as hex code, one message at a time, also
 10 |  *   includes modes to quickly see a certain message type
 11 |  * Hex related functions to convert hex codes into R types
 12 |  * - dbg_hex_to_char
 13 |  * - dbg_hex_to_int
 14 |  * - dbg_hex_to_dbl
 15 |  * - dbg_hex_count_messages to count the orders in a hex string
 16 |  * - dbg_hex_compare to compare two hex strings
 17 |  *
 18 |  * Convert Hex Strings into data.tables and vice versa
 19 |  * - orders: dbg_hex_to_orders and dbg_messages_to_hex
 20 |  * - trades:
 21 |  * - modifications:
 22 |  *
 23 |  *
 24 |  * #######################################################
 25 |  */
 26 | 
 27 | // TODO: messages_to_bin: a function that takes in a list of dataframes
 28 | // for each list, have a running index (which messages have already been parsed in this element?)
 29 | // then find the next smallest timestamp, write one message, find next timestamp... until all are fully written
 30 | // write stream to file
 31 | 
 32 | #include <Rcpp.h>
 33 | #include <zlib.h>
 34 | #include "../src/specifications.h"
 35 | #include "../src/helper_functions.h"
 36 | #include "../src/read_functions.h"
 37 | #include "../src/write_functions.h"
 38 | 
 39 | // get_message_length(c("A", "B"))
 40 | // [[Rcpp::export]]
 41 | int dbg_get_message_length_impl(std::string m) {
 42 |   unsigned char msg = m[0];
 43 |   return get_message_size(msg);
 44 | }
 45 | 
 46 | /*** R
 47 | dbg_get_message_length <- function(x) {
 48 |   sapply(x, dbg_get_message_length_impl)
 49 | }
 50 | */
 51 | 
 52 | // counts message types in a buffer
 53 | std::vector<int64_t> count_messages_buffer(unsigned char* buf,
 54 |                                            const uint64_t n_bytes) {
 55 |   std::vector<int64_t> count(N_TYPES, 0);
 56 |   uint64_t i = 0;
 57 |   while (i < n_bytes) {
 58 |     const unsigned char mt = buf[i + 2];
 59 | 
 60 |     count[mt - 'A']++;
 61 |     i += get_message_size(mt);
 62 |   }
 63 | 
 64 |   return take_needed_messages(count);
 65 | }
 66 | int64_t sum_messages(std::vector<int64_t>& count, unsigned char msg) {
 67 |   return count[msg - 'A'];
 68 | }
 69 | 
 70 | /*
 71 |  * Prints the bytes of each message of an ITCH file
 72 |  * Inputs are either
 73 |  *  - numeric which result in printing the next N values
 74 |  *  - a single character which corresponds to the message types and prints the next instance of the message
 75 |  */
 76 | // [[Rcpp::export]]
 77 | void dbg_itch_file(std::string filename = "inst/extdata/ex20101224.TEST_ITCH_50",
 78 |                    int64_t buffer_size = 1e9) {
 79 | 
 80 |   // to allow readline / user feedbakc
 81 |   Rcpp::Environment base = Rcpp::Environment("package:base");
 82 |   Rcpp::Function readline = base["readline"];
 83 |   Rcpp::Function as_character = base["as.character"];
 84 | 
 85 |   const bool is_gz = filename.substr(filename.size() - 3, filename.size()) == ".gz";
 86 | 
 87 |   // only one buffer is used...
 88 |   unsigned char* bufferPtr;
 89 |   int64_t bufferCharSize = sizeof(unsigned char) * buffer_size;
 90 |   bufferPtr = (unsigned char*) malloc(bufferCharSize);
 91 | 
 92 |   FILE* rawfile;
 93 |   gzFile gzfile;
 94 | 
 95 |   if (is_gz) {
 96 |     gzfile = gzopen(filename.c_str(), "rb");
 97 |   } else {
 98 |     rawfile = fopen(filename.c_str(), "rb");
 99 |   }
100 | 
101 |   int64_t buf_size;
102 |   if (is_gz) {
103 |     buf_size = gzread(gzfile, bufferPtr, bufferCharSize);
104 |   } else {
105 |     buf_size = fread(bufferPtr, 1, bufferCharSize, rawfile);
106 |   }
107 | 
108 |   std::vector<int64_t> counts_all = count_messages_buffer(bufferPtr, buf_size);
109 |   std::vector<int64_t> counts = take_needed_messages(counts_all);
110 | 
111 |   Rprintf("Debugging File '%s' (.gz-file? %s)\n", filename.c_str(), is_gz ? "yes" : "no");
112 |   Rprintf("Usage:\n");
113 |   Rprintf("- Empty: next message\n");
114 |   Rprintf("- Number: for next N messages\n");
115 |   Rprintf("- Character: if valid message type, print the next message, e.g., 'A' for add order\n");
116 |   Rprintf("- non valid Character: exits the debugging tool\n");
117 |   Rprintf("Note: Bytes in parenthesis show the first two bytes, which are not used!\n");
118 | 
119 |   Rprintf("Number of Messages:\n");
120 |   for (int j = 0; j < N_ACT_MSGS; j++) {
121 |     Rprintf("- '%c': %ld\n", ACT_MSG_NAMES[j], counts[j]);
122 |   }
123 |   Rprintf("=============================\n");
124 |   // Use the Buffer
125 |   int64_t idx;
126 | 
127 |   int i = 0;
128 |   idx = 0;
129 |   std::string exit_code = "";
130 |   int skip_end = 0;
131 |   bool skip_print = false;
132 |   unsigned char msg_filter = ' ';
133 | 
134 |   // to enable multiple buffers: use this logic...
135 |   // while ((thisBufferSize = fread(bufferPtr, 1, bufferCharSize, infile)) > 0) {
136 |   //  while (true) {
137 |   while (true) {
138 |     if (idx > buf_size) {
139 |       Rprintf("Reached end of buffer, increase buffer size to read more\n");
140 |       return;
141 |   }
142 |     unsigned char num = bufferPtr[idx + 2];
143 |     const int l = get_message_size(num);
144 |     // Rprintf("At offset '0x%04x' msg '%c' msg len %i (0x%04x)\n", idx, num, l, l);
145 | 
146 |     if (skip_print) {
147 |       if (num != msg_filter) {
148 |         // if the current message is not equal to the message filter, skip printing and advance
149 |         idx += l;
150 |         i++;
151 |         continue;
152 |       } else {
153 |         skip_print = false;
154 |       }
155 |     }
156 | 
157 |     Rprintf("'%c' (len 2 + %i) idx %4i at offset %5ld (0x%04lx) | ", num, l - 2, i, idx, idx);
158 |     Rprintf("(%02x %02x) ", bufferPtr[idx], bufferPtr[idx + 1]);
159 |     for (int x = 2; x < l; x++) Rprintf("%02x ", bufferPtr[idx + x]);
160 |     Rprintf("\n");
161 | 
162 |     // interactive element, allow numeric input (for N messages),
163 |     // Message Types for the next message type, or other non empty for quit
164 |     if (i >= skip_end) {
165 |       exit_code = Rcpp::as<std::string>(as_character(readline("#RITCH> ")));
166 | 
167 |       if (exit_code != "") {
168 |         // check if all numeric, than skip N
169 |         const bool only_numeric = exit_code.find_first_not_of("0123456789") == std::string::npos;
170 |         if (only_numeric) {
171 |           const int n = std::stoi(exit_code);
172 |           skip_end = i + n;
173 |           Rprintf("Showing next %i messages\n", n);
174 |         } else {
175 |           // check messages
176 |           unsigned char exit_msg = exit_code.at(0);
177 | 
178 |           // check if the input is an itch message
179 | 
180 |           bool is_itch_message = false;
181 |           for (const unsigned char c : ACT_MSG_NAMES) if (c == exit_msg) {
182 |             is_itch_message = true;
183 |             break;
184 |           }
185 | 
186 |           if (is_itch_message) {
187 |             const bool has_message = sum_messages(counts, exit_msg) > 0;
188 |             if (!has_message) {
189 |               Rprintf("No messages found for type '%c' increase buffer size or use different message type.\n", exit_msg);
190 |               continue;
191 |             }
192 |             skip_print = true;
193 |             msg_filter = exit_code[0];
194 | 
195 |             Rcpp::Rcout << "Applied filter to message type '" << msg_filter << "'\n";
196 |           } else {
197 |             // else break
198 |             Rprintf("Stopping Printing Messages\n");
199 |             break;
200 |           }
201 |         }
202 |       } // else: continue with next message
203 |     }
204 | 
205 |     idx += l;
206 |     i++;
207 |   }
208 | 
209 |   free(bufferPtr);
210 |   if (is_gz) {
211 |     gzclose(gzfile);
212 |   } else {
213 |     fclose(rawfile);
214 |   }
215 | }
216 | 
217 | /*** R
218 | # Converts a hex string into char
219 | # i.e., dbg_hex_to_char("4f") == "O"
220 | dbg_hex_to_char <- function(h) {
221 |   h <- gsub(" +", "", h)
222 |   xx <- sapply(seq(1, nchar(h), by=2), function(x) substr(h, x, x+1))
223 |   rawToChar(as.raw(strtoi(xx, 16L)))
224 | }
225 | # dbg_hex_to_int("01 23 45 67") == 19088743
226 | # dbg_hex_to_int("0a 2d f4 92 1d 67") == 11192493022567
227 | dbg_hex_to_int <- function(h) {
228 |   h <- gsub(" +", "", h)
229 |   l <- nchar(h) %/% 2
230 |   bit64::as.integer64(as.numeric(paste0("0x", h)))
231 | }
232 | # dbg_hex_to_dbl("00 01 fa 40") == 12.96
233 | # dbg_hex_to_dbl("00 00 00 46 28 21 94 40", prec = 8) == 3013.21
234 | dbg_hex_to_dbl <- function(h, prec = 4) {
235 |   dbg_hex_to_int(h) / 10^prec
236 | }
237 | */
238 | 
239 | // converts a std::string of hex values to a buffer
240 | unsigned char * to_buffer(std::string x) {
241 |   x.erase(remove_if(x.begin(), x.end(), isspace), x.end());
242 |   const uint64_t n_bytes = x.size() / 2;
243 |   unsigned char * buf;
244 |   // Rprintf("Found %u bytes\n", x.size() / 2);
245 |   buf = (unsigned char*) calloc(x.size() / 2, sizeof(unsigned char));
246 | 
247 |   for (uint64_t j = 0; j < n_bytes; j++)
248 |     buf[j] = std::stoul(x.substr(j * 2, 2), nullptr, 16);
249 |   return buf;
250 | }
251 | 
252 | // ##############################
253 | // User Functions...
254 | // ##############################
255 | 
256 | //[[Rcpp::export]]
257 | Rcpp::DataFrame hex_count_messages_impl(std::string x) {
258 |   // remove whitespaces
259 |   x.erase(remove_if(x.begin(), x.end(), isspace), x.end());
260 |   const uint64_t n_bytes = x.size() / 2;
261 |   unsigned char * buf = to_buffer(x);
262 | 
263 |   std::vector<int64_t> count = count_messages_buffer(buf, n_bytes);
264 | 
265 |   Rcpp::StringVector types;
266 |   for (unsigned char c : ACT_MSG_NAMES) types.push_back(std::string(1, c));
267 | 
268 |   Rcpp::List df(2);
269 |   df.names() = Rcpp::CharacterVector::create("msg_type", "count");
270 |   df["msg_type"] = types;
271 |   const int len = types.size();
272 |   Rcpp::NumericVector ct(len);
273 |   std::memcpy(&(ct[0]), &(count[0]), len * sizeof(double));
274 |   ct.attr("class") = "integer64";
275 |   df["count"] = ct;
276 | 
277 |   df.attr("class") = Rcpp::CharacterVector::create("data.table", "data.frame");
278 | 
279 |   return df;
280 | }
281 | /***R
282 | dbg_hex_compare <- function(x, y) {
283 |   reset_whitespaces <- function(x) {
284 |     xx <- strsplit(gsub(" ", "", x), split = "")[[1]]
285 |     paste(paste0(xx[c(T, F)], xx[c(F, T)]), collapse = " ")
286 |   }
287 |   x <- reset_whitespaces(x)
288 |   y <- reset_whitespaces(y)
289 |   xx <- strsplit(x, " ")[[1]]
290 |   yy <- strsplit(y, " ")[[1]]
291 | 
292 |   min_x <- min(length(xx), length(yy))
293 |   cat(sprintf(" %3s | %4s | %4s | %4s\n%s\n", "idx", "x", "y", "diff",
294 |               paste(rep("-", 25), collapse = "")))
295 |   for (i in seq_len(min_x)) {
296 |     cat(sprintf(" %3s | 0x%2s | 0x%2s | %4s\n", i, xx[i], yy[i],
297 |                 ifelse(xx[i] == yy[i], "", "XXX")))
298 |   }
299 | }
300 | # count orders for a hex string
301 | # dbg_hex_count_messages("00 00 41")
302 | dbg_hex_count_messages <- function(x) {
303 |   d <- hex_count_messages_impl(x)
304 |   data.table::setalloccol(d)
305 | }
306 | */
307 | 
308 | /*
309 |  * HEX to Ordertypes
310 |  */
311 | 
312 | Rcpp::DataFrame dbg_hex_to_df(std::string x, std::string msg_class) {
313 |   // create buffer
314 |   x.erase(remove_if(x.begin(), x.end(), isspace), x.end());
315 |   const uint64_t n_bytes = x.size() / 2;
316 |   unsigned char * buf = to_buffer(x);
317 |   std::vector<int64_t> count = count_messages_buffer(buf, n_bytes);
318 | 
319 |   int64_t n_messages = 0;
320 |   for (const int64_t p : count) n_messages += p;
321 | 
322 |   MessageParser mp(msg_class, 0, 100); // take max 100 messages...
323 |   mp.activate();
324 |   mp.init_vectors(n_messages + 100);
325 |   uint64_t i = 2;
326 | 
327 |   while (i < n_bytes) {
328 |     mp.parse_message(&buf[i]);
329 |     i += get_message_size(buf[i]);
330 |   }
331 | 
332 |   return mp.get_data_frame();
333 | }
334 | //[[Rcpp::export]]
335 | Rcpp::DataFrame dbg_hex_to_orders(std::string x) {
336 |   return dbg_hex_to_df(x, "orders");
337 | }
338 | //[[Rcpp::export]]
339 | Rcpp::DataFrame dbg_hex_to_trades(std::string x) {
340 |   return dbg_hex_to_df(x, "trades");
341 | }
342 | //[[Rcpp::export]]
343 | Rcpp::DataFrame dbg_hex_to_modifications(std::string x) {
344 |   return dbg_hex_to_df(x, "modifications");
345 | }
346 | //[[Rcpp::export]]
347 | Rcpp::DataFrame dbg_hex_to_system_events(std::string x) {
348 | return dbg_hex_to_df(x, "system_events");
349 | }
350 | //[[Rcpp::export]]
351 | Rcpp::DataFrame dbg_hex_to_stock_directory(std::string x) {
352 | return dbg_hex_to_df(x, "stock_directory");
353 | }
354 | //[[Rcpp::export]]
355 | Rcpp::DataFrame dbg_hex_to_trading_status(std::string x) {
356 |   return dbg_hex_to_df(x, "trading_status");
357 | }
358 | //[[Rcpp::export]]
359 | Rcpp::DataFrame dbg_hex_to_reg_sho(std::string x) {
360 |   return dbg_hex_to_df(x, "reg_sho");
361 | }
362 | //[[Rcpp::export]]
363 | Rcpp::DataFrame dbg_hex_to_market_participant_states(std::string x) {
364 |   return dbg_hex_to_df(x, "market_participant_states");
365 | }
366 | //[[Rcpp::export]]
367 | Rcpp::DataFrame dbg_hex_to_mwcb(std::string x) {
368 |   return dbg_hex_to_df(x, "mwcb");
369 | }
370 | //[[Rcpp::export]]
371 | Rcpp::DataFrame dbg_hex_to_ipo(std::string x) {
372 |   return dbg_hex_to_df(x, "ipo");
373 | }
374 | //[[Rcpp::export]]
375 | Rcpp::DataFrame dbg_hex_to_luld(std::string x) {
376 |   return dbg_hex_to_df(x, "luld");
377 | }
378 | //[[Rcpp::export]]
379 | Rcpp::DataFrame dbg_hex_to_noii(std::string x) {
380 |   return dbg_hex_to_df(x, "noii");
381 | }
382 | //[[Rcpp::export]]
383 | Rcpp::DataFrame dbg_hex_to_rpii(std::string x) {
384 |   return dbg_hex_to_df(x, "rpii");
385 | }
386 | 
387 | 
388 | /*
389 |  * ############################################################################
390 |  * Messages to hex
391 |  * The function takes one data.frame, deduces the type based on the message types
392 |  * and converts it into binary (hex) data
393 |  * ############################################################################
394 |  */
395 | //[[Rcpp::export]]
396 | std::string dbg_messages_to_hex(Rcpp::DataFrame df,
397 |                                 size_t max_buffer_size = 1e8) {
398 |   Rcpp::CharacterVector msgs = df["msg_type"];
399 |   const int total_messages = msgs.length();
400 |   // Rprintf("Found %i order messages\n", total_messages);
401 |   unsigned char * buf;
402 | 
403 |   size_t req_size = 0;
404 |   for (int i = 0; i < total_messages; i++) {
405 |     const unsigned char msg = Rcpp::as<char>(msgs[i]);
406 |     req_size += get_message_size(msg);
407 |   }
408 | 
409 |   req_size = req_size > max_buffer_size ? max_buffer_size : req_size;
410 |   // Rprintf("Need %u bytes for the messages\n", req_size);
411 |   // allocate memory to the buffer and initialise it to 0
412 |   buf = (unsigned char*) calloc(req_size, sizeof(unsigned char));
413 | 
414 |   int64_t i = 0;
415 |   int64_t msg_ct = 0;
416 |   while (msg_ct < total_messages) {
417 |     // Rprintf("Parsing Message %i\n", msg_ct);
418 |     i += load_message_to_buffer(&(buf[i]), msg_ct, df);
419 |   }
420 | 
421 |   std::stringstream ss;
422 |   for(int j = 0; j < i; ++j)
423 |     ss <<
424 |       std::setfill('0') <<
425 |         std::setw(2) <<
426 |           std::hex <<
427 |            (int) (((int) buf[j] >> (8*0)) & 0xff) << // (int) buf[j]
428 |               " ";
429 |   std::string res = ss.str();
430 | 
431 |   return res.substr(0, res.size() - 1);
432 | }
433 | 


--------------------------------------------------------------------------------
/inst/extdata/ex20101224.TEST_ITCH_50:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavZim/RITCH/9bd51af48d26703bd95ab4f0db6532a497c104c1/inst/extdata/ex20101224.TEST_ITCH_50


--------------------------------------------------------------------------------
/inst/extdata/ex20101224.TEST_ITCH_50.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavZim/RITCH/9bd51af48d26703bd95ab4f0db6532a497c104c1/inst/extdata/ex20101224.TEST_ITCH_50.gz


--------------------------------------------------------------------------------
/inst/tinytest/test_filename_helpers.R:
--------------------------------------------------------------------------------
 1 | library(RITCH)
 2 | library(tinytest)
 3 | 
 4 | # Get date from filename
 5 | expect_equal(
 6 |   get_date_from_filename("03302017.NASDAQ_ITCH50"),
 7 |   as.POSIXct("2017-03-30", "GMT")
 8 | )
 9 | expect_equal(
10 |   get_date_from_filename("20170130.BX_ITCH_50.gz"),
11 |   as.POSIXct("2017-01-30", "GMT")
12 | )
13 | expect_equal(
14 |   get_date_from_filename("S030220-v50-bx.txt.gz"),
15 |   as.POSIXct("2020-03-02", "GMT")
16 | )
17 | expect_equal(
18 |   get_date_from_filename("unknown_file_format"),
19 |   NA
20 | )
21 | 
22 | ## Get exchange from filename
23 | expect_equal(
24 |   get_exchange_from_filename("03302017.NASDAQ_ITCH50"),
25 |   "NASDAQ"
26 | )
27 | expect_equal(
28 |   get_exchange_from_filename("20170130.BX_ITCH_50.gz"),
29 |   "BX"
30 | )
31 | expect_equal(
32 |   get_exchange_from_filename("S030220-v50-bx.txt.gz"),
33 |   "BX"
34 | )
35 | expect_equal(
36 |   get_exchange_from_filename("unknown_file_format"),
37 |   NA
38 | )
39 | 
40 | ## Add meta to filename
41 | expect_equal(
42 |   add_meta_to_filename("03302017.NASDAQ_ITCH50", "2010-12-24", "TEST"),
43 |   "12242010.TEST_ITCH50"
44 | )
45 | 
46 | expect_equal(
47 |   add_meta_to_filename("20170130.BX_ITCH_50.gz", "2010-12-24", "TEST"),
48 |   "20101224.TEST_ITCH_50.gz"
49 | )
50 | expect_equal(
51 |   add_meta_to_filename("S030220-v50-bx.txt.gz", "2010-12-24", "TEST"),
52 |   "S122410-v50-TEST.txt.gz"
53 | )
54 | expect_equal(
55 |   add_meta_to_filename("unknown_file.ITCH_50", "2010-12-24", "TEST"),
56 |   "unknown_file_20101224.TEST_ITCH_50"
57 | )
58 | expect_equal(
59 |   add_meta_to_filename("some_folder/unknown_file.ITCH_50", "2010-12-24", "TEST"),
60 |   "some_folder/unknown_file_20101224.TEST_ITCH_50"
61 | )
62 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_filter_itch.R:
--------------------------------------------------------------------------------
  1 | library(RITCH)
  2 | library(tinytest)
  3 | library(data.table)
  4 | suppressPackageStartupMessages(library(bit64))
  5 | setDTthreads(2)
  6 | 
  7 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
  8 | outfile <- file.path(tempdir(), "testfile_20101224.TEST_ITCH_50")
  9 | 
 10 | 
 11 | ################################################################################
 12 | # Test that filtering for all trades returns all data entries
 13 | orig <- read_itch(infile, quiet = TRUE)
 14 | trades <- read_trades(infile, quiet = TRUE)
 15 | expect_equal(orig$trades, trades)
 16 | 
 17 | res <- read_itch(infile, quiet = TRUE, filter_msg_class = "trades",
 18 |                  filter_stock_locate = c(1, 2, 3), min_timestamp = 1,
 19 |                  max_timestamp = 6e14, filter_msg_type = "P")
 20 | expect_equal(res, trades)
 21 | 
 22 | filter_itch(infile, outfile, filter_msg_class = "trades", quiet = TRUE)
 23 | res <- read_itch(outfile, quiet = TRUE)
 24 | expect_equal(res$trades, trades)
 25 | unlink(outfile)
 26 | 
 27 | 
 28 | ################################################################################
 29 | # Test that the first and last messages are parsed
 30 | of <- filter_itch(infile, outfile, filter_msg_class = "system_events", quiet = TRUE)
 31 | 
 32 | # the filename is not changed
 33 | expect_equal(of, outfile)
 34 | 
 35 | unlink(of)
 36 | tmpfile <- tempfile("testfile")
 37 | of <- filter_itch(infile, tmpfile, filter_msg_class = "system_events", quiet = TRUE)
 38 | 
 39 | # the outfile name is correctly constructed!
 40 | expect_equal(of, paste0(tmpfile, "_20101224.TEST_ITCH_50"))
 41 | 
 42 | # test file contents
 43 | expect_equal(file.size(of), 84)
 44 | df <- read_system_events(of, quiet = TRUE)
 45 | expect_equal(nrow(df), 6)
 46 | unlink(of)
 47 | 
 48 | ################################################################################
 49 | ################################################################################
 50 | # Test Message Class
 51 | filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE)
 52 | 
 53 | # calling it on the file again causes error unless overwrite = TRUE
 54 | expect_error(
 55 |   filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE)
 56 | )
 57 | 
 58 | # test overwrite = TRUE
 59 | filter_itch(infile, outfile, filter_msg_class = "orders", overwrite = TRUE,
 60 |             quiet = TRUE)
 61 | 
 62 | expect_equal(file.size(outfile), 190012)
 63 | 
 64 | # check that the output file contains only orders
 65 | df  <- read_orders(outfile, quiet = TRUE)
 66 | expect_equal(nrow(df), 5000)
 67 | 
 68 | df2 <- read_orders(infile, quiet = TRUE)
 69 | expect_equal(df, df2)
 70 | 
 71 | # writing again to the same fail results in error
 72 | expect_error(
 73 |   filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE)
 74 | )
 75 | unlink(outfile)
 76 | 
 77 | ################################################################################
 78 | # Test Append
 79 | filter_itch(infile, outfile, filter_msg_class = "orders", quiet = TRUE)
 80 | filter_itch(infile, outfile, filter_msg_class = "orders", append = TRUE,
 81 |             quiet = TRUE)
 82 | 
 83 | df <- read_orders(outfile, quiet = TRUE)
 84 | dforig <- read_orders(infile, quiet = TRUE)
 85 | 
 86 | expect_equal(
 87 |   df,
 88 |   rbindlist(list(dforig, dforig))
 89 | )
 90 | unlink(outfile)
 91 | 
 92 | ################################################################################
 93 | # Test smaller buffer_size
 94 | 
 95 | filter_itch(infile, outfile, filter_msg_class = "orders",
 96 |             buffer_size = 50,
 97 |             quiet = TRUE)
 98 | 
 99 | expect_equal(file.size(outfile), 190012)
100 | 
101 | # check that the output file contains only orders
102 | df  <- read_orders(outfile, quiet = TRUE)
103 | expect_equal(nrow(df), 5000)
104 | 
105 | df2 <- read_orders(infile, quiet = TRUE)
106 | expect_equal(df, df2)
107 | unlink(outfile)
108 | 
109 | ################################################################################
110 | ################################################################################
111 | # Test Msg Type
112 | filter_itch(infile, outfile, filter_msg_type = "S", quiet = TRUE)
113 | 
114 | expect_equal(file.size(outfile), 84)
115 | # check that the output file contains only orders
116 | df <- read_system_events(outfile, quiet = TRUE)
117 | expect_equal(nrow(df), 6)
118 | 
119 | df2 <- read_system_events(infile, quiet = TRUE)
120 | expect_equal(df, df2)
121 | unlink(outfile)
122 | 
123 | 
124 | ################################################################################
125 | ################################################################################
126 | # Test Stock Locate
127 | filter_itch(infile, outfile, filter_stock_locate = c(2, 3), quiet = TRUE)
128 | 
129 | expect_equal(file.size(outfile), 333876)
130 | # check that the output file contains only orders
131 | df <- read_itch(outfile, quiet = TRUE)
132 | exp_count <- c(
133 |   stock_directory = 2L, trading_status = 2L,
134 |   orders = 4050L, modifications = 1626L, trades = 3115L
135 | )
136 | expect_equal(sapply(df, nrow), exp_count)
137 | 
138 | df2 <- read_itch(infile, filter_stock_locate = c(2, 3), quiet = TRUE)
139 | expect_equal(df, df2)
140 | unlink(outfile)
141 | 
142 | 
143 | ################################################################################
144 | ################################################################################
145 | # Test filter_stock
146 | stock_sel <- c("BOB", "CHAR")
147 | sdir <- data.table(stock = stock_sel,
148 |                    stock_locate = c(2, 3))
149 | filter_itch(infile, outfile, filter_stock = stock_sel, stock_directory = sdir,
150 |             quiet = TRUE)
151 | 
152 | expect_equal(file.size(outfile), 333876)
153 | # check that the output file contains only orders
154 | df <- read_itch(outfile, quiet = TRUE)
155 | exp_count <- c(
156 |   stock_directory = 2L, trading_status = 2L,
157 |   orders = 4050L, modifications = 1626L, trades = 3115L
158 | )
159 | expect_equal(sapply(df, nrow), exp_count)
160 | 
161 | df2 <- read_itch(infile, filter_stock = stock_sel, stock_directory = sdir,
162 |                  quiet = TRUE)
163 | expect_equal(df, df2)
164 | unlink(outfile)
165 | 
166 | 
167 | ################################################################################
168 | ################################################################################
169 | # Test Timestamps
170 | 
171 | get_func_of_ts <- function(ll, func = min) {
172 |   ll <- ll[sapply(ll, nrow) != 0]
173 |   mm <- lapply(ll, function(d) list(func(d$timestamp)))
174 |   x <- unlist(mm)
175 |   class(x) <- "integer64"
176 |   func(x)
177 | }
178 | 
179 | # check errors
180 | # either min & max timestamp have the same size or 0 and 1
181 | expect_error(
182 |   filter_itch(infile, outfile, min_timestamp = 1:2, quiet = TRUE)
183 | )
184 | expect_error(
185 |   filter_itch(infile, outfile, min_timestamp = 1:2, max_timestamp = 1:3,
186 |               quiet = TRUE)
187 | )
188 | expect_error(
189 |   filter_itch(infile, outfile, min_timestamp = 1, max_timestamp = 1:3,
190 |               quiet = TRUE)
191 | )
192 | 
193 | 
194 | ################################################################################
195 | ## Min only
196 | ms <- as.integer64(45463537089764)
197 | filter_itch(infile, outfile, min_timestamp = ms, quiet = TRUE)
198 | 
199 | expect_equal(file.size(outfile), 236547)
200 | # check that the output file contains only orders
201 | df <- read_itch(outfile, quiet = TRUE)
202 | exp_count <- c(
203 |   system_events = 3L, orders = 2501L, modifications = 979L, trades = 2598L
204 | )
205 | expect_equal(sapply(df, nrow), exp_count)
206 | 
207 | # read-in all data and filter the data manually
208 | df_all <- read_itch(infile, quiet = TRUE)
209 | df_all_f <- lapply(df, function(d) d[timestamp >= ms, ])
210 | expect_equal(df_all_f, df)
211 | 
212 | # check that for all classes the min timestamp is larger than the expected value
213 | expect_true(get_func_of_ts(df, min) >= ms)
214 | 
215 | df2 <- read_itch(infile, min_timestamp = ms, quiet = TRUE)
216 | expect_equal(df, df2)
217 | unlink(outfile)
218 | 
219 | 
220 | ################################################################################
221 | ## Max only
222 | ms <- as.integer64(45463537089764)
223 | filter_itch(infile, outfile, max_timestamp = ms, quiet = TRUE)
224 | 
225 | expect_equal(file.size(outfile), 228539)
226 | # check that the output file contains only orders
227 | df <- read_itch(outfile, quiet = TRUE)
228 | exp_count <- c(
229 |   system_events = 3L, stock_directory = 3L, trading_status = 3L,
230 |   orders = 2500L, modifications = 1021L, trades = 2402L
231 | )
232 | expect_equal(sapply(df, nrow), exp_count)
233 | 
234 | # read-in all data and filter the data manually
235 | df_all <- read_itch(infile, quiet = TRUE)
236 | df_all_f <- lapply(df, function(d) d[timestamp <= ms, ])
237 | expect_equal(df_all_f, df)
238 | 
239 | # check that for all classes the max timestamp is smaller than the expected value
240 | expect_true(get_func_of_ts(df, max) <= ms)
241 | 
242 | df2 <- read_itch(infile, max_timestamp = ms, quiet = TRUE)
243 | expect_equal(df, df2)
244 | unlink(outfile)
245 | 
246 | 
247 | ################################################################################
248 | ## min and max
249 | min_ts <- as.integer64(45463537089764)
250 | max_ts <- as.integer64(51233773867238)
251 | filter_itch(infile, outfile, min_timestamp = min_ts, max_timestamp = max_ts,
252 |             quiet = TRUE)
253 | 
254 | expect_equal(file.size(outfile), 138558)
255 | 
256 | # check that the output file contains only orders
257 | df <- read_itch(outfile, quiet = TRUE)
258 | exp_count <- c(orders = 1501L, modifications = 598L, trades = 1477L)
259 | expect_equal(sapply(df, nrow), exp_count)
260 | 
261 | # read-in all data and filter the data manually
262 | df_all <- read_itch(infile, quiet = TRUE)
263 | df_all_f <- lapply(df, function(d) d[timestamp >= min_ts & timestamp <= max_ts, ])
264 | expect_equal(df_all_f, df)
265 | 
266 | 
267 | # check that for all classes the max timestamp is smaller than the expected value
268 | dd <- df[sapply(df, nrow) != 0]
269 | expect_true(get_func_of_ts(df, min) >= min_ts)
270 | expect_true(get_func_of_ts(df, max) <= max_ts)
271 | 
272 | df2 <- read_itch(infile, min_timestamp = min_ts, max_timestamp = max_ts,
273 |                  quiet = TRUE)
274 | expect_equal(df, df2)
275 | unlink(outfile)
276 | 
277 | 
278 | ################################################################################
279 | ################################################################################
280 | # Test n_max
281 | 
282 | # max number of messages is 5000, taking all messages results in the same file
283 | filter_itch(infile, outfile, n_max = 5000, quiet = TRUE)
284 | expect_equal(file.size(infile), file.size(outfile))
285 | unlink(outfile)
286 | 
287 | # take the first 100 messages for each message class
288 | filter_itch(infile, outfile, n_max = 100, quiet = TRUE)
289 | df <- read_itch(outfile, quiet = TRUE)
290 | exp_count <- c(system_events = 6, stock_directory = 3, trading_status = 3,
291 |                orders = 100, modifications = 100, trades = 100)
292 | expect_equal(sapply(df, nrow), exp_count)
293 | 
294 | df2 <- read_itch(infile, n_max = 100, quiet = TRUE)
295 | expect_equal(df, df2)
296 | unlink(outfile)
297 | 
298 | ################################################################################
299 | # Test skip
300 | 
301 | # skipping 0 messages results in the same file
302 | filter_itch(infile, outfile, skip = 0, quiet = TRUE)
303 | expect_equal(file.size(infile), file.size(outfile))
304 | unlink(outfile)
305 | 
306 | 
307 | filter_itch(infile, outfile, skip = 1000, quiet = TRUE)
308 | df <- read_itch(outfile, quiet = TRUE)
309 | exp_count <- c(orders = 4000, modifications = 1000, trades = 4000)
310 | expect_equal(sapply(df, nrow), exp_count)
311 | 
312 | df2 <- read_itch(infile, skip = 1000, quiet = TRUE)
313 | expect_equal(df, df2)
314 | unlink(outfile)
315 | 
316 | 
317 | # skip the first 4000 messages for each message class
318 | # expect to see 5000-4000 trades and 5000-4000 orders
319 | filter_itch(
320 |   infile, outfile,
321 |   skip = 4000,
322 |   quiet = TRUE
323 | )
324 | df <- read_itch(outfile, quiet = TRUE)
325 | exp_count <- c(orders = 1000, trades = 1000)
326 | expect_equal(sapply(df, nrow), exp_count)
327 | 
328 | df2 <- read_itch(infile, skip = 4000, quiet = TRUE)
329 | expect_equal(df, df2)
330 | unlink(outfile)
331 | 
332 | 
333 | ################################################################################
334 | ################################################################################
335 | # Test more complex filter
336 | min_ts <- 40505246803501 # Q1 of all orders
337 | max_ts <- 49358420393946 # Q3 of all orders
338 | 
339 | filter_itch(
340 |   infile, outfile,
341 |   filter_msg_class = c("orders", "trades"),
342 |   filter_stock_locate = c(1, 3),
343 |   filter_msg_type = "D",
344 |   skip = 0, n_max = 100,
345 |   min_timestamp = min_ts,
346 |   max_timestamp = max_ts,
347 |   quiet = TRUE
348 | )
349 | expect_equal(file.size(outfile), 10500)
350 | 
351 | # check that the output file contains the same
352 | filtered_res  <- read_itch(outfile, c("orders", "trades", "modifications"),
353 |                            quiet = TRUE)
354 | expect_equal(sapply(filtered_res, nrow),
355 |              c(orders = 100, trades = 100, modifications = 100))
356 | 
357 | # read in the original file, and apply the same filters to each class
358 | df_orig <- read_itch(infile,  c("orders", "trades", "modifications"),
359 |                      quiet = TRUE)
360 | # apply the filters
361 | msg_types <- c('D', 'A', 'F', 'P', 'Q', 'B')
362 | df_orig_res <- lapply(df_orig, function(d)
363 |   d[msg_type %in% msg_types &
364 |       stock_locate %in% c(1, 3) &
365 |       timestamp > min_ts & timestamp < max_ts][1:100,]
366 | )
367 | 
368 | expect_equal(filtered_res, df_orig_res)
369 | unlink(outfile)
370 | 
371 | 
372 | ################################################################################
373 | # filter_itch works on gz input files
374 | gzinfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
375 | tmpoutfile <- file.path(tempdir(), "gz_testfile_20101224.TEST_ITCH_50")
376 | 
377 | rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders",
378 |                           quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
379 | expect_equal(rawoutfile, tmpoutfile)
380 | expect_equal(file.size(rawoutfile), 190012)
381 | 
382 | odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
383 | idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
384 | expect_equal(odf, idf)
385 | unlink(rawoutfile)
386 | 
387 | 
388 | ################################################################################
389 | # works also on gz-output files
390 | rawoutfile <- filter_itch(gzinfile, tmpoutfile, filter_msg_class = "orders", gz = TRUE,
391 |                           quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
392 | 
393 | expect_equal(rawoutfile, paste0(tmpoutfile, ".gz"))
394 | expect_true(file.exists(rawoutfile))
395 | expect_equal(file.size(rawoutfile), 72619)
396 | 
397 | odf <- read_orders(rawoutfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
398 | idf <- read_orders(gzinfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
399 | 
400 | expect_equal(odf, idf)
401 | unlink(rawoutfile)
402 | unlink(tmpoutfile)
403 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_gz_functions.R:
--------------------------------------------------------------------------------
 1 | library(RITCH)
 2 | library(tinytest)
 3 | setDTthreads(2)
 4 | 
 5 | # check that using gunzip_file and gzip_file return the same files as the originals!
 6 | raw_file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
 7 | gz_file  <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
 8 | 
 9 | tmpfile <- file.path(tempdir(), "raw_20101224.TEST_ITCH_50")
10 | tmpfile2 <- file.path(tempdir(), "gz_20101224.TEST_ITCH_50.gz")
11 | 
12 | expect_true(file.exists(raw_file))
13 | expect_true(file.exists(gz_file))
14 | 
15 | gunzip_file(gz_file, tmpfile)
16 | expect_equal(
17 |   tools::md5sum(raw_file)[[1]],
18 |   tools::md5sum(tmpfile)[[1]]
19 | )
20 | 
21 | gzip_file(raw_file, tmpfile2)
22 | 
23 | # check that the file contents are identical
24 | expect_equal(
25 |   read_itch(raw_file, quiet = TRUE),
26 |   read_itch(tmpfile2, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
27 | )
28 | expect_equal(
29 |   read_itch(raw_file, quiet = TRUE),
30 |   read_itch(tmpfile2, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
31 | )
32 | 
33 | unlink(c(tmpfile, tmpfile2))
34 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_write_itch.R:
--------------------------------------------------------------------------------
  1 | library(RITCH)
  2 | library(tinytest)
  3 | library(data.table)
  4 | setDTthreads(2)
  5 | 
  6 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
  7 | 
  8 | ll <- read_itch(infile, quiet = TRUE)
  9 | 
 10 | ################################################################################
 11 | ################################################################################
 12 | #### Testing base write functionality
 13 | outfile_base <- file.path(tempdir(), "testfile")
 14 | outfile <- write_itch(ll, outfile_base, quiet = TRUE)
 15 | 
 16 | expect_equal(file.size(infile)[[1]], file.size(outfile)[[1]])
 17 | 
 18 | ################################################################################
 19 | # expect identical files
 20 | expect_equal(tools::md5sum(infile)[[1]],
 21 |              tools::md5sum(outfile)[[1]])
 22 | 
 23 | # read in the file again and compare to outfile
 24 | ll2 <- read_itch(outfile, quiet = TRUE)
 25 | expect_equal(ll, ll2)
 26 | 
 27 | 
 28 | ################################################################################
 29 | ################################################################################
 30 | # Appending doubles file size
 31 | # appending throws warning
 32 | outfile <- write_itch(ll, outfile, quiet = TRUE, add_meta = FALSE)
 33 | expect_warning(
 34 |   outfile <- write_itch(ll, outfile, quiet = TRUE, add_meta = FALSE,
 35 |                         append = TRUE)
 36 | )
 37 | expect_equal(file.size(outfile), 465048 * 2)
 38 | 
 39 | ################################################################################
 40 | # read in again and compare to original doubled data
 41 | ll3 <- lapply(ll, function(x) rbindlist(list(x, x)))
 42 | ll4 <- read_itch(outfile, quiet = TRUE)
 43 | expect_equal(ll3, ll4)
 44 | 
 45 | 
 46 | ################################################################################
 47 | ################################################################################
 48 | #### Testing buffer_size
 49 | # buffer too large
 50 | expect_warning(
 51 |   outfile <- write_itch(ll, outfile, buffer_size = 5e9 + 1,
 52 |                         quiet = TRUE, add_meta = FALSE)
 53 | )
 54 | ################################################################################
 55 | # buffer too small
 56 | expect_warning(
 57 |   outfile <- write_itch(ll, outfile, buffer_size = 51,
 58 |                         quiet = TRUE, add_meta = FALSE)
 59 | )
 60 | ################################################################################
 61 | # small but ok buffer
 62 | outfile <- write_itch(ll, outfile, buffer_size = 52,
 63 |                       quiet = TRUE, add_meta = FALSE)
 64 | 
 65 | expect_equal(file.size(outfile), 465048)
 66 | # read in the file again and compare to outfile
 67 | ll2 <- read_itch(outfile, quiet = TRUE)
 68 | expect_equal(ll, ll2)
 69 | 
 70 | unlink(outfile)
 71 | 
 72 | 
 73 | ################################################################################
 74 | ################################################################################
 75 | #### Test gz compression file
 76 | outfile <- write_itch(ll, outfile_base, compress = TRUE, quiet = TRUE)
 77 | 
 78 | expect_equal(file.size(outfile), 159965)
 79 | 
 80 | # read in the file again and compare to outfile
 81 | ll2 <- read_itch(outfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
 82 | expect_equal(ll, ll2)
 83 | 
 84 | ################################################################################
 85 | # test gz with smaller buffer size
 86 | outfile <- write_itch(ll, outfile_base, compress = TRUE, buffer_size = 100,
 87 |                       quiet = TRUE)
 88 | 
 89 | # with smaller buffer sizes when using compress = TRUE, the filesize will increase!
 90 | expect_equal(file.size(outfile), 419608)
 91 | # read in the file again and compare to outfile
 92 | ll2 <- read_itch(outfile, quiet = TRUE, force_gunzip = TRUE, force_cleanup = TRUE)
 93 | expect_equal(ll, ll2)
 94 | 
 95 | unlink(outfile)
 96 | 
 97 | 
 98 | ################################################################################
 99 | ################################################################################
100 | #### check append and compress
101 | write_itch(ll, outfile, compress = TRUE, buffer_size = 100, add_meta = FALSE,
102 |            quiet = TRUE)
103 | expect_equal(file.size(outfile), 419608)
104 | 
105 | expect_warning(
106 |   outfile <- write_itch(ll, outfile, compress = TRUE, append = TRUE,
107 |                         buffer_size = 100, add_meta = FALSE, quiet = TRUE)
108 | )
109 | 
110 | # note that appending to a gzipped file will linearly increase file size...
111 | # only the buffers are compressed!
112 | expect_equal(file.size(outfile), 419608 * 2)
113 | 
114 | expect_equal(lapply(ll, function(x) rbindlist(list(x, x))),
115 |              read_itch(outfile, quiet = TRUE, force_gunzip = TRUE,
116 |                        force_cleanup = TRUE))
117 | 
118 | unlink(outfile)
119 | 


--------------------------------------------------------------------------------
/man/add_meta_to_filename.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{add_meta_to_filename}
 4 | \alias{add_meta_to_filename}
 5 | \title{Adds meta information (date and exchange) to an itch filename}
 6 | \usage{
 7 | add_meta_to_filename(file, date, exchange)
 8 | }
 9 | \arguments{
10 | \item{file}{the filename}
11 | 
12 | \item{date}{the date as a date-class or as a string that is understood by
13 | \code{\link[base:as.Date]{base::as.Date()}}.}
14 | 
15 | \item{exchange}{the name of the exchange}
16 | }
17 | \value{
18 | the filename with exchanged or added date and exchange information
19 | }
20 | \description{
21 | Note that if date and exchange information are already present,
22 | they are overwritten
23 | }
24 | \examples{
25 | add_meta_to_filename("03302017.NASDAQ_ITCH50", "2010-12-24", "TEST")
26 | add_meta_to_filename("20170130.BX_ITCH_50.gz", "2010-12-24", "TEST")
27 | add_meta_to_filename("S030220-v50-bx.txt.gz", "2010-12-24", "TEST")
28 | add_meta_to_filename("unknown_file.ITCH_50", "2010-12-24", "TEST")
29 | }
30 | 


--------------------------------------------------------------------------------
/man/count_functions.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/count_functions.R
  3 | \name{count_functions}
  4 | \alias{count_functions}
  5 | \alias{count_messages}
  6 | \alias{count_orders}
  7 | \alias{count_trades}
  8 | \alias{count_modifications}
  9 | \alias{count_system_events}
 10 | \alias{count_stock_directory}
 11 | \alias{count_trading_status}
 12 | \alias{count_reg_sho}
 13 | \alias{count_market_participant_states}
 14 | \alias{count_mwcb}
 15 | \alias{count_ipo}
 16 | \alias{count_luld}
 17 | \alias{count_noii}
 18 | \alias{count_rpii}
 19 | \title{Counts the messages of an ITCH-file}
 20 | \usage{
 21 | count_messages(
 22 |   file,
 23 |   add_meta_data = FALSE,
 24 |   buffer_size = -1,
 25 |   quiet = FALSE,
 26 |   force_gunzip = FALSE,
 27 |   gz_dir = tempdir(),
 28 |   force_cleanup = TRUE
 29 | )
 30 | 
 31 | count_orders(x)
 32 | 
 33 | count_trades(x)
 34 | 
 35 | count_modifications(x)
 36 | 
 37 | count_system_events(x)
 38 | 
 39 | count_stock_directory(x)
 40 | 
 41 | count_trading_status(x)
 42 | 
 43 | count_reg_sho(x)
 44 | 
 45 | count_market_participant_states(x)
 46 | 
 47 | count_mwcb(x)
 48 | 
 49 | count_ipo(x)
 50 | 
 51 | count_luld(x)
 52 | 
 53 | count_noii(x)
 54 | 
 55 | count_rpii(x)
 56 | }
 57 | \arguments{
 58 | \item{file}{the path to the input file, either a gz-file or a plain-text file}
 59 | 
 60 | \item{add_meta_data}{if the meta-data of the messages should be added, defaults to FALSE}
 61 | 
 62 | \item{buffer_size}{the size of the buffer in bytes, defaults to 1e8 (100 MB), if you have a large amount of RAM, 1e9 (1GB) might be faster}
 63 | 
 64 | \item{quiet}{if TRUE, the status messages are supressed, defaults to FALSE}
 65 | 
 66 | \item{force_gunzip}{only applies if file is a gz-file and a file with the same (gunzipped) name already exists.
 67 | if set to TRUE, the existing file is overwritten. Default value is FALSE}
 68 | 
 69 | \item{gz_dir}{a directory where the gz archive is extracted to.
 70 | Only applies if file is a gz archive. Default is \code{\link[=tempdir]{tempdir()}}.}
 71 | 
 72 | \item{force_cleanup}{only applies if file is a gz-file. If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.}
 73 | 
 74 | \item{x}{a file or a data.table containing the message types and the counts,
 75 | as outputted by \code{count_messages}}
 76 | }
 77 | \value{
 78 | a data.table containing the message-type and their counts for \code{count_messages}
 79 | or an integer value for the other functions.
 80 | }
 81 | \description{
 82 | Counts the messages of an ITCH-file
 83 | }
 84 | \details{
 85 | \itemize{
 86 | \item \code{count_orders}: Counts order messages. Message type \code{A} and \code{F}
 87 | }
 88 | 
 89 | \itemize{
 90 | \item \code{count_trades}: Counts trade messages. Message type \code{P}, \code{Q} and \code{B}
 91 | }
 92 | 
 93 | \itemize{
 94 | \item \code{count_modifications}: Counts order modification messages. Message
 95 | type \code{E}, \code{C}, \code{X}, \code{D}, and \code{U}
 96 | }
 97 | 
 98 | \itemize{
 99 | \item \code{count_system_events}: Counts system event messages. Message type \code{S}
100 | }
101 | 
102 | \itemize{
103 | \item \code{count_stock_directory}: Counts stock trading messages. Message
104 | type \code{R}
105 | }
106 | 
107 | \itemize{
108 | \item \code{count_trading_status}: Counts trading status messages. Message
109 | type \code{H} and \code{h}
110 | }
111 | 
112 | \itemize{
113 | \item \code{count_reg_sho}: Counts messages regarding reg SHO. Message type
114 | \code{Y}
115 | }
116 | 
117 | \itemize{
118 | \item \code{count_market_participant_states}: Counts messages regarding the
119 | status of market participants. Message type \code{L}
120 | }
121 | 
122 | \itemize{
123 | \item \code{count_mwcb}: Counts messages regarding Market-Wide-Circuit-Breakers
124 | (MWCB). Message type \code{V} and \code{W}
125 | }
126 | 
127 | \itemize{
128 | \item \code{count_ipo}: Counts messages regarding IPOs. Message type \code{K}
129 | }
130 | 
131 | \itemize{
132 | \item \code{count_luld}: Counts messages regarding LULDs (limit up-limit down)
133 | auction collars. Message type \code{J}
134 | }
135 | 
136 | \itemize{
137 | \item \code{count_noii}: Counts Net Order Imbalance Indicatio (NOII) messages.
138 | Message type \code{I}
139 | }
140 | 
141 | \itemize{
142 | \item \code{count_rpii}: Counts Retail Price Improvement Indicator (RPII)
143 | messages. Message type \code{N}
144 | }
145 | }
146 | \examples{
147 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
148 | count_messages(file)
149 | count_messages(file, add_meta_data = TRUE, quiet = TRUE)
150 | 
151 | # file can also be a .gz file
152 | gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
153 | count_messages(gz_file, quiet = TRUE)
154 | 
155 | # count only a specific class
156 | msg_count <- count_messages(file, quiet = TRUE)
157 | 
158 | # either count based on a given data.table outputted by count_messages
159 | count_orders(msg_count)
160 | 
161 | # or count orders from a file and not from a msg_count
162 | count_orders(file)
163 | 
164 | ### Specific class count functions are:
165 | count_orders(msg_count)
166 | count_trades(msg_count)
167 | count_modifications(msg_count)
168 | count_system_events(msg_count)
169 | count_stock_directory(msg_count)
170 | count_trading_status(msg_count)
171 | count_reg_sho(msg_count)
172 | count_market_participant_states(msg_count)
173 | count_mwcb(msg_count)
174 | count_ipo(msg_count)
175 | count_luld(msg_count)
176 | count_noii(msg_count)
177 | count_rpii(msg_count)
178 | }
179 | 


--------------------------------------------------------------------------------
/man/count_internal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_functions.R
 3 | \name{count_internal}
 4 | \alias{count_internal}
 5 | \title{Internal function to count the messages}
 6 | \usage{
 7 | count_internal(x, types)
 8 | }
 9 | \arguments{
10 | \item{x}{a data.frame containing the message types and the counts}
11 | 
12 | \item{types}{a vector containing the types}
13 | }
14 | \value{
15 | a numeric value of number of orders in x
16 | }
17 | \description{
18 | Internal function to count the messages
19 | }
20 | \examples{
21 | # Only used internally
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/download_sample_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_samples.R
 3 | \name{download_sample_file}
 4 | \alias{download_sample_file}
 5 | \title{Downloads a sample ITCH File from NASDAQs Server}
 6 | \usage{
 7 | download_sample_file(
 8 |   choice = c("smallest", "largest", "earliest", "latest", "random", "all"),
 9 |   file = NA,
10 |   exchanges = NA,
11 |   dir = ".",
12 |   force_download = FALSE,
13 |   check_md5sum = TRUE,
14 |   quiet = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{choice}{which file should be chosen? One of: smallest (default), largest,
19 | earliest (date-wise), latest, random, or all.}
20 | 
21 | \item{file}{the name of a specific file, overrules the choice and exchanges arguments}
22 | 
23 | \item{exchanges}{A vector of exchanges, can be NASDAQ, BX, or PSX.
24 | The default value is to consider all exchanges.}
25 | 
26 | \item{dir}{The directory where the files will be saved to, default is current working directory.}
27 | 
28 | \item{force_download}{If the file should be downloaded even if it already exists locally.
29 | Default value is FALSE.}
30 | 
31 | \item{check_md5sum}{If the md5-sum (hash-value) of the downloaded file should be checked, default value is TRUE.}
32 | 
33 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE}
34 | }
35 | \value{
36 | an invisible vector of the files
37 | }
38 | \description{
39 | The Server can be found at \url{https://emi.nasdaq.com/ITCH/Nasdaq\%20ITCH/}
40 | }
41 | \details{
42 | Warning: the smallest file is around 300 MB, with the largest exceeding 5 GB.
43 | There are about 17 files in total. Downloading all might take a considerable amount of time.
44 | }
45 | \examples{
46 | \dontrun{
47 | download_sample_file()
48 | file <- download_sample_file()
49 | file
50 | 
51 | # download a specific sample file
52 | file <- download_sample_file(file = "2019130.BX_ITCH_50.gz")
53 | file
54 | }
55 | }
56 | 


--------------------------------------------------------------------------------
/man/download_stock_directory.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/download_stock_directory.R
 3 | \name{download_stock_directory}
 4 | \alias{download_stock_directory}
 5 | \title{Downloads the stock directory (stock locate codes) for a given date and exchange}
 6 | \usage{
 7 | download_stock_directory(exchange, date, cache = FALSE, quiet = FALSE)
 8 | }
 9 | \arguments{
10 | \item{exchange}{The exchange, either NASDAQ (equivalent to NDQ), BX, or PSX}
11 | 
12 | \item{date}{The date, should be of class Date. If not the value is converted
13 | using \code{as.Date}.}
14 | 
15 | \item{cache}{If the stock directory should be cached, can be set to TRUE
16 | to save the stock directories in the working directory or a character for a
17 | target directory.}
18 | 
19 | \item{quiet}{If the download function should be quiet, default is FALSE.}
20 | }
21 | \value{
22 | a data.table of the tickers, the respective stock locate codes, and
23 | the exchange/date information
24 | }
25 | \description{
26 | The data is downloaded from NASDAQs server, which can be found here
27 | \url{https://emi.nasdaq.com/ITCH/Stock_Locate_Codes/}
28 | }
29 | \examples{
30 | \dontrun{
31 |   download_stock_directory("BX", "2019-07-02")
32 |   download_stock_directory(c("BX", "NDQ"), c("2019-07-02", "2019-07-03"))
33 |   download_stock_directory("BX", "2019-07-02", cache = TRUE)
34 | 
35 |   download_stock_directory("BX", "2019-07-02", cache = "stock_directory")
36 |   dir.exists("stock_directory")
37 |   list.files("stock_directory")
38 | }
39 | }
40 | 


--------------------------------------------------------------------------------
/man/ex20101224.TEST_ITCH_50.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zzz.R
 3 | \name{ex20101224.TEST_ITCH_50}
 4 | \alias{ex20101224.TEST_ITCH_50}
 5 | \title{ITCH 50 Example Testing Dataset}
 6 | \description{
 7 | ITCH 50 Example Testing Dataset
 8 | }
 9 | \section{ex20101224.TEST_ITCH_50}{
10 | 
11 | 
12 | The test dataset contains artificial trading data for three made up stocks:
13 | \code{ALC}, \code{BOB}, and \code{CHAR}.
14 | 
15 | The dataset is used in the examples and unit tests of the package.
16 | 
17 | The data contains the following count of messages:
18 | \itemize{
19 | \item 6 system event (message type \code{S})
20 | \item 3 stock directory (message type \code{R})
21 | \item 3 trading status (message type \code{H})
22 | \item 5000 orders (4997 message type \code{A} and 3 \code{F})
23 | \item 2000 modifications (198 \code{F}, 45 \code{X}, 1745 \code{D}, and 12 \code{U} message types)
24 | \item 5000 trades (message type \code{P})
25 | }
26 | 
27 | The file is also available as \code{ex20101224.TEST_ITCH_50.gz}.
28 | 
29 | To get real sample ITCH datasets, see the \code{\link[=download_sample_file]{download_sample_file()}}
30 | function.
31 | }
32 | 
33 | \examples{
34 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
35 | 
36 | sys <- read_system_events(file)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/figures/README-ETF_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavZim/RITCH/9bd51af48d26703bd95ab4f0db6532a497c104c1/man/figures/README-ETF_plot-1.png


--------------------------------------------------------------------------------
/man/filter_itch.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/filter_itch.R
  3 | \name{filter_itch}
  4 | \alias{filter_itch}
  5 | \title{Filters an ITCH file to another ITCH file}
  6 | \usage{
  7 | filter_itch(
  8 |   infile,
  9 |   outfile,
 10 |   filter_msg_class = NA_character_,
 11 |   filter_msg_type = NA_character_,
 12 |   filter_stock_locate = NA_integer_,
 13 |   min_timestamp = bit64::as.integer64(NA),
 14 |   max_timestamp = bit64::as.integer64(NA),
 15 |   filter_stock = NA_character_,
 16 |   stock_directory = NA,
 17 |   skip = 0,
 18 |   n_max = -1,
 19 |   append = FALSE,
 20 |   overwrite = FALSE,
 21 |   gz = FALSE,
 22 |   buffer_size = -1,
 23 |   quiet = FALSE,
 24 |   force_gunzip = FALSE,
 25 |   force_cleanup = TRUE
 26 | )
 27 | }
 28 | \arguments{
 29 | \item{infile}{the input file where the messages are taken from, can be a
 30 | gz-archive or a plain ITCH file.}
 31 | 
 32 | \item{outfile}{the output file where the filtered messages are written to.
 33 | Note that the date and exchange information from the \code{infile} are used,
 34 | see also \code{\link[=add_meta_to_filename]{add_meta_to_filename()}} for further information.}
 35 | 
 36 | \item{filter_msg_class}{a vector of classes to load, can be "orders", "trades",
 37 | "modifications", ... see also \code{\link[=get_msg_classes]{get_msg_classes()}}.
 38 | Default value is to take all message classes.}
 39 | 
 40 | \item{filter_msg_type}{a character vector, specifying a filter for message types.
 41 | Note that this can be used to only return 'A' orders for instance.}
 42 | 
 43 | \item{filter_stock_locate}{an integer vector, specifying a filter for locate codes.
 44 | The locate codes can be looked up by calling \code{\link[=read_stock_directory]{read_stock_directory()}}
 45 | or by downloading from NASDAQ by using \code{\link[=download_stock_directory]{download_stock_directory()}}.
 46 | Note that some message types (e.g., system events, MWCB, and IPO) do not use
 47 | a locate code.}
 48 | 
 49 | \item{min_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}})
 50 | of minimum timestamp (inclusive).
 51 | Note: min and max timestamp must be supplied with the same length or left empty.}
 52 | 
 53 | \item{max_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}})
 54 | of maxium timestamp (inclusive).
 55 | Note: min and max timestamp must be supplied with the same length or left empty.}
 56 | 
 57 | \item{filter_stock}{a character vector, specifying a filter for stocks.
 58 | Note that this a shorthand for the \code{filter_stock_locate} argument, as it
 59 | tries to find the stock_locate based on the \code{stock_directory} argument,
 60 | if this is not found, it will try to extract the stock directory from the file,
 61 | else an error is thrown.}
 62 | 
 63 | \item{stock_directory}{A data.frame containing the stock-locate code relationship.
 64 | As outputted by \code{\link[=read_stock_directory]{read_stock_directory()}}.
 65 | Only used if \code{filter_stock} is set. To download the stock directory from
 66 | NASDAQs server, use \code{\link[=download_stock_directory]{download_stock_directory()}}.}
 67 | 
 68 | \item{skip}{Number of messages to skip before starting parsing messages,
 69 | note the skip parameter applies to the specific message class, i.e., it would
 70 | skip the messages for each type (e.g., skip the first 10 messages for each class).}
 71 | 
 72 | \item{n_max}{Maximum number of messages to parse, default is to read all values.
 73 | Can also be a data.frame of msg_types and counts, as returned by
 74 | \code{\link[=count_messages]{count_messages()}}.
 75 | Note the n_max parameter applies to the specific message class not the whole
 76 | file.}
 77 | 
 78 | \item{append}{if the messages should be appended to the outfile, default is
 79 | false. Note, this is helpful if \code{skip} and or \code{n_max} are used for
 80 | batch filtering.}
 81 | 
 82 | \item{overwrite}{if an existing outfile with the same name should be
 83 | overwritten. Default value is false}
 84 | 
 85 | \item{gz}{if the output file should be gzip-compressed. Note that the name
 86 | of the output file will be appended with .gz if not already present. The
 87 | final output name is returned. Default value is false.}
 88 | 
 89 | \item{buffer_size}{the size of the buffer in bytes, defaults to 1e8 (100 MB),
 90 | if you have a large amount of RAM, 1e9 (1GB) might be faster}
 91 | 
 92 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE}
 93 | 
 94 | \item{force_gunzip}{only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists.
 95 | if set to TRUE, the existing file is overwritten. Default value is FALSE}
 96 | 
 97 | \item{force_cleanup}{only applies if the input file is a gz-archive.
 98 | If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.
 99 | Only applies when the gunzipped raw file did not exist before.}
100 | }
101 | \value{
102 | the name of the output file (maybe different from the inputted
103 | outfile due to adding the date and exchange), silently
104 | }
105 | \description{
106 | This function allows to perform very fast filter operations on large ITCH
107 | files. The messages are written to another ITCH file.
108 | }
109 | \details{
110 | Note that this can be especially useful on larger files or where memory
111 | is not large enough to filter the datalimits the analysis.
112 | 
113 | As with the \code{\link[=read_itch]{read_itch()}} functions, it allows to filter for
114 | \code{msg_class}, \code{msg_type}, \code{stock_locate}/\code{stock}, and
115 | \code{timestamp}.
116 | }
117 | \examples{
118 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
119 | outfile <- tempfile(fileext = "_20101224.TEST_ITCH_50")
120 | filter_itch(
121 |   infile, outfile,
122 |   filter_msg_class = c("orders", "trades"),
123 |   filter_msg_type = "R", # stock_directory
124 |   skip = 0, n_max = 100
125 | )
126 | 
127 | # expecting 100 orders, 100 trades, and 3 stock_directory entries
128 | count_messages(outfile)
129 | 
130 | # check that the output file contains the same
131 | res  <- read_itch(outfile, c("orders", "trades", "stock_directory"))
132 | sapply(res, nrow)
133 | 
134 | res2 <- read_itch(infile,  c("orders", "trades", "stock_directory"),
135 |                   n_max = 100)
136 | 
137 | all.equal(res, res2)
138 | }
139 | 


--------------------------------------------------------------------------------
/man/format_bytes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{format_bytes}
 4 | \alias{format_bytes}
 5 | \title{Formats a number of bytes}
 6 | \usage{
 7 | format_bytes(x, digits = 2, unit_suffix = "B", base = 1000)
 8 | }
 9 | \arguments{
10 | \item{x}{the values}
11 | 
12 | \item{digits}{the number of digits to display, default value is 2}
13 | 
14 | \item{unit_suffix}{the unit suffix, default value is 'B' (for bytes),
15 | useful is also 'B/s' if you have read/write speeds}
16 | 
17 | \item{base}{the base for kilo, mega, ... definition, default is 1000}
18 | }
19 | \value{
20 | the values as a character
21 | }
22 | \description{
23 | Formats a number of bytes
24 | }
25 | \examples{
26 | format_bytes(1234)
27 | format_bytes(1234567890)
28 | format_bytes(123456789012, unit_suffix = "iB", base = 1024)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/get_date_from_filename.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{get_date_from_filename}
 4 | \alias{get_date_from_filename}
 5 | \title{Returns the date from an ITCH-filename}
 6 | \usage{
 7 | get_date_from_filename(file)
 8 | }
 9 | \arguments{
10 | \item{file}{a filename}
11 | }
12 | \value{
13 | the date as fastPOSIXct
14 | }
15 | \description{
16 | Returns the date from an ITCH-filename
17 | }
18 | \examples{
19 | get_date_from_filename("03302017.NASDAQ_ITCH50")
20 | get_date_from_filename("20170130.BX_ITCH_50.gz")
21 | get_date_from_filename("S030220-v50-bx.txt.gz")
22 | get_date_from_filename("unknown_file_format")
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/get_exchange_from_filename.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{get_exchange_from_filename}
 4 | \alias{get_exchange_from_filename}
 5 | \title{Returns the exchange from an ITCH-filename}
 6 | \usage{
 7 | get_exchange_from_filename(file)
 8 | }
 9 | \arguments{
10 | \item{file}{a filename}
11 | }
12 | \value{
13 | The exchange
14 | }
15 | \description{
16 | Returns the exchange from an ITCH-filename
17 | }
18 | \examples{
19 | get_exchange_from_filename("03302017.NASDAQ_ITCH50")
20 | get_exchange_from_filename("20170130.BX_ITCH_50.gz")
21 | get_exchange_from_filename("S030220-v50-bx.txt.gz")
22 | get_exchange_from_filename("Unknown_file_format")
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get_msg_classes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_functions.R
 3 | \name{get_msg_classes}
 4 | \alias{get_msg_classes}
 5 | \title{Returns the message class data for the message types}
 6 | \usage{
 7 | get_msg_classes()
 8 | }
 9 | \value{
10 | a data.table with the information of the message-types
11 | }
12 | \description{
13 | All information is handled according to the official ITCH 5.0
14 | documentation as found here:
15 | \url{http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHSpecification.pdf}
16 | }
17 | \details{
18 | \itemize{
19 | \item \code{msg_type} the type of the message
20 | \item \code{msg_class} the group the message belongs to
21 | \item \code{msg_name} the official name of the message
22 | \item \code{doc_nr} the number of the message in the documentation
23 | }
24 | }
25 | \examples{
26 | get_msg_classes()
27 | }
28 | \seealso{
29 | \code{open_itch_specification()}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/gz_functions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gz_functions.R
 3 | \name{gz_functions}
 4 | \alias{gz_functions}
 5 | \alias{gunzip_file}
 6 | \alias{gzip_file}
 7 | \title{Compresses and uncompresses files to and from gz-archives}
 8 | \usage{
 9 | gunzip_file(
10 |   infile,
11 |   outfile = gsub("\\\\.gz$", "", infile),
12 |   buffer_size = min(4 * file.size(infile), 2e+09)
13 | )
14 | 
15 | gzip_file(
16 |   infile,
17 |   outfile = NA,
18 |   buffer_size = min(4 * file.size(infile), 2e+09)
19 | )
20 | }
21 | \arguments{
22 | \item{infile}{the file to be zipped or unzipped}
23 | 
24 | \item{outfile}{the resulting zipped or unzipped file}
25 | 
26 | \item{buffer_size}{the size of the buffer to read in at once, default is 4 times the file.size (max 2Gb).}
27 | }
28 | \value{
29 | The filename of the unzipped file, invisibly
30 | }
31 | \description{
32 | Allows the compression and uncompression of files
33 | }
34 | \details{
35 | Functions are
36 | 
37 | \itemize{
38 | \item \code{gunzip_file}: uncompresses a gz-archive to raw binary data
39 | }
40 | 
41 | -\code{gzip_file}: compresses a raw binary data file to a gz-archive
42 | }
43 | \examples{
44 | gzfile <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
45 | file   <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
46 | 
47 | # uncompress file
48 | (outfile <- gunzip_file(gzfile, "tmp"))
49 | file.info(outfile)
50 | unlink(outfile)
51 | 
52 | # compress file
53 | (outfile <- gzip_file(file))
54 | file.info(outfile)
55 | unlink(outfile)
56 | }
57 | 


--------------------------------------------------------------------------------
/man/list_sample_files.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_samples.R
 3 | \name{list_sample_files}
 4 | \alias{list_sample_files}
 5 | \title{Returns a data.table of the sample files on the server}
 6 | \usage{
 7 | list_sample_files()
 8 | }
 9 | \value{
10 | a data.table of the files
11 | }
12 | \description{
13 | The Server can be found at \url{https://emi.nasdaq.com/ITCH/Nasdaq\%20ITCH/}
14 | }
15 | \examples{
16 | \dontrun{
17 |   list_sample_files()
18 | }
19 | }
20 | 


--------------------------------------------------------------------------------
/man/open_itch_sample_server.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{open_itch_sample_server}
 4 | \alias{open_itch_sample_server}
 5 | \title{Opens the ITCH sample page}
 6 | \usage{
 7 | open_itch_sample_server()
 8 | }
 9 | \value{
10 | the URL (invisible)
11 | }
12 | \description{
13 | The server can be found at \url{https://emi.nasdaq.com/ITCH/Nasdaq\%20ITCH/}.
14 | }
15 | \examples{
16 | \dontrun{
17 | open_itch_sample_server()
18 | }
19 | }
20 | 


--------------------------------------------------------------------------------
/man/open_itch_specification.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{open_itch_specification}
 4 | \alias{open_itch_specification}
 5 | \title{Opens the ITCH Specification PDF}
 6 | \usage{
 7 | open_itch_specification()
 8 | }
 9 | \value{
10 | the URL (invisible)
11 | }
12 | \description{
13 | The specifications can be found as a PDF \url{https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf}.
14 | }
15 | \examples{
16 | \dontrun{
17 | open_itch_specification()
18 | }
19 | }
20 | 


--------------------------------------------------------------------------------
/man/read_functions.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/read_functions.R
  3 | \name{read_functions}
  4 | \alias{read_functions}
  5 | \alias{read_itch}
  6 | \alias{read_system_events}
  7 | \alias{read_stock_directory}
  8 | \alias{read_trading_status}
  9 | \alias{read_reg_sho}
 10 | \alias{read_market_participant_states}
 11 | \alias{read_mwcb}
 12 | \alias{read_ipo}
 13 | \alias{read_luld}
 14 | \alias{read_orders}
 15 | \alias{read_modifications}
 16 | \alias{read_trades}
 17 | \alias{read_noii}
 18 | \alias{read_rpii}
 19 | \alias{get_orders}
 20 | \alias{get_trades}
 21 | \alias{get_modifications}
 22 | \title{Reads certain messages of an ITCH-file into a data.table}
 23 | \usage{
 24 | read_itch(
 25 |   file,
 26 |   filter_msg_class = NA,
 27 |   skip = 0,
 28 |   n_max = -1,
 29 |   filter_msg_type = NA_character_,
 30 |   filter_stock_locate = NA_integer_,
 31 |   min_timestamp = bit64::as.integer64(NA),
 32 |   max_timestamp = bit64::as.integer64(NA),
 33 |   filter_stock = NA_character_,
 34 |   stock_directory = NA,
 35 |   buffer_size = -1,
 36 |   quiet = FALSE,
 37 |   add_meta = TRUE,
 38 |   force_gunzip = FALSE,
 39 |   gz_dir = tempdir(),
 40 |   force_cleanup = TRUE
 41 | )
 42 | 
 43 | read_system_events(file, ..., add_descriptions = FALSE)
 44 | 
 45 | read_stock_directory(file, ..., add_descriptions = FALSE)
 46 | 
 47 | read_trading_status(file, ..., add_descriptions = FALSE)
 48 | 
 49 | read_reg_sho(file, ..., add_descriptions = FALSE)
 50 | 
 51 | read_market_participant_states(file, ..., add_descriptions = FALSE)
 52 | 
 53 | read_mwcb(file, ...)
 54 | 
 55 | read_ipo(file, ..., add_descriptions = FALSE)
 56 | 
 57 | read_luld(file, ...)
 58 | 
 59 | read_orders(file, ...)
 60 | 
 61 | read_modifications(file, ...)
 62 | 
 63 | read_trades(file, ...)
 64 | 
 65 | read_noii(file, ..., add_descriptions = FALSE)
 66 | 
 67 | read_rpii(file, ..., add_descriptions = FALSE)
 68 | 
 69 | get_orders(file, ...)
 70 | 
 71 | get_trades(file, ...)
 72 | 
 73 | get_modifications(file, ...)
 74 | }
 75 | \arguments{
 76 | \item{file}{the path to the input file, either a gz-archive or a plain ITCH file}
 77 | 
 78 | \item{filter_msg_class}{a vector of classes to load, can be "orders", "trades",
 79 | "modifications", ... see also \code{\link[=get_msg_classes]{get_msg_classes()}}.
 80 | Default value is to take all message classes.}
 81 | 
 82 | \item{skip}{Number of messages to skip before starting parsing messages,
 83 | note the skip parameter applies to the specific message class, i.e., it would
 84 | skip the messages for each type (e.g., skip the first 10 messages for each class).}
 85 | 
 86 | \item{n_max}{Maximum number of messages to parse, default is to read all values.
 87 | Can also be a data.frame of msg_types and counts, as returned by
 88 | \code{\link[=count_messages]{count_messages()}}.
 89 | Note the n_max parameter applies to the specific message class not the whole
 90 | file.}
 91 | 
 92 | \item{filter_msg_type}{a character vector, specifying a filter for message types.
 93 | Note that this can be used to only return 'A' orders for instance.}
 94 | 
 95 | \item{filter_stock_locate}{an integer vector, specifying a filter for locate codes.
 96 | The locate codes can be looked up by calling \code{\link[=read_stock_directory]{read_stock_directory()}}
 97 | or by downloading from NASDAQ by using \code{\link[=download_stock_directory]{download_stock_directory()}}.
 98 | Note that some message types (e.g., system events, MWCB, and IPO) do not use
 99 | a locate code.}
100 | 
101 | \item{min_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}})
102 | of minimum timestamp (inclusive).
103 | Note: min and max timestamp must be supplied with the same length or left empty.}
104 | 
105 | \item{max_timestamp}{an 64 bit integer vector (see also \code{\link[bit64:as.integer64.character]{bit64::as.integer64()}})
106 | of maxium timestamp (inclusive).
107 | Note: min and max timestamp must be supplied with the same length or left empty.}
108 | 
109 | \item{filter_stock}{a character vector, specifying a filter for stocks.
110 | Note that this a shorthand for the \code{filter_stock_locate} argument, as it
111 | tries to find the stock_locate based on the \code{stock_directory} argument,
112 | if this is not found, it will try to extract the stock directory from the file,
113 | else an error is thrown.}
114 | 
115 | \item{stock_directory}{A data.frame containing the stock-locate code relationship.
116 | As outputted by \code{\link[=read_stock_directory]{read_stock_directory()}}.
117 | Only used if \code{filter_stock} is set. To download the stock directory from
118 | NASDAQs server, use \code{\link[=download_stock_directory]{download_stock_directory()}}.}
119 | 
120 | \item{buffer_size}{the size of the buffer in bytes, defaults to 1e8 (100 MB),
121 | if you have a large amount of RAM, 1e9 (1GB) might be faster}
122 | 
123 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE}
124 | 
125 | \item{add_meta}{if TRUE, the date and exchange information of the file are added, defaults to TRUE}
126 | 
127 | \item{force_gunzip}{only applies if the input file is a gz-archive and a file with the same (gunzipped) name already exists.
128 | if set to TRUE, the existing file is overwritten. Default value is FALSE}
129 | 
130 | \item{gz_dir}{a directory where the gz archive is extracted to.
131 | Only applies if file is a gz archive. Default is \code{\link[=tempdir]{tempdir()}}.}
132 | 
133 | \item{force_cleanup}{only applies if the input file is a gz-archive.
134 | If force_cleanup=TRUE, the gunzipped raw file will be deleted afterwards.
135 | Only applies when the gunzipped raw file did not exist before.}
136 | 
137 | \item{...}{Additional arguments passed to \code{read_itch}}
138 | 
139 | \item{add_descriptions}{add longer descriptions to shortened variables.
140 | The added information is taken from the official ITCH documentation
141 | see also \code{\link[=open_itch_specification]{open_itch_specification()}}}
142 | }
143 | \value{
144 | a data.table containing the messages
145 | }
146 | \description{
147 | For faster file-reads (at the tradeoff of increased memory usages), you can
148 | increase the \code{buffer_size} to 1GB (1e9) or more.
149 | 
150 | If you access the same file multiple times, you can provide the message
151 | counts as outputted from \code{\link[=count_messages]{count_messages()}} to the \code{n_max}
152 | argument, this allows skipping one pass over the file per read instruction.
153 | 
154 | If you need to read in multiple message classes, you can specify multiple
155 | message classes to \code{read_itch}, which results in only a single file pass.
156 | 
157 | If the file is too large to be loaded into the workspace at once, you can
158 | specify different \code{skip} and \code{n_max} to load only
159 | a specific range of messages.
160 | Alternatively, you can filter certain messages to another file using
161 | \code{\link[=filter_itch]{filter_itch()}}, which is substantially faster than parsing a file
162 | and filtering it.
163 | 
164 | Note that all read functions allow both plain ITCH files as well as gzipped
165 | files.
166 | If a gzipped file is found, it will look for a plain ITCH file with
167 | the same name and use that instead.
168 | If this file is not found, it will be created by unzipping the archive.
169 | Note that the unzipped file is NOT deleted by default (the file will be
170 | created in the current working directory).
171 | It might result in increased disk usage but reduces future read times for
172 | that specific file.
173 | To force RITCH to delete "temporary" files after uncompressing, use
174 | \code{force_cleanup = TRUE} (only deletes the files if they were extracted
175 | before, does not remove the archive itself).
176 | }
177 | \details{
178 | The details of the different messages types can be found in the official
179 | ITCH specification (see also \code{\link[=open_itch_specification]{open_itch_specification()}})
180 | 
181 | \itemize{
182 | \item \code{read_itch}: Reads a message class message, can also read multiple
183 | classes in one file-pass.
184 | }
185 | 
186 | \itemize{
187 | \item \code{read_system_events}: Reads system event messages. Message type \code{S}
188 | }
189 | 
190 | \itemize{
191 | \item \code{read_stock_directory}: Reads stock trading messages. Message type \code{R}
192 | }
193 | 
194 | \itemize{
195 | \item \code{read_trading_status}: Reads trading status messages. Message type \code{H}
196 | and \code{h}
197 | }
198 | 
199 | \itemize{
200 | \item \code{read_reg_sho}: Reads messages regarding reg SHO. Message type \code{Y}
201 | }
202 | 
203 | \itemize{
204 | \item \code{read_market_participant_states}: Reads messages regarding the
205 | status of market participants. Message type \code{L}
206 | }
207 | 
208 | \itemize{
209 | \item \code{read_mwcb}: Reads messages regarding Market-Wide-Circuit-Breakers
210 | (MWCB). Message type \code{V} and \code{W}
211 | }
212 | 
213 | \itemize{
214 | \item \code{read_ipo}: Reads messages regarding IPOs. Message type \code{K}
215 | }
216 | 
217 | \itemize{
218 | \item \code{read_luld}: Reads messages regarding LULDs (limit up-limit down)
219 | auction collars. Message type \code{J}
220 | }
221 | 
222 | \itemize{
223 | \item \code{read_orders}: Reads order messages. Message type \code{A} and \code{F}
224 | }
225 | 
226 | \itemize{
227 | \item \code{read_modifications}: Reads order modification messages. Message
228 | type \code{E}, \code{C}, \code{X}, \code{D}, and \code{U}
229 | }
230 | 
231 | \itemize{
232 | \item \code{read_trades}: Reads trade messages. Message type \code{P}, \code{Q} and \code{B}
233 | }
234 | 
235 | \itemize{
236 | \item \code{read_noii}: Reads Net Order Imbalance Indicatio (NOII) messages.
237 | Message type \code{I}
238 | }
239 | 
240 | \itemize{
241 | \item \code{read_rpii}: Reads Retail Price Improvement Indicator (RPII)
242 | messages. Message type \code{N}
243 | }
244 | 
245 | For backwards compatability reasons, the following functions are provided as
246 | well:
247 | \itemize{
248 | \item \code{get_orders}: Redirects to \code{read_orders}
249 | }
250 | 
251 | \itemize{
252 | \item \code{get_trades}: Redirects to \code{read_trades}
253 | }
254 | 
255 | \itemize{
256 | \item \code{get_modifications}: Redirects to \code{read_modifications}
257 | }
258 | }
259 | \examples{
260 | \dontshow{
261 | data.table::setDTthreads(2)
262 | }
263 | file <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
264 | od <- read_orders(file, quiet = FALSE) # note quiet = FALSE is the default
265 | tr <- read_trades(file, quiet = TRUE)
266 | 
267 | ## Alternatively
268 | od <- read_itch(file, "orders", quiet = TRUE)
269 | 
270 | ll <- read_itch(file, c("orders", "trades"), quiet = TRUE)
271 | 
272 | od
273 | tr
274 | str(ll, max.level = 1)
275 | 
276 | ## additional options:
277 | 
278 | # take only subset of messages
279 | od <- read_orders(file, skip = 3, n_max = 10)
280 | 
281 | # a message count can be provided for slightly faster reads
282 | msg_count <- count_messages(file, quiet = TRUE)
283 | od <- read_orders(file, n_max = msg_count)
284 | 
285 | ## .gz archive functionality
286 | # .gz archives will be automatically unzipped
287 | gz_file <- system.file("extdata", "ex20101224.TEST_ITCH_50.gz", package = "RITCH")
288 | od <- read_orders(gz_file)
289 | # force a decompress and delete the decompressed file afterwards
290 | od <- read_orders(gz_file, force_gunzip = TRUE, force_cleanup = TRUE)
291 | 
292 | ## read_itch()
293 | otm <- read_itch(file, c("orders", "trades"), quiet = TRUE)
294 | str(otm, max.level = 1)
295 | 
296 | ## read_system_events()
297 | se <- read_system_events(file, add_descriptions = TRUE, quiet = TRUE)
298 | se
299 | 
300 | ## read_stock_directory()
301 | sd <- read_stock_directory(file, add_descriptions = TRUE, quiet = TRUE)
302 | sd
303 | 
304 | ## read_trading_status()
305 | ts <- read_trading_status(file, add_descriptions = TRUE, quiet = TRUE)
306 | ts
307 | 
308 | ## read_reg_sho()
309 | \dontrun{
310 | # note the example file has no reg SHO messages
311 | rs <- read_reg_sho(file, add_descriptions = TRUE, quiet = TRUE)
312 | rs
313 | }
314 | 
315 | ## read_market_participant_states()
316 | \dontrun{
317 | # note the example file has no market participant states
318 | mps <- read_market_participant_states(file, add_descriptions = TRUE,
319 |                                       quiet = TRUE)
320 | mps
321 | }
322 | 
323 | ## read_mwcb()
324 | \dontrun{
325 | # note the example file has no circuit breakers messages
326 | mwcb <- read_mwcb(file, quiet = TRUE)
327 | mwcb
328 | }
329 | 
330 | ## read_ipo()
331 | \dontrun{
332 | # note the example file has no IPOs
333 | ipo <- read_ipo(file, add_descriptions = TRUE, quiet = TRUE)
334 | ipo
335 | }
336 | 
337 | ## read_luld()
338 | \dontrun{
339 | # note the example file has no LULD messages
340 | luld <- read_luld(file, quiet = TRUE)
341 | luld
342 | }
343 | 
344 | ## read_orders()
345 | od <- read_orders(file, quiet = TRUE)
346 | od
347 | 
348 | ## read_modifications()
349 | mod <- read_modifications(file, quiet = TRUE)
350 | mod
351 | 
352 | ## read_trades()
353 | tr <- read_trades(file, quiet = TRUE)
354 | tr
355 | 
356 | ## read_noii()
357 | \dontrun{
358 | # note the example file has no NOII messages
359 | noii <- read_noii(file, add_descriptions = TRUE, quiet = TRUE)
360 | noii
361 | }
362 | 
363 | ## read_rpii()
364 | \dontrun{
365 | # note the example file has no RPII messages
366 | rpii <- read_rpii(file, add_descriptions = TRUE, quiet = TRUE)
367 | rpii
368 | }
369 | }
370 | \references{
371 | \url{https://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/NQTVITCHspecification.pdf}
372 | }
373 | 


--------------------------------------------------------------------------------
/man/write_itch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/write_itch.R
 3 | \name{write_itch}
 4 | \alias{write_itch}
 5 | \title{Writes a data.frame or a list of data.frames of ITCH messages to file}
 6 | \usage{
 7 | write_itch(
 8 |   ll,
 9 |   file,
10 |   add_meta = TRUE,
11 |   append = FALSE,
12 |   compress = FALSE,
13 |   buffer_size = 1e+08,
14 |   quiet = FALSE,
15 |   append_warning = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{ll}{a data.frame or a list of data.frames of ITCH messages, in the format
20 | that the \code{\link[=read_functions]{read_functions()}} return}
21 | 
22 | \item{file}{the filename of the target file. If the folder to the file does
23 | not exist, it will be created recursively}
24 | 
25 | \item{add_meta}{if date and file information should be added to the filename.
26 | Default value is TRUE. Note that adding meta information changes the filename.}
27 | 
28 | \item{append}{if the information should be appended to the file. Default value
29 | is FALSE}
30 | 
31 | \item{compress}{if the file should be gzipped. Default value is FALSE.
32 | Note that if you compress a file, buffer_size matters a lot, with larger
33 | buffers you are more likely to get smaller filesizes in the end.
34 | Alternatively, but slower, is to write the file without compression fully
35 | and then gzip the file using another program.}
36 | 
37 | \item{buffer_size}{the maximum buffer size. Default value is 1e8 (100MB).
38 | Accepted values are > 52 and < 5e9}
39 | 
40 | \item{quiet}{if TRUE, the status messages are suppressed, defaults to FALSE}
41 | 
42 | \item{append_warning}{if append is set, a warning about timestamp ordering is
43 | given. Set \code{append_warning = FALSE} to silence the warning. Default
44 | value is TRUE}
45 | }
46 | \value{
47 | the filename (invisibly)
48 | }
49 | \description{
50 | Note that additional information, e.g., columns that were added, will be
51 | dropped in the process and only ITCH-compliant information is saved.
52 | }
53 | \details{
54 | Note that the ITCH filename contains the information for the date and exchange.
55 | This can be specified explicitly in the file argument or it is added if not
56 | turned off \code{add_meta = FALSE}.
57 | }
58 | \examples{
59 | infile <- system.file("extdata", "ex20101224.TEST_ITCH_50", package = "RITCH")
60 | sys <- read_system_events(infile, quiet = TRUE)
61 | outfile <- tempfile()
62 | write_itch(sys, outfile)
63 | 
64 | # create a list of events, stock directory, and orders and write to a file
65 | sdir <- read_stock_directory(infile, quiet = TRUE)
66 | od   <- read_orders(infile, quiet = TRUE)
67 | 
68 | ll <- list(sys, sdir, od)
69 | write_itch(ll, outfile)
70 | }
71 | 


--------------------------------------------------------------------------------
/simulate_dataset.R:
--------------------------------------------------------------------------------
  1 | ##############################
  2 | #' This script takes an existing dataset and samples and obfuscates the data
  3 | #' to create a smaller testing/example dataset.
  4 | #'
  5 | #' Messages that are sampled are:
  6 | #' - System Event Messages
  7 | #' - Stock Directory
  8 | #' - Trading Status
  9 | #' - Orders
 10 | #' - Modifications
 11 | #' - Trades
 12 | #'
 13 | ##############################
 14 | 
 15 | library(RITCH)
 16 | library(data.table)
 17 | 
 18 | # take 3 most traded stocks in orders, trades
 19 | file <- "20191230.BX_ITCH_50"
 20 | 
 21 | loc_code <- read_stock_directory(file, add_meta = FALSE, quiet = TRUE)
 22 | trades   <- read_trades(file, add_meta = FALSE, quiet = TRUE)
 23 | orders   <- read_orders(file, add_meta = FALSE, quiet = TRUE)
 24 | mods     <- read_modifications(file, add_meta = FALSE, quiet = TRUE)
 25 | 
 26 | names_trades <- names(trades)
 27 | names_orders <- names(orders)
 28 | names_mods   <- names(mods)
 29 | 
 30 | # look at the most active stocks
 31 | orders[, .(n = .N), by = stock][order(-n)][1:3]
 32 | trades[, .(n = .N), by = stock][order(-n)][1:3]
 33 | merge(
 34 |   mods[, .(n = .N), by = stock_locate][order(-n)][1:3],
 35 |   loc_code[, .(stock_locate, stock)], by = "stock_locate", all.x = TRUE
 36 | )
 37 | 
 38 | # take the following stocks as a base
 39 | stock_select <- c("TSLA" = "ALC", "NIO" = "BOB", "BABA" = "CHAR")
 40 | 
 41 | loc_codes <- loc_code[
 42 |   stock %chin% names(stock_select)
 43 | ][,
 44 |   .(stock_old = stock,
 45 |     old_loc_code = stock_locate,
 46 |     stock = stock_select[stock])
 47 | ][order(stock)][, stock_locate := 1:.N][]
 48 | 
 49 | # removes price outliers outside of a given sigma range...
 50 | remove_price_outliers <- function(dt, sigma = 3) {
 51 |   dd <- dt[]
 52 |   setorder(dd, stock, timestamp)
 53 |   dd[, rmean := frollmean(price, 100, align = "left"), by = stock][, rmean := nafill(rmean, type = "locf"), by = stock]
 54 |   dd[, diff := (price - rmean), by = stock]
 55 |   dd[, diff := (diff - mean(diff, na.rm = TRUE)) / sd(diff, na.rm = TRUE), by = .(buy, stock)]
 56 |   dd <- dd[diff > -sigma & diff < sigma]
 57 | 
 58 |   dd[, -c("diff", "rmean")]
 59 | }
 60 | 
 61 | # obfuscates prices in a "standard" way
 62 | obfuscate_prices <- function(dt) {
 63 |   price_info <- data.table(stock = c("ALC", "BOB", "CHAR"),
 64 |                            tar_min_price = c(180, 45,  90),
 65 |                            tar_range     = c(20,  5,   15),
 66 |                            est_min_price = c(410, 2.5, 210),
 67 |                            est_range     = c(30,  6,   6))
 68 | 
 69 |   dd <- merge(dt, price_info, by = "stock", all.x = TRUE)
 70 |   # dd[, ':=' (
 71 |   #   min_price = min(price),
 72 |   #   price_range = max(price) - min(price)
 73 |   # ), by = stock]
 74 | 
 75 |   # scale the price by the base prices...
 76 |   dd[, price := (price - est_min_price) / est_range * (tar_range) + tar_range]
 77 |   dd[, price := round(price, 4)]
 78 |   return(dd[, -c("tar_min_price", "tar_range", "est_min_price", "est_range")])
 79 | }
 80 | 
 81 | 
 82 | ######################
 83 | # Prepare System Event Messages
 84 | set.seed(65411235)
 85 | 
 86 | sys_ev <- read_system_events(file, add_meta = FALSE, quiet = TRUE)
 87 | sys_ev[, timestamp := timestamp + rnorm(.N, 0, 1e10)]
 88 | 
 89 | 
 90 | ######################
 91 | # Prepare Stock Directory Messages
 92 | set.seed(76411948)
 93 | 
 94 | stock_dir <- read_stock_directory(file, add_meta = FALSE, quiet = TRUE)
 95 | names_dir <- names(stock_dir)
 96 | sdir <- stock_dir[stock %chin% names(stock_select)][, stock := stock_select[stock]][]
 97 | 
 98 | valid_market_cat <- c("Q", "G", "S", "N", "A", "P", "Z", "V", " ")
 99 | sdir[, ':='(
100 |   market_category = sample(valid_market_cat, .N, replace = TRUE),
101 |   financial_status = "N",
102 |   issue_classification = "A",
103 |   ipo_flag = FALSE,
104 |   luld_price_tier = 2,
105 |   etp_leverage = 0,
106 |   stock_locate = NULL
107 | )]
108 | sdir <- sdir[loc_codes[, .(stock, stock_locate)], on = "stock"]
109 | setorder(sdir, stock)
110 | # rearrange timestamp to fit alphabetic stock names
111 | sdir[, timestamp := sort(timestamp)]
112 | setcolorder(sdir, names_dir)
113 | 
114 | ######################
115 | # Prepare Trading Status Messages
116 | set.seed(198179841)
117 | 
118 | trad_stat <- read_trading_status(file, add_meta = FALSE, quiet = TRUE)
119 | names_stat <- names(trad_stat)
120 | 
121 | # shuffle the timestamps and rename the stocks
122 | trstat <- trad_stat[stock_locate %in% loc_codes$old_loc_code][
123 |   , ':='(
124 |     timestamp = timestamp + rnorm(.N, 0, 1e8),
125 |     stock = stock_select[stock]
126 |   )
127 | ][]
128 | 
129 | # add the new stock_locates
130 | trstat <- merge(trstat[, -c("stock_locate")],
131 |                 loc_codes[, .(stock, stock_locate)],
132 |                 by = "stock", all.x = TRUE)
133 | 
134 | # order the timestamps by locate code...
135 | trstat[, timestamp := timestamp[order(-stock_locate)]]
136 | 
137 | setcolorder(trstat, names_stat)
138 | 
139 | ######################
140 | # Prepare Orders Messages
141 | set.seed(654918413)
142 | N_ORDERS <- 5000
143 | 
144 | # rename the stock and stock_locates
145 | or <- orders[stock %chin% names(stock_select)][, stock := stock_select[stock]]
146 | or <- merge(or[, -c("stock_locate")], loc_codes[, .(stock, stock_locate)])
147 | 
148 | or <- remove_price_outliers(or, 2)
149 | 
150 | # Sample N orders
151 | or <- or[sample.int(.N, N_ORDERS)]
152 | # change timestamp
153 | or <- or[, timestamp := timestamp + rnorm(.N, 0, 1e6)][order(timestamp)]
154 | 
155 | # treat order_ref
156 | MIN_ORDER_REF <- min(or$order_ref)
157 | or[, order_ref := order_ref - MIN_ORDER_REF]
158 | 
159 | # obfuscate prices
160 | or <- obfuscate_prices(or)
161 | setcolorder(or, names_orders)
162 | 
163 | 
164 | ######################
165 | # Prepare Trades Messages
166 | set.seed(7451984)
167 | N_TRADES <- 1000
168 | 
169 | tr <- trades[stock %chin% names(stock_select)][, stock := stock_select[stock]]
170 | tr <- merge(tr[, -c("stock_locate")], loc_codes[, .(stock, stock_locate)])
171 | 
172 | tr <- remove_price_outliers(tr, 2)
173 | 
174 | # Sample N orders
175 | tr <- tr[sample.int(.N, N_ORDERS)]
176 | # change timestamp
177 | tr <- tr[, timestamp := timestamp + rnorm(.N, 0, 1e6)][order(timestamp)]
178 | 
179 | tr <- obfuscate_prices(tr)
180 | setcolorder(tr, names_trades)
181 | 
182 | 
183 | ######################
184 | # Prepare Modifications Messages
185 | set.seed(78632176)
186 | N_MODS <- 2000
187 | 
188 | md <- mods[stock_locate %in% loc_codes$old_loc_code][, old_loc_code := stock_locate]
189 | md <- merge(md[, -c("stock_locate")],
190 |             loc_codes[, .(stock, stock_locate, old_loc_code)],
191 |             by = "old_loc_code")[, -c("old_loc_code")]
192 | 
193 | # subset only for stocks that are also in the orders
194 | md[, order_ref := order_ref - MIN_ORDER_REF]
195 | md <- md[order_ref %in% or$order_ref]
196 | 
197 | md <- md[sample.int(.N, N_MODS)]
198 | 
199 | md <- obfuscate_prices(md)
200 | md[, stock := NULL]
201 | setcolorder(md, names_mods)
202 | 
203 | 
204 | ########################################
205 | # Combine datasets and write to file
206 | 
207 | ll <- list(
208 |   sys_ev,
209 |   sdir,
210 |   trstat,
211 |   or,
212 |   tr,
213 |   md
214 | )
215 | 
216 | # write the dataset to file
217 | if (!dir.exists("inst/extdata")) dir.create("inst/extdata")
218 | outfile <- "inst/extdata/ex20101224.TEST_ITCH_50"
219 | 
220 | write_itch(ll, outfile, add_meta = FALSE, quiet = TRUE)
221 | write_itch(ll, outfile, compress = TRUE, add_meta = FALSE, quiet = TRUE)
222 | 
223 | cat(sprintf("Wrote sample dataset to '%s' with size '%.2f'KB\n",
224 |             outfile, file.info(outfile)[["size"]] / 1024))
225 | 
226 | #######################################
227 | # Read in the dataset and compare results
228 | funcs <- list(read_system_events, read_stock_directory, read_trading_status,
229 |               read_orders, read_trades, read_modifications)
230 | 
231 | ll_read <- lapply(funcs, function(f) f(outfile, quiet = TRUE, add_meta = FALSE))
232 | all.equal(ll, ll_read, check.attributes = FALSE)
233 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_LIBS = -lz
2 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <Rcpp.h>
  5 | 
  6 | using namespace Rcpp;
  7 | 
  8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
  9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 11 | #endif
 12 | 
 13 | // count_messages_impl
 14 | Rcpp::DataFrame count_messages_impl(std::string filename, int64_t max_buffer_size, bool quiet);
 15 | RcppExport SEXP _RITCH_count_messages_impl(SEXP filenameSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) {
 16 | BEGIN_RCPP
 17 |     Rcpp::RObject rcpp_result_gen;
 18 |     Rcpp::RNGScope rcpp_rngScope_gen;
 19 |     Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
 20 |     Rcpp::traits::input_parameter< int64_t >::type max_buffer_size(max_buffer_sizeSEXP);
 21 |     Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
 22 |     rcpp_result_gen = Rcpp::wrap(count_messages_impl(filename, max_buffer_size, quiet));
 23 |     return rcpp_result_gen;
 24 | END_RCPP
 25 | }
 26 | // filter_itch_impl
 27 | void filter_itch_impl(std::string infile, std::string outfile, int64_t start, int64_t end, Rcpp::CharacterVector filter_msg_type, Rcpp::IntegerVector filter_stock_locate, Rcpp::NumericVector min_timestamp, Rcpp::NumericVector max_timestamp, bool append, int64_t max_buffer_size, bool quiet);
 28 | RcppExport SEXP _RITCH_filter_itch_impl(SEXP infileSEXP, SEXP outfileSEXP, SEXP startSEXP, SEXP endSEXP, SEXP filter_msg_typeSEXP, SEXP filter_stock_locateSEXP, SEXP min_timestampSEXP, SEXP max_timestampSEXP, SEXP appendSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) {
 29 | BEGIN_RCPP
 30 |     Rcpp::RNGScope rcpp_rngScope_gen;
 31 |     Rcpp::traits::input_parameter< std::string >::type infile(infileSEXP);
 32 |     Rcpp::traits::input_parameter< std::string >::type outfile(outfileSEXP);
 33 |     Rcpp::traits::input_parameter< int64_t >::type start(startSEXP);
 34 |     Rcpp::traits::input_parameter< int64_t >::type end(endSEXP);
 35 |     Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type filter_msg_type(filter_msg_typeSEXP);
 36 |     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type filter_stock_locate(filter_stock_locateSEXP);
 37 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type min_timestamp(min_timestampSEXP);
 38 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type max_timestamp(max_timestampSEXP);
 39 |     Rcpp::traits::input_parameter< bool >::type append(appendSEXP);
 40 |     Rcpp::traits::input_parameter< int64_t >::type max_buffer_size(max_buffer_sizeSEXP);
 41 |     Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
 42 |     filter_itch_impl(infile, outfile, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, append, max_buffer_size, quiet);
 43 |     return R_NilValue;
 44 | END_RCPP
 45 | }
 46 | // gunzip_file_impl
 47 | void gunzip_file_impl(std::string infile, std::string outfile, int64_t buffer_size);
 48 | RcppExport SEXP _RITCH_gunzip_file_impl(SEXP infileSEXP, SEXP outfileSEXP, SEXP buffer_sizeSEXP) {
 49 | BEGIN_RCPP
 50 |     Rcpp::RNGScope rcpp_rngScope_gen;
 51 |     Rcpp::traits::input_parameter< std::string >::type infile(infileSEXP);
 52 |     Rcpp::traits::input_parameter< std::string >::type outfile(outfileSEXP);
 53 |     Rcpp::traits::input_parameter< int64_t >::type buffer_size(buffer_sizeSEXP);
 54 |     gunzip_file_impl(infile, outfile, buffer_size);
 55 |     return R_NilValue;
 56 | END_RCPP
 57 | }
 58 | // gzip_file_impl
 59 | void gzip_file_impl(std::string infile, std::string outfile, int64_t buffer_size);
 60 | RcppExport SEXP _RITCH_gzip_file_impl(SEXP infileSEXP, SEXP outfileSEXP, SEXP buffer_sizeSEXP) {
 61 | BEGIN_RCPP
 62 |     Rcpp::RNGScope rcpp_rngScope_gen;
 63 |     Rcpp::traits::input_parameter< std::string >::type infile(infileSEXP);
 64 |     Rcpp::traits::input_parameter< std::string >::type outfile(outfileSEXP);
 65 |     Rcpp::traits::input_parameter< int64_t >::type buffer_size(buffer_sizeSEXP);
 66 |     gzip_file_impl(infile, outfile, buffer_size);
 67 |     return R_NilValue;
 68 | END_RCPP
 69 | }
 70 | // read_itch_impl
 71 | Rcpp::List read_itch_impl(std::vector<std::string> classes, std::string filename, int64_t start, int64_t end, Rcpp::CharacterVector filter_msg_type, Rcpp::IntegerVector filter_stock_locate, Rcpp::NumericVector min_timestamp, Rcpp::NumericVector max_timestamp, int64_t max_buffer_size, bool quiet);
 72 | RcppExport SEXP _RITCH_read_itch_impl(SEXP classesSEXP, SEXP filenameSEXP, SEXP startSEXP, SEXP endSEXP, SEXP filter_msg_typeSEXP, SEXP filter_stock_locateSEXP, SEXP min_timestampSEXP, SEXP max_timestampSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) {
 73 | BEGIN_RCPP
 74 |     Rcpp::RObject rcpp_result_gen;
 75 |     Rcpp::RNGScope rcpp_rngScope_gen;
 76 |     Rcpp::traits::input_parameter< std::vector<std::string> >::type classes(classesSEXP);
 77 |     Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
 78 |     Rcpp::traits::input_parameter< int64_t >::type start(startSEXP);
 79 |     Rcpp::traits::input_parameter< int64_t >::type end(endSEXP);
 80 |     Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type filter_msg_type(filter_msg_typeSEXP);
 81 |     Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type filter_stock_locate(filter_stock_locateSEXP);
 82 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type min_timestamp(min_timestampSEXP);
 83 |     Rcpp::traits::input_parameter< Rcpp::NumericVector >::type max_timestamp(max_timestampSEXP);
 84 |     Rcpp::traits::input_parameter< int64_t >::type max_buffer_size(max_buffer_sizeSEXP);
 85 |     Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
 86 |     rcpp_result_gen = Rcpp::wrap(read_itch_impl(classes, filename, start, end, filter_msg_type, filter_stock_locate, min_timestamp, max_timestamp, max_buffer_size, quiet));
 87 |     return rcpp_result_gen;
 88 | END_RCPP
 89 | }
 90 | // write_itch_impl
 91 | int64_t write_itch_impl(Rcpp::List ll, std::string filename, bool append, bool gz, size_t max_buffer_size, bool quiet);
 92 | RcppExport SEXP _RITCH_write_itch_impl(SEXP llSEXP, SEXP filenameSEXP, SEXP appendSEXP, SEXP gzSEXP, SEXP max_buffer_sizeSEXP, SEXP quietSEXP) {
 93 | BEGIN_RCPP
 94 |     Rcpp::RObject rcpp_result_gen;
 95 |     Rcpp::RNGScope rcpp_rngScope_gen;
 96 |     Rcpp::traits::input_parameter< Rcpp::List >::type ll(llSEXP);
 97 |     Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
 98 |     Rcpp::traits::input_parameter< bool >::type append(appendSEXP);
 99 |     Rcpp::traits::input_parameter< bool >::type gz(gzSEXP);
100 |     Rcpp::traits::input_parameter< size_t >::type max_buffer_size(max_buffer_sizeSEXP);
101 |     Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
102 |     rcpp_result_gen = Rcpp::wrap(write_itch_impl(ll, filename, append, gz, max_buffer_size, quiet));
103 |     return rcpp_result_gen;
104 | END_RCPP
105 | }
106 | 
107 | static const R_CallMethodDef CallEntries[] = {
108 |     {"_RITCH_count_messages_impl", (DL_FUNC) &_RITCH_count_messages_impl, 3},
109 |     {"_RITCH_filter_itch_impl", (DL_FUNC) &_RITCH_filter_itch_impl, 11},
110 |     {"_RITCH_gunzip_file_impl", (DL_FUNC) &_RITCH_gunzip_file_impl, 3},
111 |     {"_RITCH_gzip_file_impl", (DL_FUNC) &_RITCH_gzip_file_impl, 3},
112 |     {"_RITCH_read_itch_impl", (DL_FUNC) &_RITCH_read_itch_impl, 10},
113 |     {"_RITCH_write_itch_impl", (DL_FUNC) &_RITCH_write_itch_impl, 6},
114 |     {NULL, NULL, 0}
115 | };
116 | 
117 | RcppExport void R_init_RITCH(DllInfo *dll) {
118 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
119 |     R_useDynamicSymbols(dll, FALSE);
120 | }
121 | 


--------------------------------------------------------------------------------
/src/count_messages.cpp:
--------------------------------------------------------------------------------
 1 | #include "count_messages.h"
 2 | 
 3 | #ifdef __APPLE__
 4 | #  define fseeko64 fseeko
 5 | #  define ftello64 ftello
 6 | #endif
 7 | 
 8 | // counts messages in a file
 9 | std::vector<int64_t> count_messages_internal(std::string filename,
10 |                                              int64_t max_buffer_size) {
11 |   FILE* infile;
12 |   infile = fopen(filename.c_str(), "rb");
13 |   if (infile == NULL) {
14 |     char buffer [50];
15 |     snprintf(buffer, sizeof(buffer), "File Error number %i!", errno);
16 |     Rcpp::stop(buffer);
17 |   }
18 | 
19 |   // get size of the file
20 |   if (fseeko64(infile, 0L, SEEK_END) != 0) {
21 |     Rcpp::stop("Error seeking to end of file");
22 |   }
23 |   int64_t filesize = ftello64(infile);
24 |   if (filesize == -1) {
25 |     Rcpp::stop("Error getting file size");
26 |   }
27 |   if (fseeko64(infile, 0L, SEEK_SET) != 0) {
28 |     Rcpp::stop("Error seeking back to start of file");
29 |   }
30 | 
31 |   // create buffer
32 |   int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size;
33 |   unsigned char * buf;
34 |   buf = (unsigned char*) malloc(buf_size);
35 | 
36 |   int64_t bytes_read = 0, this_buffer_size = 0;
37 |   std::vector<int64_t> count(sizeof(MSG_SIZES)/sizeof(MSG_SIZES[0]));
38 | 
39 |   while (bytes_read < filesize) {
40 |     Rcpp::checkUserInterrupt();
41 | 
42 |     // read in buffer buffers
43 |     this_buffer_size = fread(buf, 1, buf_size, infile);
44 |     int64_t i = 0;
45 | 
46 |     int msg_size = 0;
47 |     do {
48 |       msg_size = get_message_size(buf[i + 2]);
49 | 
50 |       count[buf[i + 2] - 'A']++;
51 |       i += msg_size;
52 | 
53 |     } while (i + msg_size <= this_buffer_size && bytes_read + i <= filesize);
54 | 
55 |     // align the file pointer to read in a full message again
56 |     const int64_t offset = i - this_buffer_size;
57 |     fseeko64(infile, offset, SEEK_CUR);
58 |     bytes_read += i;
59 |   }
60 | 
61 |   free(buf);
62 |   fclose(infile);
63 |   return count;
64 | }
65 | 
66 | // [[Rcpp::export]]
67 | Rcpp::DataFrame count_messages_impl(std::string filename,
68 |                                     int64_t max_buffer_size,
69 |                                     bool quiet) {
70 | 
71 |   std::vector<int64_t> ct_raw = count_messages_internal(filename, max_buffer_size);
72 |   std::vector<int64_t> count = take_needed_messages(ct_raw);
73 | 
74 |   int64_t total_msgs = 0;
75 |   for (int64_t v : count) total_msgs += v;
76 | 
77 |   if (!quiet) Rprintf("[Counting]   %s total messages found\n",
78 |       format_thousands(total_msgs).c_str());
79 | 
80 |   if (!quiet) Rprintf("[Converting] to data.table\n");
81 | 
82 |   Rcpp::CharacterVector names;
83 |   for (const unsigned char c : ACT_MSG_NAMES) names.push_back(std::string(1, c));
84 | 
85 |   Rcpp::NumericVector ct(N_ACT_MSGS);
86 |   ct.attr("class") = "integer64";
87 |   std::memcpy(&(ct[0]), &(count[0]), N_ACT_MSGS * sizeof(double));
88 | 
89 |   Rcpp::List res = Rcpp::List::create(
90 |     Rcpp::Named("msg_type") = names,
91 |     Rcpp::Named("count") = ct
92 |   );
93 | 
94 |   res.attr("class") = Rcpp::CharacterVector::create("data.table", "data.frame");
95 |   return res;
96 | }
97 | 


--------------------------------------------------------------------------------
/src/count_messages.h:
--------------------------------------------------------------------------------
 1 | #ifndef COUNTMESSAGES_H
 2 | #define COUNTMESSAGES_H
 3 | 
 4 | #include <Rcpp.h>
 5 | #include "specifications.h"
 6 | #include "helper_functions.h"
 7 | 
 8 | // internal main worker function that counts the messages
 9 | std::vector<int64_t> count_messages_internal(std::string filename,
10 |                                              int64_t max_buffer_size);
11 | 
12 | // Entry function for returning the count data.frame
13 | Rcpp::DataFrame count_messages_impl(std::string filename,
14 |                                     int64_t max_buffer_size = 1e8,
15 |                                     bool quiet = false);
16 | 
17 | #endif // COUNTMESSAGES_H


--------------------------------------------------------------------------------
/src/filter_itch.cpp:
--------------------------------------------------------------------------------
  1 | #include "filter_itch.h"
  2 | 
  3 | #ifdef __APPLE__
  4 | #  define fseeko64 fseeko
  5 | #  define ftello64 ftello
  6 | #endif
  7 | 
  8 | // [[Rcpp::export]]
  9 | void filter_itch_impl(std::string infile, std::string outfile,
 10 |                       int64_t start, int64_t end,
 11 |                       Rcpp::CharacterVector filter_msg_type,
 12 |                       Rcpp::IntegerVector filter_stock_locate,
 13 |                       Rcpp::NumericVector min_timestamp,
 14 |                       Rcpp::NumericVector max_timestamp,
 15 |                       bool append,
 16 |                       int64_t max_buffer_size,
 17 |                       bool quiet) {
 18 | 
 19 |   // treat filters
 20 |   std::vector<char> filter_msgs;
 21 |   std::vector<int>  filter_sloc;
 22 | 
 23 |   for (auto f : filter_msg_type) filter_msgs.push_back(Rcpp::as<char>(f));
 24 |   for (int s : filter_stock_locate) filter_sloc.push_back(s);
 25 | 
 26 |   const size_t ts_size = min_timestamp.size();
 27 |   std::vector<int64_t> min_ts(ts_size);
 28 |   if (ts_size > 0)
 29 |     std::memcpy(&(min_ts[0]), &(min_timestamp[0]), ts_size * sizeof(int64_t)); 
 30 | 
 31 |   std::vector<int64_t> max_ts(ts_size);
 32 |   if (ts_size > 0)
 33 |     std::memcpy(&(max_ts[0]), &(max_timestamp[0]), ts_size * sizeof(int64_t));
 34 |   if (max_ts.size() == 1 && max_ts[0] == -1)
 35 |     max_ts[0] = std::numeric_limits<int64_t>::max();
 36 | 
 37 |   // get the max_ts_value!
 38 |   int64_t max_ts_val = -1;
 39 |   for (auto t : max_ts) if (t > max_ts_val) max_ts_val = t;
 40 |   if (max_ts_val == -1) max_ts_val = std::numeric_limits<int64_t>::max();
 41 | 
 42 |   if (end < 0) end = std::numeric_limits<int64_t>::max();
 43 | 
 44 |   if (filter_msgs.size() == 0 &&
 45 |       filter_sloc.size() == 0 &&
 46 |       min_ts.size() == 0 &&
 47 |       max_ts.size() == 0 &&
 48 |       start == 0 &&
 49 |       end == -1)
 50 |     Rcpp::stop("No filters where set, aborting filter process!");
 51 | 
 52 |   // parse the messages
 53 |   // redirect to the correct msg types only
 54 |   FILE* ifile;
 55 |   ifile = fopen(infile.c_str(), "rb");
 56 |   if (ifile == NULL) {
 57 |     char buffer [50];
 58 |     snprintf(buffer, sizeof(buffer), "Input File Error number %i!", errno);
 59 |     Rcpp::stop(buffer);
 60 |   }
 61 | 
 62 |   FILE* ofile;
 63 |   std::string omode = append ? "ab" : "wb";
 64 |   ofile = fopen(outfile.c_str(), omode.c_str());
 65 |   if (ofile == NULL)  {
 66 |     char buffer [50];
 67 |     snprintf(buffer, sizeof(buffer), "Output File Error number %i!", errno);
 68 |     Rcpp::stop(buffer);
 69 |   }
 70 | 
 71 |   // get size of the file
 72 |   if (fseeko64(ifile, 0L, SEEK_END) != 0) {
 73 |     Rcpp::stop("Error seeking to end of file");
 74 |   }
 75 |   int64_t filesize = ftello64(ifile);
 76 |   if (filesize == -1) {
 77 |     Rcpp::stop("Error getting file size");
 78 |   }
 79 |   if (fseeko64(ifile, 0L, SEEK_SET) != 0) {
 80 |     Rcpp::stop("Error seeking back to start of file");
 81 |   }
 82 | 
 83 |   // create buffer
 84 |   int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size;
 85 |   unsigned char * ibuf;
 86 |   unsigned char * obuf;
 87 |   ibuf = (unsigned char*) malloc(buf_size);
 88 |   obuf = (unsigned char*) malloc(buf_size);
 89 |   // Rprintf("Allocating buffer to size %lld\n", buf_size);
 90 | 
 91 |   int64_t bytes_read = 0, this_buffer_size = 0, bytes_written = 0;
 92 |   int64_t msg_read = 0, msg_count = 0;
 93 |   std::vector<int64_t> msg_reads(MSG_CLASS_SIZE, 0);
 94 | 
 95 |   int64_t o = 0;
 96 |   int msg_size;
 97 |   bool max_ts_reached = false;
 98 | 
 99 |   while (bytes_read < filesize && !max_ts_reached) {
100 |     Rcpp::checkUserInterrupt();
101 | 
102 |     // read in buffer buffers
103 |     this_buffer_size = fread(ibuf, 1, buf_size, ifile);
104 |     int64_t i = 0;
105 |     msg_size = 0;
106 | 
107 |     do {
108 |       // check early stop in max_timestamp
109 |       const int64_t cur_ts = getNBytes64<6>(&ibuf[i + 2 + 5]);
110 |       if (cur_ts > max_ts_val) {
111 |         max_ts_reached = true;
112 |         break;
113 |       }
114 | 
115 |       const unsigned char mt = ibuf[i + 2];
116 |       // Check Filter Messages
117 |       bool parse_message = true;
118 |       // only check the filter if previous tests are all OK
119 |       if (parse_message)
120 |         parse_message = passes_filter(&ibuf[i + 2], filter_msgs);
121 |       if (parse_message)
122 |         parse_message = passes_filter(&ibuf[i + 2 + 1], filter_sloc);
123 |       if (parse_message)
124 |         parse_message = passes_filter_in(&ibuf[i + 2 + 5], min_ts, max_ts);
125 |       // use TYPE_CLASS_TRANSLATOR as we count per message class not per msg_type!
126 |       if (parse_message) {
127 |         // count here the msg_reads to make sure that the count is within the
128 |         // other filters
129 |         parse_message = msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']] >= start &&
130 |           msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']] <= end;
131 |         msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']]++;
132 |       }
133 | 
134 |       msg_size = get_message_size(mt);
135 | 
136 |       if (o + msg_size > buf_size) {
137 |         // write to buffer until o
138 |         // Rprintf("New obuf, write  %9lld bytes to ofile next msg %i\n",
139 |         //         o, msg_size);
140 |         fwrite(obuf, sizeof(unsigned char), o, ofile);
141 |         // reset obuf
142 |         std::memset(obuf, 0x00, buf_size);
143 | 
144 |         bytes_written += o;
145 |         o = 0;
146 |       }
147 | 
148 |       if (parse_message) {
149 |         // Rprintf("Filter ibuf at %lld copy into obuf at %lld\n",
150 |         // i, o);
151 |         msg_read++;
152 |         // Rprintf("Copying '%i' from ibuf at %lld to obuf at %lld\n",
153 |         //         msg_size, i, o);
154 |         std::memcpy(&(obuf[o]), &(ibuf[i]), msg_size);
155 |         o += msg_size;
156 |         // msg_reads[TYPE_CLASS_TRANSLATOR[mt - 'A']]++;
157 |       }
158 | 
159 |       msg_count++;
160 |       i += msg_size;
161 |       // 50 = max msg_size
162 |     } while (i + 50 <= this_buffer_size && bytes_read + i <= filesize);
163 | 
164 |     // offset file pointer to fit the next message into the buffer
165 |     const int64_t offset = i - this_buffer_size;
166 |     // Rprintf("Filter ibuf at %6lld offsetting by %3lld - Total bytes read %lld\n",
167 |     //         i, offset, bytes_read + i);
168 |     fseeko64(ifile, offset, SEEK_CUR);
169 |     bytes_read += i;
170 |   }
171 | 
172 |   if (o > 0) {
173 |     // write to buffer until o
174 |     // Rprintf("Last obuf, write %9lld bytes to ofile\n", o);
175 |     fwrite(obuf, sizeof(unsigned char), o, ofile);
176 |   }
177 | 
178 |   if (!quiet) {
179 |     Rprintf("[Bytes]      scanned %lld, filtered %lld\n",
180 |             (long long int) filesize, (long long int) bytes_written + o);
181 |     Rprintf("[Messages]   scanned %lld, filtered %lld\n",
182 |             (long long int) msg_count, (long long int) msg_read);
183 |   }
184 | 
185 |   free(ibuf);
186 |   fclose(ifile);
187 | 
188 |   free(obuf);
189 |   fclose(ofile);
190 | }
191 | 


--------------------------------------------------------------------------------
/src/filter_itch.h:
--------------------------------------------------------------------------------
 1 | #ifndef FILTERITCH_H
 2 | #define FILTERITCH_H
 3 | 
 4 | #include <Rcpp.h>
 5 | #include "specifications.h"
 6 | #include "helper_functions.h"
 7 | 
 8 | void filter_itch_impl(std::string infile, std::string outfile,
 9 |                       int64_t start, int64_t end,
10 |                       Rcpp::CharacterVector filter_msg_type,
11 |                       Rcpp::IntegerVector filter_stock_locate,
12 |                       Rcpp::NumericVector min_timestamp,
13 |                       Rcpp::NumericVector max_timestamp,
14 |                       bool append = false,
15 |                       int64_t max_buffer_size = 1e8,
16 |                       bool quiet = false);
17 | 
18 | #endif // FILTERITCH_H


--------------------------------------------------------------------------------
/src/gz_functionality.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include <zlib.h>
  3 | 
  4 | /**
  5 |  * @brief Inflates (uncompresses) a gz file of binary data
  6 |  *
  7 |  * @param infile The name of the compressed gz archive
  8 |  * @param outfile The name of the uncompressed target file (make sure it does not exist before for faster speeds!)
  9 |  * @param buffer_size the size of the buffer, default is 1e9 bytes.
 10 |  */
 11 | // [[Rcpp::export]]
 12 | void gunzip_file_impl(std::string infile,
 13 |                       std::string outfile,
 14 |                       int64_t buffer_size = 1e9) {
 15 | 
 16 |   gzFile gzfile = gzopen(infile.c_str(), "rb");
 17 |   if (gzfile == NULL) {
 18 |     Rcpp::stop("Could not open file '%s' for gunzip", infile.c_str());
 19 |   }
 20 | 
 21 |   unsigned char* buf;
 22 |   int64_t buffer_char_size = sizeof(unsigned char) * buffer_size > UINT_MAX ?
 23 |     UINT_MAX :
 24 |     sizeof(unsigned char) * buffer_size;
 25 |   buf = (unsigned char*) malloc(buffer_char_size);
 26 | 
 27 |   int64_t this_buffer_size;
 28 | 
 29 |   FILE* ofile = fopen(outfile.c_str(), "wb");
 30 |   if (ofile == NULL) {
 31 |     Rcpp::stop("Could not open file '%s' for gunzip", outfile.c_str());
 32 |   }
 33 |   // iterate over the file until the all information is gathered
 34 | 
 35 |   while (1) {
 36 |     // fill the buffer
 37 |     this_buffer_size = gzread(gzfile, buf, buffer_char_size);
 38 |     // write the buffer
 39 |     fwrite(&buf[0], 1, this_buffer_size, ofile);
 40 | 
 41 |     // check if the read buffer is smaller than the asked size
 42 |     if (this_buffer_size < buffer_char_size || this_buffer_size == 0) {
 43 |       break;
 44 |     }
 45 |   }
 46 | 
 47 |   free(buf);
 48 |   fclose(ofile);
 49 |   gzclose(gzfile);
 50 | }
 51 | 
 52 | 
 53 | /**
 54 |  * @brief Deflates (compresses) a gz file of binary data
 55 |  *
 56 |  * @param infile The name of the raw uncompressed file
 57 |  * @param outfile The name of the compressed target file (make sure it does not exist before for faster speeds!)
 58 |  * @param buffer_size the size of the buffer, default is 1e9 bytes.
 59 |  */
 60 | // [[Rcpp::export]]
 61 | void gzip_file_impl(std::string infile,
 62 |                     std::string outfile,
 63 |                     int64_t buffer_size = 1e9) {
 64 | 
 65 |   FILE* file = fopen(infile.c_str(), "rb");
 66 |   if (file == NULL) {
 67 |     Rcpp::stop("Could not open file %s for gzip", infile.c_str());
 68 |   }
 69 | 
 70 |   unsigned char* buf;
 71 |   int64_t buffer_char_size = sizeof(unsigned char) * buffer_size > UINT_MAX ?
 72 |     UINT_MAX :
 73 |     sizeof(unsigned char) * buffer_size;
 74 |   buf = (unsigned char*) malloc(buffer_char_size);
 75 | 
 76 |   int64_t this_buffer_size;
 77 | 
 78 |   gzFile ofile = gzopen(outfile.c_str(), "wb");
 79 |   if (ofile == NULL) {
 80 |     Rcpp::stop("Could not open file %s for gzip", outfile.c_str());
 81 |   }
 82 |   // iterate over the file until the all information is gathered
 83 | 
 84 |   while (1) {
 85 |     // fill the buffer
 86 |     this_buffer_size = fread(buf, 1, buffer_char_size, file);
 87 |     // write the buffer
 88 |     gzwrite(ofile, &buf[0], this_buffer_size);
 89 | 
 90 |     // check if the read buffer is smaller than the asked size
 91 |     if (this_buffer_size < buffer_char_size || this_buffer_size == 0) {
 92 |       break;
 93 |     }
 94 |   }
 95 | 
 96 |   free(buf);
 97 |   fclose(file);
 98 |   gzclose(ofile);
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/helper_functions.cpp:
--------------------------------------------------------------------------------
  1 | #include "helper_functions.h"
  2 | 
  3 | 
  4 | // small helper function to get the message size for a char
  5 | int get_message_size(const unsigned char msg) {
  6 |   return MSG_SIZES[msg - 'A'] + 2;
  7 | }
  8 | 
  9 | // the count_messages_internal function is optimized and therefore contains
 10 | // unused messages (they are used for faster access speeds!)
 11 | // (see also Specifications.h)
 12 | // this function extracts the needed message classes from the raw vector
 13 | std::vector<int64_t> take_needed_messages(std::vector<int64_t> &v) {
 14 |   std::vector<int64_t> res;
 15 |   for (const unsigned char act_msg : ACT_MSG_NAMES) {
 16 |     size_t i = 0;
 17 |     for (const unsigned char msg : MSG_NAMES) {
 18 |       if (msg == act_msg) {
 19 |         res.push_back(v[i]);
 20 |         break;
 21 |       }
 22 |       i++;
 23 |     }
 24 |   }
 25 |   return res;
 26 | }
 27 | 
 28 | /*
 29 |  * @brief      Formats an integer number to a std::string with thousands separator
 30 |  *
 31 |  * @param      num    The number to format
 32 |  * @param      sep    The thousands separator, default value is a comma
 33 |  * @param      s      The return string, this is only used internally, as the function
 34 |  *                    is called recursively
 35 |  *
 36 |  * @return       The number as a string
 37 |  */
 38 | std::string format_thousands(int64_t num,
 39 |                              const std::string sep,
 40 |                              const std::string s) {
 41 |   if (num < 1000) {
 42 |     return std::to_string(num) + s;
 43 |   } else {
 44 |     std::string last_three = std::to_string(num % 1000);
 45 |     const int num_zeros = 3 - last_three.length();
 46 |     last_three = std::string(num_zeros, '0').append(last_three);
 47 | 
 48 |     const int64_t remainder = (int64_t) num / 1000;
 49 |     const std::string res = sep + last_three + s;
 50 |     return format_thousands(remainder, sep, res);
 51 |   }
 52 | }
 53 | 
 54 | // #############################################################################
 55 | // small internal helper function to convert bytes etc
 56 | // #############################################################################
 57 | 
 58 | // return N bytes of a buffer as a string
 59 | std::string getNBytes(unsigned char* buf, const int n, const unsigned char empty) {
 60 |   std::string res;
 61 |   for (int i = 0; i < n; ++i) if (buf[i] != empty) res += buf[i];
 62 |   return res;
 63 | }
 64 | 
 65 | // converts a Numeric Vector to int64
 66 | Rcpp::NumericVector to_int64(Rcpp::NumericVector v) {
 67 |   v.attr("class") = "integer64";
 68 |   return v;
 69 | }
 70 | 
 71 | // helper functions that check if a buffer value is in a vector of filters
 72 | // equivalent of R buf_val %in% filter
 73 | bool passes_filter(unsigned char* buf, std::vector<char> &filter) {
 74 |   if (filter.size() == 0) return true;
 75 |   for (unsigned char cc : filter) if (cc == *buf) return true;
 76 |   return false;
 77 | }
 78 | // same helper function as before but for int vector
 79 | bool passes_filter(unsigned char* buf, std::vector<int> &filter) {
 80 |   if (filter.size() == 0) return true;
 81 |   const int val = (int) getNBytes32<2>(&buf[0]);
 82 |   for (int cc : filter) if (cc == val) return true;
 83 |   return false;
 84 | }
 85 | // check larger/smaller inclusive for 6 byte numbers (timestamp)
 86 | // equivalent to R (buf_val >= lower & buf_val <= upper)
 87 | bool passes_filter_in(unsigned char* buf,
 88 |                       std::vector<int64_t> &lower,
 89 |                       std::vector<int64_t> &upper) {
 90 |   // lower and upper have the same size!
 91 |   if (lower.size() == 0) return true;
 92 |   const int64_t val = getNBytes64<6>(buf);
 93 |   for (size_t i = 0; i < lower.size(); i++) {
 94 |     if (val >= lower[i] && val <= upper[i]) return true;
 95 |   }
 96 | 
 97 |   return false;
 98 | }
 99 | 
100 | // sets inside a unsigned char buffer b, 2 bytes from the value val, returns number of bytes changed
101 | // i.e., convert val = 8236 to 0x202c
102 | uint64_t set2bytes(unsigned char* b, int32_t val) {
103 |   b[1] = val         & 0xff;
104 |   b[0] = (val >> 8)  & 0xff;
105 |   // Rprintf("Converting: %15i -> 0x %02x %02x\n",
106 |   //         val, b[0], b[1]);
107 |   return 2;
108 | }
109 | 
110 | // sets inside a unsigned char buffer b, 4 bytes from the value val, returns number of bytes changed
111 | // i.e., convert val = 11900 to 0x00002e7c
112 | uint64_t set4bytes(unsigned char* b, int32_t val) {
113 |   b[3] = val         & 0xffff;
114 |   b[2] = (val >> 8)  & 0xffff;
115 |   b[1] = (val >> 16) & 0xffff;
116 |   b[0] = (val >> 24) & 0xffff;
117 |   // Rprintf("Converting: %15i -> 0x %02x %02x %02x %02x\n",
118 |   //         val, b[0], b[1], b[2], b[3]);
119 |   return 4;
120 | }
121 | // sets inside a unsigned char buffer b, 6 bytes from the value val, returns number of bytes changed
122 | // i.e., 25200002107428 to 0x16eb552c8824
123 | uint64_t set6bytes(unsigned char* b, int64_t val) {
124 |   b[5] = val         & 0xffffff;
125 |   b[4] = (val >> 8)  & 0xffffff;
126 |   b[3] = (val >> 16) & 0xffffff;
127 |   b[2] = (val >> 24) & 0xffffff;
128 |   b[1] = (val >> 32) & 0xffffff;
129 |   b[0] = (val >> 40) & 0xffffff;
130 |   // Rprintf("Converting: %15lld -> 0x %02x %02x %02x %02x %02x %02x\n",
131 |   //         (long long) val, b[0], b[1], b[2], b[3], b[4], b[5]);
132 |   return 6;
133 | }
134 | // sets inside a unsigned char buffer b, 8 bytes from the value val, returns number of bytes changed
135 | // i.e., 4 to 0x0000000000000004
136 | uint64_t set8bytes(unsigned char* b, int64_t val) {
137 |   b[7] = val         & 0xffffffff;
138 |   b[6] = (val >> 8)  & 0xffffffff;
139 |   b[5] = (val >> 16) & 0xffffffff;
140 |   b[4] = (val >> 24) & 0xffffffff;
141 |   b[3] = (val >> 32) & 0xffffffff;
142 |   b[2] = (val >> 40) & 0xffffffff;
143 |   b[1] = (val >> 48) & 0xffffffff;
144 |   b[0] = (val >> 56) & 0xffffffff;
145 |   // Rprintf("Converting: %15lld -> 0x %02x %02x %02x %02x %02x %02x %02x %02x\n",
146 |   //         (long long) val, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
147 |   return 8;
148 | }
149 | // sets inside a unsigned char buffer b, n bytes from the string x, returns number of bytes changed
150 | // i.e., "UFO" with 8 to 0x55534f2020202020 (filled with whitespaces)
151 | uint64_t setCharBytes(unsigned char* b, std::string x, uint64_t n) {
152 |   unsigned char *st = new unsigned char[n + 1];
153 |   if (x.size() > n)
154 |     Rprintf("ERROR: setChar Bytes for string '%s' larger than capacity %llu\n",
155 |             x.c_str(), (long long unsigned int) n);
156 |   for (uint64_t j = 0; j < n; j++) st[j] = ' '; // fill with n spaces
157 |   for (uint64_t j = 0; j < x.size(); j++) st[j] = x[j]; // copy the string x
158 |   memcpy(b, st, n);
159 |   // Rprintf("Set %i unsigned char Bytes from '%s' -> 0x %02x %02x %02x %02x %02x %02x %02x %02x\n",
160 |   //         n, x.c_str(), b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
161 |   delete[] st;
162 |   return n;
163 | }
164 | 


--------------------------------------------------------------------------------
/src/helper_functions.h:
--------------------------------------------------------------------------------
 1 | #ifndef HELPERFUNCTIONS_H
 2 | #define HELPERFUNCTIONS_H
 3 | 
 4 | #include <Rcpp.h>
 5 | #include "specifications.h"
 6 | 
 7 | // get the message size for a char
 8 | int get_message_size(const unsigned char msg);
 9 | // converts from the long form (MSG_NAMES) to the shorter used form (ACT_MST_NAMES)
10 | std::vector<int64_t> take_needed_messages(std::vector<int64_t> &v);
11 | // formats a number with thousands separator
12 | std::string format_thousands(int64_t num,
13 |                              const std::string sep = ",",
14 |                              const std::string s = "");
15 | 
16 | // get bytes functions
17 | 
18 | // Converts n bytes from a buffer in big endian to an int32_t
19 | template<size_t size> int32_t getNBytes32(unsigned char* buff) {
20 |     int32_t r = 0;
21 |     for (size_t i = 0; i < size; ++i) {
22 |         r = (r << 8) + *buff++;
23 |         // Rprintf("i %2i, r: %15i (0x%llx), next 6: %02x %02x %02x %02x %02x %02x\n",
24 |         //         i, r, (long long) r, buff[0], buff[1], buff[2], buff[3], buff[4], buff[5]);
25 |     }
26 |     return r;
27 | }
28 | // Converts n bytes from a buffer in big endian to an int64_t
29 | template<size_t size> int64_t getNBytes64(unsigned char* buff) {
30 |     int64_t r = 0;
31 |     for (size_t i = 0; i < size; ++i) {
32 |         r = (r << 8) + *buff++;
33 |         // Rprintf("i %2i, r: %15i (0x%llx), next 6: %02x %02x %02x %02x %02x %02x\n",
34 |         //         i, r, (long long) r, buff[0], buff[1], buff[2], buff[3], buff[4], buff[5]);
35 |     }
36 |     return r;
37 | }
38 | 
39 | std::string getNBytes(unsigned char* buf, const int n = 8, const unsigned char empty = ' ');
40 | 
41 | // converts a numeric vector to integer64
42 | Rcpp::NumericVector to_int64(Rcpp::NumericVector v);
43 | 
44 | // function that checks if a buffer passes a filter
45 | bool passes_filter(unsigned char* buf, std::vector<char> &filter);
46 | bool passes_filter(unsigned char* buf, std::vector<int> &filter);
47 | bool passes_filter_in(unsigned char* buf, std::vector<int64_t> &lower,
48 |                       std::vector<int64_t> &upper);
49 | 
50 | // set functions, set X bytes in a buffer
51 | uint64_t set2bytes(unsigned char* b, int32_t val);
52 | uint64_t set4bytes(unsigned char* b, int32_t val);
53 | uint64_t set6bytes(unsigned char* b, int64_t val);
54 | uint64_t set8bytes(unsigned char* b, int64_t val);
55 | uint64_t setCharBytes(unsigned char* b, std::string x, uint64_t n);
56 | 
57 | #endif //HELPERFUNCTIONS_H
58 | 


--------------------------------------------------------------------------------
/src/read_functions.h:
--------------------------------------------------------------------------------
  1 | #ifndef READFUNCTIONS_H
  2 | #define READFUNCTIONS_H
  3 | 
  4 | #include "specifications.h"
  5 | #include "count_messages.h"
  6 | 
  7 | // Entry Function for the reading function
  8 | Rcpp::List read_itch_impl(std::vector<std::string> classes,
  9 |                           std::string filename,
 10 |                           int64_t start, int64_t end,
 11 |                           Rcpp::CharacterVector filter_msg_type,
 12 |                           Rcpp::IntegerVector filter_stock_locate,
 13 |                           Rcpp::NumericVector min_timestamp,
 14 |                           Rcpp::NumericVector max_timestamp,
 15 |                           int64_t max_buffer_size = 1e8,
 16 |                           bool quiet = false);
 17 | 
 18 | /*
 19 |  * Message Parser class, each class holds one "class" (stock_directory,
 20 |  *   sytem_events, trades, ...) and is able to parse them.
 21 |  *
 22 |  * The main usage is
 23 |  *
 24 |  * - create a MessageParser with its type (can be empty for no class)
 25 |  * - activate the object if messages need to be parsed later on
 26 |  * - init the vectors to appropriate sizes
 27 |  * - loop over a buffer and call parse_message on the respective messages
 28 |  * - convert the parsed messages to a data.frame with get_data_frame
 29 |  *
 30 |  * Note that the class holds vectors for all possible classes but only fills
 31 |  * and uses needed classes.
 32 |  *
 33 |  */
 34 | class MessageParser{
 35 | public:
 36 |   MessageParser(std::string type,
 37 |                 int64_t skip = 0,
 38 |                 int64_t n_max = std::numeric_limits<int64_t>::max());
 39 | 
 40 |   void activate();
 41 |   void init_vectors(int64_t n);
 42 |   void parse_message(unsigned char * buf);
 43 |   Rcpp::List get_data_frame();
 44 | 
 45 |   std::vector<char> msg_types;
 46 |   bool active = false;
 47 | 
 48 | private:
 49 |   void prune_vectors();
 50 | 
 51 |   std::string type;
 52 |   // msg_buf_idx is only used when the skip/n_max is used.
 53 |   // index counts the number of messages in the Parser, msg_buf_idx counts the
 54 |   // running number of messages of this type it has seen (but not necessarily parsed!)
 55 |   int64_t size = 0, index = 0, msg_buf_idx = 0, start_count, end_count;
 56 |   std::vector<std::string> colnames;
 57 | 
 58 |   // general data vectors
 59 |   // NOTE: later classes may use earlier vectors as well,
 60 |   // e.g., noii also uses cross_type, defined under trades...
 61 | 
 62 |   Rcpp::CharacterVector msg_type;
 63 |   Rcpp::IntegerVector stock_locate, tracking_number;
 64 |   Rcpp::NumericVector timestamp;
 65 | 
 66 |   // system_events
 67 |   Rcpp::CharacterVector    event_code;
 68 | 
 69 |   // stock_directory
 70 |   Rcpp::CharacterVector stock;
 71 |   Rcpp::CharacterVector market_category, financial_status;
 72 |   Rcpp::IntegerVector lot_size;
 73 |   Rcpp::LogicalVector round_lots_only;
 74 |   Rcpp::CharacterVector issue_classification;
 75 |   Rcpp::CharacterVector issue_subtype;
 76 |   Rcpp::LogicalVector authentic;
 77 |   Rcpp::LogicalVector short_sell_closeout;
 78 |   Rcpp::LogicalVector ipo_flag;
 79 |   Rcpp::CharacterVector luld_price_tier;
 80 |   Rcpp::LogicalVector etp_flag;
 81 |   Rcpp::IntegerVector etp_leverage;
 82 |   Rcpp::LogicalVector inverse;
 83 | 
 84 |   // trading_status
 85 |   Rcpp::CharacterVector trading_state, reserved;
 86 |   Rcpp::CharacterVector reason;
 87 |   Rcpp::CharacterVector market_code;
 88 |   Rcpp::LogicalVector operation_halted;
 89 | 
 90 |   // reg_sho
 91 |   Rcpp::CharacterVector regsho_action;
 92 | 
 93 |   // Market Participant States
 94 |   Rcpp::LogicalVector primary_mm;
 95 |   Rcpp::CharacterVector mm_mode, participant_state;
 96 | 
 97 |   // mwcb
 98 |   Rcpp::NumericVector level1, level2, level3;
 99 |   Rcpp::IntegerVector breached_level;
100 | 
101 |   // ipo
102 |   Rcpp::IntegerVector release_time;
103 |   Rcpp::CharacterVector release_qualifier;
104 |   Rcpp::NumericVector ipo_price;
105 | 
106 |   // luld
107 |   Rcpp::NumericVector reference_price, lower_price, upper_price;
108 |   Rcpp::IntegerVector extension;
109 | 
110 |   // orders
111 |   Rcpp::NumericVector order_ref;
112 |   Rcpp::LogicalVector buy;
113 |   Rcpp::IntegerVector shares;
114 |   Rcpp::NumericVector price;
115 |   Rcpp::CharacterVector mpid;
116 | 
117 |   // modifications
118 |   Rcpp::NumericVector new_order_ref;
119 |   Rcpp::LogicalVector printable;
120 | 
121 |   // trades
122 |   Rcpp::NumericVector match_number;
123 |   Rcpp::CharacterVector cross_type;
124 | 
125 |   // noii
126 |   Rcpp::NumericVector paired_shares, imbalance_shares;
127 |   Rcpp::CharacterVector imbalance_direction;
128 |   Rcpp::NumericVector far_price, near_price;
129 |   Rcpp::CharacterVector variation_indicator;
130 | 
131 |   // rpii
132 |   Rcpp::CharacterVector interest_flag;
133 | };
134 | 
135 | #endif // READFUNCTIONS_H
136 | 


--------------------------------------------------------------------------------
/src/specifications.h:
--------------------------------------------------------------------------------
 1 | #ifndef SPECIFICATIONS_H
 2 | #define SPECIFICATIONS_H
 3 | 
 4 | // to fix windows int64_t typedef issues...
 5 | #include <stdint.h>
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | // Define NA_INT64
10 | const int64_t NA_INT64 = 1ULL << 63;
11 | 
12 | // the lengths of the message types ordered based on their ASCII table positions
13 | // To get the respective positions of a message 'msg' (e.g., 'Q') use MSG_SIZES[msg - 'A'];
14 | const int MSG_SIZES [] = {
15 |   // A   B   C   D   E   F  G   H   I   J   K   L  M   N  O   P   Q   R   S  T
16 |   36, 19, 36, 19, 31, 40, 0, 25, 50, 35, 28, 26, 0, 20, 0, 44, 40, 39, 12, 0,
17 |   // U   V   W   X   Y  Z  [  \  ]  ^  _  `  a  b  c  d  e  f  g   h
18 |   35, 35, 12, 23, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21
19 | };
20 | // the names of the message types
21 | const unsigned char MSG_NAMES [] = {
22 |   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
23 |   'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^',
24 |   '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'
25 | };
26 | // the number of message types in MSG_SIZES, MSG_NAMES, value is 40...
27 | const int N_TYPES = sizeof(MSG_SIZES) / sizeof(MSG_SIZES[0]);
28 | 
29 | // the names of the messages we actually use
30 | const unsigned char ACT_MSG_NAMES [] = {
31 |   'S', 'R', 'H', 'Y', 'L', 'V', 'W', 'K', 'J', 'h', 'A', 'F', 'E', 'C', 'X',
32 |   'D', 'U', 'P', 'Q', 'B', 'I', 'N'
33 | };
34 | const int N_ACT_MSGS = sizeof(ACT_MSG_NAMES) / sizeof(ACT_MSG_NAMES[0]);
35 | 
36 | // The message classes (or groups)
37 | const std::vector<std::string> MSG_CLASSES {
38 |   "system_events",
39 |   "stock_directory",
40 |   "trading_status",
41 |   "reg_sho",
42 |   "market_participant_states",
43 |   "mwcb",
44 |   "ipo",
45 |   "luld",
46 |   "orders",
47 |   "modifications",
48 |   "trades",
49 |   "noii",
50 |   "rpii"
51 | };
52 | 
53 | // How many classes there are
54 | const int MSG_CLASS_SIZE = MSG_CLASSES.size();
55 | 
56 | // translates msg_type to MSG_CLASSES position
57 | // e.g., msg_type 'h' has value 2, belongs to the third class in MSG_CLASSES: trading_status
58 | const int TYPE_CLASS_TRANSLATOR [] = {
59 | // A   B  C  D  E  F   G  H   I  J  K  L   M   N   O   P   Q  R  S  T
60 |    8, 10, 9, 9, 9, 8, -1, 2, 11, 7, 6, 4, -1, 12, -1, 10, 10, 1, 0, -1,
61 | // U  V  W  X  Y   Z   [   \   ]   ^   _   `   a   b   c   d   e   f   g  h
62 |    9, 5, 5, 9, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2
63 | };
64 | 
65 | #endif //SPECIFICATIONS_H
66 | 


--------------------------------------------------------------------------------
/src/write_functions.h:
--------------------------------------------------------------------------------
 1 | #ifndef WRITEFUNCTIONS_H
 2 | #define WRITEFUNCTIONS_H
 3 | 
 4 | #include <zlib.h>
 5 | #include <Rcpp.h>
 6 | #include "specifications.h"
 7 | #include "helper_functions.h"
 8 | 
 9 | 
10 | // parse specific messages into a buffer
11 | uint64_t parse_orders_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
12 | uint64_t parse_trades_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
13 | uint64_t parse_modifications_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
14 | uint64_t parse_system_events_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
15 | uint64_t parse_stock_directory_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
16 | uint64_t parse_trading_status_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
17 | uint64_t parse_reg_sho_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
18 | uint64_t parse_market_participants_states_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
19 | uint64_t parse_mwcb_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
20 | uint64_t parse_ipo_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
21 | uint64_t parse_luld_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
22 | uint64_t parse_noii_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
23 | uint64_t parse_rpii_at(unsigned char * buf, Rcpp::DataFrame df, uint64_t msg_num);
24 | 
25 | // loads a data.frame at a position into a buffer
26 | int64_t load_message_to_buffer(unsigned char * buf, int64_t &msg_ct, Rcpp::DataFrame df);
27 | 
28 | // returns the index at which the values are minimum
29 | int get_min_val_pos(std::vector<int64_t> &x);
30 | 
31 | // writes a buffer to file
32 | void write_buffer_to_file(unsigned char* buf, int64_t size, std::string filename,
33 |                           bool append = false, bool gz = false);
34 | 
35 | // Writes a list of data.frames (already sorted by timestamp)
36 | // to a file, if specified, the file is a gz.file
37 | int64_t write_itch_impl(Rcpp::List ll, std::string filename,
38 |                         bool append = false, bool gz = false,
39 |                         size_t max_buffer_size = 1e9, bool quiet = false);
40 | 
41 | #endif // WRITEFUNCTIONS_H


--------------------------------------------------------------------------------
/tests/tinytests.R:
--------------------------------------------------------------------------------
 1 | if (requireNamespace("tinytest", quietly=TRUE) &&
 2 |     utils::packageVersion("tinytest") >= "1.0.0") {
 3 | 
 4 |   ## Set a seed to make the test deterministic
 5 |   set.seed(42)
 6 | 
 7 |   ## R makes us to this
 8 |   Sys.setenv("R_TESTS" = "")
 9 | 
10 |   ## there are several more granular ways to test files in a tinytest directory,
11 |   ## see its package vignette; tests can also run once the package is installed
12 |   ## using the same command `test_package(pkgName)`, or by director or file
13 |   tinytest::test_package("RITCH")
14 | }


--------------------------------------------------------------------------------