├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── RcppExports.R
    ├── bigreadr-package.R
    ├── bind.R
    ├── nlines-split.R
    ├── read.R
    └── zzz.R
├── README.md
├── _pkgdown.yml
├── bigreadr.Rproj
├── codecov.yml
├── docs
    ├── 404.html
    ├── articles
    │   ├── csv2sqlite.html
    │   └── index.html
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── link.svg
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    ├── reference
    │   ├── Rplot001.png
    │   ├── big_fread1.html
    │   ├── big_fread2.html
    │   ├── bigreadr-package.html
    │   ├── cbind_df.html
    │   ├── fread2.html
    │   ├── fwrite2.html
    │   ├── index.html
    │   ├── nlines.html
    │   ├── rbind_df.html
    │   └── split_file.html
    └── sitemap.xml
├── inst
    ├── WORDLIST
    └── testdata
    │   ├── cars_with_newline.csv
    │   ├── cars_without_newline.csv
    │   └── wrong_string.rds
├── man
    ├── big_fread1.Rd
    ├── big_fread2.Rd
    ├── bigreadr-package.Rd
    ├── cbind_df.Rd
    ├── fread2.Rd
    ├── fwrite2.Rd
    ├── nlines.Rd
    ├── rbind_df.Rd
    └── split_file.Rd
├── src
    ├── .gitignore
    ├── RcppExports.cpp
    └── nlines-split.cpp
├── tests
    ├── spelling.R
    ├── testthat.R
    └── testthat
    │   ├── test-bind.R
    │   ├── test-nlines.R
    │   ├── test-read.R
    │   └── test-split.R
├── tmp-save
    └── nlines.cpp
├── tmp-tests
    ├── bench-acc.R
    ├── bench-rbind.R
    ├── bench-read.R
    ├── bench-read2.R
    ├── bench-read3.R
    ├── bench-read4.R
    ├── bench-read5.R
    ├── bench-read6.R
    ├── bench-read7.R
    ├── has-header.R
    ├── split.cpp
    ├── test-file2string.cpp
    ├── test-mmap-nlines.cpp
    ├── test-parallel.R
    ├── test-parallel2.R
    ├── test-setvbuf.cpp
    ├── test-setvbuf2.cpp
    ├── test-setvbuf3.cpp
    ├── test-setvbuf4.cpp
    ├── test-setvbuf5.cpp
    ├── test-setvbuf6.cpp
    ├── test-string.cpp
    ├── text-write.txt
    └── text-write2.txt
└── vignettes
    └── csv2sqlite.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^tmp-tests$
 4 | ^tmp-data$
 5 | ^\.travis\.yml$
 6 | ^appveyor\.yml$
 7 | ^codecov\.yml$
 8 | ^tmp-save$
 9 | ^_pkgdown\.yml$
10 | ^docs$
11 | ^vignettes$
12 | ^\.github$
13 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v3
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | tmp-data/
6 | tmp-tests/tmp/
7 | tmp-tests/tmp2/
8 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bigreadr
 2 | Version: 0.2.5
 3 | Date: 2022-12-06
 4 | Title: Read Large Text Files
 5 | Description: Read large text files by splitting them in smaller files.
 6 |     Package 'bigreadr' also provides some convenient wrappers around fread()
 7 |     and fwrite() from package 'data.table'. 
 8 | Authors@R: 
 9 |     person(given = "Florian",
10 |            family = "Privé",
11 |            role = c("aut", "cre"),
12 |            email = "florian.prive.21@gmail.com")
13 | License: GPL-3
14 | Encoding: UTF-8
15 | ByteCompile: true
16 | Roxygen: list(markdown = TRUE)
17 | RoxygenNote: 6.1.0
18 | Imports: 
19 |     bigassertr (>= 0.1.1),
20 |     data.table,
21 |     parallelly,
22 |     Rcpp,
23 |     utils
24 | Suggests:
25 |     spelling, 
26 |     testthat,
27 |     covr,
28 |     RSQLite
29 | LinkingTo: 
30 |     Rcpp
31 | Language: en-US
32 | URL: https://github.com/privefl/bigreadr
33 | BugReports: https://github.com/privefl/bigreadr/issues
34 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(big_fread1)
 4 | export(big_fread2)
 5 | export(cbind_df)
 6 | export(fread2)
 7 | export(fwrite2)
 8 | export(get_split_files)
 9 | export(nlines)
10 | export(rbind_df)
11 | export(split_file)
12 | importFrom(Rcpp,sourceCpp)
13 | importFrom(bigassertr,assert_exist)
14 | importFrom(bigassertr,assert_int)
15 | importFrom(bigassertr,assert_pos)
16 | importFrom(bigassertr,message2)
17 | importFrom(bigassertr,stop2)
18 | importFrom(bigassertr,warning2)
19 | useDynLib(bigreadr, .registration = TRUE)
20 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | nlines_cpp <- function(file) {
 5 |     .Call(`_bigreadr_nlines_cpp`, file)
 6 | }
 7 | 
 8 | split_every_nlines <- function(name_in, prefix_out, every_nlines, repeat_header) {
 9 |     .Call(`_bigreadr_split_every_nlines`, name_in, prefix_out, every_nlines, repeat_header)
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/R/bigreadr-package.R:
--------------------------------------------------------------------------------
1 | #' @useDynLib bigreadr, .registration = TRUE
2 | #' @importFrom Rcpp sourceCpp
3 | #' @importFrom bigassertr message2 warning2 stop2 assert_exist assert_int assert_pos
4 | #' @keywords internal
5 | "_PACKAGE"
6 | 


--------------------------------------------------------------------------------
/R/bind.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | 
 3 | #' Merge data frames
 4 | #'
 5 | #' @param list_df A list of multiple data frames with the same variables in the
 6 | #'   same order.
 7 | #'
 8 | #' @return One merged data frame with the names of the first input data frame.
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' str(iris)
13 | #' str(rbind_df(list(iris, iris)))
14 | #'
15 | rbind_df <- function(list_df) {
16 | 
17 |   first_df <- list_df[[1]]
18 |   if (data.table::is.data.table(first_df)) {
19 |     data.table::rbindlist(list_df)
20 |   } else if (is.data.frame(first_df)) {
21 |     list_df_merged <- lapply(seq_along(first_df), function(k) {
22 |       unlist(lapply(list_df, function(l) l[[k]]), recursive = FALSE)
23 |     })
24 |     list_df_merged_named <- stats::setNames(list_df_merged, names(list_df[[1]]))
25 |     as.data.frame(list_df_merged_named, stringsAsFactors = FALSE)
26 |   } else {
27 |     stop2("'list_df' should contain data tables or data frames.")
28 |   }
29 | }
30 | 
31 | ################################################################################
32 | 
33 | #' Merge data frames
34 | #'
35 | #' @param list_df A list of multiple data frames with the same observations in
36 | #'   the same order.
37 | #'
38 | #' @return One merged data frame.
39 | #' @export
40 | #'
41 | #' @examples
42 | #' str(iris)
43 | #' str(cbind_df(list(iris, iris)))
44 | #'
45 | cbind_df <- function(list_df) {
46 |   do.call(cbind, list_df)
47 | }
48 | 
49 | ################################################################################
50 | 


--------------------------------------------------------------------------------
/R/nlines-split.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | 
 3 | #' Number of lines
 4 | #'
 5 | #' Get the number of lines of a file.
 6 | #'
 7 | #' @param file Path of the file.
 8 | #'
 9 | #' @return The number of lines as one integer.
10 | #' @export
11 | #'
12 | #' @examples
13 | #' tmp <- fwrite2(iris)
14 | #' nlines(tmp)
15 | #'
16 | nlines <- function(file) {
17 |   nlines_cpp( normalizePath(file, mustWork = TRUE) )
18 | }
19 | 
20 | ################################################################################
21 | 
22 | #' Split file every nlines
23 | #'
24 | #' @param file Path to file that you want to split.
25 | #' @param every_nlines Maximum number of lines in new file parts.
26 | #' @param prefix_out Prefix for created files. Default uses `tempfile()`.
27 | #' @param repeat_header Whether to repeat the header row in each file.
28 | #'   Default is `FALSE`.
29 | #'
30 | #' @return A list with
31 | #'   - `name_in`: input parameter `file`,
32 | #'   - `prefix_out`: input parameter `prefix_out``,
33 | #'   - `nfiles`: Number of files (parts) created,
34 | #'   - `nlines_part`: input parameter `every_nlines`,
35 | #'   - `nlines_all`: total number of lines of `file`.
36 | #' @export
37 | #'
38 | #' @examples
39 | #' tmp <- fwrite2(iris)
40 | #' infos <- split_file(tmp, 100)
41 | #' str(infos)
42 | #' get_split_files(infos)
43 | split_file <- function(file, every_nlines,
44 |                        prefix_out = tempfile(),
45 |                        repeat_header = FALSE) {
46 | 
47 |   split_every_nlines(
48 |     name_in       = normalizePath(file, mustWork = TRUE),
49 |     prefix_out    = path.expand(prefix_out),
50 |     every_nlines  = every_nlines,
51 |     repeat_header = repeat_header
52 |   )
53 | }
54 | 
55 | ################################################################################
56 | 
57 | #' Get files from splitting.
58 | #'
59 | #' @param split_file_out Output of [split_file].
60 | #'
61 | #' @return Vector of file paths created by [split_file].
62 | #' @export
63 | #' @rdname split_file
64 | #'
65 | get_split_files <- function(split_file_out) {
66 | 
67 |   sprintf("%s_%s.txt",
68 |           split_file_out[["prefix_out"]],
69 |           seq_len(split_file_out[["nfiles"]]))
70 | }
71 | 
72 | ################################################################################
73 | 


--------------------------------------------------------------------------------
/R/read.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | 
  3 | #' Read text file(s)
  4 | #'
  5 | #' @param input Path to the file(s) that you want to read from.
  6 | #'   This can also be a command, some text or an URL.
  7 | #'   If a vector of inputs is provided, resulting data frames are appended.
  8 | #' @param ... Other arguments to be passed to [data.table::fread].
  9 | #' @param data.table Whether to return a `data.table` or just a `data.frame`?
 10 | #'   Default is `FALSE` (and is the opposite of [data.table::fread]).
 11 | #' @param nThread Number of threads to use. Default uses all threads minus one.
 12 | #'
 13 | #' @return A `data.frame` by default; a `data.table` when `data.table = TRUE`.
 14 | #' @export
 15 | #'
 16 | #' @examples
 17 | #' tmp <- fwrite2(iris)
 18 | #' iris2 <- fread2(tmp)
 19 | #' all.equal(iris2, iris)  ## fread doesn't use factors
 20 | fread2 <- function(input, ...,
 21 |                    data.table = FALSE,
 22 |                    nThread = getOption("bigreadr.nThread")) {
 23 | 
 24 |   if (missing(input)) {
 25 |     data.table::fread(..., data.table = data.table, nThread = nThread)
 26 |   } else if (length(input) > 1) {
 27 |     rbind_df(lapply(input, fread2, ..., data.table = data.table, nThread = nThread))
 28 |   } else {
 29 |     data.table::fread(input, ..., data.table = data.table, nThread = nThread)
 30 |   }
 31 | }
 32 | 
 33 | ################################################################################
 34 | 
 35 | #' Write a data frame to a text file
 36 | #'
 37 | #' @param x Data frame to write.
 38 | #' @param file Path to the file that you want to write to.
 39 | #'   Defaults uses `tempfile()`.
 40 | #' @param ... Other arguments to be passed to [data.table::fwrite].
 41 | #' @param quote Whether to quote strings (default is `FALSE`).
 42 | #' @param nThread Number of threads to use. Default uses all threads minus one.
 43 | #'
 44 | #' @return Input parameter `file`, invisibly.
 45 | #' @export
 46 | #'
 47 | #' @examples
 48 | #' tmp <- fwrite2(iris)
 49 | #' iris2 <- fread2(tmp)
 50 | #' all.equal(iris2, iris)  ## fread doesn't use factors
 51 | fwrite2 <- function(x, file = tempfile(), ...,
 52 |                     quote = FALSE,
 53 |                     nThread = getOption("bigreadr.nThread")) {
 54 | 
 55 |   data.table::fwrite(x, file, ..., quote = quote, nThread = nThread)
 56 |   invisible(file)
 57 | }
 58 | 
 59 | ################################################################################
 60 | 
 61 | #' Read large text file
 62 | #'
 63 | #' Read large text file by splitting lines.
 64 | #'
 65 | #' @param file Path to file that you want to read.
 66 | #' @inheritParams split_file
 67 | #' @param .transform Function to transform each data frame corresponding to each
 68 | #'   part of the `file`. Default doesn't change anything.
 69 | #' @param .combine Function to combine results (list of data frames).
 70 | #' @param skip Number of lines to skip at the beginning of `file`.
 71 | #' @param ... Other arguments to be passed to [data.table::fread],
 72 | #'   excepted `input`, `file`, `skip`, `col.names` and `showProgress`.
 73 | #' @param print_timings Whether to print timings? Default is `TRUE`.
 74 | #'
 75 | #' @inherit fread2 return
 76 | #' @export
 77 | #'
 78 | big_fread1 <- function(file, every_nlines,
 79 |                        .transform = identity, .combine = rbind_df,
 80 |                        skip = 0, ...,
 81 |                        print_timings = TRUE) {
 82 | 
 83 |   begin <- proc.time()[3]
 84 |   print_proc <- function(action) {
 85 |     if (print_timings) {
 86 |       reset <- proc.time()[3]
 87 |       message2("%s: %s seconds.", action, round(reset - begin, 1))
 88 |       begin <<- reset
 89 |     }
 90 |   }
 91 | 
 92 |   ## Split file
 93 |   infos_split <- split_file(file, every_nlines = every_nlines)
 94 |   file_parts <- get_split_files(infos_split)
 95 |   on.exit(unlink(file_parts), add = TRUE)
 96 | 
 97 |   print_proc("Splitting")
 98 | 
 99 |   ## Read first part to get names and to skip some lines
100 |   part1 <- fread2(file_parts[1], skip = skip, ..., showProgress = FALSE)
101 |   names_df <- names(part1)
102 |   part1 <- .transform(part1)
103 | 
104 |   print_proc("Reading + transforming first part")
105 | 
106 |   ## Read + transform other parts
107 |   other_parts <- lapply(file_parts[-1], function(file_part) {
108 |     .transform(fread2(file_part, skip = 0, col.names = names_df,
109 |                       ..., showProgress = FALSE))
110 |   })
111 | 
112 |   print_proc("Reading + transforming other parts")
113 | 
114 |   ## Combine
115 |   all_parts <- unname(c(list(part1), other_parts))
116 |   res <- tryCatch(.combine(all_parts), error = function(e) {
117 |     warning2("Combining failed. Returning list of parts instead..")
118 |     all_parts
119 |   })
120 | 
121 |   print_proc("Combining")
122 | 
123 |   res
124 | }
125 | 
126 | ################################################################################
127 | 
128 | cut_in_nb <- function(x, nb) {
129 |   split(x, sort(rep_len(seq_len(nb), length(x))))
130 | }
131 | 
132 | #' Read large text file
133 | #'
134 | #' Read large text file by splitting columns.
135 | #'
136 | #' @param file Path to file that you want to read.
137 | #' @param nb_parts Number of parts in which to split reading (and transforming).
138 | #'   Parts are referring to blocks of selected columns.
139 | #'   Default uses `part_size` to set a good value.
140 | #' @param .transform Function to transform each data frame corresponding to each
141 | #'   block of selected columns. Default doesn't change anything.
142 | #' @param .combine Function to combine results (list of data frames).
143 | #' @param skip Number of lines to skip at the beginning of `file`.
144 | #' @param select Indices of columns to keep (sorted). Default keeps them all.
145 | #' @param ... Other arguments to be passed to [data.table::fread],
146 | #'   excepted `input`, `file`, `skip`, `select` and `showProgress`.
147 | #' @param progress Show progress? Default is `FALSE`.
148 | #' @param part_size Size of the parts if `nb_parts` is not supplied.
149 | #'   Default is `500 * 1024^2` (500 MB).
150 | #'
151 | #' @return The outputs of `fread2` + `.transform`, combined with `.combine`.
152 | #' @export
153 | #'
154 | big_fread2 <- function(file, nb_parts = NULL,
155 |                        .transform = identity,
156 |                        .combine = cbind_df,
157 |                        skip = 0,
158 |                        select = NULL,
159 |                        progress = FALSE,
160 |                        part_size = 500 * 1024^2,  ## 500 MB
161 |                        ...) {
162 | 
163 |   assert_exist(file)
164 |   ## Split selected columns in nb_parts
165 |   if (is.null(select)) {
166 |     nb_cols <- ncol(fread2(file, nrows = 1, skip = skip, ...))
167 |     select <- seq_len(nb_cols)
168 |   } else {
169 |     assert_int(select); assert_pos(select)
170 |     if (is.unsorted(select, strictly = TRUE))
171 |       stop2("Argument 'select' should be sorted.")
172 |   }
173 |   # Number of parts
174 |   if (is.null(nb_parts)) {
175 |     nb_parts <- ceiling(file.size(file) / part_size)
176 |     if (progress) message2("Will read the file in %d parts.", nb_parts)
177 |   }
178 |   split_cols <- cut_in_nb(select, nb_parts)
179 | 
180 |   if (progress) {
181 |     pb <- utils::txtProgressBar(min = 0, max = length(select), style = 3)
182 |     on.exit(close(pb), add = TRUE)
183 |   }
184 | 
185 |   ## Read + transform other parts
186 |   already_read <- 0
187 |   all_parts <- lapply(split_cols, function(cols) {
188 |     part <- .transform(
189 |       fread2(file, skip = skip, select = cols, ..., showProgress = FALSE)
190 |     )
191 |     already_read <<- already_read + length(cols)
192 |     if (progress) utils::setTxtProgressBar(pb, already_read)
193 |     part
194 |   })
195 |   all_parts <- unname(all_parts)
196 | 
197 |   ## Combine
198 |   tryCatch(.combine(all_parts), error = function(e) {
199 |     warning2("Combining failed. Returning list of parts instead..")
200 |     all_parts
201 |   })
202 | }
203 | 
204 | ################################################################################
205 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | 
 3 | .onLoad <- function(libname, pkgname) {
 4 |   options(
 5 |     bigreadr.nThread = max(parallelly::availableCores() - 1L, 1L)
 6 |   )
 7 | }
 8 | 
 9 | ################################################################################
10 | 
11 | .onUnload <- function(libpath) {
12 |   options(
13 |     bigreadr.nThread = NULL
14 |   )
15 | }
16 | 
17 | ################################################################################
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- badges: start -->
 2 | [![R-CMD-check](https://github.com/privefl/bigreadr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/privefl/bigreadr/actions/workflows/R-CMD-check.yaml)
 3 | [![CRAN status](https://www.r-pkg.org/badges/version/bigreadr)](https://cran.r-project.org/package=bigreadr)
 4 | [![Codecov test coverage](https://codecov.io/gh/privefl/bigreadr/branch/master/graph/badge.svg)](https://app.codecov.io/gh/privefl/bigreadr?branch=master)
 5 | <!-- badges: end -->
 6 | 
 7 | 
 8 | # R package {bigreadr}
 9 | 
10 | Read large text files based on splitting + `data.table::fread`
11 | 
12 | 
13 | ## Example
14 | 
15 | ```r
16 | # remotes::install_github("privefl/bigreadr")
17 | library(bigreadr)
18 | 
19 | # Create a temporary file of ~141 MB (just as an example)
20 | csv <- fwrite2(iris[rep(seq_len(nrow(iris)), 1e4), rep(1:5, 4)], tempfile())
21 | format(file.size(csv), big.mark = ",")
22 | 
23 | ## Splitting lines (1)
24 | # Read (by parts) all data -> using `fread` would be faster
25 | nlines(csv)  ## 1M5 lines -> split every 500,000
26 | big_iris1 <- big_fread1(csv, every_nlines = 5e5)
27 | # Read and subset (by parts)
28 | big_iris1_setosa <- big_fread1(csv, every_nlines = 5e5, .transform = function(df) {
29 |   dplyr::filter(df, Species == "setosa")
30 | })
31 | 
32 | ## Splitting columns (2)
33 | big_iris2 <- big_fread2(csv, nb_parts = 3)
34 | # Read and subset (by parts)
35 | species_setosa <- (fread2(csv, select = 5)[[1]] == "setosa")
36 | big_iris2_setosa <- big_fread2(csv, nb_parts = 3, .transform = function(df) {
37 |   dplyr::filter(df, species_setosa)
38 | })
39 | 
40 | ## Verification
41 | identical(big_iris1_setosa, dplyr::filter(big_iris1, Species == "setosa"))
42 | identical(big_iris2, big_iris1)
43 | identical(big_iris2_setosa, big_iris1_setosa)
44 | ```
45 | 
46 | ## Use cases
47 | 
48 | Please send me your use cases!
49 | 
50 | - [Convert a CSV to SQLite by parts](https://privefl.github.io/bigreadr/articles/csv2sqlite.html)
51 | 
52 | - [Read a text file as a disk.frame](https://diskframe.com/articles/ingesting-data.html)
53 | 
54 | - [Read a text file as a Filebacked Big Matrix](https://privefl.github.io/bigstatsr/reference/big_read.html)
55 | 
56 | - [Read a text file as a Filebacked Data Frame](https://privefl.github.io/bigdfr/reference/FDF_read.html)
57 | 
58 | - Read multiple files at once using `bigreadr::fread2()`.
59 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/privefl/bigreadr/2d8806f1067b19610a2d633bf2e863b910570d5d/_pkgdown.yml


--------------------------------------------------------------------------------
/bigreadr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en">
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5 | <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | <title>Page not found (404) • bigreadr</title>
  9 | <!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous">
 10 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css">
 11 | <script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
 12 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
 13 | <!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet">
 14 | <script src="pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
 15 | <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 16 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 17 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 18 | <![endif]-->
 19 | </head>
 20 | <body data-spy="scroll" data-target="#toc">
 21 |     
 22 | 
 23 |     <div class="container template-title-body">
 24 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 25 |   <div class="container">
 26 |     <div class="navbar-header">
 27 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 28 |         <span class="sr-only">Toggle navigation</span>
 29 |         <span class="icon-bar"></span>
 30 |         <span class="icon-bar"></span>
 31 |         <span class="icon-bar"></span>
 32 |       </button>
 33 |       <span class="navbar-brand">
 34 |         <a class="navbar-link" href="index.html">bigreadr</a>
 35 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 36 |       </span>
 37 |     </div>
 38 | 
 39 |     <div id="navbar" class="navbar-collapse collapse">
 40 |       <ul class="nav navbar-nav">
 41 | <li>
 42 |   <a href="reference/index.html">Reference</a>
 43 | </li>
 44 | <li class="dropdown">
 45 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 46 |     Articles
 47 |      
 48 |     <span class="caret"></span>
 49 |   </a>
 50 |   <ul class="dropdown-menu" role="menu">
 51 | <li>
 52 |       <a href="articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 53 |     </li>
 54 |   </ul>
 55 | </li>
 56 |       </ul>
 57 | <ul class="nav navbar-nav navbar-right">
 58 | <li>
 59 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 60 |     <span class="fab fa-github fa-lg"></span>
 61 |      
 62 |   </a>
 63 | </li>
 64 |       </ul>
 65 | </div>
 66 | <!--/.nav-collapse -->
 67 |   </div>
 68 | <!--/.container -->
 69 | </div>
 70 | <!--/.navbar -->
 71 | 
 72 |       
 73 | 
 74 |       </header><div class="row">
 75 |   <div class="contents col-md-9">
 76 |     <div class="page-header">
 77 |       <h1>Page not found (404)</h1>
 78 |     </div>
 79 | 
 80 | Content not found. Please use links in the navbar.
 81 | 
 82 |   </div>
 83 | 
 84 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 85 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 86 |     </nav>
 87 | </div>
 88 | 
 89 | </div>
 90 | 
 91 | 
 92 | 
 93 |       <footer><div class="copyright">
 94 |   <p></p>
 95 | <p>Developed by Florian Privé.</p>
 96 | </div>
 97 | 
 98 | <div class="pkgdown">
 99 |   <p></p>
100 | <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
101 | </div>
102 | 
103 |       </footer>
104 | </div>
105 | 
106 |   
107 | 
108 | 
109 |   
110 | 
111 |   </body>
112 | </html>
113 | 


--------------------------------------------------------------------------------
/docs/articles/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Articles • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Articles"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
 6 |     
 7 | 
 8 |     <div class="container template-article-index">
 9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
10 |   <div class="container">
11 |     <div class="navbar-header">
12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
13 |         <span class="sr-only">Toggle navigation</span>
14 |         <span class="icon-bar"></span>
15 |         <span class="icon-bar"></span>
16 |         <span class="icon-bar"></span>
17 |       </button>
18 |       <span class="navbar-brand">
19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
21 |       </span>
22 |     </div>
23 | 
24 |     <div id="navbar" class="navbar-collapse collapse">
25 |       <ul class="nav navbar-nav"><li>
26 |   <a href="../reference/index.html">Reference</a>
27 | </li>
28 | <li class="dropdown">
29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
30 |     Articles
31 |      
32 |     <span class="caret"></span>
33 |   </a>
34 |   <ul class="dropdown-menu" role="menu"><li>
35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
36 |     </li>
37 |   </ul></li>
38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
40 |     <span class="fab fa-github fa-lg"></span>
41 |      
42 |   </a>
43 | </li>
44 |       </ul></div><!--/.nav-collapse -->
45 |   </div><!--/.container -->
46 | </div><!--/.navbar -->
47 | 
48 |       
49 | 
50 |       </header><div class="row">
51 |   <div class="col-md-9 contents">
52 |     <div class="page-header">
53 |       <h1>Articles</h1>
54 |     </div>
55 | 
56 |     <div class="section ">
57 |       <h3>All vignettes</h3>
58 |       <p class="section-desc"></p>
59 | 
60 |       <dl><dt><a href="csv2sqlite.html">Convert a CSV to SQLite by parts</a></dt>
61 |         <dd>
62 |       </dd></dl></div>
63 |   </div>
64 | </div>
65 | 
66 | 
67 |       <footer><div class="copyright">
68 |   <p></p><p>Developed by Florian Privé.</p>
69 | </div>
70 | 
71 | <div class="pkgdown">
72 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
73 | </div>
74 | 
75 |       </footer></div>
76 | 
77 |   
78 | 
79 | 
80 |   
81 | 
82 |   </body></html>
83 | 
84 | 


--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Authors and Citation • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"><script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"><script src="pkgdown.js"></script><meta property="og:title" content="Authors and Citation"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-citation-authors">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="contents col-md-9">
 52 |     <div class="section level2 authors-section">
 53 |       <div class="page-header">
 54 |         <h1>Authors</h1>
 55 |       </div>
 56 | 
 57 |       
 58 |       <ul class="list-unstyled"><li>
 59 |           <p><strong>Florian Privé</strong>. Author, maintainer. 
 60 |           </p>
 61 |         </li>
 62 |       </ul></div>
 63 |     <div class="section level2 citation-section">
 64 |     <div>
 65 |       <h1 id="citation">Citation</h1>
 66 |       <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/DESCRIPTION" class="external-link"><code>DESCRIPTION</code></a></small>
 67 |     </div>
 68 |     </div>
 69 | 
 70 | 
 71 |     <p>PrivÃ© F (2022).
 72 | <em>bigreadr: Read Large Text Files</em>.
 73 | R package version 0.2.5, <a href="https://github.com/privefl/bigreadr" class="external-link">https://github.com/privefl/bigreadr</a>. 
 74 | </p>
 75 |     <pre>@Manual{,
 76 |   title = {bigreadr: Read Large Text Files},
 77 |   author = {Florian Privé},
 78 |   year = {2022},
 79 |   note = {R package version 0.2.5},
 80 |   url = {https://github.com/privefl/bigreadr},
 81 | }</pre>
 82 | 
 83 |   </div>
 84 | 
 85 | </div>
 86 | 
 87 | 
 88 | 
 89 |       <footer><div class="copyright">
 90 |   <p></p><p>Developed by Florian Privé.</p>
 91 | </div>
 92 | 
 93 | <div class="pkgdown">
 94 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
 95 | </div>
 96 | 
 97 |       </footer></div>
 98 | 
 99 |   
100 | 
101 | 
102 |   
103 | 
104 |   </body></html>
105 | 
106 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
  3 |  * Copyright 2015 Aidan Feldman
  4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
  5 | (function() {
  6 |   'use strict';
  7 | 
  8 |   window.Toc = {
  9 |     helpers: {
 10 |       // return all matching elements in the set, or their descendants
 11 |       findOrFilter: function($el, selector) {
 12 |         // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
 13 |         // http://stackoverflow.com/a/12731439/358804
 14 |         var $descendants = $el.find(selector);
 15 |         return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
 16 |       },
 17 | 
 18 |       generateUniqueIdBase: function(el) {
 19 |         var text = $(el).text();
 20 |         var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
 21 |         return anchor || el.tagName.toLowerCase();
 22 |       },
 23 | 
 24 |       generateUniqueId: function(el) {
 25 |         var anchorBase = this.generateUniqueIdBase(el);
 26 |         for (var i = 0; ; i++) {
 27 |           var anchor = anchorBase;
 28 |           if (i > 0) {
 29 |             // add suffix
 30 |             anchor += '-' + i;
 31 |           }
 32 |           // check if ID already exists
 33 |           if (!document.getElementById(anchor)) {
 34 |             return anchor;
 35 |           }
 36 |         }
 37 |       },
 38 | 
 39 |       generateAnchor: function(el) {
 40 |         if (el.id) {
 41 |           return el.id;
 42 |         } else {
 43 |           var anchor = this.generateUniqueId(el);
 44 |           el.id = anchor;
 45 |           return anchor;
 46 |         }
 47 |       },
 48 | 
 49 |       createNavList: function() {
 50 |         return $('<ul class="nav"></ul>');
 51 |       },
 52 | 
 53 |       createChildNavList: function($parent) {
 54 |         var $childList = this.createNavList();
 55 |         $parent.append($childList);
 56 |         return $childList;
 57 |       },
 58 | 
 59 |       generateNavEl: function(anchor, text) {
 60 |         var $a = $('<a></a>');
 61 |         $a.attr('href', '#' + anchor);
 62 |         $a.text(text);
 63 |         var $li = $('<li></li>');
 64 |         $li.append($a);
 65 |         return $li;
 66 |       },
 67 | 
 68 |       generateNavItem: function(headingEl) {
 69 |         var anchor = this.generateAnchor(headingEl);
 70 |         var $heading = $(headingEl);
 71 |         var text = $heading.data('toc-text') || $heading.text();
 72 |         return this.generateNavEl(anchor, text);
 73 |       },
 74 | 
 75 |       // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
 76 |       getTopLevel: function($scope) {
 77 |         for (var i = 1; i <= 6; i++) {
 78 |           var $headings = this.findOrFilter($scope, 'h' + i);
 79 |           if ($headings.length > 1) {
 80 |             return i;
 81 |           }
 82 |         }
 83 | 
 84 |         return 1;
 85 |       },
 86 | 
 87 |       // returns the elements for the top level, and the next below it
 88 |       getHeadings: function($scope, topLevel) {
 89 |         var topSelector = 'h' + topLevel;
 90 | 
 91 |         var secondaryLevel = topLevel + 1;
 92 |         var secondarySelector = 'h' + secondaryLevel;
 93 | 
 94 |         return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
 95 |       },
 96 | 
 97 |       getNavLevel: function(el) {
 98 |         return parseInt(el.tagName.charAt(1), 10);
 99 |       },
100 | 
101 |       populateNav: function($topContext, topLevel, $headings) {
102 |         var $context = $topContext;
103 |         var $prevNav;
104 | 
105 |         var helpers = this;
106 |         $headings.each(function(i, el) {
107 |           var $newNav = helpers.generateNavItem(el);
108 |           var navLevel = helpers.getNavLevel(el);
109 | 
110 |           // determine the proper $context
111 |           if (navLevel === topLevel) {
112 |             // use top level
113 |             $context = $topContext;
114 |           } else if ($prevNav && $context === $topContext) {
115 |             // create a new level of the tree and switch to it
116 |             $context = helpers.createChildNavList($prevNav);
117 |           } // else use the current $context
118 | 
119 |           $context.append($newNav);
120 | 
121 |           $prevNav = $newNav;
122 |         });
123 |       },
124 | 
125 |       parseOps: function(arg) {
126 |         var opts;
127 |         if (arg.jquery) {
128 |           opts = {
129 |             $nav: arg
130 |           };
131 |         } else {
132 |           opts = arg;
133 |         }
134 |         opts.$scope = opts.$scope || $(document.body);
135 |         return opts;
136 |       }
137 |     },
138 | 
139 |     // accepts a jQuery object, or an options object
140 |     init: function(opts) {
141 |       opts = this.helpers.parseOps(opts);
142 | 
143 |       // ensure that the data attribute is in place for styling
144 |       opts.$nav.attr('data-toggle', 'toc');
145 | 
146 |       var $topContext = this.helpers.createChildNavList(opts.$nav);
147 |       var topLevel = this.helpers.getTopLevel(opts.$scope);
148 |       var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
149 |       this.helpers.populateNav($topContext, topLevel, $headings);
150 |     }
151 |   };
152 | 
153 |   $(function() {
154 |     $('nav[data-toggle="toc"]').each(function(i, el) {
155 |       var $nav = $(el);
156 |       Toc.init($nav);
157 |     });
158 |   });
159 | })();
160 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.css:
--------------------------------------------------------------------------------
  1 | /* Sticky footer */
  2 | 
  3 | /**
  4 |  * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/
  5 |  * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css
  6 |  *
  7 |  * .Site -> body > .container
  8 |  * .Site-content -> body > .container .row
  9 |  * .footer -> footer
 10 |  *
 11 |  * Key idea seems to be to ensure that .container and __all its parents__
 12 |  * have height set to 100%
 13 |  *
 14 |  */
 15 | 
 16 | html, body {
 17 |   height: 100%;
 18 | }
 19 | 
 20 | body {
 21 |   position: relative;
 22 | }
 23 | 
 24 | body > .container {
 25 |   display: flex;
 26 |   height: 100%;
 27 |   flex-direction: column;
 28 | }
 29 | 
 30 | body > .container .row {
 31 |   flex: 1 0 auto;
 32 | }
 33 | 
 34 | footer {
 35 |   margin-top: 45px;
 36 |   padding: 35px 0 36px;
 37 |   border-top: 1px solid #e5e5e5;
 38 |   color: #666;
 39 |   display: flex;
 40 |   flex-shrink: 0;
 41 | }
 42 | footer p {
 43 |   margin-bottom: 0;
 44 | }
 45 | footer div {
 46 |   flex: 1;
 47 | }
 48 | footer .pkgdown {
 49 |   text-align: right;
 50 | }
 51 | footer p {
 52 |   margin-bottom: 0;
 53 | }
 54 | 
 55 | img.icon {
 56 |   float: right;
 57 | }
 58 | 
 59 | /* Ensure in-page images don't run outside their container */
 60 | .contents img {
 61 |   max-width: 100%;
 62 |   height: auto;
 63 | }
 64 | 
 65 | /* Fix bug in bootstrap (only seen in firefox) */
 66 | summary {
 67 |   display: list-item;
 68 | }
 69 | 
 70 | /* Typographic tweaking ---------------------------------*/
 71 | 
 72 | .contents .page-header {
 73 |   margin-top: calc(-60px + 1em);
 74 | }
 75 | 
 76 | dd {
 77 |   margin-left: 3em;
 78 | }
 79 | 
 80 | /* Section anchors ---------------------------------*/
 81 | 
 82 | a.anchor {
 83 |   display: none;
 84 |   margin-left: 5px;
 85 |   width: 20px;
 86 |   height: 20px;
 87 | 
 88 |   background-image: url(./link.svg);
 89 |   background-repeat: no-repeat;
 90 |   background-size: 20px 20px;
 91 |   background-position: center center;
 92 | }
 93 | 
 94 | h1:hover .anchor,
 95 | h2:hover .anchor,
 96 | h3:hover .anchor,
 97 | h4:hover .anchor,
 98 | h5:hover .anchor,
 99 | h6:hover .anchor {
100 |   display: inline-block;
101 | }
102 | 
103 | /* Fixes for fixed navbar --------------------------*/
104 | 
105 | .contents h1, .contents h2, .contents h3, .contents h4 {
106 |   padding-top: 60px;
107 |   margin-top: -40px;
108 | }
109 | 
110 | /* Navbar submenu --------------------------*/
111 | 
112 | .dropdown-submenu {
113 |   position: relative;
114 | }
115 | 
116 | .dropdown-submenu>.dropdown-menu {
117 |   top: 0;
118 |   left: 100%;
119 |   margin-top: -6px;
120 |   margin-left: -1px;
121 |   border-radius: 0 6px 6px 6px;
122 | }
123 | 
124 | .dropdown-submenu:hover>.dropdown-menu {
125 |   display: block;
126 | }
127 | 
128 | .dropdown-submenu>a:after {
129 |   display: block;
130 |   content: " ";
131 |   float: right;
132 |   width: 0;
133 |   height: 0;
134 |   border-color: transparent;
135 |   border-style: solid;
136 |   border-width: 5px 0 5px 5px;
137 |   border-left-color: #cccccc;
138 |   margin-top: 5px;
139 |   margin-right: -10px;
140 | }
141 | 
142 | .dropdown-submenu:hover>a:after {
143 |   border-left-color: #ffffff;
144 | }
145 | 
146 | .dropdown-submenu.pull-left {
147 |   float: none;
148 | }
149 | 
150 | .dropdown-submenu.pull-left>.dropdown-menu {
151 |   left: -100%;
152 |   margin-left: 10px;
153 |   border-radius: 6px 0 6px 6px;
154 | }
155 | 
156 | /* Sidebar --------------------------*/
157 | 
158 | #pkgdown-sidebar {
159 |   margin-top: 30px;
160 |   position: -webkit-sticky;
161 |   position: sticky;
162 |   top: 70px;
163 | }
164 | 
165 | #pkgdown-sidebar h2 {
166 |   font-size: 1.5em;
167 |   margin-top: 1em;
168 | }
169 | 
170 | #pkgdown-sidebar h2:first-child {
171 |   margin-top: 0;
172 | }
173 | 
174 | #pkgdown-sidebar .list-unstyled li {
175 |   margin-bottom: 0.5em;
176 | }
177 | 
178 | /* bootstrap-toc tweaks ------------------------------------------------------*/
179 | 
180 | /* All levels of nav */
181 | 
182 | nav[data-toggle='toc'] .nav > li > a {
183 |   padding: 4px 20px 4px 6px;
184 |   font-size: 1.5rem;
185 |   font-weight: 400;
186 |   color: inherit;
187 | }
188 | 
189 | nav[data-toggle='toc'] .nav > li > a:hover,
190 | nav[data-toggle='toc'] .nav > li > a:focus {
191 |   padding-left: 5px;
192 |   color: inherit;
193 |   border-left: 1px solid #878787;
194 | }
195 | 
196 | nav[data-toggle='toc'] .nav > .active > a,
197 | nav[data-toggle='toc'] .nav > .active:hover > a,
198 | nav[data-toggle='toc'] .nav > .active:focus > a {
199 |   padding-left: 5px;
200 |   font-size: 1.5rem;
201 |   font-weight: 400;
202 |   color: inherit;
203 |   border-left: 2px solid #878787;
204 | }
205 | 
206 | /* Nav: second level (shown on .active) */
207 | 
208 | nav[data-toggle='toc'] .nav .nav {
209 |   display: none; /* Hide by default, but at >768px, show it */
210 |   padding-bottom: 10px;
211 | }
212 | 
213 | nav[data-toggle='toc'] .nav .nav > li > a {
214 |   padding-left: 16px;
215 |   font-size: 1.35rem;
216 | }
217 | 
218 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
219 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
220 |   padding-left: 15px;
221 | }
222 | 
223 | nav[data-toggle='toc'] .nav .nav > .active > a,
224 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
225 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
226 |   padding-left: 15px;
227 |   font-weight: 500;
228 |   font-size: 1.35rem;
229 | }
230 | 
231 | /* orcid ------------------------------------------------------------------- */
232 | 
233 | .orcid {
234 |   font-size: 16px;
235 |   color: #A6CE39;
236 |   /* margins are required by official ORCID trademark and display guidelines */
237 |   margin-left:4px;
238 |   margin-right:4px;
239 |   vertical-align: middle;
240 | }
241 | 
242 | /* Reference index & topics ----------------------------------------------- */
243 | 
244 | .ref-index th {font-weight: normal;}
245 | 
246 | .ref-index td {vertical-align: top; min-width: 100px}
247 | .ref-index .icon {width: 40px;}
248 | .ref-index .alias {width: 40%;}
249 | .ref-index-icons .alias {width: calc(40% - 40px);}
250 | .ref-index .title {width: 60%;}
251 | 
252 | .ref-arguments th {text-align: right; padding-right: 10px;}
253 | .ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px}
254 | .ref-arguments .name {width: 20%;}
255 | .ref-arguments .desc {width: 80%;}
256 | 
257 | /* Nice scrolling for wide elements --------------------------------------- */
258 | 
259 | table {
260 |   display: block;
261 |   overflow: auto;
262 | }
263 | 
264 | /* Syntax highlighting ---------------------------------------------------- */
265 | 
266 | pre, code, pre code {
267 |   background-color: #f8f8f8;
268 |   color: #333;
269 | }
270 | pre, pre code {
271 |   white-space: pre-wrap;
272 |   word-break: break-all;
273 |   overflow-wrap: break-word;
274 | }
275 | 
276 | pre {
277 |   border: 1px solid #eee;
278 | }
279 | 
280 | pre .img, pre .r-plt {
281 |   margin: 5px 0;
282 | }
283 | 
284 | pre .img img, pre .r-plt img {
285 |   background-color: #fff;
286 | }
287 | 
288 | code a, pre a {
289 |   color: #375f84;
290 | }
291 | 
292 | a.sourceLine:hover {
293 |   text-decoration: none;
294 | }
295 | 
296 | .fl      {color: #1514b5;}
297 | .fu      {color: #000000;} /* function */
298 | .ch,.st  {color: #036a07;} /* string */
299 | .kw      {color: #264D66;} /* keyword */
300 | .co      {color: #888888;} /* comment */
301 | 
302 | .error   {font-weight: bolder;}
303 | .warning {font-weight: bolder;}
304 | 
305 | /* Clipboard --------------------------*/
306 | 
307 | .hasCopyButton {
308 |   position: relative;
309 | }
310 | 
311 | .btn-copy-ex {
312 |   position: absolute;
313 |   right: 0;
314 |   top: 0;
315 |   visibility: hidden;
316 | }
317 | 
318 | .hasCopyButton:hover button.btn-copy-ex {
319 |   visibility: visible;
320 | }
321 | 
322 | /* headroom.js ------------------------ */
323 | 
324 | .headroom {
325 |   will-change: transform;
326 |   transition: transform 200ms linear;
327 | }
328 | .headroom--pinned {
329 |   transform: translateY(0%);
330 | }
331 | .headroom--unpinned {
332 |   transform: translateY(-100%);
333 | }
334 | 
335 | /* mark.js ----------------------------*/
336 | 
337 | mark {
338 |   background-color: rgba(255, 255, 51, 0.5);
339 |   border-bottom: 2px solid rgba(255, 153, 51, 0.3);
340 |   padding: 1px;
341 | }
342 | 
343 | /* vertical spacing after htmlwidgets */
344 | .html-widget {
345 |   margin-bottom: 10px;
346 | }
347 | 
348 | /* fontawesome ------------------------ */
349 | 
350 | .fab {
351 |     font-family: "Font Awesome 5 Brands" !important;
352 | }
353 | 
354 | /* don't display links in code chunks when printing */
355 | /* source: https://stackoverflow.com/a/10781533 */
356 | @media print {
357 |   code a:link:after, code a:visited:after {
358 |     content: "";
359 |   }
360 | }
361 | 
362 | /* Section anchors ---------------------------------
363 |    Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71
364 | */
365 | 
366 | div.csl-bib-body { }
367 | div.csl-entry {
368 |   clear: both;
369 | }
370 | .hanging-indent div.csl-entry {
371 |   margin-left:2em;
372 |   text-indent:-2em;
373 | }
374 | div.csl-left-margin {
375 |   min-width:2em;
376 |   float:left;
377 | }
378 | div.csl-right-inline {
379 |   margin-left:2em;
380 |   padding-left:1em;
381 | }
382 | div.csl-indent {
383 |   margin-left: 2em;
384 | }
385 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.19.2
2 | pkgdown: 2.0.6
3 | pkgdown_sha: ~
4 | articles:
5 |   csv2sqlite: csv2sqlite.html
6 | last_built: 2022-12-06T14:39Z
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/privefl/bigreadr/2d8806f1067b19610a2d633bf2e863b910570d5d/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/docs/reference/big_fread1.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Read large text file — big_fread1 • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Read large text file — big_fread1"><meta property="og:description" content="Read large text file by splitting lines."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Read large text file</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/read.R" class="external-link"><code>R/read.R</code></a></small>
 55 |     <div class="hidden name"><code>big_fread1.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Read large text file by splitting lines.</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">big_fread1</span><span class="op">(</span><span class="va">file</span>, <span class="va">every_nlines</span>, .transform <span class="op">=</span> <span class="va">identity</span>,</span>
 64 | <span>  .combine <span class="op">=</span> <span class="va">rbind_df</span>, skip <span class="op">=</span> <span class="fl">0</span>, <span class="va">...</span>, print_timings <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span></code></pre></div>
 65 |     </div>
 66 | 
 67 |     <div id="arguments">
 68 |     <h2>Arguments</h2>
 69 |     <dl><dt>file</dt>
 70 | <dd><p>Path to file that you want to read.</p></dd>
 71 | 
 72 | 
 73 | <dt>every_nlines</dt>
 74 | <dd><p>Maximum number of lines in new file parts.</p></dd>
 75 | 
 76 | 
 77 | <dt>.transform</dt>
 78 | <dd><p>Function to transform each data frame corresponding to each
 79 | part of the <code>file</code>. Default doesn't change anything.</p></dd>
 80 | 
 81 | 
 82 | <dt>.combine</dt>
 83 | <dd><p>Function to combine results (list of data frames).</p></dd>
 84 | 
 85 | 
 86 | <dt>skip</dt>
 87 | <dd><p>Number of lines to skip at the beginning of <code>file</code>.</p></dd>
 88 | 
 89 | 
 90 | <dt>...</dt>
 91 | <dd><p>Other arguments to be passed to <a href="https://Rdatatable.gitlab.io/data.table/reference/fread.html" class="external-link">data.table::fread</a>,
 92 | excepted <code>input</code>, <code>file</code>, <code>skip</code>, <code>col.names</code> and <code>showProgress</code>.</p></dd>
 93 | 
 94 | 
 95 | <dt>print_timings</dt>
 96 | <dd><p>Whether to print timings? Default is <code>TRUE</code>.</p></dd>
 97 | 
 98 | </dl></div>
 99 |     <div id="value">
100 |     <h2>Value</h2>
101 |     
102 | 
103 | <p>A <code>data.frame</code> by default; a <code>data.table</code> when <code>data.table = TRUE</code>.</p>
104 |     </div>
105 | 
106 |   </div>
107 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
108 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
109 |     </nav></div>
110 | </div>
111 | 
112 | 
113 |       <footer><div class="copyright">
114 |   <p></p><p>Developed by Florian Privé.</p>
115 | </div>
116 | 
117 | <div class="pkgdown">
118 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
119 | </div>
120 | 
121 |       </footer></div>
122 | 
123 |   
124 | 
125 | 
126 |   
127 | 
128 |   </body></html>
129 | 
130 | 


--------------------------------------------------------------------------------
/docs/reference/big_fread2.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Read large text file — big_fread2 • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Read large text file — big_fread2"><meta property="og:description" content="Read large text file by splitting columns."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Read large text file</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/read.R" class="external-link"><code>R/read.R</code></a></small>
 55 |     <div class="hidden name"><code>big_fread2.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Read large text file by splitting columns.</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">big_fread2</span><span class="op">(</span><span class="va">file</span>, nb_parts <span class="op">=</span> <span class="cn">NULL</span>, .transform <span class="op">=</span> <span class="va">identity</span>,</span>
 64 | <span>  .combine <span class="op">=</span> <span class="va">cbind_df</span>, skip <span class="op">=</span> <span class="fl">0</span>, select <span class="op">=</span> <span class="cn">NULL</span>, progress <span class="op">=</span> <span class="cn">FALSE</span>,</span>
 65 | <span>  part_size <span class="op">=</span> <span class="fl">500</span> <span class="op">*</span> <span class="fl">1024</span><span class="op">^</span><span class="fl">2</span>, <span class="va">...</span><span class="op">)</span></span></code></pre></div>
 66 |     </div>
 67 | 
 68 |     <div id="arguments">
 69 |     <h2>Arguments</h2>
 70 |     <dl><dt>file</dt>
 71 | <dd><p>Path to file that you want to read.</p></dd>
 72 | 
 73 | 
 74 | <dt>nb_parts</dt>
 75 | <dd><p>Number of parts in which to split reading (and transforming).
 76 | Parts are referring to blocks of selected columns.
 77 | Default uses <code>part_size</code> to set a good value.</p></dd>
 78 | 
 79 | 
 80 | <dt>.transform</dt>
 81 | <dd><p>Function to transform each data frame corresponding to each
 82 | block of selected columns. Default doesn't change anything.</p></dd>
 83 | 
 84 | 
 85 | <dt>.combine</dt>
 86 | <dd><p>Function to combine results (list of data frames).</p></dd>
 87 | 
 88 | 
 89 | <dt>skip</dt>
 90 | <dd><p>Number of lines to skip at the beginning of <code>file</code>.</p></dd>
 91 | 
 92 | 
 93 | <dt>select</dt>
 94 | <dd><p>Indices of columns to keep (sorted). Default keeps them all.</p></dd>
 95 | 
 96 | 
 97 | <dt>progress</dt>
 98 | <dd><p>Show progress? Default is <code>FALSE</code>.</p></dd>
 99 | 
100 | 
101 | <dt>part_size</dt>
102 | <dd><p>Size of the parts if <code>nb_parts</code> is not supplied.
103 | Default is <code>500 * 1024^2</code> (500 MB).</p></dd>
104 | 
105 | 
106 | <dt>...</dt>
107 | <dd><p>Other arguments to be passed to <a href="https://Rdatatable.gitlab.io/data.table/reference/fread.html" class="external-link">data.table::fread</a>,
108 | excepted <code>input</code>, <code>file</code>, <code>skip</code>, <code>select</code> and <code>showProgress</code>.</p></dd>
109 | 
110 | </dl></div>
111 |     <div id="value">
112 |     <h2>Value</h2>
113 |     
114 | 
115 | <p>The outputs of <code>fread2</code> + <code>.transform</code>, combined with <code>.combine</code>.</p>
116 |     </div>
117 | 
118 |   </div>
119 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
120 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
121 |     </nav></div>
122 | </div>
123 | 
124 | 
125 |       <footer><div class="copyright">
126 |   <p></p><p>Developed by Florian Privé.</p>
127 | </div>
128 | 
129 | <div class="pkgdown">
130 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
131 | </div>
132 | 
133 |       </footer></div>
134 | 
135 |   
136 | 
137 | 
138 |   
139 | 
140 |   </body></html>
141 | 
142 | 


--------------------------------------------------------------------------------
/docs/reference/bigreadr-package.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>bigreadr: Read Large Text Files — bigreadr-package • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="bigreadr: Read Large Text Files — bigreadr-package"><meta property="og:description" content="Read large text files by splitting them in smaller files.
  3 |     Package 'bigreadr' also provides some convenient wrappers around fread()
  4 |     and fwrite() from package 'data.table'."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  5 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  6 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  7 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  8 |     
  9 | 
 10 |     <div class="container template-reference-topic">
 11 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 12 |   <div class="container">
 13 |     <div class="navbar-header">
 14 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 15 |         <span class="sr-only">Toggle navigation</span>
 16 |         <span class="icon-bar"></span>
 17 |         <span class="icon-bar"></span>
 18 |         <span class="icon-bar"></span>
 19 |       </button>
 20 |       <span class="navbar-brand">
 21 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 22 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 23 |       </span>
 24 |     </div>
 25 | 
 26 |     <div id="navbar" class="navbar-collapse collapse">
 27 |       <ul class="nav navbar-nav"><li>
 28 |   <a href="../reference/index.html">Reference</a>
 29 | </li>
 30 | <li class="dropdown">
 31 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 32 |     Articles
 33 |      
 34 |     <span class="caret"></span>
 35 |   </a>
 36 |   <ul class="dropdown-menu" role="menu"><li>
 37 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 38 |     </li>
 39 |   </ul></li>
 40 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 41 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 42 |     <span class="fab fa-github fa-lg"></span>
 43 |      
 44 |   </a>
 45 | </li>
 46 |       </ul></div><!--/.nav-collapse -->
 47 |   </div><!--/.container -->
 48 | </div><!--/.navbar -->
 49 | 
 50 |       
 51 | 
 52 |       </header><div class="row">
 53 |   <div class="col-md-9 contents">
 54 |     <div class="page-header">
 55 |     <h1>bigreadr: Read Large Text Files</h1>
 56 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/bigreadr-package.R" class="external-link"><code>R/bigreadr-package.R</code></a></small>
 57 |     <div class="hidden name"><code>bigreadr-package.Rd</code></div>
 58 |     </div>
 59 | 
 60 |     <div class="ref-description">
 61 |     <p>Read large text files by splitting them in smaller files.
 62 |     Package 'bigreadr' also provides some convenient wrappers around fread()
 63 |     and fwrite() from package 'data.table'.</p>
 64 |     </div>
 65 | 
 66 | 
 67 |     <div id="see-also">
 68 |     <h2>See also</h2>
 69 |     <div class="dont-index"><p>Useful links:</p><ul><li><p><a href="https://github.com/privefl/bigreadr" class="external-link">https://github.com/privefl/bigreadr</a></p></li>
 70 | <li><p>Report bugs at <a href="https://github.com/privefl/bigreadr/issues" class="external-link">https://github.com/privefl/bigreadr/issues</a></p></li>
 71 | </ul></div>
 72 |     </div>
 73 |     <div id="author">
 74 |     <h2>Author</h2>
 75 |     <p><strong>Maintainer</strong>: Florian Privé <a href="mailto:florian.prive.21@gmail.com">florian.prive.21@gmail.com</a></p>
 76 |     </div>
 77 | 
 78 |   </div>
 79 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 80 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 81 |     </nav></div>
 82 | </div>
 83 | 
 84 | 
 85 |       <footer><div class="copyright">
 86 |   <p></p><p>Developed by Florian Privé.</p>
 87 | </div>
 88 | 
 89 | <div class="pkgdown">
 90 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
 91 | </div>
 92 | 
 93 |       </footer></div>
 94 | 
 95 |   
 96 | 
 97 | 
 98 |   
 99 | 
100 |   </body></html>
101 | 
102 | 


--------------------------------------------------------------------------------
/docs/reference/cbind_df.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Merge data frames — cbind_df • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Merge data frames — cbind_df"><meta property="og:description" content="Merge data frames"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Merge data frames</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/bind.R" class="external-link"><code>R/bind.R</code></a></small>
 55 |     <div class="hidden name"><code>cbind_df.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Merge data frames</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">cbind_df</span><span class="op">(</span><span class="va">list_df</span><span class="op">)</span></span></code></pre></div>
 64 |     </div>
 65 | 
 66 |     <div id="arguments">
 67 |     <h2>Arguments</h2>
 68 |     <dl><dt>list_df</dt>
 69 | <dd><p>A list of multiple data frames with the same observations in
 70 | the same order.</p></dd>
 71 | 
 72 | </dl></div>
 73 |     <div id="value">
 74 |     <h2>Value</h2>
 75 |     
 76 | 
 77 | <p>One merged data frame.</p>
 78 |     </div>
 79 | 
 80 |     <div id="ref-examples">
 81 |     <h2>Examples</h2>
 82 |     <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/utils/str.html" class="external-link">str</a></span><span class="op">(</span><span class="va">iris</span><span class="op">)</span></span></span>
 83 | <span class="r-out co"><span class="r-pr">#&gt;</span> 'data.frame':	150 obs. of  5 variables:</span>
 84 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...</span>
 85 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...</span>
 86 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...</span>
 87 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...</span>
 88 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...</span>
 89 | <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/utils/str.html" class="external-link">str</a></span><span class="op">(</span><span class="fu">cbind_df</span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span><span class="va">iris</span>, <span class="va">iris</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span></span>
 90 | <span class="r-out co"><span class="r-pr">#&gt;</span> 'data.frame':	150 obs. of  10 variables:</span>
 91 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...</span>
 92 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...</span>
 93 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...</span>
 94 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...</span>
 95 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...</span>
 96 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...</span>
 97 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...</span>
 98 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...</span>
 99 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...</span>
100 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...</span>
101 | <span class="r-in"><span></span></span>
102 | </code></pre></div>
103 |     </div>
104 |   </div>
105 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
106 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
107 |     </nav></div>
108 | </div>
109 | 
110 | 
111 |       <footer><div class="copyright">
112 |   <p></p><p>Developed by Florian Privé.</p>
113 | </div>
114 | 
115 | <div class="pkgdown">
116 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
117 | </div>
118 | 
119 |       </footer></div>
120 | 
121 |   
122 | 
123 | 
124 |   
125 | 
126 |   </body></html>
127 | 
128 | 


--------------------------------------------------------------------------------
/docs/reference/fread2.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Read text file(s) — fread2 • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Read text file(s) — fread2"><meta property="og:description" content="Read text file(s)"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Read text file(s)</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/read.R" class="external-link"><code>R/read.R</code></a></small>
 55 |     <div class="hidden name"><code>fread2.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Read text file(s)</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">fread2</span><span class="op">(</span><span class="va">input</span>, <span class="va">...</span>, data.table <span class="op">=</span> <span class="cn">FALSE</span>,</span>
 64 | <span>  nThread <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/options.html" class="external-link">getOption</a></span><span class="op">(</span><span class="st">"bigreadr.nThread"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
 65 |     </div>
 66 | 
 67 |     <div id="arguments">
 68 |     <h2>Arguments</h2>
 69 |     <dl><dt>input</dt>
 70 | <dd><p>Path to the file(s) that you want to read from.
 71 | This can also be a command, some text or an URL.
 72 | If a vector of inputs is provided, resulting data frames are appended.</p></dd>
 73 | 
 74 | 
 75 | <dt>...</dt>
 76 | <dd><p>Other arguments to be passed to <a href="https://Rdatatable.gitlab.io/data.table/reference/fread.html" class="external-link">data.table::fread</a>.</p></dd>
 77 | 
 78 | 
 79 | <dt>data.table</dt>
 80 | <dd><p>Whether to return a <code>data.table</code> or just a <code>data.frame</code>?
 81 | Default is <code>FALSE</code> (and is the opposite of <a href="https://Rdatatable.gitlab.io/data.table/reference/fread.html" class="external-link">data.table::fread</a>).</p></dd>
 82 | 
 83 | 
 84 | <dt>nThread</dt>
 85 | <dd><p>Number of threads to use. Default uses all threads minus one.</p></dd>
 86 | 
 87 | </dl></div>
 88 |     <div id="value">
 89 |     <h2>Value</h2>
 90 |     
 91 | 
 92 | <p>A <code>data.frame</code> by default; a <code>data.table</code> when <code>data.table = TRUE</code>.</p>
 93 |     </div>
 94 | 
 95 |     <div id="ref-examples">
 96 |     <h2>Examples</h2>
 97 |     <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="va">tmp</span> <span class="op">&lt;-</span> <span class="fu"><a href="fwrite2.html">fwrite2</a></span><span class="op">(</span><span class="va">iris</span><span class="op">)</span></span></span>
 98 | <span class="r-in"><span><span class="va">iris2</span> <span class="op">&lt;-</span> <span class="fu">fread2</span><span class="op">(</span><span class="va">tmp</span><span class="op">)</span></span></span>
 99 | <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/base/all.equal.html" class="external-link">all.equal</a></span><span class="op">(</span><span class="va">iris2</span>, <span class="va">iris</span><span class="op">)</span>  <span class="co">## fread doesn't use factors</span></span></span>
100 | <span class="r-out co"><span class="r-pr">#&gt;</span> [1] "Component \"Species\": Modes: character, numeric"                      </span>
101 | <span class="r-out co"><span class="r-pr">#&gt;</span> [2] "Component \"Species\": Attributes: &lt; target is NULL, current is list &gt;"</span>
102 | <span class="r-out co"><span class="r-pr">#&gt;</span> [3] "Component \"Species\": target is character, current is factor"         </span>
103 | </code></pre></div>
104 |     </div>
105 |   </div>
106 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
107 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
108 |     </nav></div>
109 | </div>
110 | 
111 | 
112 |       <footer><div class="copyright">
113 |   <p></p><p>Developed by Florian Privé.</p>
114 | </div>
115 | 
116 | <div class="pkgdown">
117 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
118 | </div>
119 | 
120 |       </footer></div>
121 | 
122 |   
123 | 
124 | 
125 |   
126 | 
127 |   </body></html>
128 | 
129 | 


--------------------------------------------------------------------------------
/docs/reference/fwrite2.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Write a data frame to a text file — fwrite2 • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Write a data frame to a text file — fwrite2"><meta property="og:description" content="Write a data frame to a text file"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Write a data frame to a text file</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/read.R" class="external-link"><code>R/read.R</code></a></small>
 55 |     <div class="hidden name"><code>fwrite2.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Write a data frame to a text file</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">fwrite2</span><span class="op">(</span><span class="va">x</span>, file <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span>, <span class="va">...</span>, quote <span class="op">=</span> <span class="cn">FALSE</span>,</span>
 64 | <span>  nThread <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/options.html" class="external-link">getOption</a></span><span class="op">(</span><span class="st">"bigreadr.nThread"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
 65 |     </div>
 66 | 
 67 |     <div id="arguments">
 68 |     <h2>Arguments</h2>
 69 |     <dl><dt>x</dt>
 70 | <dd><p>Data frame to write.</p></dd>
 71 | 
 72 | 
 73 | <dt>file</dt>
 74 | <dd><p>Path to the file that you want to write to.
 75 | Defaults uses <code><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile()</a></code>.</p></dd>
 76 | 
 77 | 
 78 | <dt>...</dt>
 79 | <dd><p>Other arguments to be passed to <a href="https://Rdatatable.gitlab.io/data.table/reference/fwrite.html" class="external-link">data.table::fwrite</a>.</p></dd>
 80 | 
 81 | 
 82 | <dt>quote</dt>
 83 | <dd><p>Whether to quote strings (default is <code>FALSE</code>).</p></dd>
 84 | 
 85 | 
 86 | <dt>nThread</dt>
 87 | <dd><p>Number of threads to use. Default uses all threads minus one.</p></dd>
 88 | 
 89 | </dl></div>
 90 |     <div id="value">
 91 |     <h2>Value</h2>
 92 |     
 93 | 
 94 | <p>Input parameter <code>file</code>, invisibly.</p>
 95 |     </div>
 96 | 
 97 |     <div id="ref-examples">
 98 |     <h2>Examples</h2>
 99 |     <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="va">tmp</span> <span class="op">&lt;-</span> <span class="fu">fwrite2</span><span class="op">(</span><span class="va">iris</span><span class="op">)</span></span></span>
100 | <span class="r-in"><span><span class="va">iris2</span> <span class="op">&lt;-</span> <span class="fu"><a href="fread2.html">fread2</a></span><span class="op">(</span><span class="va">tmp</span><span class="op">)</span></span></span>
101 | <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/base/all.equal.html" class="external-link">all.equal</a></span><span class="op">(</span><span class="va">iris2</span>, <span class="va">iris</span><span class="op">)</span>  <span class="co">## fread doesn't use factors</span></span></span>
102 | <span class="r-out co"><span class="r-pr">#&gt;</span> [1] "Component \"Species\": Modes: character, numeric"                      </span>
103 | <span class="r-out co"><span class="r-pr">#&gt;</span> [2] "Component \"Species\": Attributes: &lt; target is NULL, current is list &gt;"</span>
104 | <span class="r-out co"><span class="r-pr">#&gt;</span> [3] "Component \"Species\": target is character, current is factor"         </span>
105 | </code></pre></div>
106 |     </div>
107 |   </div>
108 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
109 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
110 |     </nav></div>
111 | </div>
112 | 
113 | 
114 |       <footer><div class="copyright">
115 |   <p></p><p>Developed by Florian Privé.</p>
116 | </div>
117 | 
118 | <div class="pkgdown">
119 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
120 | </div>
121 | 
122 |       </footer></div>
123 | 
124 |   
125 | 
126 | 
127 |   
128 | 
129 |   </body></html>
130 | 
131 | 


--------------------------------------------------------------------------------
/docs/reference/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Function reference • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Function reference"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-index">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="contents col-md-9">
 52 |     <div class="page-header">
 53 |       <h1>Reference</h1>
 54 |     </div>
 55 | 
 56 |     <table class="ref-index"><colgroup><col class="alias"><col class="title"></colgroup><tbody><tr><th colspan="2">
 57 |           <h2 id="all-functions">All functions <a href="#all-functions" class="anchor" aria-hidden="true"></a></h2>
 58 |           <p class="section-desc"></p>
 59 |         </th>
 60 |       </tr></tbody><tbody><tr><td>
 61 |           <p><code><a href="big_fread1.html">big_fread1()</a></code> </p>
 62 |         </td>
 63 |         <td><p>Read large text file</p></td>
 64 |       </tr><tr><td>
 65 |           <p><code><a href="big_fread2.html">big_fread2()</a></code> </p>
 66 |         </td>
 67 |         <td><p>Read large text file</p></td>
 68 |       </tr><tr><td>
 69 |           <p><code><a href="cbind_df.html">cbind_df()</a></code> </p>
 70 |         </td>
 71 |         <td><p>Merge data frames</p></td>
 72 |       </tr><tr><td>
 73 |           <p><code><a href="fread2.html">fread2()</a></code> </p>
 74 |         </td>
 75 |         <td><p>Read text file(s)</p></td>
 76 |       </tr><tr><td>
 77 |           <p><code><a href="fwrite2.html">fwrite2()</a></code> </p>
 78 |         </td>
 79 |         <td><p>Write a data frame to a text file</p></td>
 80 |       </tr><tr><td>
 81 |           <p><code><a href="nlines.html">nlines()</a></code> </p>
 82 |         </td>
 83 |         <td><p>Number of lines</p></td>
 84 |       </tr><tr><td>
 85 |           <p><code><a href="rbind_df.html">rbind_df()</a></code> </p>
 86 |         </td>
 87 |         <td><p>Merge data frames</p></td>
 88 |       </tr><tr><td>
 89 |           <p><code><a href="split_file.html">split_file()</a></code> <code><a href="split_file.html">get_split_files()</a></code> </p>
 90 |         </td>
 91 |         <td><p>Split file every nlines</p></td>
 92 |       </tr></tbody></table></div>
 93 | 
 94 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 95 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 96 |     </nav></div>
 97 | </div>
 98 | 
 99 | 
100 |       <footer><div class="copyright">
101 |   <p></p><p>Developed by Florian Privé.</p>
102 | </div>
103 | 
104 | <div class="pkgdown">
105 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
106 | </div>
107 | 
108 |       </footer></div>
109 | 
110 |   
111 | 
112 | 
113 |   
114 | 
115 |   </body></html>
116 | 
117 | 


--------------------------------------------------------------------------------
/docs/reference/nlines.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Number of lines — nlines • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Number of lines — nlines"><meta property="og:description" content="Get the number of lines of a file."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Number of lines</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/nlines-split.R" class="external-link"><code>R/nlines-split.R</code></a></small>
 55 |     <div class="hidden name"><code>nlines.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Get the number of lines of a file.</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">nlines</span><span class="op">(</span><span class="va">file</span><span class="op">)</span></span></code></pre></div>
 64 |     </div>
 65 | 
 66 |     <div id="arguments">
 67 |     <h2>Arguments</h2>
 68 |     <dl><dt>file</dt>
 69 | <dd><p>Path of the file.</p></dd>
 70 | 
 71 | </dl></div>
 72 |     <div id="value">
 73 |     <h2>Value</h2>
 74 |     
 75 | 
 76 | <p>The number of lines as one integer.</p>
 77 |     </div>
 78 | 
 79 |     <div id="ref-examples">
 80 |     <h2>Examples</h2>
 81 |     <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="va">tmp</span> <span class="op">&lt;-</span> <span class="fu"><a href="fwrite2.html">fwrite2</a></span><span class="op">(</span><span class="va">iris</span><span class="op">)</span></span></span>
 82 | <span class="r-in"><span><span class="fu">nlines</span><span class="op">(</span><span class="va">tmp</span><span class="op">)</span></span></span>
 83 | <span class="r-out co"><span class="r-pr">#&gt;</span> [1] 151</span>
 84 | <span class="r-in"><span></span></span>
 85 | </code></pre></div>
 86 |     </div>
 87 |   </div>
 88 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 89 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 90 |     </nav></div>
 91 | </div>
 92 | 
 93 | 
 94 |       <footer><div class="copyright">
 95 |   <p></p><p>Developed by Florian Privé.</p>
 96 | </div>
 97 | 
 98 | <div class="pkgdown">
 99 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
100 | </div>
101 | 
102 |       </footer></div>
103 | 
104 |   
105 | 
106 | 
107 |   
108 | 
109 |   </body></html>
110 | 
111 | 


--------------------------------------------------------------------------------
/docs/reference/rbind_df.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Merge data frames — rbind_df • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Merge data frames — rbind_df"><meta property="og:description" content="Merge data frames"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li class="dropdown">
 29 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 30 |     Articles
 31 |      
 32 |     <span class="caret"></span>
 33 |   </a>
 34 |   <ul class="dropdown-menu" role="menu"><li>
 35 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 36 |     </li>
 37 |   </ul></li>
 38 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 39 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 40 |     <span class="fab fa-github fa-lg"></span>
 41 |      
 42 |   </a>
 43 | </li>
 44 |       </ul></div><!--/.nav-collapse -->
 45 |   </div><!--/.container -->
 46 | </div><!--/.navbar -->
 47 | 
 48 |       
 49 | 
 50 |       </header><div class="row">
 51 |   <div class="col-md-9 contents">
 52 |     <div class="page-header">
 53 |     <h1>Merge data frames</h1>
 54 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/bind.R" class="external-link"><code>R/bind.R</code></a></small>
 55 |     <div class="hidden name"><code>rbind_df.Rd</code></div>
 56 |     </div>
 57 | 
 58 |     <div class="ref-description">
 59 |     <p>Merge data frames</p>
 60 |     </div>
 61 | 
 62 |     <div id="ref-usage">
 63 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">rbind_df</span><span class="op">(</span><span class="va">list_df</span><span class="op">)</span></span></code></pre></div>
 64 |     </div>
 65 | 
 66 |     <div id="arguments">
 67 |     <h2>Arguments</h2>
 68 |     <dl><dt>list_df</dt>
 69 | <dd><p>A list of multiple data frames with the same variables in the
 70 | same order.</p></dd>
 71 | 
 72 | </dl></div>
 73 |     <div id="value">
 74 |     <h2>Value</h2>
 75 |     
 76 | 
 77 | <p>One merged data frame with the names of the first input data frame.</p>
 78 |     </div>
 79 | 
 80 |     <div id="ref-examples">
 81 |     <h2>Examples</h2>
 82 |     <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/utils/str.html" class="external-link">str</a></span><span class="op">(</span><span class="va">iris</span><span class="op">)</span></span></span>
 83 | <span class="r-out co"><span class="r-pr">#&gt;</span> 'data.frame':	150 obs. of  5 variables:</span>
 84 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...</span>
 85 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...</span>
 86 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...</span>
 87 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...</span>
 88 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...</span>
 89 | <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/utils/str.html" class="external-link">str</a></span><span class="op">(</span><span class="fu">rbind_df</span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span><span class="va">iris</span>, <span class="va">iris</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span></span>
 90 | <span class="r-out co"><span class="r-pr">#&gt;</span> 'data.frame':	300 obs. of  5 variables:</span>
 91 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...</span>
 92 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...</span>
 93 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...</span>
 94 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...</span>
 95 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...</span>
 96 | <span class="r-in"><span></span></span>
 97 | </code></pre></div>
 98 |     </div>
 99 |   </div>
100 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
101 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
102 |     </nav></div>
103 | </div>
104 | 
105 | 
106 |       <footer><div class="copyright">
107 |   <p></p><p>Developed by Florian Privé.</p>
108 | </div>
109 | 
110 | <div class="pkgdown">
111 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
112 | </div>
113 | 
114 |       </footer></div>
115 | 
116 |   
117 | 
118 | 
119 |   
120 | 
121 |   </body></html>
122 | 
123 | 


--------------------------------------------------------------------------------
/docs/reference/split_file.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Split file every nlines — split_file • bigreadr</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Split file every nlines — split_file"><meta property="og:description" content="Split file every nlines
  3 | Get files from splitting."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  4 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  5 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  6 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  7 |     
  8 | 
  9 |     <div class="container template-reference-topic">
 10 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 11 |   <div class="container">
 12 |     <div class="navbar-header">
 13 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 14 |         <span class="sr-only">Toggle navigation</span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |         <span class="icon-bar"></span>
 18 |       </button>
 19 |       <span class="navbar-brand">
 20 |         <a class="navbar-link" href="../index.html">bigreadr</a>
 21 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.5</span>
 22 |       </span>
 23 |     </div>
 24 | 
 25 |     <div id="navbar" class="navbar-collapse collapse">
 26 |       <ul class="nav navbar-nav"><li>
 27 |   <a href="../reference/index.html">Reference</a>
 28 | </li>
 29 | <li class="dropdown">
 30 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 31 |     Articles
 32 |      
 33 |     <span class="caret"></span>
 34 |   </a>
 35 |   <ul class="dropdown-menu" role="menu"><li>
 36 |       <a href="../articles/csv2sqlite.html">Convert a CSV to SQLite by parts</a>
 37 |     </li>
 38 |   </ul></li>
 39 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 40 |   <a href="https://github.com/privefl/bigreadr/" class="external-link">
 41 |     <span class="fab fa-github fa-lg"></span>
 42 |      
 43 |   </a>
 44 | </li>
 45 |       </ul></div><!--/.nav-collapse -->
 46 |   </div><!--/.container -->
 47 | </div><!--/.navbar -->
 48 | 
 49 |       
 50 | 
 51 |       </header><div class="row">
 52 |   <div class="col-md-9 contents">
 53 |     <div class="page-header">
 54 |     <h1>Split file every nlines</h1>
 55 |     <small class="dont-index">Source: <a href="https://github.com/privefl/bigreadr/blob/HEAD/R/nlines-split.R" class="external-link"><code>R/nlines-split.R</code></a></small>
 56 |     <div class="hidden name"><code>split_file.Rd</code></div>
 57 |     </div>
 58 | 
 59 |     <div class="ref-description">
 60 |     <p>Split file every nlines</p>
 61 | <p>Get files from splitting.</p>
 62 |     </div>
 63 | 
 64 |     <div id="ref-usage">
 65 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">split_file</span><span class="op">(</span><span class="va">file</span>, <span class="va">every_nlines</span>, prefix_out <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile</a></span><span class="op">(</span><span class="op">)</span>,</span>
 66 | <span>  repeat_header <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
 67 | <span></span>
 68 | <span><span class="fu">get_split_files</span><span class="op">(</span><span class="va">split_file_out</span><span class="op">)</span></span></code></pre></div>
 69 |     </div>
 70 | 
 71 |     <div id="arguments">
 72 |     <h2>Arguments</h2>
 73 |     <dl><dt>file</dt>
 74 | <dd><p>Path to file that you want to split.</p></dd>
 75 | 
 76 | 
 77 | <dt>every_nlines</dt>
 78 | <dd><p>Maximum number of lines in new file parts.</p></dd>
 79 | 
 80 | 
 81 | <dt>prefix_out</dt>
 82 | <dd><p>Prefix for created files. Default uses <code><a href="https://rdrr.io/r/base/tempfile.html" class="external-link">tempfile()</a></code>.</p></dd>
 83 | 
 84 | 
 85 | <dt>repeat_header</dt>
 86 | <dd><p>Whether to repeat the header row in each file.
 87 | Default is <code>FALSE</code>.</p></dd>
 88 | 
 89 | 
 90 | <dt>split_file_out</dt>
 91 | <dd><p>Output of split_file.</p></dd>
 92 | 
 93 | </dl></div>
 94 |     <div id="value">
 95 |     <h2>Value</h2>
 96 |     
 97 | 
 98 | <p>A list with</p><ul><li><p><code>name_in</code>: input parameter <code>file</code>,</p></li>
 99 | <li><p><code>prefix_out</code>: input parameter `prefix_out``,</p></li>
100 | <li><p><code>nfiles</code>: Number of files (parts) created,</p></li>
101 | <li><p><code>nlines_part</code>: input parameter <code>every_nlines</code>,</p></li>
102 | <li><p><code>nlines_all</code>: total number of lines of <code>file</code>.</p></li>
103 | </ul><p>Vector of file paths created by split_file.</p>
104 |     </div>
105 | 
106 |     <div id="ref-examples">
107 |     <h2>Examples</h2>
108 |     <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="va">tmp</span> <span class="op">&lt;-</span> <span class="fu"><a href="fwrite2.html">fwrite2</a></span><span class="op">(</span><span class="va">iris</span><span class="op">)</span></span></span>
109 | <span class="r-in"><span><span class="va">infos</span> <span class="op">&lt;-</span> <span class="fu">split_file</span><span class="op">(</span><span class="va">tmp</span>, <span class="fl">100</span><span class="op">)</span></span></span>
110 | <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/utils/str.html" class="external-link">str</a></span><span class="op">(</span><span class="va">infos</span><span class="op">)</span></span></span>
111 | <span class="r-out co"><span class="r-pr">#&gt;</span> List of 6</span>
112 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ name_in      : chr "C:\\Users\\au639593\\AppData\\Local\\Temp\\Rtmpq2HStE\\file40f821d7102d"</span>
113 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ prefix_out   : chr "C:\\Users\\au639593\\AppData\\Local\\Temp\\Rtmpq2HStE\\file40f855f46bc3"</span>
114 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ nfiles       : int 2</span>
115 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ nlines_part  : int 100</span>
116 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ nlines_all   : num 151</span>
117 | <span class="r-out co"><span class="r-pr">#&gt;</span>  $ repeat_header: logi FALSE</span>
118 | <span class="r-in"><span><span class="fu">get_split_files</span><span class="op">(</span><span class="va">infos</span><span class="op">)</span></span></span>
119 | <span class="r-out co"><span class="r-pr">#&gt;</span> [1] "C:\\Users\\au639593\\AppData\\Local\\Temp\\Rtmpq2HStE\\file40f855f46bc3_1.txt"</span>
120 | <span class="r-out co"><span class="r-pr">#&gt;</span> [2] "C:\\Users\\au639593\\AppData\\Local\\Temp\\Rtmpq2HStE\\file40f855f46bc3_2.txt"</span>
121 | </code></pre></div>
122 |     </div>
123 |   </div>
124 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
125 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
126 |     </nav></div>
127 | </div>
128 | 
129 | 
130 |       <footer><div class="copyright">
131 |   <p></p><p>Developed by Florian Privé.</p>
132 | </div>
133 | 
134 | <div class="pkgdown">
135 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
136 | </div>
137 | 
138 |       </footer></div>
139 | 
140 |   
141 | 
142 | 
143 |   
144 | 
145 |   </body></html>
146 | 
147 | 


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 3 |   <url>
 4 |     <loc>/404.html</loc>
 5 |   </url>
 6 |   <url>
 7 |     <loc>/articles/csv2sqlite.html</loc>
 8 |   </url>
 9 |   <url>
10 |     <loc>/articles/index.html</loc>
11 |   </url>
12 |   <url>
13 |     <loc>/authors.html</loc>
14 |   </url>
15 |   <url>
16 |     <loc>/index.html</loc>
17 |   </url>
18 |   <url>
19 |     <loc>/reference/bigreadr-package.html</loc>
20 |   </url>
21 |   <url>
22 |     <loc>/reference/big_fread1.html</loc>
23 |   </url>
24 |   <url>
25 |     <loc>/reference/big_fread2.html</loc>
26 |   </url>
27 |   <url>
28 |     <loc>/reference/cbind_df.html</loc>
29 |   </url>
30 |   <url>
31 |     <loc>/reference/fread2.html</loc>
32 |   </url>
33 |   <url>
34 |     <loc>/reference/fwrite2.html</loc>
35 |   </url>
36 |   <url>
37 |     <loc>/reference/index.html</loc>
38 |   </url>
39 |   <url>
40 |     <loc>/reference/nlines.html</loc>
41 |   </url>
42 |   <url>
43 |     <loc>/reference/rbind_df.html</loc>
44 |   </url>
45 |   <url>
46 |     <loc>/reference/split_file.html</loc>
47 |   </url>
48 | </urlset>
49 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
1 | Filebacked
2 | Florian
3 | fpeek
4 | fread
5 | fwrite
6 | nlines
7 | PrivÃ
8 | Privé
9 | 


--------------------------------------------------------------------------------
/inst/testdata/cars_with_newline.csv:
--------------------------------------------------------------------------------
 1 | speed,dist
 2 | 4,2
 3 | 4,10
 4 | 7,4
 5 | 7,22
 6 | 8,16
 7 | 9,10
 8 | 10,18
 9 | 10,26
10 | 10,34
11 | 11,17
12 | 11,28
13 | 12,14
14 | 12,20
15 | 12,24
16 | 12,28
17 | 13,26
18 | 13,34
19 | 13,34
20 | 13,46
21 | 14,26
22 | 14,36
23 | 14,60
24 | 14,80
25 | 15,20
26 | 15,26
27 | 15,54
28 | 16,32
29 | 16,40
30 | 17,32
31 | 17,40
32 | 17,50
33 | 18,42
34 | 18,56
35 | 18,76
36 | 18,84
37 | 19,36
38 | 19,46
39 | 19,68
40 | 20,32
41 | 20,48
42 | 20,52
43 | 20,56
44 | 20,64
45 | 22,66
46 | 23,54
47 | 24,70
48 | 24,92
49 | 24,93
50 | 24,120
51 | 25,85
52 | 


--------------------------------------------------------------------------------
/inst/testdata/cars_without_newline.csv:
--------------------------------------------------------------------------------
 1 | speed,dist
 2 | 4,2
 3 | 4,10
 4 | 7,4
 5 | 7,22
 6 | 8,16
 7 | 9,10
 8 | 10,18
 9 | 10,26
10 | 10,34
11 | 11,17
12 | 11,28
13 | 12,14
14 | 12,20
15 | 12,24
16 | 12,28
17 | 13,26
18 | 13,34
19 | 13,34
20 | 13,46
21 | 14,26
22 | 14,36
23 | 14,60
24 | 14,80
25 | 15,20
26 | 15,26
27 | 15,54
28 | 16,32
29 | 16,40
30 | 17,32
31 | 17,40
32 | 17,50
33 | 18,42
34 | 18,56
35 | 18,76
36 | 18,84
37 | 19,36
38 | 19,46
39 | 19,68
40 | 20,32
41 | 20,48
42 | 20,52
43 | 20,56
44 | 20,64
45 | 22,66
46 | 23,54
47 | 24,70
48 | 24,92
49 | 24,93
50 | 24,120
51 | 25,85


--------------------------------------------------------------------------------
/inst/testdata/wrong_string.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/privefl/bigreadr/2d8806f1067b19610a2d633bf2e863b910570d5d/inst/testdata/wrong_string.rds


--------------------------------------------------------------------------------
/man/big_fread1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{big_fread1}
 4 | \alias{big_fread1}
 5 | \title{Read large text file}
 6 | \usage{
 7 | big_fread1(file, every_nlines, .transform = identity,
 8 |   .combine = rbind_df, skip = 0, ..., print_timings = TRUE)
 9 | }
10 | \arguments{
11 | \item{file}{Path to file that you want to read.}
12 | 
13 | \item{every_nlines}{Maximum number of lines in new file parts.}
14 | 
15 | \item{.transform}{Function to transform each data frame corresponding to each
16 | part of the \code{file}. Default doesn't change anything.}
17 | 
18 | \item{.combine}{Function to combine results (list of data frames).}
19 | 
20 | \item{skip}{Number of lines to skip at the beginning of \code{file}.}
21 | 
22 | \item{...}{Other arguments to be passed to \link[data.table:fread]{data.table::fread},
23 | excepted \code{input}, \code{file}, \code{skip}, \code{col.names} and \code{showProgress}.}
24 | 
25 | \item{print_timings}{Whether to print timings? Default is \code{TRUE}.}
26 | }
27 | \value{
28 | A \code{data.frame} by default; a \code{data.table} when \code{data.table = TRUE}.
29 | }
30 | \description{
31 | Read large text file by splitting lines.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/big_fread2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{big_fread2}
 4 | \alias{big_fread2}
 5 | \title{Read large text file}
 6 | \usage{
 7 | big_fread2(file, nb_parts = NULL, .transform = identity,
 8 |   .combine = cbind_df, skip = 0, select = NULL, progress = FALSE,
 9 |   part_size = 500 * 1024^2, ...)
10 | }
11 | \arguments{
12 | \item{file}{Path to file that you want to read.}
13 | 
14 | \item{nb_parts}{Number of parts in which to split reading (and transforming).
15 | Parts are referring to blocks of selected columns.
16 | Default uses \code{part_size} to set a good value.}
17 | 
18 | \item{.transform}{Function to transform each data frame corresponding to each
19 | block of selected columns. Default doesn't change anything.}
20 | 
21 | \item{.combine}{Function to combine results (list of data frames).}
22 | 
23 | \item{skip}{Number of lines to skip at the beginning of \code{file}.}
24 | 
25 | \item{select}{Indices of columns to keep (sorted). Default keeps them all.}
26 | 
27 | \item{progress}{Show progress? Default is \code{FALSE}.}
28 | 
29 | \item{part_size}{Size of the parts if \code{nb_parts} is not supplied.
30 | Default is \code{500 * 1024^2} (500 MB).}
31 | 
32 | \item{...}{Other arguments to be passed to \link[data.table:fread]{data.table::fread},
33 | excepted \code{input}, \code{file}, \code{skip}, \code{select} and \code{showProgress}.}
34 | }
35 | \value{
36 | The outputs of \code{fread2} + \code{.transform}, combined with \code{.combine}.
37 | }
38 | \description{
39 | Read large text file by splitting columns.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/bigreadr-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bigreadr-package.R
 3 | \docType{package}
 4 | \name{bigreadr-package}
 5 | \alias{bigreadr}
 6 | \alias{bigreadr-package}
 7 | \title{bigreadr: Read Large Text Files}
 8 | \description{
 9 | Read large text files by splitting them in smaller files.
10 |     Package 'bigreadr' also provides some convenient wrappers around fread()
11 |     and fwrite() from package 'data.table'.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://github.com/privefl/bigreadr}
17 |   \item Report bugs at \url{https://github.com/privefl/bigreadr/issues}
18 | }
19 | 
20 | }
21 | \author{
22 | \strong{Maintainer}: Florian Privé \email{florian.prive.21@gmail.com}
23 | 
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/cbind_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bind.R
 3 | \name{cbind_df}
 4 | \alias{cbind_df}
 5 | \title{Merge data frames}
 6 | \usage{
 7 | cbind_df(list_df)
 8 | }
 9 | \arguments{
10 | \item{list_df}{A list of multiple data frames with the same observations in
11 | the same order.}
12 | }
13 | \value{
14 | One merged data frame.
15 | }
16 | \description{
17 | Merge data frames
18 | }
19 | \examples{
20 | str(iris)
21 | str(cbind_df(list(iris, iris)))
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/fread2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{fread2}
 4 | \alias{fread2}
 5 | \title{Read text file(s)}
 6 | \usage{
 7 | fread2(input, ..., data.table = FALSE,
 8 |   nThread = getOption("bigreadr.nThread"))
 9 | }
10 | \arguments{
11 | \item{input}{Path to the file(s) that you want to read from.
12 | This can also be a command, some text or an URL.
13 | If a vector of inputs is provided, resulting data frames are appended.}
14 | 
15 | \item{...}{Other arguments to be passed to \link[data.table:fread]{data.table::fread}.}
16 | 
17 | \item{data.table}{Whether to return a \code{data.table} or just a \code{data.frame}?
18 | Default is \code{FALSE} (and is the opposite of \link[data.table:fread]{data.table::fread}).}
19 | 
20 | \item{nThread}{Number of threads to use. Default uses all threads minus one.}
21 | }
22 | \value{
23 | A \code{data.frame} by default; a \code{data.table} when \code{data.table = TRUE}.
24 | }
25 | \description{
26 | Read text file(s)
27 | }
28 | \examples{
29 | tmp <- fwrite2(iris)
30 | iris2 <- fread2(tmp)
31 | all.equal(iris2, iris)  ## fread doesn't use factors
32 | }
33 | 


--------------------------------------------------------------------------------
/man/fwrite2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read.R
 3 | \name{fwrite2}
 4 | \alias{fwrite2}
 5 | \title{Write a data frame to a text file}
 6 | \usage{
 7 | fwrite2(x, file = tempfile(), ..., quote = FALSE,
 8 |   nThread = getOption("bigreadr.nThread"))
 9 | }
10 | \arguments{
11 | \item{x}{Data frame to write.}
12 | 
13 | \item{file}{Path to the file that you want to write to.
14 | Defaults uses \code{tempfile()}.}
15 | 
16 | \item{...}{Other arguments to be passed to \link[data.table:fwrite]{data.table::fwrite}.}
17 | 
18 | \item{quote}{Whether to quote strings (default is \code{FALSE}).}
19 | 
20 | \item{nThread}{Number of threads to use. Default uses all threads minus one.}
21 | }
22 | \value{
23 | Input parameter \code{file}, invisibly.
24 | }
25 | \description{
26 | Write a data frame to a text file
27 | }
28 | \examples{
29 | tmp <- fwrite2(iris)
30 | iris2 <- fread2(tmp)
31 | all.equal(iris2, iris)  ## fread doesn't use factors
32 | }
33 | 


--------------------------------------------------------------------------------
/man/nlines.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/nlines-split.R
 3 | \name{nlines}
 4 | \alias{nlines}
 5 | \title{Number of lines}
 6 | \usage{
 7 | nlines(file)
 8 | }
 9 | \arguments{
10 | \item{file}{Path of the file.}
11 | }
12 | \value{
13 | The number of lines as one integer.
14 | }
15 | \description{
16 | Get the number of lines of a file.
17 | }
18 | \examples{
19 | tmp <- fwrite2(iris)
20 | nlines(tmp)
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/man/rbind_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bind.R
 3 | \name{rbind_df}
 4 | \alias{rbind_df}
 5 | \title{Merge data frames}
 6 | \usage{
 7 | rbind_df(list_df)
 8 | }
 9 | \arguments{
10 | \item{list_df}{A list of multiple data frames with the same variables in the
11 | same order.}
12 | }
13 | \value{
14 | One merged data frame with the names of the first input data frame.
15 | }
16 | \description{
17 | Merge data frames
18 | }
19 | \examples{
20 | str(iris)
21 | str(rbind_df(list(iris, iris)))
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/split_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/nlines-split.R
 3 | \name{split_file}
 4 | \alias{split_file}
 5 | \alias{get_split_files}
 6 | \title{Split file every nlines}
 7 | \usage{
 8 | split_file(file, every_nlines, prefix_out = tempfile(),
 9 |   repeat_header = FALSE)
10 | 
11 | get_split_files(split_file_out)
12 | }
13 | \arguments{
14 | \item{file}{Path to file that you want to split.}
15 | 
16 | \item{every_nlines}{Maximum number of lines in new file parts.}
17 | 
18 | \item{prefix_out}{Prefix for created files. Default uses \code{tempfile()}.}
19 | 
20 | \item{repeat_header}{Whether to repeat the header row in each file.
21 | Default is \code{FALSE}.}
22 | 
23 | \item{split_file_out}{Output of \link{split_file}.}
24 | }
25 | \value{
26 | A list with
27 | \itemize{
28 | \item \code{name_in}: input parameter \code{file},
29 | \item \code{prefix_out}: input parameter `prefix_out``,
30 | \item \code{nfiles}: Number of files (parts) created,
31 | \item \code{nlines_part}: input parameter \code{every_nlines},
32 | \item \code{nlines_all}: total number of lines of \code{file}.
33 | }
34 | 
35 | Vector of file paths created by \link{split_file}.
36 | }
37 | \description{
38 | Split file every nlines
39 | 
40 | Get files from splitting.
41 | }
42 | \examples{
43 | tmp <- fwrite2(iris)
44 | infos <- split_file(tmp, 100)
45 | str(infos)
46 | get_split_files(infos)
47 | }
48 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11 | #endif
12 | 
13 | // nlines_cpp
14 | double nlines_cpp(std::string file);
15 | RcppExport SEXP _bigreadr_nlines_cpp(SEXP fileSEXP) {
16 | BEGIN_RCPP
17 |     Rcpp::RObject rcpp_result_gen;
18 |     Rcpp::RNGScope rcpp_rngScope_gen;
19 |     Rcpp::traits::input_parameter< std::string >::type file(fileSEXP);
20 |     rcpp_result_gen = Rcpp::wrap(nlines_cpp(file));
21 |     return rcpp_result_gen;
22 | END_RCPP
23 | }
24 | // split_every_nlines
25 | List split_every_nlines(std::string name_in, std::string prefix_out, int every_nlines, bool repeat_header);
26 | RcppExport SEXP _bigreadr_split_every_nlines(SEXP name_inSEXP, SEXP prefix_outSEXP, SEXP every_nlinesSEXP, SEXP repeat_headerSEXP) {
27 | BEGIN_RCPP
28 |     Rcpp::RObject rcpp_result_gen;
29 |     Rcpp::RNGScope rcpp_rngScope_gen;
30 |     Rcpp::traits::input_parameter< std::string >::type name_in(name_inSEXP);
31 |     Rcpp::traits::input_parameter< std::string >::type prefix_out(prefix_outSEXP);
32 |     Rcpp::traits::input_parameter< int >::type every_nlines(every_nlinesSEXP);
33 |     Rcpp::traits::input_parameter< bool >::type repeat_header(repeat_headerSEXP);
34 |     rcpp_result_gen = Rcpp::wrap(split_every_nlines(name_in, prefix_out, every_nlines, repeat_header));
35 |     return rcpp_result_gen;
36 | END_RCPP
37 | }
38 | 
39 | static const R_CallMethodDef CallEntries[] = {
40 |     {"_bigreadr_nlines_cpp", (DL_FUNC) &_bigreadr_nlines_cpp, 1},
41 |     {"_bigreadr_split_every_nlines", (DL_FUNC) &_bigreadr_split_every_nlines, 4},
42 |     {NULL, NULL, 0}
43 | };
44 | 
45 | RcppExport void R_init_bigreadr(DllInfo *dll) {
46 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
47 |     R_useDynamicSymbols(dll, FALSE);
48 | }
49 | 


--------------------------------------------------------------------------------
/src/nlines-split.cpp:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | 
  3 | #include <Rcpp.h>
  4 | using namespace Rcpp;
  5 | 
  6 | #define INIT_SIZE 64
  7 | 
  8 | /******************************************************************************/
  9 | 
 10 | char * fgets_full_line(char * str, FILE * stream, size_t * p_size) {
 11 | 
 12 |   while (true) {
 13 | 
 14 |     str = fgets(str, *p_size, stream);
 15 |     if (str == NULL) return NULL;
 16 |     // Rcout << *p_size << " -> " << (str[strlen(str) - 1] == '\n') << std::endl;
 17 | 
 18 |     if (feof(stream) | (str[strlen(str) - 1] == '\n')) { // reached EOF or EOL
 19 | 
 20 |       // Rcout << strlen(str) << " / " << (str[strlen(str) - 1] == '\n') << std::endl;
 21 |       return str;
 22 | 
 23 |     } else { // increase size of str and try again
 24 | 
 25 |       fseek(stream , 1 - *p_size, SEEK_CUR);
 26 |       *p_size *= 2;
 27 | 
 28 |       delete [] str;
 29 |       str = new char[*p_size];
 30 | 
 31 |     }
 32 |   }
 33 | }
 34 | 
 35 | /******************************************************************************/
 36 | 
 37 | // [[Rcpp::export]]
 38 | double nlines_cpp(std::string file) {
 39 | 
 40 |   FILE *fp_in = fopen(file.c_str(), "r");
 41 |   if (fp_in == NULL) Rcpp::stop("Error while opening file '%s'.", file);
 42 | 
 43 |   size_t size = INIT_SIZE;
 44 | 
 45 |   char *line = new char[size];
 46 |   size_t nline_all = 0;
 47 | 
 48 |   while (!feof(fp_in)) {
 49 | 
 50 |     line = fgets_full_line(line, fp_in, &size);
 51 | 
 52 |     if (ferror(fp_in)) {
 53 |       delete [] line;
 54 |       Rcpp::stop("Error while reading file '%s'.", file);
 55 |     }
 56 | 
 57 |     if (line != NULL) nline_all++;
 58 |   }
 59 | 
 60 |   fclose(fp_in);
 61 |   delete [] line;
 62 | 
 63 |   return nline_all;
 64 | }
 65 | 
 66 | /******************************************************************************/
 67 | 
 68 | // [[Rcpp::export]]
 69 | List split_every_nlines(std::string name_in,
 70 |                         std::string prefix_out,
 71 |                         int every_nlines,
 72 |                         bool repeat_header) {
 73 | 
 74 |   FILE *fp_in = fopen(name_in.c_str(), "r"), *fp_out;
 75 |   if (fp_in == NULL)
 76 |     Rcpp::stop("Error while opening file '%s'.", name_in);
 77 | 
 78 |   const char *fn_out = prefix_out.c_str();
 79 |   size_t max_len = strlen(fn_out) + 20;
 80 |   char *name_out = new char[max_len];
 81 | 
 82 |   size_t size = INIT_SIZE;
 83 | 
 84 |   char *line = new char[size];
 85 | 
 86 |   // read header once and store it
 87 |   line = fgets_full_line(line, fp_in, &size);
 88 |   char *head = new char[size];
 89 |   strcpy(head, line);
 90 |   rewind(fp_in);
 91 | 
 92 |   bool not_eof = true, header_added = false;
 93 |   int nfile = 0;
 94 |   size_t nline_all = 0;
 95 | 
 96 |   while (not_eof) {
 97 | 
 98 |     // Open file number 'nfile'
 99 |     snprintf(name_out, max_len, "%s_%d.txt", fn_out, ++nfile);
100 |     fp_out = fopen(name_out, "w");
101 | 
102 |     // Fill it with 'every_nlines' lines
103 |     int nline_file = 0;
104 |     while (nline_file < every_nlines) {
105 | 
106 |       if ( (line = fgets_full_line(line, fp_in, &size)) == NULL ) {
107 |         not_eof = false;
108 |         break;
109 |       }
110 | 
111 |       if (repeat_header & (nline_file == 0) & (nfile > 1)) {
112 |         fputs(head, fp_out);
113 |         header_added = true;
114 |       };
115 | 
116 |       fputs(line, fp_out);
117 |       nline_file++;
118 |     }
119 | 
120 |     // Close file number 'nfile'
121 |     fflush(fp_out);
122 |     fclose(fp_out);
123 |     if (nline_file == 0) {
124 |       // nothing has been written because of EOF -> remove file
125 |       remove(name_out);
126 |       nfile--;
127 |     } else {
128 |       nline_all += nline_file + header_added;
129 |     }
130 |   }
131 | 
132 |   fclose(fp_in);
133 | 
134 |   delete[] name_out;
135 |   delete[] line;
136 |   delete[] head;
137 | 
138 |   return List::create(
139 |     _["name_in"]       = name_in,
140 |     _["prefix_out"]    = prefix_out,
141 |     _["nfiles"]        = nfile,
142 |     _["nlines_part"]   = every_nlines,
143 |     _["nlines_all"]    = nline_all,
144 |     _["repeat_header"] = repeat_header
145 |   );
146 | }
147 | 
148 | /******************************************************************************/
149 | 


--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | spelling::spell_check_test(vignettes = TRUE, error = FALSE)
2 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bigreadr)
3 | 
4 | test_check("bigreadr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bind.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | 
 3 | context("test-bind.R")
 4 | 
 5 | ################################################################################
 6 | 
 7 | test_that("'cbind_df' works", {
 8 | 
 9 |   # No copies with 'cbind.data.frame'
10 |   iris$Species <- as.character(iris$Species)
11 |   addr <- sapply(iris, data.table::address)
12 |   iris2 <- cbind_df(list(iris, iris))
13 |   expect_identical(sapply(iris2, data.table::address), c(addr, addr))
14 | 
15 |   # Data frame with factors
16 |   df <- datasets::iris
17 |   df2 <- cbind_df(list(df))
18 |   expect_identical(df2, df)
19 |   df3 <- cbind_df(list(df, df, df))
20 |   expect_equal(dim(df3), c(150, 15))
21 |   expect_identical(class(df3), "data.frame")
22 | 
23 |   # Data table
24 |   dt <- data.table::as.data.table(df)
25 |   dt2 <- cbind_df(list(dt))
26 |   expect_identical(class(dt2), c("data.table", "data.frame"))
27 |   expect_identical(dt2, dt)
28 |   dt3 <- cbind_df(list(dt, dt, dt))
29 |   expect_equal(dim(dt3), c(150, 15))
30 |   expect_identical(class(dt3), c("data.table", "data.frame"))
31 | 
32 |   # Data frame without factors
33 |   df$Species <- as.character(df$Species)
34 |   df2 <- cbind_df(list(df))
35 |   expect_identical(df2, df)
36 |   df3 <- cbind_df(list(df, df, df))
37 |   expect_equal(dim(df3), c(150, 15))
38 |   expect_identical(class(df3), "data.frame")
39 | })
40 | 
41 | ################################################################################
42 | 
43 | test_that("'rbind_df' works", {
44 | 
45 |   # Data frame with factors
46 |   df <- datasets::iris
47 |   df2 <- rbind_df(list(df))
48 |   expect_identical(df2, df)
49 |   df3 <- rbind_df(list(df, df, df))
50 |   expect_equal(dim(df3), c(450, 5))
51 |   expect_identical(class(df3), "data.frame")
52 | 
53 |   # Data table
54 |   dt <- data.table::as.data.table(df)
55 |   dt2 <- rbind_df(list(dt))
56 |   expect_identical(class(dt2), c("data.table", "data.frame"))
57 |   expect_identical(dt2, dt)
58 |   dt3 <- rbind_df(list(dt, dt, dt))
59 |   expect_equal(dim(dt3), c(450, 5))
60 |   expect_identical(class(dt3), c("data.table", "data.frame"))
61 | 
62 |   # Data frame without factors
63 |   df$Species <- as.character(df$Species)
64 |   df2 <- rbind_df(list(df))
65 |   expect_identical(df2, df)
66 |   df3 <- rbind_df(list(df, df, df))
67 |   expect_equal(dim(df3), c(450, 5))
68 |   expect_identical(class(df3), "data.frame")
69 | 
70 |   # Error
71 |   expect_error(rbind_df(list(as.matrix(iris), iris)),
72 |                "'list_df' should contain data tables or data frames.", fixed = TRUE)
73 | })
74 | 
75 | ################################################################################
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-nlines.R:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | 
 3 | context("test-nlines.R")
 4 | 
 5 | ################################################################################
 6 | 
 7 | test_that("'nlines()' works", {
 8 | 
 9 |   expect_error(nlines("does_not_exist.txt"))
10 | 
11 |   strings <- readRDS(system.file("testdata", "wrong_string.rds", package = "bigreadr"))
12 |   writeLines(strings, tmp <- tempfile())
13 |   expect_equal(nlines(tmp), 24)
14 | 
15 |   strings <- c("", "", " ", sapply(10^(seq(0, 4, by = 0.2)), function(i) {
16 |     paste(as.matrix(iris)[sample(nrow(iris), i, TRUE), ], collapse = " ")
17 |   }))
18 |   replicate(100, {
19 |     writeLines(sample(strings, replace = TRUE), tmp <- tempfile())
20 |     expect_equal(nlines(tmp), length(readLines(tmp)))
21 |   })
22 | })
23 | 
24 | ################################################################################
25 | 
26 | test_that("'nlines()' works with or without newline", {
27 |   csv1 <- system.file("testdata", "cars_with_newline.csv",    package = "bigreadr")
28 |   expect_identical(nlines(csv1), 51)
29 |   csv2 <- system.file("testdata", "cars_without_newline.csv", package = "bigreadr")
30 |   expect_identical(nlines(csv2), 51)
31 | })
32 | 
33 | ################################################################################
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | 
  3 | context("test-read.R")
  4 | 
  5 | iris$Species <- as.character(iris$Species)
  6 | csv <- fwrite2(iris, tempfile(fileext = ".csv"))
  7 | 
  8 | ################################################################################
  9 | 
 10 | test_that("'fread2' changes default", {
 11 |   no_dt <- fread2(csv)
 12 |   expect_equal(no_dt, iris)
 13 |   expect_s3_class(no_dt, "data.frame")
 14 |   expect_failure(expect_s3_class(no_dt, "data.table"))
 15 |   expect_s3_class(fread2(csv, data.table = TRUE), "data.table")
 16 | })
 17 | 
 18 | test_that("'fread2' works with multiple files", {
 19 |   csv2 <- rep(csv, 3)
 20 |   no_dt <- fread2(csv2)
 21 |   expect_equal(no_dt, rbind(iris, iris, iris))
 22 |   expect_s3_class(no_dt, "data.frame")
 23 |   expect_failure(expect_s3_class(no_dt, "data.table"))
 24 |   expect_s3_class(fread2(csv2, data.table = TRUE), "data.table")
 25 | 
 26 |   expect_equal(dim(fread2(csv2, nrows = 5)), c(15, 5))
 27 |   expect_equal(dim(fread2(csv2, select = "Species")), c(450, 1))
 28 | })
 29 | 
 30 | test_that("'fread2' can use different types of input", {
 31 | 
 32 |   cmd <- sprintf("grep -v setosa %s", fwrite2(datasets::iris))
 33 |   expect_equal(fread2(cmd), data.table::fread(cmd, data.table = FALSE))
 34 | 
 35 |   url <- "https://raw.githubusercontent.com/privefl/bigsnpr/master/inst/extdata/example.fam"
 36 |   expect_equal(fread2(url), data.table::fread(url, data.table = FALSE))
 37 | 
 38 |   text <- paste(readLines(url), collapse = "\n")
 39 |   expect_equal(fread2(text), data.table::fread(url, data.table = FALSE))
 40 | })
 41 | 
 42 | ################################################################################
 43 | 
 44 | test_that("'big_fread1' works", {
 45 | 
 46 |   iris1 <- big_fread1(file = csv, 50, print_timings = FALSE)
 47 |   expect_equal(iris1, iris)
 48 | 
 49 |   expect_warning(
 50 |     iris1 <- big_fread1(file = csv, 50, print_timings = FALSE,
 51 |                         .combine = function() stop("ERROR")),
 52 |     "Combining failed.")
 53 |   expect_length(iris1, 4)
 54 |   expect_equal(rbind_df(iris1), iris)
 55 | 
 56 |   iris2 <- big_fread1(file = csv, 250, print_timings = FALSE)
 57 |   expect_equal(iris2, iris)
 58 | 
 59 |   ind3 <- 1:4
 60 |   iris3 <- big_fread1(file = csv, 7, select = ind3, skip = 1, print_timings = FALSE)
 61 |   expect_equal(iris3, iris[ind3], check.attributes = FALSE)
 62 |   expect_identical(names(iris3), paste0("V", ind3))
 63 | 
 64 |   iris4 <- big_fread1(file = csv, 50, print_timings = FALSE,
 65 |                       .transform = function(df) subset(df, Species == "virginica"))
 66 |   expect_equal(iris4, subset(iris, Species == "virginica"), check.attributes = FALSE)
 67 | 
 68 |   expect_message(big_fread1(file = csv, 50, print_timings = TRUE), "seconds")
 69 | })
 70 | 
 71 | ################################################################################
 72 | 
 73 | test_that("'big_fread2' works", {
 74 | 
 75 |   for (nb_parts in 1:7) {
 76 | 
 77 |     iris1 <- big_fread2(file = csv, nb_parts)
 78 |     expect_equal(iris1, iris)
 79 | 
 80 |     expect_warning(
 81 |       iris1 <- big_fread2(file = csv, nb_parts,
 82 |                           .combine = function() stop("ERROR")),
 83 |       "Combining failed.")
 84 |     expect_length(iris1, min(nb_parts, ncol(iris)))
 85 |     expect_equal(cbind_df(iris1), iris)
 86 | 
 87 |     ind2 <- 1
 88 |     iris2 <- big_fread2(file = csv, nb_parts, select = ind2, skip = 0)
 89 |     expect_equal(iris2, iris[ind2])
 90 | 
 91 |     ind3 <- 1:4
 92 |     iris3 <- big_fread2(file = csv, nb_parts, select = ind3, skip = 1)
 93 |     expect_equal(iris3, iris[ind3], check.attributes = FALSE)
 94 |     expect_identical(names(iris3), paste0("V", ind3))
 95 | 
 96 |     expect_error(big_fread2(file = csv, nb_parts, select = c(4, 1:3), skip = 0),
 97 |                  "Argument 'select' should be sorted.", fixed = TRUE)
 98 |   }
 99 | })
100 | 
101 | ################################################################################
102 | 
103 | test_that("Same column accessor", {
104 |   iris_dt <- data.table::as.data.table(iris)
105 |   expect_equal(iris[, 1:3], as.data.frame(iris_dt[, 1:3]))
106 |   expect_equal(iris[, 3, drop = FALSE],
107 |                as.data.frame(iris_dt[, 3, drop = FALSE]))
108 | })
109 | 
110 | ################################################################################
111 | 
112 | test_that("Use 'scan' correctly", {
113 |   expect_identical(scan(csv, "", skip = 0, nlines = 1, sep = "\n", quiet = TRUE),
114 |                    paste(names(iris), collapse = ","))
115 |   expect_identical(scan(csv, "", skip = 1, nlines = 1, sep = "\n", quiet = TRUE),
116 |                    paste(as.matrix(iris)[1, ], collapse = ","))
117 | })
118 | 
119 | ################################################################################
120 | 


--------------------------------------------------------------------------------
/tests/testthat/test-split.R:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | 
  3 | context("test-split.R")
  4 | 
  5 | ################################################################################
  6 | 
  7 | test_that("'split_every_nlines()' works", {
  8 | 
  9 |   tmp <- bigreadr::fwrite2(iris)
 10 |   test <- bigreadr:::split_every_nlines(tmp, tmp, 20, TRUE)
 11 |   files <- list.files(tempdir(), basename(tmp), full.names = TRUE)
 12 |   files2 <- c(tmp, paste0(tmp, "_", 1:8, ".txt"))
 13 |   expect_identical(normalizePath(sort(files)), normalizePath(files2))
 14 | })
 15 | 
 16 | ################################################################################
 17 | 
 18 | test_that("'split_file()' works", {
 19 | 
 20 |   strings <- c("", "", " ", sapply(10^(seq(0, 4, by = 0.2)), function(i) {
 21 |     paste(as.matrix(iris)[sample(nrow(iris), i, TRUE), ], collapse = " ")
 22 |   }))
 23 |   for (every in c(1, 2, 4, 12, 24, 25)) {
 24 |     writeLines(sample(strings, replace = TRUE), tmp <- tempfile())
 25 |     # Infos are correct
 26 |     infos <- split_file(tmp, every, tmp2 <- tempfile())
 27 |     expect_identical(infos[["name_in"]], normalizePath(tmp))
 28 |     expect_identical(infos[["prefix_out"]], path.expand(tmp2))
 29 |     expect_identical(infos[["repeat_header"]], FALSE)
 30 |     expect_equal(ceiling(infos[["nlines_all"]] / infos[["nlines_part"]]),
 31 |                  infos[["nfiles"]])
 32 |     expect_equal(infos[["nlines_all"]], 24)
 33 |     # New files all exist
 34 |     files <- get_split_files(infos)
 35 |     expect_true(all(file.exists(files)))
 36 |     # Number of lines and size is summing to whole input file
 37 |     expect_identical(sum(sapply(files, nlines)), nlines(tmp))
 38 |     expect_identical(sum(file.size(files)), file.size(tmp))
 39 |     # Content is the same
 40 |     expect_identical(do.call('c', lapply(files, readLines)), readLines(tmp))
 41 |   }
 42 | })
 43 | 
 44 | ################################################################################
 45 | 
 46 | test_that("'split_file()' works with a repeated header", {
 47 | 
 48 |   # Reading splitted files is easier
 49 |   tf <- fwrite2(cars, tempfile(fileext = ".csv"))
 50 |   sf1 <- split_file(tf, 10)
 51 |   gsf1 <- get_split_files(sf1)
 52 |   expect_equal(sum(sapply(gsf1, nlines)), 51)
 53 |   expect_error(Reduce(rbind, lapply(gsf1, fread2)),
 54 |                "names do not match previous names")
 55 | 
 56 |   sf2 <- split_file(tf, 10, repeat_header = TRUE)
 57 |   gsf2 <- get_split_files(sf2)
 58 |   expect_equal(sapply(gsf2, readLines, n = 1), rep(readLines(tf, n = 1), 6),
 59 |                check.attributes = FALSE)
 60 | 
 61 |   loaded_df <- Reduce(rbind, lapply(gsf2, read.csv))
 62 |   expect_equal(names(loaded_df), c("speed", "dist"))
 63 |   expect_equal(nrow(loaded_df), 50)
 64 | 
 65 |   # Content is the same
 66 |   first_part <- readLines(gsf2[1])
 67 |   other_parts <- unlist(lapply(gsf2[-1], function(f) readLines(f)[-1]))
 68 |   expect_identical(c(first_part, other_parts), readLines(tf))
 69 | })
 70 | 
 71 | ################################################################################
 72 | 
 73 | test_that("'split_file()' works with a repeated header (special cases)", {
 74 | 
 75 |   strings <- c("", "", " ", sapply(10^(seq(0, 4, by = 0.2)), function(i) {
 76 |     paste(as.matrix(iris)[sample(nrow(iris), i, TRUE), ], collapse = " ")
 77 |   }))
 78 |   for (every in c(1, 2, 4, 12, 24, 25)) {
 79 |     writeLines(sample(strings, replace = TRUE), tmp <- tempfile())
 80 |     # Infos are correct
 81 |     infos <- split_file(tmp, every, tmp2 <- tempfile(), repeat_header = TRUE)
 82 |     expect_identical(infos[["name_in"]], normalizePath(tmp))
 83 |     expect_identical(infos[["prefix_out"]], path.expand(tmp2))
 84 |     expect_identical(infos[["repeat_header"]], TRUE)
 85 |     nlines_all_without_header <- infos[["nlines_all"]] - infos[["nfiles"]]
 86 |     expect_equal(nlines_all_without_header + 1, 24)
 87 |     expect_equal(ceiling((nlines_all_without_header + 1) / infos[["nlines_part"]]),
 88 |                  infos[["nfiles"]])
 89 |     # New files all exist
 90 |     files <- get_split_files(infos)
 91 |     expect_true(all(file.exists(files)))
 92 |     # Same first line for each file
 93 |     expect_equal(sapply(files, readLines, n = 1),
 94 |                  rep(readLines(tmp, n = 1), infos[["nfiles"]]),
 95 |                  check.attributes = FALSE)
 96 |     # Content is the same
 97 |     first_part <- readLines(files[1])
 98 |     other_parts <- unlist(lapply(files[-1], function(f) readLines(f)[-1]))
 99 |     expect_identical(c(first_part, other_parts), readLines(tmp))
100 |   }
101 | })
102 | 
103 | ################################################################################
104 | 


--------------------------------------------------------------------------------
/tmp-save/nlines.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | 
  3 | #define BUFSIZE (1024 * 1024)
  4 | 
  5 | 
  6 | //' Count number of lines
  7 | //'
  8 | //' @param filename Path to the file.
  9 | //'
 10 | //' @export
 11 | //'
 12 | // [[Rcpp::export]]
 13 | double nlines1(std::string filename) {
 14 | 
 15 |   FILE *fp_in = fopen(filename.c_str(), "rb");
 16 |   // setvbuf(fp_in, NULL, _IOLBF, BUFSIZE);
 17 | 
 18 |   size_t size = 100;
 19 |   size_t last = size - 2;
 20 | 
 21 |   char *line = new char[size];
 22 |   char *temp= NULL;
 23 |   size_t c = 0;
 24 |   bool not_eol;
 25 | 
 26 |   while (fgets(line, size, fp_in) != NULL) {
 27 | 
 28 |     if (strlen(line) > last) {
 29 | 
 30 |       not_eol = (line[last] != '\n');
 31 | 
 32 |       size *= 2;
 33 |       temp = new char[size];
 34 |       delete [] line;
 35 |       line = temp;
 36 |       last = size - 2;
 37 | 
 38 |       if (not_eol) continue;
 39 |     }
 40 | 
 41 |     // End of line
 42 |     c++;
 43 |   }
 44 | 
 45 |   fclose(fp_in);
 46 | 
 47 |   return c;
 48 | }
 49 | 
 50 | #include <iostream>
 51 | #include <fstream>
 52 | using namespace std;
 53 | 
 54 | int FileRead(istream& is, char* buff) {
 55 |   is.read(buff, BUFSIZE);
 56 |   return is.gcount();
 57 | }
 58 | 
 59 | // [[Rcpp::export]]
 60 | double nlines2(const char * filename) {
 61 | 
 62 |   ifstream ifs(filename, ios::in | ios::binary);
 63 | 
 64 |   char *buff = new char[BUFSIZE];
 65 | 
 66 |   size_t nlines = 0;
 67 |   while (int cc = FileRead(ifs, buff)) {
 68 |     nlines += std::count(buff, buff + cc, '\n');
 69 |   }
 70 | 
 71 |   delete [] buff;
 72 | 
 73 |   return nlines;
 74 | }
 75 | 
 76 | // [[Rcpp::export]]
 77 | double nlines3(const char * filename) {
 78 | 
 79 |   FILE *fp = fopen(filename, "r");
 80 | 
 81 |   size_t nlines = 0;
 82 | 
 83 |   char c = 'a';
 84 |   while (c != EOF) {
 85 |     c = getc(fp);
 86 |     if (c == '\n') nlines++;
 87 |   }
 88 | 
 89 |   fclose(fp);
 90 | 
 91 |   return nlines;
 92 | }
 93 | 
 94 | // [[Rcpp::export]]
 95 | double nlines4(std::string filename, int buff_size = 1024) {
 96 | 
 97 |   FILE *fp_in = fopen(filename.c_str(), "rb");
 98 |   // setvbuf(fp_in, NULL, _IOFBF, BUFSIZE);
 99 | 
100 |   char *buff = new char[buff_size];
101 |   // int buff_size_minus_one = buff_size - 1;
102 |   size_t nlines = 0;
103 | 
104 |   while (feof(fp_in) == 0) {
105 |     if (fgets(buff, buff_size, fp_in) == NULL)
106 |       Rcpp::Rcout << "Error?" << std::endl;
107 | 
108 |     // Rcpp::Rcout << " : "<< strlen(buff) <<
109 |     //   " => " << (buff[strlen(buff) - 1] == '\n') << std::endl;
110 | 
111 |     if ((buff[strlen(buff) - 1] == '\n')) nlines++;
112 |   }
113 | 
114 |   fclose(fp_in);
115 | 
116 |   return nlines;
117 | }
118 | 
119 | // [[Rcpp::export]]
120 | double nlines5(std::string filename, int buff_size = 1024) {
121 | 
122 |   FILE *input_file = fopen(filename.c_str(), "rb");
123 |   char buffer[buff_size + 1];
124 |   size_t line_count = 0;
125 | 
126 |   while (!feof(input_file))
127 |   {
128 |     size_t chars_read = fread(buffer, 1, buff_size, input_file);
129 |     for (unsigned int i = 0; i < chars_read; ++i)
130 |     {
131 |       if (buffer[i] == '\n')
132 |       {
133 |         ++line_count;
134 |       }
135 |     }
136 |   }
137 | 
138 |   fclose(input_file);
139 | 
140 |   return line_count;
141 | }
142 | 
143 | // [[Rcpp::export]]
144 | double nlines6(std::string filename) {
145 | 
146 |   size_t newlines = 0;
147 |   char buf[BUFSIZE];
148 |   size_t BUFSIZE_M1 = BUFSIZE - 1;
149 |   size_t BUFSIZE_M2 = BUFSIZE - 2;
150 |   FILE* file = fopen(filename.c_str(), "rb");
151 | 
152 |   while (fgets(buf, BUFSIZE, file)) {
153 |     if (strlen(buf) != BUFSIZE_M1 || buf[BUFSIZE_M2] != '\n')
154 |       newlines++;
155 |   }
156 | 
157 |   return newlines;
158 | }
159 | 
160 | 
161 | 
162 | #include <sys/types.h>
163 | #include <sys/stat.h>
164 | #include <fcntl.h>
165 | #include <unistd.h>
166 | 
167 | // [[Rcpp::export]]
168 | double nlines7(std::string filename) {
169 | 
170 |   int fd = open(filename.c_str(), O_RDONLY, 0);
171 | 
172 |   char *buff = new char[BUFSIZE];
173 |   size_t nlines = 0;
174 | 
175 |   while (int len = read(fd, buff, BUFSIZE)) {
176 | 
177 |     if (len == -1) {
178 |       (void)close(fd);
179 |       break;
180 |     }
181 | 
182 |     for (int i = 0; i < len; i++)
183 |       if (buff[i] == '\n') nlines++;
184 |   }
185 | 
186 |   (void)close(fd);
187 | 
188 |   return nlines;
189 | }
190 | 
191 | /*** R
192 | cars2 <- cars[rep(1:50, 20e2), rep(1:2, 100)]
193 | # cars2 <- cars[rep(1:50, 5), rep(1:2, 30e3)]
194 | bigreadr::fwrite2(cars2, "tmp-data/cars.csv")
195 | for (i in 2:10) bigreadr::fwrite2(cars2, "tmp-data/cars.csv", append = TRUE)
196 | 
197 | system.time(print(nlines7("tmp-data/cars.csv")))
198 | system.time(print(nlines1("tmp-data/cars.csv")))
199 | system.time(system("wc -l tmp-data/cars.csv"))
200 | system.time(print(bigreadr::nlines("tmp-data/cars.csv")))
201 | 
202 | 
203 | # microbenchmark::microbenchmark(
204 | #   nlines1("tmp-data/cars.csv"),           # 1000
205 | #   # nlines2("tmp-data/cars.csv"),           # 1500
206 | #   # nlines3("tmp-data/cars.csv"),           # 33500
207 | #   # nlines4("tmp-data/cars.csv", 1024),           # 1050
208 | #   # nlines4("tmp-data/cars.csv", 1024 * 1024),    # 1100
209 | #   # nlines5("tmp-data/cars.csv", 1024),           # 1050
210 | #   # nlines5("tmp-data/cars.csv", 1024 * 1024),    # 1100
211 | #   nlines6("tmp-data/cars.csv"),           # 1050
212 | #   # nlines5("tmp-data/cars.csv", 1024 * 1024 * 64),    # 1100
213 | #   # nlines_mmap("tmp-data/cars.csv"),       # 1900
214 | #   # bigreadr::nlines("tmp-data/cars.csv"),  # 3400
215 | #   system("wc -l tmp-data/cars.csv"),      # 400
216 | #   # system("grep -c '\n' tmp-data/cars.csv"), # 400
217 | #   times = 5
218 | # )
219 | #### 5M x 200 ####
220 | # Unit: milliseconds
221 | # expr       min        lq     mean    median
222 | # nlines("tmp-data/cars.csv") 2092.0324 2098.8990 2138.311 2101.8745
223 | # bigreadr::nlines("tmp-data/cars.csv") 6746.9176 6762.7296 6868.384 6799.3394
224 | # system("wc -l tmp-data/cars.csv")  853.2787  856.6954  863.299  862.6793
225 | # uq       max neval
226 | # 2113.3909 2448.5013    10
227 | # 6816.9416 7438.5126    10
228 | # 867.3886  883.3312    10
229 | 
230 | #### 5K x 200K ####
231 | # Unit: milliseconds
232 | # expr       min        lq     mean    median
233 | # nlines("tmp-data/cars.csv") 1852.4570 1858.6921 2429.795 1934.0913
234 | # bigreadr::nlines("tmp-data/cars.csv") 6557.9264 6621.6394 6982.951 6836.6807
235 | # system("wc -l tmp-data/cars.csv")  798.7292  845.8318 1426.601  864.2086
236 | # uq      max neval
237 | # 2312.193 5831.689    10
238 | # 7211.877 7922.534    10
239 | # 1092.094 5640.510    10
240 | 
241 | val <- try(system(paste("wc -l", "tmp-data/cars.csv"), intern = TRUE,
242 |                   ignore.stderr = TRUE), silent = TRUE)
243 | val <- `if`(class(val) == "try-error", nlines1("tmp-data/cars.csv"),
244 |             as.numeric(strsplit(val, " ")[[1]][1]))
245 | */
246 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-acc.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(data.table)
 3 | iris_dt <- as.data.table(iris)
 4 | microbenchmark::microbenchmark(
 5 |   iris[, 1:3],
 6 |   iris[1:3],
 7 |   iris_dt[, 1:3],
 8 |   iris[, 3, drop = FALSE],
 9 |   iris[3],
10 |   iris_dt[, 3, drop = FALSE]
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-rbind.R:
--------------------------------------------------------------------------------
 1 | mtcars <- datasets::mtcars
 2 | mtcars <- mtcars[rep(1:32, 1000), rep(1:11, 10)]
 3 | mtcars_dt <- data.table::as.data.table(mtcars)
 4 | 
 5 | list_mtcars <- rep(list(mtcars), 10)
 6 | list_mtcars_dt <- rep(list(mtcars_dt), 10)
 7 | 
 8 | rbind_df <- function(list_df) {
 9 |   list_df_merged <- lapply(seq_along(list_df[[1]]), function(k) {
10 |     unlist(lapply(list_df, function(l) l[[k]]))
11 |   })
12 |   list_df_merged_named <- stats::setNames(list_df_merged, names(list_df[[1]]))
13 |   as.data.frame(list_df_merged_named, stringsAsFactors = FALSE)
14 | }
15 | 
16 | rbind_df2 <- function(list_df) {
17 |   data.table::rbindlist(list_df)
18 | }
19 | 
20 | microbenchmark::microbenchmark(
21 | 
22 |   A1 = rbind.data.frame(mtcars),
23 |   A2 = rbind.data.frame(mtcars_dt),
24 |   B1 = rbind_df(list(mtcars)),
25 |   B2 = rbind_df(list(mtcars_dt)),
26 |   C1 = rbind_df2(list(mtcars)),
27 |   C2 = rbind_df2(list(mtcars_dt)),
28 | 
29 |   AA1 = do.call(rbind.data.frame, list_mtcars),
30 |   AA2 = do.call(rbind.data.frame, list_mtcars_dt),
31 |   BB1 = rbind_df(list_mtcars),
32 |   BB2 = rbind_df(list_mtcars_dt),
33 |   CC1 = rbind_df2(list_mtcars),
34 |   CC2 = rbind_df2(list_mtcars_dt),
35 | 
36 |   times = 10
37 | )
38 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read.R:
--------------------------------------------------------------------------------
 1 | csv <- readr::readr_example("mtcars.csv")
 2 | df <- data.table::fread(csv, data.table = FALSE)
 3 | 
 4 | ## LONG CSV
 5 | csv2 <- "tmp-data/mtcars-long.csv"
 6 | # data.table::fwrite(df[rep(seq_len(nrow(df)), 500000), ], csv2,
 7 | #                    quote = FALSE, row.names = FALSE)
 8 | 
 9 | system.time(
10 |   df2 <- data.table::fread(csv2)
11 | ) # 3.5
12 | 
13 | system.time(
14 |   df3 <- readr::read_csv(csv2)
15 | ) # 25
16 | rm(df2, df3); gc(reset = TRUE)
17 | 
18 | 
19 | system.time(nlines <- fpeek::peek_count_lines(csv2)) # 1.8
20 | system.time(nlines2 <- nrow(data.table::fread(csv2, select = 1))) # 2.8
21 | 
22 | tmp <- tempfile()
23 | if (Sys.info()[["sysname"]] == "Windows") {
24 | 
25 |   # https://sourceforge.net/projects/gnuwin32/
26 |   awk <- shortPathName("C:/Program Files (x86)/GnuWin32/bin/awk.exe") # Windows
27 |   cmd <- sprintf("%s \"NR%%%d==1{x=\"\"\"%s\"\"\"++i;}{print > x}\" %s",
28 |                  awk, 20, gsub("\\\\", "\\\\\\\\", tmp), normalizePath(csv))
29 | 
30 | } else {
31 | 
32 |   cmd <- sprintf("awk 'NR%%%d==1{x=\"%s\"++i;}{print > x}' %s",
33 |                  tmp, 20, normalizePath(csv))
34 | 
35 | }
36 | system(cmd)
37 | readLines(paste0(tmp, 1), 1)
38 | 
39 | cmd <- sprintf("%s \"NR%%%d==1{x=\"\"\"%s\"\"\"++i;}{print > x}\" %s",
40 |                awk, 20000, gsub("\\\\", "\\\\\\\\", tmp), normalizePath(csv2))
41 | system.time(system(cmd)) # 1.4
42 | # readLines(paste0(tmp, 1))
43 | 
44 | 
45 | ## LARGE CSV
46 | csv3 <- "tmp-data/mtcars-wide.csv"
47 | data.table::fwrite(df[rep(seq_len(nrow(df)), 500), rep(seq_len(ncol(df)), 1000)], csv3,
48 |                    quote = FALSE, row.names = FALSE)
49 | 
50 | system.time(
51 |   df2 <- data.table::fread(csv3, data.table = FALSE)
52 | ) # 0.06 -> 0.65 -> 9.8
53 | system.time(
54 |   nlines <- nrow(data.table::fread(csv3, select = 1))
55 | ) # 0.1 -> 0.45 -> 4.5
56 | system.time(nlines2 <- fpeek::peek_count_lines(csv3))
57 | 
58 | # system.time(
59 | #   df3 <- readr::read_csv(csv3)
60 | # ) # 6
61 | 
62 | cmd <- sprintf("%s \"NR%%%d==1{x=\"\"\"%s\"\"\"++i;}{print > x}\" %s",
63 |                awk, 2, gsub("\\\\", "\\\\\\\\", tmp), normalizePath(csv3))
64 | system.time(system(cmd)) # 1.4
65 | # readLines(paste0(tmp, 1))
66 | 
67 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read2.R:
--------------------------------------------------------------------------------
 1 | # https://sourceforge.net/projects/gnuwin32/files/coreutils/5.3.0/coreutils-5.3.0.exe/download
 2 | 
 3 | csv <- readr::readr_example("mtcars.csv")
 4 | # split <- shortPathName("C:\\Program Files (x86)\\GnuWin32/bin/split.exe")
 5 | split <- "split"
 6 | 
 7 | system(sprintf("%s --version", split)) == 0
 8 | # system(sprintf("%s -l 5 %s", split, csv))
 9 | 
10 | ## LONG CSV
11 | df <- data.table::fread(csv, data.table = FALSE)
12 | csv2 <- tempfile(fileext = ".csv")
13 | data.table::fwrite(df[rep(seq_len(nrow(df)), 500000), ], csv2,
14 |                    quote = FALSE, row.names = FALSE)
15 | file.size(csv2)
16 | 
17 | # system.time(system(sprintf("find /c /v \"aabbccdd\" %s", csv2)))
18 | 
19 | system.time(data.table::fread(csv2, nThread = 1))  ## 2.2
20 | system.time(data.table::fread(csv2, nThread = 2))  ## 1.5
21 | system.time(data.table::fread(csv2, nThread = 4))  ## 1
22 | system.time(data.table::fread(csv2, nThread = 7))  ## 0.7
23 | 
24 | tmp <- tempfile()
25 | system.time(system(sprintf("%s -l 200000 %s %s", split, csv2, tmp))) ## 12 sec
26 | system.time(fpeek::peek_count_lines(csv2))                           ## 3 sec
27 | system.time(nrow(data.table::fread(csv2, select = 1)))
28 | 
29 | files <- list.files(dirname(tmp), basename(tmp), full.names = TRUE)
30 | df1 <- data.table::fread(files[1], data.table = FALSE)
31 | data.table::fread(tail(files, 1), col.names = names(df1), data.table = FALSE)
32 | 
33 | scan(csv, "", sep = ",", nlines = 1, skip = 0)
34 | 
35 | 
36 | df <- mtcars
37 | df2 <- unname(mtcars)
38 | 
39 | sapply(df, data.table::address)
40 | sapply(df2, data.table::address)
41 | 
42 | 
43 | microbenchmark::microbenchmark(
44 |   as.matrix(unname(mtcars), rownames.force = FALSE),
45 |   as.matrix(mtcars)
46 | )
47 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read3.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ## LONG CSV
 3 | csv2 <- "tmp-data/mtcars-long.csv"
 4 | # data.table::fwrite(df[rep(seq_len(nrow(df)), 500000), ], csv2,
 5 | #                    quote = FALSE, row.names = FALSE)
 6 | 
 7 | library(bigreadr)
 8 | if (Sys.info()[["sysname"]] == "Windows") {
 9 |   options(bigreadr.split = "C:\\Program Files (x86)\\GnuWin32/bin/split.exe")
10 | }
11 | 
12 | system.time(
13 |   test <- split_file(csv2)
14 | )
15 | 
16 | rm(test2); gc(reset = TRUE)
17 | system.time(
18 |   test2 <- big_fread(csv2, every_x_mb = 100)
19 | )
20 | gc() # + 2 GB
21 | 
22 | rm(test2); gc(reset = TRUE)
23 | system.time(
24 |   test2 <- data.table::fread(csv2)
25 | )
26 | gc() # + 1 GB
27 | 
28 | # system.time(test <- split_file(csv2, every_x_mb = 1000))
29 | # system.time(test <- split_file(csv2, every_x_mb = 10))
30 | system.time(tmp <- lapply(test, function(f) data.table::fread(f, data.table = FALSE)))
31 | 
32 | system.time(tmp2 <- do.call(my_rbind, tmp))
33 | 
34 | system.time(
35 |   test2 <- big_fread(csv2, every_x_mb = 100)
36 | )
37 | system.time(
38 |   test3 <- data.table::fread(csv2)
39 | )
40 | 
41 | 
42 | tmp <- tempfile()
43 | system.time(
44 |   status <- system(sprintf("%s -C %dm %s %s", "split", 100, csv2, tmp))
45 | )
46 | file_parts <- list.files(dirname(tmp), basename(tmp), full.names = TRUE)
47 | 
48 | dt1 <- data.table::fread(file_parts[1])
49 | 
50 | system.time(df2 <- data.table::fread(csv2, data.table = FALSE))
51 | system.time(df3 <- bigreadr::big_fread(
52 |   csv2, .transform = identity
53 | ))
54 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read4.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ## LONG CSV
 3 | csv2 <- "tmp-data/mtcars-long.csv"
 4 | 
 5 | Rcpp::sourceCpp('tmp-tests/test-setvbuf.cpp')
 6 | 
 7 | # system.time(test <- test_setvbuf(csv2, 10))
 8 | system.time(test <- test_setvbuf2(csv2))
 9 | system.time(test2 <- fpeek::peek_count_lines(csv2))
10 | 
11 | csv2.2 <- sub("\\.csv$", "2.csv", csv2)
12 | system.time(test <- test_setvbuf3(csv2, csv2.2))
13 | 
14 | # df1 <- data.table::fread(csv2)
15 | # df2 <- data.table::fread(csv2.2)
16 | # identical(df1, df2)
17 | #
18 | # system.time(file.copy(csv2, sub("\\.csv$", "3.csv", csv2))) # 1.5 sec
19 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read5.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(bigreadr)
 3 | if (Sys.info()[["sysname"]] == "Windows") {
 4 |   options(bigreadr.split = "C:\\Program Files (x86)\\GnuWin32/bin/split.exe")
 5 | }
 6 | 
 7 | 
 8 | ## LONG CSV
 9 | csv2 <- "tmp-data/mtcars-long.csv"
10 | # csv <- readr::readr_example("mtcars.csv")
11 | # df <- data.table::fread(csv, data.table = FALSE)
12 | # data.table::fwrite(df[rep(seq_len(nrow(df)), 500000), ], csv2,
13 | #                    quote = FALSE, row.names = FALSE)
14 | 
15 | nlines(csv2)
16 | system.time(
17 |   test <- split_file(csv2)
18 | )
19 | # Windows: 4.6 / 8.2 / 8.9
20 | # Linux:   1.5 / 1.8 / 1.4
21 | # Linux2:  1.4 / 1.3 / 1.1 / 1.3
22 | 
23 | Rcpp::sourceCpp('tmp-tests/test-setvbuf5.cpp')
24 | tmp <- tempfile()
25 | system.time(
26 |   test2 <- test_setvbuf6(csv2, tmp, 1e6)
27 | )
28 | # Windows: 15 / 4.8 / 5.0 / 4.4
29 | # Linux:  5.4 / 3.3 / 3.6 / 3.5 / 2.8
30 | # Linux2: 1.3 / 1.8 / 1.8 / 1.7
31 | as.integer(test2)
32 | list.files(dirname(tmp), basename(tmp))
33 | 
34 | 
35 | 
36 | ## LARGE CSV
37 | csv3 <- "tmp-data/mtcars-wide.csv"
38 | # data.table::fwrite(df[rep(seq_len(nrow(df)), 50), rep(seq_len(ncol(df)), 10000)],
39 | #                    csv3, quote = FALSE, row.names = FALSE)
40 | 
41 | nlines(csv3)
42 | system.time(
43 |   test <- split_file(csv3)
44 | )
45 | # Windows: 4.3 / 3.9 / 9.6
46 | # Linux:   3.2 / 1.4 / 3.7
47 | # Linux2:  1.4 / 1.2 / 1.1
48 | 
49 | Rcpp::sourceCpp('tmp-tests/test-setvbuf5.cpp')
50 | tmp <- tempfile()
51 | system.time(
52 |   test2 <- test_setvbuf6(csv3, tmp, 100)
53 | )
54 | # Windows: 14. / 5.0 / 4.6
55 | # Linux:   1.7 / 1.7 / 6.5
56 | # Linux2:  0.4 / 1.1 / 1.2 / 1.2
57 | as.integer(test2)
58 | list.files(dirname(tmp), basename(tmp))
59 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read6.R:
--------------------------------------------------------------------------------
 1 | library(bigreadr)
 2 | 
 3 | long <- FALSE
 4 | if (long) {
 5 |   csv2 <- "tmp-data/mtcars-long.csv"
 6 |   block <- 1e6
 7 |   M <- 11
 8 |   block2 <- 3
 9 | } else {
10 |   csv2 <- "tmp-data/mtcars-wide.csv"
11 |   block <- 1e3
12 |   M <- 11e3
13 |   block2 <- 3
14 | }
15 | 
16 | 
17 | library(bigstatsr)
18 | (n1 <- bigreadr::nlines(csv2))
19 | 
20 | # debugonce(big_read)
21 | # tmp <- gc(reset = TRUE)
22 | # system.time(
23 | #   test <- big_read(csv2, header = TRUE, sep = ",",
24 | #                    nlines = n1, confirmed = TRUE,
25 | #                    nlines.block = block, type = "double")
26 | # ) # 38 sec  //  912 sec
27 | # gc() - tmp
28 | 
29 | tmp <- gc(reset = TRUE)
30 | system.time({
31 |   X <- FBM(n1 - 1, M)
32 |   offset <- 0
33 |   test2 <- big_fread1(csv2, block, .transform = function(df) {
34 |     ind <- rows_along(df)
35 |     X[offset + ind, ] <- as.matrix(df)
36 |     offset <<- offset + length(ind)
37 |     NULL
38 |   }, .combine = c)
39 | }) # 16 sec  //  122 sec
40 | gc() - tmp
41 | 
42 | # all.equal(dim(test$FBM), dim(X))
43 | # all.equal(test$FBM[, 1], X[, 1])
44 | # all.equal(test$FBM[, 11], X[, 11])
45 | 
46 | tmp <- gc(reset = TRUE)
47 | system.time({
48 |   X2 <- FBM(n1 - 1, M)
49 |   offset <- 0
50 |   test3 <- big_fread2(csv2, block2, .transform = function(df) {
51 |     print(offset)
52 |     ind <- cols_along(df)
53 |     X2[, offset + ind] <- as.matrix(df)
54 |     offset <<- offset + length(ind)
55 |     NULL
56 |   }, .combine = c)
57 | }) # 16 sec  //  122 sec
58 | gc() - tmp
59 | 
60 | all.equal(dim(X2),  dim(X))
61 | all.equal(X2[, 1],  X[, 1])
62 | all.equal(X2[, 11], X[, 11])
63 | all.equal(X2[, M],  X[, M])
64 | 
65 | 


--------------------------------------------------------------------------------
/tmp-tests/bench-read7.R:
--------------------------------------------------------------------------------
 1 | csv <- "tmp-data/mtcars-long.csv"
 2 | csv2 <- "tmp-data/mtcars-wide.csv"
 3 | 
 4 | ## System command 'cut' is super slow on my Windows.
 5 | 
 6 | tmp <- gc(reset = TRUE)
 7 | system.time(
 8 |   test2 <- data.table::fread(sprintf("cut -f1-5 -s -d',' %s", csv))
 9 | )
10 | gc() - tmp
11 | 
12 | tmp <- gc(reset = TRUE)
13 | system.time(
14 |   test2 <- data.table::fread(sprintf("cut -f1-50000 -s -d',' %s", csv2))
15 | )
16 | gc() - tmp
17 | 
18 | 
19 | tmp <- gc(reset = TRUE)
20 | system.time(
21 |   test2 <- data.table::fread(csv, select = 1:5)
22 | )
23 | gc() - tmp
24 | 
25 | tmp <- gc(reset = TRUE)
26 | system.time(
27 |   test2 <- data.table::fread(csv2, select = 1:50000)
28 | )
29 | gc() - tmp
30 | 
31 | 
32 | tmp <- gc(reset = TRUE)
33 | system.time(
34 |   test2 <- data.table::fread(csv2, select = 1:10000)
35 | )
36 | gc() - tmp
37 | 
38 | tryCatch(data.table::fread(file = csv, nrows = 0, skip = 1),
39 |          error = function(e) NULL)
40 | dt <- data.table::fread(file = csv, select = c(5, 1, 3), verbose = TRUE)
41 | names(dt)
42 | names(mtcars)[c(5, 1, 3)]
43 | dt2 <- `[.data.frame`(dt, names(mtcars)[c(5, 1, 3)])
44 | dt2[1]
45 | class(dt2)
46 | 
47 | library(data.table)
48 | fwrite(iris, tmp <- tempfile())
49 | debugonce(fread)
50 | data.table::fread(file = tmp, select = c(5, 1, 3), skip = 0)
51 | data.table::fread(file = tmp, select = c(5, 1, 3), skip = 1)
52 | 
53 | system.time(first_line <- fread(csv2, nrows = 1))
54 | system.time(zero_line <- fread(csv2, nrows = 0))
55 | system.time(first_line <- fread(csv2, nrows = 1, skip = 1))
56 | 
57 | # system.time(
58 | #   df4 <- limma::read.columns(csv, names(mtcars)[1:4], sep = ",")
59 | # )  # 32 sec
60 | 


--------------------------------------------------------------------------------
/tmp-tests/has-header.R:
--------------------------------------------------------------------------------
1 | part1 <- fread2(file_parts[1], skip = skip, ...)
2 | first_line <- scan(file, "", skip = skip, nlines = 1, sep = "\n", quiet = TRUE)
3 | match_names <- sapply(names(part1), regexpr, text = first_line, fixed = TRUE)
4 | has_header <- all( diff(match_names) > 0 )
5 | 


--------------------------------------------------------------------------------
/tmp-tests/split.cpp:
--------------------------------------------------------------------------------
 1 | #include <bigstatsr/BMAcc.h>
 2 | 
 3 | #define BUFLEN (64 * 1024)
 4 | 
 5 | // [[Rcpp::export]]
 6 | NumericVector test_setvbuf7(std::string filename,
 7 |                             std::string filename2,
 8 |                             int every_nlines,
 9 |                             Environment parts_) {
10 | 
11 |   XPtr<FBM> xptr = parts_["address"];
12 |   BMAcc<int> parts(xptr);
13 | 
14 |   FILE *fp_in = fopen(filename.c_str(), "rb"), *fp_out;
15 |   setvbuf(fp_in, NULL, _IOLBF, BUFLEN);
16 | 
17 |   const char *fn_out = filename2.c_str();
18 |   char name_out[strlen(fn_out) + 20];
19 | 
20 |   size_t line_size;
21 |   size_t size = 100;
22 |   size_t last = size - 2;
23 | 
24 |   char *line = new char[size];
25 |   char *temp;
26 | 
27 |   bool not_eol, not_eof = true;
28 |   int i, k = 0, c = 0;
29 | 
30 | 
31 |   while (not_eof) {
32 | 
33 |     // Open file number 'k'
34 |     sprintf(name_out, "%s%d.txt", fn_out, ++k);
35 |     fp_out = fopen(name_out, "wb");
36 |     setvbuf(fp_out, NULL, _IOFBF, BUFLEN);
37 | 
38 |     // Fill it with 'every_nlines' lines
39 |     i = 0;
40 |     while (i < every_nlines) {
41 | 
42 |       if (fgets(line, size, fp_in) == NULL) {
43 |         not_eof = false;
44 |         break;
45 |       }
46 | 
47 |       line_size = strlen(line);
48 | 
49 |       fputs(line, fp_out);
50 | 
51 |       if (line_size > last) {
52 | 
53 |         not_eol = (line[last] != '\n');
54 | 
55 |         fflush(fp_out);
56 |         size *= 2;
57 |         temp = new char[size];
58 |         delete [] line;
59 |         line = temp;
60 |         last = size - 2;
61 | 
62 |         if (not_eol) continue;
63 |       }
64 | 
65 |       // End of line
66 |       i++;
67 | 
68 |     }
69 | 
70 |     c += i;
71 | 
72 |     // Close file number 'k'
73 |     fflush(fp_out);
74 |     fclose(fp_out);
75 |     parts(k - 1, 0) = 1;  // OK to porcess
76 |     Rcout << k << std::endl;
77 | 
78 |   }
79 | 
80 |   fclose(fp_in);
81 | 
82 |   return NumericVector::create(_["K"] = k, _["every"] = every_nlines, _["N"] = c);
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/tmp-tests/test-file2string.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <fstream>
 3 | #include <Rcpp.h>
 4 | using namespace std;
 5 | 
 6 | // [[Rcpp::export]]
 7 | std::string file2string(std::string fn) {
 8 | 
 9 |   std::string str, strTotal;
10 |   ifstream in;
11 |   in.open(fn.c_str());
12 |   getline(in, str);
13 |   while ( in ) {
14 |     Rcpp::Rcout << strTotal.max_size() << std::endl;
15 |     strTotal += str + '\n';
16 |     getline(in, str);
17 |   }
18 | 
19 |   return strTotal;
20 | }
21 | 
22 | // [[Rcpp::export]]
23 | std::string file2string2(std::string fn) {
24 | 
25 |   std::ifstream ifs(fn.c_str());
26 |   std::string content( (std::istreambuf_iterator<char>(ifs) ),
27 |                        (std::istreambuf_iterator<char>()    ) );
28 | 
29 |   return content;
30 | }
31 | 
32 | 
33 | /*** R
34 | test <- file2string("text-write.txt")
35 | writeLines(test)
36 | test2 <- file2string2("text-write.txt")
37 | writeLines(test2)
38 | csv2 <- "tmp-data/mtcars-long.csv"
39 | # system.time(test3 <- file2string2(csv2))
40 | */
41 | 


--------------------------------------------------------------------------------
/tmp-tests/test-mmap-nlines.cpp:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::depends(rmio)]]
 2 | // [[Rcpp::plugins(cpp11)]]
 3 | #include <mio/mmap.hpp>
 4 | #include <system_error> // for std::error_code
 5 | #include <Rcpp.h>
 6 | 
 7 | using std::size_t;
 8 | 
 9 | 
10 | // [[Rcpp::export]]
11 | double nlines_mmap(std::string path) {
12 | 
13 |   // Memory-map the file
14 |   std::error_code error;
15 |   mio::ummap_source ro_ummap;
16 |   ro_ummap.map(path, error);
17 |   if (error) Rcpp::stop("Error when mapping file:\n  %s.\n", error.message());
18 | 
19 |   int nlines = std::count_if(ro_ummap.begin(), ro_ummap.end(),
20 |                              [](unsigned char x) { return x == '\n'; });
21 | 
22 |   size_t nbytes = ro_ummap.size();
23 |   // size_t nlines = 0;
24 |   // for (size_t k = 0; k < nbytes; k++) {
25 |   //   if (ro_ummap[k] == '\n') nlines++;
26 |   // }
27 | 
28 |   if (ro_ummap[nbytes - 1] != '\n') nlines++;
29 | 
30 |   return nlines;
31 | }
32 | 
33 | // [[Rcpp::export]]
34 | double nlines_mmap2(std::string path) {
35 | 
36 |   // Memory-map the file
37 |   std::error_code error;
38 |   mio::ummap_source ro_ummap;
39 |   ro_ummap.map(path, error);
40 |   if (error) Rcpp::stop("Error when mapping file:\n  %s.\n", error.message());
41 | 
42 |   size_t nbytes = ro_ummap.size();
43 |   size_t nlines = 0;
44 |   for (size_t k = 0; k < (nbytes - 4); k += 4) {
45 |     nlines += ((ro_ummap[k] == '\n') + (ro_ummap[k + 1] == '\n')) +
46 |       ((ro_ummap[k + 2] == '\n') + (ro_ummap[k + 3] == '\n'));
47 |   }
48 | 
49 |   // TODO: add the test and test that more than 4 bytes
50 | 
51 |   if (ro_ummap[nbytes - 1] != '\n') nlines++;
52 | 
53 |   return nlines;
54 | }
55 | 
56 | /*** R
57 | nlines_mmap("../tmp-data/cars.csv.bk")
58 | nlines_mmap2("../tmp-data/cars.csv.bk")
59 | */
60 | 


--------------------------------------------------------------------------------
/tmp-tests/test-parallel.R:
--------------------------------------------------------------------------------
 1 | library(bigreadr)
 2 | library(bigstatsr)
 3 | library(foreach)
 4 | 
 5 | ## Need to handle 'skip'
 6 | csv2 <- "tmp-data/mtcars-long.csv"
 7 | n <- nlines(csv2)
 8 | K <- 20
 9 | every_lines <- ceiling(n / 20)
10 | 
11 | Rcpp::sourceCpp('tmp-tests/test-setvbuf6.cpp')
12 | tmp <- tempfile()
13 | parts <- FBM(K, 1, init = 0, type = "integer")
14 | system.time(
15 |   test <- test_setvbuf7(csv2, tmp, every_nlines = every_lines, parts)
16 | )
17 | as.integer(test)
18 | files <- paste0(tmp, 1:K, ".txt")
19 | file.exists(files)
20 | 
21 | system.time({
22 |   res2 <- foreach(ic = 1:K) %do% {
23 |     while (parts[ic] == 0) Sys.sleep(TIME)
24 |     bigreadr:::fread2(files[ic], nThread = 8)
25 |   }
26 | }) # 0.9 / 1 (8) -> 2.4 (1)
27 | ## Either all or only 1
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/tmp-tests/test-parallel2.R:
--------------------------------------------------------------------------------
 1 | library(bigreadr)
 2 | library(bigstatsr)
 3 | library(foreach)
 4 | 
 5 | ## Need to handle 'skip'
 6 | csv2 <- "tmp-data/mtcars-long.csv"
 7 | n <- nlines(csv2)
 8 | K <- 20
 9 | every_lines <- ceiling(n / 20)
10 | 
11 | parallel <- TRUE
12 | if (!parallel) {
13 |   registerDoSEQ()
14 | } else {
15 |   cl <- parallel::makeCluster(2)
16 |   doParallel::registerDoParallel(cl)
17 |   # on.exit(parallel::stopCluster(cl), add = TRUE)
18 | }
19 | 
20 | TIME <- 1 / (10 * K)
21 | parts <- FBM(K, 1, init = 0, type = "integer")
22 | tmp <- tempfile()
23 | files <- paste0(tmp, 1:K, ".txt")
24 | system.time({
25 |   res <- foreach(job = 1:2) %dopar% {
26 | 
27 |     if (job == 1) {
28 |       print(1)
29 |       system.time(
30 |         test <- bigreadr:::test_setvbuf7(csv2, tmp, every_nlines = every_lines, parts)
31 |       )
32 |       # NULL
33 |     } else {
34 |       print(2)
35 |       system.time({
36 |         lapply(seq_along(files), function(k) {
37 |           while (parts[k] == 0) Sys.sleep(TIME)
38 |           bigreadr:::fread2(files[k])
39 |         })
40 |       })
41 |     }
42 |   }
43 | })
44 | parallel::stopCluster(cl)
45 | res
46 | # res <- do.call(bigreadr::my_rbind, res[[2]])
47 | 
48 | 
49 | #### PROBLEM: fread reading (second job) is slowing down first job ####
50 | 
51 | 
52 | system.time({
53 |   lapply(seq_along(files), function(k) {
54 |     while (parts[k] == 0) Sys.sleep(TIME)
55 |     bigreadr:::fread2(files[k], nThread = 8)
56 |   })
57 | })
58 | 


--------------------------------------------------------------------------------
/tmp-tests/test-setvbuf.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | using namespace Rcpp;
  3 | 
  4 | #include <stdio.h>
  5 | #include <string.h>
  6 | #include <fcntl.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #define BUFLEN (64 * 1024)
 10 | 
 11 | // [[Rcpp::export]]
 12 | int test_setvbuf(std::string filename, int size = 100) {
 13 | 
 14 |   FILE *fp = fopen(filename.c_str(), "r");
 15 | 
 16 |   unsigned sizem1 = size - 1;
 17 |   int last = size - 2;
 18 | 
 19 |   char line[size];
 20 |   // char *id;
 21 |   // char *token;
 22 |   char *buf = (char*)malloc(BUFLEN);
 23 |   int c = 0;
 24 | 
 25 |   setvbuf ( fp , buf , _IOLBF, BUFLEN );
 26 |   while (fgets(line, size, fp) != NULL) {
 27 |     // Rcout << strlen(line) << std::endl;
 28 |     if (strlen(line) < sizem1) {
 29 |       c++;
 30 |     } else {
 31 |       // Rcout << (line[last] == '\n') << std::endl;
 32 |       if (line[last] == '\n') c++;
 33 |     }
 34 |     // id = strtok(line, "\t");
 35 |     // token = strtok(NULL, "\t");
 36 |     //
 37 |     // char *fnout = malloc(strlen(id)+5);
 38 |     // fnout = strcat(fnout, id);
 39 |     // fnout = strcat(fnout, ".seq");
 40 |     //
 41 |     // fpout = fopen(fnout, "w");
 42 |     // setvbuf ( fpout , NULL , _IONBF , 0 );
 43 |     // fprintf(fpout, "%s", token);
 44 |     // fclose(fpout);
 45 |   }
 46 | 
 47 |   fclose(fp);
 48 | 
 49 |   return c;
 50 | 
 51 | }
 52 | 
 53 | // [[Rcpp::export]]
 54 | int test_setvbuf2(std::string filename, int size = 100) {
 55 | 
 56 |   FILE *fp = fopen(filename.c_str(), "r");
 57 | 
 58 |   unsigned sizem1 = size - 1;
 59 |   int last = size - 2;
 60 | 
 61 |   char * line = new char[size];
 62 |   char * temp;
 63 |   // char *id;
 64 |   // char *token;
 65 |   // char *buf = (char*)malloc(BUFLEN);
 66 |   int c = 0;
 67 | 
 68 |   setvbuf ( fp , NULL , _IOLBF, BUFLEN );
 69 |   while (fgets(line, size, fp) != NULL) {
 70 |     // Rcout << strlen(line) << std::endl;
 71 |     if (strlen(line) < sizem1) {
 72 |       c++;
 73 |     } else {
 74 |       // Rcout << (line[last] == '\n') << std::endl;
 75 |       if (line[last] == '\n') c++;
 76 |       size *= 2;
 77 |       temp = new char[size];
 78 |       delete [] line;
 79 |       line = temp;
 80 |       sizem1 = size - 1;
 81 |       last = size - 2;
 82 |     }
 83 |     // id = strtok(line, "\t");
 84 |     // token = strtok(NULL, "\t");
 85 |     //
 86 |     // char *fnout = malloc(strlen(id)+5);
 87 |     // fnout = strcat(fnout, id);
 88 |     // fnout = strcat(fnout, ".seq");
 89 |     //
 90 |     // fpout = fopen(fnout, "w");
 91 |     // setvbuf ( fpout , NULL , _IONBF , 0 );
 92 |     // fprintf(fpout, "%s", token);
 93 |     // fclose(fpout);
 94 |   }
 95 | 
 96 |   fclose(fp);
 97 | 
 98 |   return c;
 99 | 
100 | }
101 | 
102 | /*** R
103 | test_setvbuf("text-write.txt")
104 | test_setvbuf2("text-write.txt")
105 | */
106 | 


--------------------------------------------------------------------------------
/tmp-tests/test-setvbuf2.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | #include <stdio.h>
 5 | #include <string.h>
 6 | #include <fcntl.h>
 7 | #include <stdlib.h>
 8 | 
 9 | #define BUFLEN (64 * 1024)
10 | 
11 | // [[Rcpp::export]]
12 | int test_setvbuf3(std::string filename,
13 |                   std::string filename2,
14 |                   int size = 100) {
15 | 
16 |   FILE *fp_in = fopen(filename.c_str(), "rb");
17 |   FILE *fp_out = fopen(filename2.c_str(), "wb");
18 | 
19 |   unsigned sizem1 = size - 1;
20 |   int last = size - 2;
21 | 
22 |   char * line = new char[size];
23 |   char * temp;
24 |   // char *id;
25 |   // char *token;
26 |   // char *buf = (char*)malloc(BUFLEN);
27 |   int c = 0;
28 | 
29 |   setvbuf ( fp_in , NULL , _IOLBF, BUFLEN );
30 |   setvbuf ( fp_out , NULL , _IOFBF, BUFLEN );
31 | 
32 | 
33 |   while (fgets(line, size, fp_in) != NULL) {
34 | 
35 |     fputs(line, fp_out);
36 | 
37 |     // Rcout << strlen(line) << std::endl;
38 |     if (strlen(line) < sizem1) {
39 |       c++;
40 |       // if (c % 1000 == 1) fflush(fp_out);
41 |     } else {
42 |       // Rcout << (line[last] == '\n') << std::endl;
43 |       if (line[last] == '\n') c++;
44 |       size *= 2;
45 |       temp = new char[size];
46 |       delete [] line;
47 |       line = temp;
48 |       sizem1 = size - 1;
49 |       last = size - 2;
50 |     }
51 | 
52 |     // id = strtok(line, "\t");
53 |     // token = strtok(NULL, "\t");
54 |     //
55 |     // char *fnout = malloc(strlen(id)+5);
56 |     // fnout = strcat(fnout, id);
57 |     // fnout = strcat(fnout, ".seq");
58 |     //
59 |     // fpout = fopen(fnout, "w");
60 |     // setvbuf ( fpout , NULL , _IONBF , 0 );
61 |     // fprintf(fpout, "%s", token);
62 |     // fclose(fpout);
63 |   }
64 | 
65 |   fclose(fp_in);
66 |   fflush(fp_out);
67 |   fclose(fp_out);
68 | 
69 |   return c;
70 | }
71 | 
72 | /*** R
73 | test_setvbuf3("text-write.txt", "text-write2.txt")
74 | */
75 | 


--------------------------------------------------------------------------------
/tmp-tests/test-setvbuf3.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | 
 3 | #define BUFLEN (64 * 1024)
 4 | 
 5 | // [[Rcpp::export]]
 6 | int test_setvbuf4(std::string filename, std::string filename2) {
 7 | 
 8 |   FILE *fp_in  = fopen(filename.c_str(),  "rb");
 9 |   FILE *fp_out = fopen(filename2.c_str(), "wb");
10 | 
11 |   size_t line_size;
12 |   size_t size = 100;
13 |   size_t last = size - 2;
14 | 
15 |   char *line = new char[size];
16 |   char *temp;
17 |   int c = 0;
18 |   bool not_eol;
19 | 
20 |   setvbuf(fp_in,  NULL, _IOLBF, BUFLEN);
21 |   setvbuf(fp_out, NULL, _IOFBF, BUFLEN);
22 | 
23 |   while (fgets(line, size, fp_in) != NULL) {
24 | 
25 |     line_size = strlen(line);
26 | 
27 |     fputs(line, fp_out);
28 | 
29 |     if (line_size > last) {
30 | 
31 |       not_eol = (line[last] != '\n');
32 | 
33 |       size *= 2;
34 |       temp = new char[size];
35 |       delete [] line;
36 |       line = temp;
37 |       last = size - 2;
38 | 
39 |       if (not_eol) continue;
40 |     }
41 | 
42 |     // End of line
43 |     c++;
44 | 
45 |   }
46 | 
47 |   fclose(fp_in);
48 |   fflush(fp_out);
49 |   fclose(fp_out);
50 | 
51 |   return c;
52 | }
53 | 
54 | /*** R
55 | test_setvbuf4("text-write.txt", "text-write2.txt")
56 | */
57 | 


--------------------------------------------------------------------------------
/tmp-tests/test-setvbuf4.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | #define BUFLEN (64 * 1024)
 5 | 
 6 | // [[Rcpp::export]]
 7 | int test_setvbuf5(std::string filename, std::string filename2) {
 8 | 
 9 |   FILE *fp_in = fopen(filename.c_str(), "rb"), *fp_out;
10 |   setvbuf(fp_in, NULL, _IOLBF, BUFLEN);
11 | 
12 |   const char *fn_out = filename2.c_str();
13 |   char name_out[strlen(fn_out) + 20];
14 | 
15 |   size_t line_size;
16 |   size_t size = 100;
17 |   size_t last = size - 2;
18 | 
19 |   char *line = new char[size];
20 |   char *temp;
21 |   int c = 0;
22 |   bool not_eol;
23 | 
24 |   sprintf(name_out, "%s%d.txt", fn_out, c);
25 |   fp_out = fopen(name_out, "wb");
26 |   setvbuf(fp_out, NULL, _IOFBF, BUFLEN);
27 | 
28 |   while (fgets(line, size, fp_in) != NULL) {
29 | 
30 |     line_size = strlen(line);
31 | 
32 |     fputs(line, fp_out);
33 | 
34 |     if (line_size > last) {
35 | 
36 |       not_eol = (line[last] != '\n');
37 | 
38 |       fflush(fp_out);
39 |       size *= 2;
40 |       temp = new char[size];
41 |       delete [] line;
42 |       line = temp;
43 |       last = size - 2;
44 | 
45 |       if (not_eol) continue;
46 |     }
47 | 
48 |     // End of line
49 |     c++;
50 |     fflush(fp_out);
51 |     fclose(fp_out);
52 |     sprintf(name_out, "%s%d.txt", fn_out, c);
53 |     fp_out = fopen(name_out, "wb");
54 |     setvbuf(fp_out, NULL, _IOFBF, BUFLEN);
55 | 
56 |   }
57 | 
58 |   fflush(fp_out);
59 |   fclose(fp_out); // last one has nothing inside
60 |   fclose(fp_in);
61 | 
62 |   return c;
63 | }
64 | 
65 | /*** R
66 | test_setvbuf5("text-write.txt", "tmp/text-write-part")
67 | readLines("text-write.txt")[[6]]
68 | readLines("tmp/text-write-part5.txt")
69 | */
70 | 


--------------------------------------------------------------------------------
/tmp-tests/test-setvbuf5.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | #define BUFLEN (64 * 1024)
 5 | 
 6 | // [[Rcpp::export]]
 7 | NumericVector test_setvbuf6(std::string filename,
 8 |                             std::string filename2,
 9 |                             int every_nlines) {
10 | 
11 |   FILE *fp_in = fopen(filename.c_str(), "rb"), *fp_out;
12 |   setvbuf(fp_in, NULL, _IOLBF, BUFLEN);
13 | 
14 |   const char *fn_out = filename2.c_str();
15 |   char name_out[strlen(fn_out) + 20];
16 | 
17 |   size_t line_size;
18 |   size_t size = 100;
19 |   size_t last = size - 2;
20 | 
21 |   char *line = new char[size];
22 |   char *temp;
23 | 
24 |   bool not_eol, not_eof = true;
25 |   int i, k = 0, c = 0;
26 | 
27 | 
28 |   while (not_eof) {
29 | 
30 |     // Open file number 'k'
31 |     sprintf(name_out, "%s%d.txt", fn_out, ++k);
32 |     fp_out = fopen(name_out, "wb");
33 |     setvbuf(fp_out, NULL, _IOFBF, BUFLEN);
34 | 
35 |     // Fill it with 'every_nlines' lines
36 |     i = 0;
37 |     while (i < every_nlines) {
38 | 
39 |       if (fgets(line, size, fp_in) == NULL) {
40 |         not_eof = false;
41 |         break;
42 |       }
43 | 
44 |       line_size = strlen(line);
45 | 
46 |       fputs(line, fp_out);
47 | 
48 |       if (line_size > last) {
49 | 
50 |         not_eol = (line[last] != '\n');
51 | 
52 |         fflush(fp_out);
53 |         size *= 2;
54 |         temp = new char[size];
55 |         delete [] line;
56 |         line = temp;
57 |         last = size - 2;
58 | 
59 |         if (not_eol) continue;
60 |       }
61 | 
62 |       // End of line
63 |       i++;
64 | 
65 |     }
66 | 
67 |     c += i;
68 | 
69 |     // Close file number 'k'
70 |     fflush(fp_out);
71 |     fclose(fp_out);
72 | 
73 |   }
74 | 
75 |   fclose(fp_in);
76 | 
77 |   return NumericVector::create(_["K"] = k, _["every"] = every_nlines, _["N"] = c);
78 | }
79 | 
80 | /***R
81 | test_setvbuf6("text-write.txt", "tmp2/text-write-part", 2)
82 | readLines("text-write.txt")[[7]]
83 | readLines("tmp2/text-write-part4.txt")
84 | */
85 | 


--------------------------------------------------------------------------------
/tmp-tests/test-setvbuf6.cpp:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::depends(BH, bigstatsr)]]
 2 | #include <bigstatsr/BMAcc.h>
 3 | 
 4 | #define BUFLEN (64 * 1024)
 5 | 
 6 | // [[Rcpp::export]]
 7 | NumericVector test_setvbuf7(std::string filename,
 8 |                             std::string filename2,
 9 |                             int every_nlines,
10 |                             Environment parts_) {
11 | 
12 |   XPtr<FBM> xptr = parts_["address"];
13 |   BMAcc<int> parts(xptr);
14 | 
15 |   FILE *fp_in = fopen(filename.c_str(), "rb"), *fp_out;
16 |   setvbuf(fp_in, NULL, _IOLBF, BUFLEN);
17 | 
18 |   const char *fn_out = filename2.c_str();
19 |   char name_out[strlen(fn_out) + 20];
20 | 
21 |   size_t line_size;
22 |   size_t size = 100;
23 |   size_t last = size - 2;
24 | 
25 |   char *line = new char[size];
26 |   char *temp;
27 | 
28 |   bool not_eol, not_eof = true;
29 |   int i, k = 0, c = 0;
30 | 
31 | 
32 |   while (not_eof) {
33 | 
34 |     // Open file number 'k'
35 |     sprintf(name_out, "%s%d.txt", fn_out, ++k);
36 |     fp_out = fopen(name_out, "wb");
37 |     setvbuf(fp_out, NULL, _IOFBF, BUFLEN);
38 | 
39 |     // Fill it with 'every_nlines' lines
40 |     i = 0;
41 |     while (i < every_nlines) {
42 | 
43 |       if (fgets(line, size, fp_in) == NULL) {
44 |         not_eof = false;
45 |         break;
46 |       }
47 | 
48 |       line_size = strlen(line);
49 | 
50 |       fputs(line, fp_out);
51 | 
52 |       if (line_size > last) {
53 | 
54 |         not_eol = (line[last] != '\n');
55 | 
56 |         fflush(fp_out);
57 |         size *= 2;
58 |         temp = new char[size];
59 |         delete [] line;
60 |         line = temp;
61 |         last = size - 2;
62 | 
63 |         if (not_eol) continue;
64 |       }
65 | 
66 |       // End of line
67 |       i++;
68 | 
69 |     }
70 | 
71 |     c += i;
72 | 
73 |     // Close file number 'k'
74 |     fflush(fp_out);
75 |     fclose(fp_out);
76 |     parts(k - 1, 0) = 1;
77 |     Rcout << k << std::endl;
78 | 
79 |   }
80 | 
81 |   fclose(fp_in);
82 | 
83 |   return NumericVector::create(_["K"] = k, _["every"] = every_nlines, _["N"] = c);
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/tmp-tests/test-string.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | #define BUFLEN (64 * 1024)
 5 | 
 6 | // [[Rcpp::export]]
 7 | void test_string(std::string filename) {
 8 | 
 9 |   const char *fn = filename.c_str();
10 |   char name_out[strlen(fn) + 20];
11 | 
12 |   for (int k = 1; k < 10; k++) {
13 |     sprintf(name_out, "%s%d.txt", fn, k);
14 |     Rcout << filename << std::endl;
15 |     Rcout << name_out << std::endl;
16 |   }
17 | }
18 | 
19 | /*** R
20 | test_string(tempfile())
21 | */
22 | 


--------------------------------------------------------------------------------
/tmp-tests/text-write.txt:
--------------------------------------------------------------------------------
1 | a  199 23.45
2 | b 1e+8
3 | c 23339999
4 | errrrr
5 | dde
6 | mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmeeeeeeeeeeeeeeeeeeeeeeeelllllllllllllllllllllllllddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggggggggaaacvdgbfetgdfghmethdpfa
7 | mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmeeeeeeeeeeeeeeeeeeeeeeeelllllllllllllllllllllllllddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggggggggaaacvdgbfetgdfghmethdpf
8 | 


--------------------------------------------------------------------------------
/tmp-tests/text-write2.txt:
--------------------------------------------------------------------------------
1 | a  199 23.45
2 | b 1e+8
3 | c 23339999
4 | errrrr
5 | dde
6 | mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmeeeeeeeeeeeeeeeeeeeeeeeelllllllllllllllllllllllllddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggggggggaaacvdgbfetgdfghmethdpfa
7 | mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmeeeeeeeeeeeeeeeeeeeeeeeelllllllllllllllllllllllllddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeggggggggggggggggggggggaaacvdgbfetgdfghmethdpf
8 | 


--------------------------------------------------------------------------------
/vignettes/csv2sqlite.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Convert a CSV to SQLite by parts"
 3 | author: "Florian Privé"
 4 | date: "August 26, 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, fig.align = "center", eval = FALSE)
10 | ```
11 | 
12 | ## How
13 | 
14 | You can easily use this package {bigreadr} to convert a CSV to an SQLite database without loading the whole CSV in memory.
15 | 
16 | You can use the following function:
17 | 
18 | ```{r}
19 | csv2sqlite <- function(csv,
20 |                        every_nlines,
21 |                        table_name,
22 |                        dbname = sub("\\.csv$", ".sqlite", csv),
23 |                        ...) {
24 |   
25 |   # Prepare reading
26 |   con <- RSQLite::dbConnect(RSQLite::SQLite(), dbname)
27 |   init <- TRUE
28 |   fill_sqlite <- function(df) {
29 |     
30 |     if (init) {
31 |       RSQLite::dbCreateTable(con, table_name, df)
32 |       init <<- FALSE
33 |     }
34 |     
35 |     RSQLite::dbAppendTable(con, table_name, df)
36 |     NULL
37 |   }
38 |   
39 |   # Read and fill by parts
40 |   bigreadr::big_fread1(csv, every_nlines,
41 |                        .transform = fill_sqlite,
42 |                        .combine = unlist,
43 |                        ... = ...)
44 |   
45 |   # Returns
46 |   con
47 | }
48 | ```
49 | 
50 | Function `bigreadr::big_fread1()` first splits the CSV in smaller CSV files, then it reads these CSV files as data frames and transform them, and finally combine the results.
51 | 
52 | Here, the transformation is just appending the data frame to the SQLite database (and creating this DB the first time). Moreover, you don't want to return anything (`NULL`).
53 | 
54 | ## Use case
55 | 
56 | For example, with this function, I was able to convert a [CSV file of 9 GB](https://www.data.gouv.fr/fr/datasets/base-sirene-des-entreprises-et-de-leurs-etablissements-siren-siret/) in 40 minutes using less than 2 GB of memory.
57 | 
58 | ```{r}
59 | con <- csv2sqlite(csv, every_nlines = 1e6, table_name = "sirene",
60 |                   encoding = "Latin-1")
61 | ```
62 | 
63 | 


--------------------------------------------------------------------------------