├── test
    ├── REQUIRE
    ├── test_files
    │   ├── test_empty_file.csv
    │   ├── transposed_1row.csv
    │   ├── transposed_emtpy.csv
    │   ├── test_one_row_of_data.cscv
    │   ├── test_one_row_of_data.csv
    │   ├── dash_as_null.csv
    │   ├── plus_as_null.csv
    │   ├── comma_decimal.csv
    │   ├── transposed_noheader.csv
    │   ├── int8_overflow.csv
    │   ├── test_single_column.csv
    │   ├── test_mac_line_endings.csv
    │   ├── transposed.csv
    │   ├── transposed_extra_newline.csv
    │   ├── test_basic.csv
    │   ├── test_no_header.csv
    │   ├── test_basic_pipe.csv
    │   ├── test_dates.csv
    │   ├── test_tab_null_empty.txt
    │   ├── test_simple_quoted.csv
    │   ├── test_tab_null_string.txt
    │   ├── test_crlf_line_endings.csv
    │   ├── test_excel_date_formats.csv
    │   ├── test_float_in_int_column.csv
    │   ├── test_floats.csv
    │   ├── test_newline_line_endings.csv
    │   ├── test_null_only_column.csv
    │   ├── test_utf8.csv
    │   ├── test_missing_value.csv
    │   ├── test_windows.csv
    │   ├── .DS_Store
    │   ├── test_header_on_row_4.csv
    │   ├── test_quoted_delim_and_newline.csv
    │   ├── test_utf8_with_BOM.csv
    │   ├── bools.csv
    │   ├── test_empty_file_newlines.csv
    │   ├── test_missing_value_NULL.csv
    │   ├── test_quoted_numbers.csv
    │   ├── test_basic.csv.gz
    │   ├── test_datetimes.csv
    │   ├── test_header_range.csv
    │   ├── test_mixed_date_formats.csv
    │   ├── test_2_footer_rows.csv
    │   ├── test_utf16.csv
    │   ├── test_utf16_be.csv
    │   ├── test_utf16_le.csv
    │   ├── census.txt
    │   ├── stocks.csv
    │   ├── double_quote_quotechar_and_escapechar.csv
    │   ├── baseball.csv
    │   └── attenu.csv
    ├── sink.jl
    ├── validate.jl
    ├── runtests.jl
    ├── datastreams.jl
    ├── multistream.jl
    ├── io.jl
    └── source.jl
├── benchmark
    ├── REQUIRE
    ├── .tune.jld
    ├── .results
    │   └── 449f7c62ce415c3ecf5adddf115b52a8bff6c6a6.jld
    ├── randoms_small.csv
    └── benchmarks.jl
├── docs
    ├── .documenter.enc
    ├── make.jl
    ├── build
    │   ├── assets
    │   │   ├── Documenter.css
    │   │   └── mathjaxhelper.js
    │   └── index.md
    ├── src
    │   └── index.md
    └── mkdocs.yml
├── .gitignore
├── REQUIRE
├── .travis.yml
├── LICENSE.md
├── appveyor.yml
├── README.md
└── src
    ├── validate.jl
    ├── Sink.jl
    ├── CSV.jl
    ├── float.jl
    ├── parsefields.jl
    ├── io.jl
    ├── TransposedSource.jl
    └── Source.jl


/test/REQUIRE:
--------------------------------------------------------------------------------
1 | DecFP
2 | 


--------------------------------------------------------------------------------
/benchmark/REQUIRE:
--------------------------------------------------------------------------------
1 | @unix DecFP
2 | 


--------------------------------------------------------------------------------
/test/test_files/test_empty_file.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/test_files/transposed_1row.csv:
--------------------------------------------------------------------------------
1 | col1,1


--------------------------------------------------------------------------------
/test/test_files/transposed_emtpy.csv:
--------------------------------------------------------------------------------
1 | col1


--------------------------------------------------------------------------------
/test/test_files/test_one_row_of_data.cscv:
--------------------------------------------------------------------------------
1 | 1,2,3


--------------------------------------------------------------------------------
/test/test_files/test_one_row_of_data.csv:
--------------------------------------------------------------------------------
1 | 1,2,3


--------------------------------------------------------------------------------
/test/test_files/dash_as_null.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 1,2
3 | -,4


--------------------------------------------------------------------------------
/test/test_files/plus_as_null.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 1,2
3 | +,4


--------------------------------------------------------------------------------
/test/test_files/comma_decimal.csv:
--------------------------------------------------------------------------------
1 | x;y
2 | 3,14;1
3 | 1,0;1


--------------------------------------------------------------------------------
/test/test_files/transposed_noheader.csv:
--------------------------------------------------------------------------------
1 | 1,2
2 | 3,4
3 | 5,6


--------------------------------------------------------------------------------
/test/test_files/int8_overflow.csv:
--------------------------------------------------------------------------------
1 | col1
2 | 1
3 | 2
4 | 3
5 | 129


--------------------------------------------------------------------------------
/test/test_files/test_single_column.csv:
--------------------------------------------------------------------------------
1 | col1
2 | 1
3 | 2
4 | 3


--------------------------------------------------------------------------------
/test/test_files/test_mac_line_endings.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col31,2,34,5,67,8,9


--------------------------------------------------------------------------------
/test/test_files/transposed.csv:
--------------------------------------------------------------------------------
1 | col1,1,2,3
2 | col2,4,5,6
3 | col3,7,8,9


--------------------------------------------------------------------------------
/test/test_files/transposed_extra_newline.csv:
--------------------------------------------------------------------------------
1 | col1,1,2
2 | col2,3,4
3 | 


--------------------------------------------------------------------------------
/test/test_files/test_basic.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1,2,3
3 | 4,5,6
4 | 7,8,9


--------------------------------------------------------------------------------
/test/test_files/test_no_header.csv:
--------------------------------------------------------------------------------
1 | 1.0,2.0,3.0
2 | 4.0,5.0,6.0
3 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/test/test_files/test_basic_pipe.csv:
--------------------------------------------------------------------------------
1 | col1|col2|col3
2 | 1|2|3
3 | 4|5|6
4 | 7|8|9


--------------------------------------------------------------------------------
/test/test_files/test_dates.csv:
--------------------------------------------------------------------------------
1 | col1
2 | 2015-01-01
3 | 2015-01-02
4 | 2015-01-03


--------------------------------------------------------------------------------
/test/test_files/test_tab_null_empty.txt:
--------------------------------------------------------------------------------
1 | A	B	C	D
2 | 1	2000	x	100
3 | 2		y	200
4 | 


--------------------------------------------------------------------------------
/test/test_files/test_simple_quoted.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | "quoted field 1","quoted field 2"


--------------------------------------------------------------------------------
/test/test_files/test_tab_null_string.txt:
--------------------------------------------------------------------------------
1 | A	B	C	D
2 | 1	2000	x	100
3 | 2	NULL	y	200
4 | 


--------------------------------------------------------------------------------
/benchmark/.tune.jld:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wikunia/CSV.jl/master/benchmark/.tune.jld


--------------------------------------------------------------------------------
/docs/.documenter.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wikunia/CSV.jl/master/docs/.documenter.enc


--------------------------------------------------------------------------------
/test/test_files/test_crlf_line_endings.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1,2,3
3 | 4,5,6
4 | 7,8,9


--------------------------------------------------------------------------------
/test/test_files/test_excel_date_formats.csv:
--------------------------------------------------------------------------------
1 | col1
2 | 01/01/2015
3 | 01/02/2015
4 | 01/03/2015


--------------------------------------------------------------------------------
/test/test_files/test_float_in_int_column.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1,2,3
3 | 4,5.4,6
4 | 7,8,9


--------------------------------------------------------------------------------
/test/test_files/test_floats.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1.0,2.0,3.0
3 | 4.0,5.0,6.0
4 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/test/test_files/test_newline_line_endings.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1,2,3
3 | 4,5,6
4 | 7,8,9


--------------------------------------------------------------------------------
/test/test_files/test_null_only_column.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | 123,NA
3 | abc,NA
4 | 123abc,NA
5 | 


--------------------------------------------------------------------------------
/test/test_files/test_utf8.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1.0,2.0,3.0
3 | 4.0,5.0,6.0
4 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | docs/build/
3 | docs/site/
4 | *.jl.cov
5 | *.jl.*.cov
6 | *.jl.mem
7 | 


--------------------------------------------------------------------------------
/test/test_files/test_missing_value.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1.0,2.0,3.0
3 | 4.0,,6.0
4 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/test/test_files/test_windows.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1.0,2.0,3.0
3 | 4.0,5.0,6.0
4 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/test/test_files/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wikunia/CSV.jl/master/test/test_files/.DS_Store


--------------------------------------------------------------------------------
/test/test_files/test_header_on_row_4.csv:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | col1,col2,col3
5 | 1,2,3
6 | 4,5,6
7 | 7,8,9


--------------------------------------------------------------------------------
/test/test_files/test_quoted_delim_and_newline.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | "quoted ,field 1","quoted
3 |  field 2"


--------------------------------------------------------------------------------
/test/test_files/test_utf8_with_BOM.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1.0,2.0,3.0
3 | 4.0,5.0,6.0
4 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/test/test_files/bools.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | true,false,1
3 | false,true,2
4 | true,true,3
5 | false,false,4


--------------------------------------------------------------------------------
/test/test_files/test_empty_file_newlines.csv:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/test/test_files/test_missing_value_NULL.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | 1.0,2.0,3.0
3 | 4.0,NULL,6.0
4 | 7.0,8.0,9.0


--------------------------------------------------------------------------------
/test/test_files/test_quoted_numbers.csv:
--------------------------------------------------------------------------------
1 | col1,col2,"col3"
2 | 123,"1",1
3 | abc,42,42
4 | 123abc,"12",12
5 | 


--------------------------------------------------------------------------------
/test/test_files/test_basic.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wikunia/CSV.jl/master/test/test_files/test_basic.csv.gz


--------------------------------------------------------------------------------
/test/test_files/test_datetimes.csv:
--------------------------------------------------------------------------------
1 | col1
2 | 2015-01-01 00:00:00
3 | 2015-01-02 00:00:01
4 | 2015-01-03 00:12:00.001


--------------------------------------------------------------------------------
/test/test_files/test_header_range.csv:
--------------------------------------------------------------------------------
1 | col1,col2,col3
2 | sub1,sub2,sub3
3 | part1,part2,part3
4 | 1,2,3
5 | 4,5,6
6 | 7,8,9


--------------------------------------------------------------------------------
/test/test_files/test_mixed_date_formats.csv:
--------------------------------------------------------------------------------
1 | col1
2 | 01/01/2015
3 | 01/02/2015
4 | 01/03/2015
5 | 2015-01-02
6 | 2015-01-03


--------------------------------------------------------------------------------
/test/test_files/test_2_footer_rows.csv:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | col1,col2,col3
5 | 1,2,3
6 | 4,5,6
7 | 7,8,9
8 | 
9 | # extra row at bottom


--------------------------------------------------------------------------------
/REQUIRE:
--------------------------------------------------------------------------------
1 | julia 0.6
2 | DataStreams 0.3.0
3 | DataFrames 0.11.0
4 | WeakRefStrings 0.4.0
5 | CategoricalArrays 0.3.0
6 | Compat 0.41.0


--------------------------------------------------------------------------------
/test/test_files/test_utf16.csv:
--------------------------------------------------------------------------------
1 | c o l 1 , c o l 2 , c o l 3 
2 |  1 . 0 , 2 . 0 , 3 . 0 
3 |  4 . 0 , 5 . 0 , 6 . 0 
4 |  7 . 0 , 8 . 0 , 9 . 0 


--------------------------------------------------------------------------------
/test/test_files/test_utf16_be.csv:
--------------------------------------------------------------------------------
1 |  c o l 1 , c o l 2 , c o l 3 
2 |  1 . 0 , 2 . 0 , 3 . 0 
3 |  4 . 0 , 5 . 0 , 6 . 0 
4 |  7 . 0 , 8 . 0 , 9 . 0


--------------------------------------------------------------------------------
/test/test_files/test_utf16_le.csv:
--------------------------------------------------------------------------------
1 | c o l 1 , c o l 2 , c o l 3 
2 |  1 . 0 , 2 . 0 , 3 . 0 
3 |  4 . 0 , 5 . 0 , 6 . 0 
4 |  7 . 0 , 8 . 0 , 9 . 0 


--------------------------------------------------------------------------------
/benchmark/.results/449f7c62ce415c3ecf5adddf115b52a8bff6c6a6.jld:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wikunia/CSV.jl/master/benchmark/.results/449f7c62ce415c3ecf5adddf115b52a8bff6c6a6.jld


--------------------------------------------------------------------------------
/test/sink.jl:
--------------------------------------------------------------------------------
1 | @testset "Write to IOBuffer" begin
2 |     csv_string = chomp(read(joinpath(dir, "test_basic.csv"), String))
3 |     df = CSV.read(IOBuffer(csv_string))
4 |     io = IOBuffer()
5 |     CSV.write(io, df)
6 |     written = chomp(String(take!(io)))
7 |     @test written == csv_string
8 | end
9 | 


--------------------------------------------------------------------------------
/docs/make.jl:
--------------------------------------------------------------------------------
 1 | using Documenter, CSV
 2 | 
 3 | makedocs(
 4 |     modules = [CSV],
 5 |     format = :html,
 6 |     sitename = "CSV.jl",
 7 |     pages = ["Home" => "index.md"]
 8 | )
 9 | 
10 | deploydocs(
11 |     repo = "github.com/JuliaData/CSV.jl.git",
12 |     target = "build",
13 |     deps = nothing,
14 |     make = nothing,
15 |     julia = "0.6",
16 |     osname = "linux"
17 | )
18 | 


--------------------------------------------------------------------------------
/docs/build/assets/Documenter.css:
--------------------------------------------------------------------------------
 1 | div.wy-menu-vertical ul.current li.toctree-l3 a {
 2 |   font-weight: bold;
 3 | }
 4 | 
 5 | a.documenter-source {
 6 |   float: right;
 7 | }
 8 | 
 9 | .documenter-methodtable pre {
10 |     margin-left: 0px;
11 |     margin-right: 0px;
12 |     margin-top:  0px;
13 |     padding:     0px;
14 | }
15 | 
16 | .documenter-methodtable pre.documenter-inline {
17 |     display: inline;
18 | }
19 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: julia
 2 | 
 3 | sudo: false
 4 | 
 5 | os:
 6 |   - linux
 7 |   - osx
 8 | 
 9 | julia:
10 |   - 0.6
11 |   - nightly
12 | 
13 | notifications:
14 |   email: false
15 | 
16 | after_success:
17 |   - julia -e 'cd(Pkg.dir("CSV")); Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())'
18 |   - julia -e 'Pkg.add("Documenter")'
19 |   - julia -e 'cd(Pkg.dir("CSV")); include(joinpath("docs", "make.jl"))'
20 | 


--------------------------------------------------------------------------------
/docs/src/index.md:
--------------------------------------------------------------------------------
 1 | # CSV.jl Documentation
 2 | 
 3 | CSV.jl is built to be a fast and flexible pure-Julia library for handling delimited text files.
 4 | 
 5 | ```@contents
 6 | ```
 7 | 
 8 | ## High-level interface
 9 | 
10 | ```@docs
11 | CSV.read
12 | CSV.validate
13 | CSV.write
14 | ```
15 | 
16 | ## Lower-level utilities
17 | 
18 | ```@docs
19 | CSV.Source
20 | CSV.Sink
21 | CSV.Options
22 | CSV.parsefield
23 | CSV.readline
24 | CSV.readsplitline
25 | CSV.countlines
26 | ```
27 | 


--------------------------------------------------------------------------------
/test/validate.jl:
--------------------------------------------------------------------------------
 1 | io = IOBuffer("""A,B,C
 2 | 1,1,10
 3 | 6,1""")
 4 | 
 5 | @test_throws CSV.ExpectedMoreColumnsError CSV.validate(io)
 6 | 
 7 | io = IOBuffer("""A;B;C
 8 | 1,1,10
 9 | 2,0,16""")
10 | @test_throws CSV.TooManyColumnsError CSV.validate(io)
11 | 
12 | io = IOBuffer("""A;B;C
13 | 1,1,10
14 | 2,0,16""")
15 | @test_throws CSV.ExpectedMoreColumnsError CSV.validate(io; delim=';')
16 | 
17 | io = IOBuffer("""a b c d e
18 | 1 2  3 4 5
19 | 1 2 3  4 5
20 | 1  2 3  4 5""")
21 | @test_throws CSV.TooManyColumnsError CSV.validate(io; delim=' ')


--------------------------------------------------------------------------------
/docs/build/assets/mathjaxhelper.js:
--------------------------------------------------------------------------------
 1 | MathJax.Hub.Config({
 2 |   "tex2jax": {
 3 |     inlineMath: [['$','$'], ['\\(','\\)']],
 4 |     processEscapes: true
 5 |   }
 6 | });
 7 | MathJax.Hub.Config({
 8 |   config: ["MMLorHTML.js"],
 9 |   jax: [
10 |     "input/TeX",
11 |     "output/HTML-CSS",
12 |     "output/NativeMML"
13 |   ],
14 |   extensions: [
15 |     "MathMenu.js",
16 |     "MathZoom.js",
17 |     "TeX/AMSmath.js",
18 |     "TeX/AMSsymbols.js",
19 |     "TeX/autobold.js",
20 |     "TeX/autoload-all.js"
21 |   ]
22 | });
23 | MathJax.Hub.Config({
24 |   TeX: { equationNumbers: { autoNumber: "AMS" } }
25 | });
26 | 


--------------------------------------------------------------------------------
/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | using Missings
 2 | using CSV
 3 | using DataStreams, WeakRefStrings, CategoricalArrays
 4 | using DataFrames
 5 | @static if VERSION < v"0.7.0-DEV.2005"
 6 |     using Base.Test
 7 | else
 8 |     using Test
 9 | end
10 | if VERSION < v"0.7.0-DEV.2575"
11 |     using Base.Dates
12 | else
13 |     using Dates
14 | end
15 | 
16 | const dir = joinpath(dirname(@__FILE__),"test_files/")
17 | # dir = joinpath(Pkg.dir("CSV"), "test/test_files")
18 | 
19 | @testset "CSV" begin
20 | 
21 | include("parsefields.jl")
22 | include("io.jl")
23 | 
24 | include("source.jl")
25 | include("sink.jl")
26 | include("multistream.jl")
27 | include("validate.jl")
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name:        CSV.jl
 2 | repo_url:         https://github.com/JuliaData/CSV.jl
 3 | site_description: Utility library for working with CSV and other delimited files in the Julia programming language
 4 | site_author:      Jacob Quinn
 5 | 
 6 | theme: material
 7 | 
 8 | extra:
 9 |   palette:
10 |     primary: 'indigo'
11 |     accent:  'blue'
12 | 
13 | extra_css:
14 |   - assets/Documenter.css
15 | 
16 | extra_javascript:
17 |   - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML
18 |   - assets/mathjaxhelper.js
19 | 
20 | markdown_extensions:
21 |   - extra
22 |   - tables
23 |   - fenced_code
24 |   - mdx_math
25 | 
26 | docs_dir: 'build'
27 | 
28 | pages:
29 |   - Home: index.md
30 | 


--------------------------------------------------------------------------------
/test/test_files/census.txt:
--------------------------------------------------------------------------------
1 | GEOID	POP10	HU10	ALAND	AWATER	ALAND_SQMI	AWATER_SQMI	INTPTLAT	INTPTLONG                                                                                                                       
2 | 00601	18570	7744	166659789	799296	      64.348	       0.309	 18.180555	 -66.749961                                                                                                                      
3 | 00602	41520	18073	79288158	4446273	      30.613	       1.717	 18.362268	 -67.176130                                                                                                                     
4 | 00603	54689	25653	81880442	183425	      31.614	       0.071	 18.455183	 -67.119887                                                                                                                      


--------------------------------------------------------------------------------
/test/test_files/stocks.csv:
--------------------------------------------------------------------------------
 1 | Stock Name,Company Name
 2 | AXP,American Express Co
 3 | BA,Boeing Co
 4 | CAT,Caterpillar Inc
 5 | CSC, Cisco Systems Inc
 6 | CVX,Chevron Corp
 7 | DD,Dupont E I De Nemours & Co
 8 | DIS,Walt Disney Co
 9 | GE,General Electric Co
10 | GS,Goldman Sachs Group Inc
11 | HD,Home Depot Inc
12 | IBM,International Business Machines Co...
13 | INTC,Intel Corp
14 | JNJ,Johnson & Johnson
15 | JPM,JPMorgan Chase and Co
16 | KO,The Coca-Cola Co
17 | MCD,McDonald's Corp
18 | MMM,3M Co
19 | MRK,Merck & Co Inc
20 | MSFT,Microsoft Corp
21 | NKE,Nike Inc
22 | PFE,Pfizer Inc
23 | PG,Procter & Gamble Co
24 | T,AT&T Inc
25 | TRV,Travelers Companies Inc
26 | UNH,UnitedHealth Group Inc
27 | UTX,United Technologies Corp
28 | V,Visa Inc
29 | VZ,Verizon Communications Inc
30 | WMT,Wal-Mart Stores Inc
31 | XOM,Exxon Mobil Corp


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The CSV.jl package is licensed under the MIT "Expat" License:
 2 | 
 3 | > Copyright (c) 2015: Jacob Quinn.
 4 | >
 5 | > Permission is hereby granted, free of charge, to any person obtaining
 6 | > a copy of this software and associated documentation files (the
 7 | > "Software"), to deal in the Software without restriction, including
 8 | > without limitation the rights to use, copy, modify, merge, publish,
 9 | > distribute, sublicense, and/or sell copies of the Software, and to
10 | > permit persons to whom the Software is furnished to do so, subject to
11 | > the following conditions:
12 | >
13 | > The above copyright notice and this permission notice shall be
14 | > included in all copies or substantial portions of the Software.
15 | >
16 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | > IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | > CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | > TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | > SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/test/datastreams.jl:
--------------------------------------------------------------------------------
 1 | reload("Nulls"); reload("WeakRefStrings"); reload("DataStreams"); reload("CSV"); reload("DataStreamsIntegrationTests")
 2 | # NamedTuples
 3 | FILE = joinpath(DataStreamsIntegrationTests.DSTESTDIR, "randoms_small.csv")
 4 | DF = CSV.read(FILE)
 5 | DF2 = CSV.read(FILE)
 6 | dfsource = DataStreamsIntegrationTests.Tester("NamedTuple", x->x, false, NamedTuple, (DF,), DataStreamsIntegrationTests.scalartransforms, DataStreamsIntegrationTests.vectortransforms, x->x, x->nothing)
 7 | dfsink = DataStreamsIntegrationTests.Tester("NamedTuple", x->x, false, NamedTuple, (DF2,), DataStreamsIntegrationTests.scalartransforms, DataStreamsIntegrationTests.vectortransforms, x->x, x->nothing)
 8 | 
 9 | # CSV
10 | FILE2 = joinpath(DataStreamsIntegrationTests.DSTESTDIR, "randoms2_small.csv")
11 | csvsource = DataStreamsIntegrationTests.Tester("CSV.Source", CSV.read, true, CSV.Source, (FILE,), DataStreamsIntegrationTests.scalartransforms, DataStreamsIntegrationTests.vectortransforms, x->x, x->nothing)
12 | csvsink = DataStreamsIntegrationTests.Tester("CSV.Sink", CSV.write, true, CSV.Sink, (FILE2,), DataStreamsIntegrationTests.scalartransforms, DataStreamsIntegrationTests.vectortransforms, x->CSV.read(FILE2; use_mmap=false), x->rm(FILE2))
13 | 
14 | DataStreamsIntegrationTests.teststream([dfsource, csvsource], [dfsink]; rows=99)
15 | 


--------------------------------------------------------------------------------
/test/multistream.jl:
--------------------------------------------------------------------------------
 1 | # Previous versions assumed that nb_available could accurately check for an empty CSV, but
 2 | # this doesn't work reliably for streams because nb_available only checks buffered bytes
 3 | # (see issue #77). This test verifies that even when nb_available would return 0 on a stream
 4 | # the full stream is still read.
 5 | 
 6 | mutable struct MultiStream{S<:IO} <: IO
 7 |     streams::Array{S}
 8 |     index::Int
 9 | end
10 | 
11 | function MultiStream(streams::AbstractArray{S}) where {S <: IO}
12 |     MultiStream(streams, 1)
13 | end
14 | 
15 | function refill(s::MultiStream)
16 |     while eof(s.streams[s.index]) && s.index < length(s.streams)
17 |         close(s.streams[s.index])
18 |         s.index += 1
19 |     end
20 | end
21 | 
22 | function Base.close(s::MultiStream)
23 |     for i in s.index:length(s.streams)
24 |         close(s.streams[i])
25 |     end
26 |     s.index = length(s.streams)
27 |     nothing
28 | end
29 | 
30 | function Base.eof(s::MultiStream)
31 |     eof(s.streams[s.index]) && s.index == length(s.streams)
32 | end
33 | 
34 | function Base.read(s::MultiStream, ::Type{UInt8})
35 |     refill(s)
36 |     read(s.streams[s.index], UInt8)::UInt8
37 | end
38 | 
39 | function Base.nb_available(s::MultiStream)
40 |     nb_available(s.streams[s.index])
41 | end
42 | 
43 | stream = MultiStream(
44 |     [IOBuffer(""), IOBuffer("a,b,c\n1,2,3\n"), IOBuffer(""), IOBuffer("4,5,6")]
45 | )
46 | 
47 | @test nb_available(stream) == 0
48 | @test CSV.read(stream) == CSV.read(IOBuffer("a,b,c\n1,2,3\n4,5,6"))
49 | 


--------------------------------------------------------------------------------
/test/test_files/double_quote_quotechar_and_escapechar.csv:
--------------------------------------------------------------------------------
 1 | APINo,FileNo,CurrentWellName,LeaseName,OriginalWellName
 2 | 3.3101E+13,1,BLUM     1,BLUM,PIONEER OIL & GAS #1
 3 | 3.3001E+13,2,DAVIS WELL     1,DAVIS WELL,DAVIS WELL #1
 4 | 3.3009E+13,3,GREAT NORTH. O AND G PIPELINE CO.     1,GREAT NORTH. O AND G PIPELINE CO.,GREAT NORTHERN OIL & GAS PIPELINE #1
 5 | 3.3043E+13,4,ROBINSON PATD LAND     1,ROBINSON PATD LAND,ROBINSON PAT'D LAND #1
 6 | 3.3031E+13,5,GLENFIELD OIL COMPANY     1,GLENFIELD OIL COMPANY,GLENFIELD OIL COMPANY #1
 7 | 3.3023E+13,6,NORTHWEST OIL CO.     1,NORTHWEST OIL CO.,#1
 8 | 3.3055E+13,7,OIL SYNDICATE     1,OIL SYNDICATE,H. HANSON OIL SYNDICATE #1
 9 | 3.3043E+13,8,ARMSTRONG     1,ARMSTRONG,ARMSTRONG #1
10 | 3.3075E+13,9,GEHRINGER     1,GEHRINGER,GEHRINGER #1
11 | 3.3101E+13,10,PETROLEUM CO.     1,PETROLEUM CO.,VELVA PETROLEUM CO. #1
12 | 3.3047E+13,11,BURNSTAD     1,BURNSTAD,BURNSTAD #1
13 | 3.3105E+13,12,OIL COMPANY     1,OIL COMPANY,BIG VIKING #1
14 | 3.3105E+13,13,NELS KAMP     1,NELS KAMP,NELS KAMP #1
15 | 3.3059E+13,14,EXPLORATION-NORTH DAKOTA     1,EXPLORATION-NORTH DAKOTA,EXPLORATION-NORTH DAKOTA #1
16 | 3.3065E+13,15,WACHTER     16-18,WACHTER,E. L. SEMLING #1
17 | 3.3029E+13,16,FRANKLIN INVESTMENT CO.     1,FRANKLIN INVESTMENT CO.,FRANKLIN INVESTMENT CO. #1
18 | 3.3077E+13,17,RUDDY BROS     1,RUDDY BROS,RUDDY BROS #1
19 | 3.3101E+13,18,J. H. KLINE     1,J. H. KLINE,J. H. KLINE #1
20 | 3.3015E+13,19,STRATIGRAPHIC TEST     1,STRATIGRAPHIC TEST,STRATIGRAPHIC TEST #1
21 | 3.3071E+13,20,AANSTAD STRATIGRAPHIC TEST     1,AANSTAD STRATIGRAPHIC TEST,AANSTAD STRATIGRAPHIC TEST #1
22 | 3.3057E+13,21,FRITZ LEUTZ     1,FRITZ LEUTZ,FRITZ LEUTZ #1
23 | 3.3055E+13,22,VAUGHN HANSON     1,VAUGHN HANSON,VAUGHN HANSON #1
24 | 3.3029E+13,23,J. J. WEBER     1,J. J. WEBER,J. J. WEBER #1
25 | 3.3043E+13,24,NORTH DAKOTA STATE A     1,NORTH DAKOTA STATE A,"NORTH DAKOTA STATE ""A"" #1"
26 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   matrix:
 3 |   - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
 4 |   - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
 5 |   - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe"
 6 |   - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe"
 7 | 
 8 | branches:
 9 |   only:
10 |     - master
11 |     - /release-.*/
12 | 
13 | notifications:
14 |   - provider: Email
15 |     on_build_success: false
16 |     on_build_failure: false
17 |     on_build_status_changed: false
18 | 
19 | install:
20 |   - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12"
21 | # if there's a newer build queued for the same PR, cancel this one
22 |   - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
23 |         https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
24 |         Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
25 |         throw "There are newer queued builds for this pull request, failing early." }
26 | # Download most recent Julia Windows binary
27 |   - ps: (new-object net.webclient).DownloadFile(
28 |         $env:JULIA_URL,
29 |         "C:\projects\julia-binary.exe")
30 | # Run installer silently, output to C:\projects\julia
31 |   - C:\projects\julia-binary.exe /S /D=C:\projects\julia
32 | 
33 | build_script:
34 | # Need to convert from shallow to complete for Pkg.clone to work
35 |   - IF EXIST .git\shallow (git fetch --unshallow)
36 |   - C:\projects\julia\bin\julia -e "versioninfo();
37 |       Pkg.clone(pwd(), \"CSV\"); Pkg.build(\"CSV\")"
38 | 
39 | test_script:
40 |   - C:\projects\julia\bin\julia -e "Pkg.test(\"CSV\")"
41 | 


--------------------------------------------------------------------------------
/test/test_files/baseball.csv:
--------------------------------------------------------------------------------
 1 | Rk,Year,Age,Tm,Lg,,W,L,W-L%,G,Finish,Wpost,Lpost,W-L%post,
 2 | 1,1978,37,Atlanta Braves,NL,,69,93,.426,162,6,0,0,,
 3 | 2,1979,38,Atlanta Braves,NL,,66,94,.413,160,6,0,0,,
 4 | 3,1980,39,Atlanta Braves,NL,,81,80,.503,161,4,0,0,,
 5 | 4,1981,40,Atlanta Braves,NL,,25,29,.463,55,4,0,0,,First half of season
 6 | 5,1981,40,Atlanta Braves,NL,,25,27,.481,52,5,0,0,,Second half of season
 7 | ,,,,,,,,,,,,,,
 8 | 6,1982,41,Toronto Blue Jays,AL,,78,84,.481,162,6,0,0,,
 9 | 7,1983,42,Toronto Blue Jays,AL,,89,73,.549,162,4,0,0,,
10 | 8,1984,43,Toronto Blue Jays,AL,,89,73,.549,163,2,0,0,,
11 | 9,1985,44,Toronto Blue Jays,AL,,99,62,.615,161,1,3,4,.429,
12 | ,,,,,,,,,,,,,,
13 | 10,1990,49,Atlanta Braves,NL,2nd of 2,40,57,.412,97,6,0,0,,
14 | 11,1991,50,Atlanta Braves,NL,,94,68,.580,162,1,7,7,.500,NL Pennant
15 | 12,1992,51,Atlanta Braves,NL,,98,64,.605,162,1,6,7,.462,NL Pennant
16 | 13,1993,52,Atlanta Braves,NL,,104,58,.642,162,1,2,4,.333,
17 | 14,1994,53,Atlanta Braves,NL,,68,46,.596,114,2,0,0,,
18 | 15,1995,54,Atlanta Braves,NL,,90,54,.625,144,1,11,3,.786,WS Champs
19 | 16,1996,55,Atlanta Braves,NL,,96,66,.593,162,1,9,7,.562,NL Pennant
20 | 17,1997,56,Atlanta Braves,NL,,101,61,.623,162,1,5,4,.556,
21 | 18,1998,57,Atlanta Braves,NL,,106,56,.654,162,1,5,4,.556,
22 | 19,1999,58,Atlanta Braves,NL,,103,59,.636,162,1,7,7,.500,NL Pennant
23 | 20,2000,59,Atlanta Braves,NL,,95,67,.586,162,1,0,3,.000,
24 | 21,2001,60,Atlanta Braves,NL,,88,74,.543,162,1,4,4,.500,
25 | 22,2002,61,Atlanta Braves,NL,,101,59,.631,161,1,2,3,.400,
26 | 23,2003,62,Atlanta Braves,NL,,101,61,.623,162,1,2,3,.400,
27 | 24,2004,63,Atlanta Braves,NL,,96,66,.593,162,1,2,3,.400,
28 | 25,2005,64,Atlanta Braves,NL,,90,72,.556,162,1,1,3,.250,
29 | 26,2006,65,Atlanta Braves,NL,,79,83,.488,162,3,0,0,,
30 | 27,2007,66,Atlanta Braves,NL,,84,78,.519,162,3,0,0,,
31 | 28,2008,67,Atlanta Braves,NL,,72,90,.444,162,4,0,0,,
32 | 29,2009,68,Atlanta Braves,NL,,86,76,.531,162,3,0,0,,
33 | 30,2010,69,Atlanta Braves,NL,,91,71,.562,162,2,1,3,.250,
34 | ,,,Toronto Blue Jays,,4 years,355,292,.549,648,3.3,3,4,.429,
35 | ,,,Atlanta Braves,,25 years,2149,1709,.557,3860,2.4,64,65,.496,5 Pennants and 1 World Series Title
36 | ,,,,,29 years,2504,2001,.556,4508,2.5,67,69,.493,5 Pennants and 1 World Series Title


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # CSV
 3 | 
 4 | *A fast, flexible delimited file reader/writer for Julia.*
 5 | 
 6 | | **Documentation**                                                               | **PackageEvaluator**                                            | **Build Status**                                                                                |
 7 | |:-------------------------------------------------------------------------------:|:---------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------:|
 8 | | [![][docs-stable-img]][docs-stable-url] [![][docs-latest-img]][docs-latest-url] | [![][pkg-0.6-img]][pkg-0.6-url] | [![][travis-img]][travis-url] [![][appveyor-img]][appveyor-url] [![][codecov-img]][codecov-url] |
 9 | 
10 | 
11 | ## Installation
12 | 
13 | The package is registered in `METADATA.jl` and so can be installed with `Pkg.add`.
14 | 
15 | ```julia
16 | julia> Pkg.add("CSV")
17 | ```
18 | 
19 | ## Documentation
20 | 
21 | - [**STABLE**][docs-stable-url] &mdash; **most recently tagged version of the documentation.**
22 | - [**LATEST**][docs-latest-url] &mdash; *in-development version of the documentation.*
23 | 
24 | ## Project Status
25 | 
26 | The package is tested against Julia `0.6` and nightly on Linux, OS X, and Windows.
27 | 
28 | ## Contributing and Questions
29 | 
30 | Contributions are very welcome, as are feature requests and suggestions. Please open an
31 | [issue][issues-url] if you encounter any problems or would just like to ask a question.
32 | 
33 | 
34 | 
35 | [docs-latest-img]: https://img.shields.io/badge/docs-latest-blue.svg
36 | [docs-latest-url]: https://JuliaData.github.io/CSV.jl/latest
37 | 
38 | [docs-stable-img]: https://img.shields.io/badge/docs-stable-blue.svg
39 | [docs-stable-url]: https://JuliaData.github.io/CSV.jl/stable
40 | 
41 | [travis-img]: https://travis-ci.org/JuliaData/CSV.jl.svg?branch=master
42 | [travis-url]: https://travis-ci.org/JuliaData/CSV.jl
43 | 
44 | [appveyor-img]: https://ci.appveyor.com/api/projects/status/wcm124d03d2ey5o2?svg=true
45 | [appveyor-url]: https://ci.appveyor.com/project/quinnj/csv-jl-groth
46 | 
47 | [codecov-img]: https://codecov.io/gh/JuliaData/CSV.jl/branch/master/graph/badge.svg
48 | [codecov-url]: https://codecov.io/gh/JuliaData/CSV.jl
49 | 
50 | [issues-url]: https://github.com/JuliaData/CSV.jl/issues
51 | 
52 | [pkg-0.6-img]: http://pkg.julialang.org/badges/CSV_0.6.svg
53 | [pkg-0.6-url]: http://pkg.julialang.org/?pkg=CSV
54 | 


--------------------------------------------------------------------------------
/src/validate.jl:
--------------------------------------------------------------------------------
 1 | # ensure each cell is valid type of column detected type
 2 | # ensure each row has exactly as many values as expected from detection
 3 | 
 4 | text(state::P) = state[] == Delimiter ? "delimiter" : state[] == Newline ? "newline" : "end-of-file (EOF)"
 5 | 
 6 | struct ExpectedMoreColumnsError <: Exception
 7 |     msg::String
 8 | end
 9 | 
10 | struct TooManyColumnsError <: Exception
11 |     msg::String
12 | end
13 | 
14 | """
15 | `CSV.validate(fullpath::Union{AbstractString,IO}, sink::Type{T}=DataFrame, args...; kwargs...)` => `typeof(sink)`
16 | 
17 | `CSV.validate(fullpath::Union{AbstractString,IO}, sink::Data.Sink; kwargs...)` => `Data.Sink`
18 | 
19 | Takes the same positional & keyword arguments as [`CSV.read`](@ref), but provides detailed information as to why reading a csv file failed. Useful for cases where reading fails and it's not clear whether it's due to a row havign too many columns, or wrong types, or what have you.
20 | 
21 | """
22 | function validate end
23 | 
24 | function validate(s::CSV.Source)
25 |     sch = Data.schema(s) # size, header, types
26 |     rows, cols = size(sch)
27 |     types = Data.types(sch)
28 |     state = P()
29 |     for row = 1:rows
30 |         rowstr = ""
31 |         for col = 1:cols
32 |             v = CSV.parsefield(s.io, types[col], s.options, row, col, state)
33 |             rowstr *= "$(col == 1 ? "" : Char(s.options.delim))$v"
34 |             if col < cols
35 |                 if state[] != Delimiter
36 |                     throw(ExpectedMoreColumnsError("row=$row, col=$col: expected $cols columns, parsed $col, but parsing encountered unexpected $(text(state)); parsed row: '$rowstr'"))
37 |                 end
38 |             else
39 |                 if state[] != Newline && state[] != EOF
40 |                     throw(TooManyColumnsError("row=$row, col=$col: expected $cols columns then a newline or EOF, but parsing encountered another $(text(state)): '$(Char(s.options.delim))'; parsed row: '$rowstr'"))
41 |                 end
42 |             end
43 |         end
44 |     end
45 | end
46 | 
47 | function validate(fullpath::Union{AbstractString,IO}, sink::Type=DataFrame, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}(), kwargs...)
48 |     validate(Source(fullpath; kwargs...))
49 |     return
50 | end
51 | 
52 | function validate(fullpath::Union{AbstractString,IO}, sink::T; append::Bool=false, transforms::Dict=Dict{Int,Function}(), kwargs...) where {T}
53 |     validate(Source(fullpath; kwargs...))
54 |     return
55 | end


--------------------------------------------------------------------------------
/test/io.jl:
--------------------------------------------------------------------------------
 1 | # `CSV.readline(io::IO, q='"', e='\\', buf::IOBuffer=IOBuffer())` => `String`
 2 | str = "field1,field2,\"quoted \\\"field with \n embedded newline\",field3"
 3 | io = IOBuffer(str)
 4 | @test CSV.readline(io) == str
 5 | io = IOBuffer(str * "\n" * str * "\r\n" * str)
 6 | @test CSV.readline(io) == str * "\n"
 7 | @test CSV.readline(io) == str * "\r\n"
 8 | @test CSV.readline(io) == str
 9 | 
10 | # `CSV.readline(source::CSV.Source)` => `String`
11 | strsource = CSV.Source(IOBuffer(str); header=["col1","col2","col3","col4"])
12 | @test CSV.readline(strsource) == str
13 | 
14 | # `CSV.readsplitline(io, d=',', q='"', e='\\', buf::IOBuffer=IOBuffer())` => `Vector{String}`
15 | spl = [CSV.RawField("field1", false),
16 |        CSV.RawField("field2", false),
17 |        CSV.RawField("quoted \\\"field with \n embedded newline", true),
18 |        CSV.RawField("field3", false)]
19 | io = IOBuffer(str)
20 | @test CSV.readsplitline(io) == spl
21 | io = IOBuffer(str * "\n" * str * "\r\n" * str)
22 | @test CSV.readsplitline(io) == spl
23 | @test CSV.readsplitline(io) == spl
24 | @test CSV.readsplitline(io) == spl
25 | 
26 | @testset "empty fields" begin
27 |     str2 = "field1,,\"\",field3,"
28 |     spl2 = [CSV.RawField("field1", false),
29 |            CSV.RawField("", false),
30 |            CSV.RawField("", true),
31 |            CSV.RawField("field3", false),
32 |            CSV.RawField("", false)]
33 |     ioo = IOBuffer(str2)
34 |     @test CSV.readsplitline(ioo) == spl2
35 | end
36 | 
37 | # `CSV.readsplitline(source::CSV.Source)` => `Vector{String}`
38 | strsource = CSV.Source(IOBuffer(str); header=["col1","col2","col3","col4"])
39 | @test CSV.readsplitline(strsource) == spl
40 | 
41 | # `CSV.countlines(io::IO, quotechar, escapechar)` => `Int`
42 | @test CSV.countlines(IOBuffer(str)) == 1
43 | @test CSV.countlines(IOBuffer(str * "\n" * str)) == 2
44 | 
45 | # `CSV.countlines(source::CSV.Source)` => `Int`
46 | intsource = CSV.Source(IOBuffer(str); header=["col1","col2","col3","col4"])
47 | @test CSV.countlines(intsource) == 1
48 | 
49 | @testset "misformatted CSV lines" begin
50 |     @testset "missing quote" begin
51 |         str1 = "field1,field2,\"quoted \\\"field with \n embedded newline,field3"
52 |         io2 = IOBuffer(str1)
53 |         @test_throws CSV.ParsingException CSV.readsplitline(io2)
54 |     end
55 | 
56 |     @testset "misplaced quote" begin
57 |         str1 = "fi\"eld1\",field2,\"quoted \\\"field with \n embedded newline\",field3"
58 |         io2 = IOBuffer(str1)
59 |         @test_throws CSV.ParsingException CSV.readsplitline(io2)
60 | 
61 |         str2 = "field1,field2,\"quoted \\\"field with \n\"\" embedded newline\",field3"
62 |         io2 = IOBuffer(str2)
63 |         @test_throws CSV.ParsingException CSV.readsplitline(io2)
64 | 
65 |         str3 = "\"field\"1,field2,\"quoted \\\"field with \n embedded newline\",field3"
66 |         io2 = IOBuffer(str3)
67 |         @test_throws CSV.ParsingException CSV.readsplitline(io2)
68 |     end
69 | end
70 | 


--------------------------------------------------------------------------------
/test/test_files/attenu.csv:
--------------------------------------------------------------------------------
  1 | "Event","Mag","Station","Dist","Accel"
  2 | 1,7.0,"117",12.0,0.359
  3 | 2,7.4,"1083",148.0,0.014
  4 | 2,7.4,"1095",42.0,0.196
  5 | 2,7.4,"283",85.0,0.135
  6 | 2,7.4,"135",107.0,0.062
  7 | 2,7.4,"475",109.0,0.054
  8 | 2,7.4,"113",156.0,0.014
  9 | 2,7.4,"1008",224.0,0.018
 10 | 2,7.4,"1028",293.0,0.01
 11 | 2,7.4,"2001",359.0,0.004
 12 | 2,7.4,"117",370.0,0.004
 13 | 3,5.3,"1117",8.0,0.127
 14 | 4,6.1,"1438",16.1,0.411
 15 | 4,6.1,"1083",63.6,0.018
 16 | 4,6.1,"1013",6.6,0.509
 17 | 4,6.1,"1014",9.3,0.467
 18 | 4,6.1,"1015",13.0,0.279
 19 | 4,6.1,"1016",17.3,0.072
 20 | 4,6.1,"1095",105.0,0.012
 21 | 4,6.1,"1011",112.0,0.006
 22 | 4,6.1,"1028",123.0,0.003
 23 | 5,6.6,"270",105.0,0.018
 24 | 5,6.6,"280",122.0,0.048
 25 | 5,6.6,"116",141.0,0.011
 26 | 5,6.6,"266",200.0,0.007
 27 | 5,6.6,"117",45.0,0.142
 28 | 5,6.6,"113",130.0,0.031
 29 | 5,6.6,"112",147.0,0.006
 30 | 5,6.6,"130",187.0,0.01
 31 | 5,6.6,"475",197.0,0.01
 32 | 5,6.6,"269",203.0,0.006
 33 | 5,6.6,"135",211.0,0.013
 34 | 6,5.6,"1093",62.0,0.005
 35 | 7,5.7,"1093",62.0,0.003
 36 | 8,5.3,"111",19.0,0.086
 37 | 8,5.3,"116",21.0,0.179
 38 | 8,5.3,"290",13.0,0.205
 39 | 8,5.3,"112",22.0,0.073
 40 | 8,5.3,"113",29.0,0.045
 41 | 9,6.6,"128",17.0,0.374
 42 | 9,6.6,"126",19.6,0.2
 43 | 9,6.6,"127",20.2,0.147
 44 | 9,6.6,"141",21.1,0.188
 45 | 9,6.6,"266",21.9,0.204
 46 | 9,6.6,"110",24.2,0.335
 47 | 9,6.6,"1027",66.0,0.057
 48 | 9,6.6,"111",87.0,0.021
 49 | 9,6.6,"125",23.4,0.152
 50 | 9,6.6,"135",24.6,0.217
 51 | 9,6.6,"475",25.7,0.114
 52 | 9,6.6,"262",28.6,0.15
 53 | 9,6.6,"269",37.4,0.148
 54 | 9,6.6,"1052",46.7,0.112
 55 | 9,6.6,"411",56.9,0.043
 56 | 9,6.6,"290",60.7,0.057
 57 | 9,6.6,"130",61.4,0.03
 58 | 9,6.6,"272",62.0,0.027
 59 | 9,6.6,"1096",64.0,0.028
 60 | 9,6.6,"1102",82.0,0.034
 61 | 9,6.6,"112",88.0,0.03
 62 | 9,6.6,"113",91.0,0.039
 63 | 10,5.3,"1028",31.0,0.03
 64 | 11,7.7,"2714",45.0,0.11
 65 | 11,7.7,"2708",145.0,0.01
 66 | 11,7.7,"2715",300.0,0.01
 67 | 12,6.2,"3501",5.0,0.39
 68 | 13,5.6,"655",50.0,0.031
 69 | 13,5.6,"272",16.0,0.13
 70 | 14,5.2,"1032",17.0,0.011
 71 | 14,5.2,"1377",8.0,0.12
 72 | 14,5.2,"1028",10.0,0.17
 73 | 14,5.2,"1250",10.0,0.14
 74 | 15,6.0,"1051",8.0,0.11
 75 | 15,6.0,"1293",32.0,0.04
 76 | 15,6.0,"1291",30.0,0.07
 77 | 15,6.0,"1292",31.0,0.08
 78 | 16,5.1,"283",2.9,0.21
 79 | 16,5.1,"885",3.2,0.39
 80 | 16,5.1,"NA",7.6,0.28
 81 | 17,7.6,"2734",25.4,0.16
 82 | 17,7.6,"NA",32.9,0.064
 83 | 17,7.6,"2728",92.2,0.09
 84 | 18,5.8,"1413",1.2,0.42
 85 | 18,5.8,"1445",1.6,0.23
 86 | 18,5.8,"1408",9.1,0.13
 87 | 18,5.8,"1411",3.7,0.26
 88 | 18,5.8,"1410",5.3,0.27
 89 | 18,5.8,"1409",7.4,0.26
 90 | 18,5.8,"1377",17.9,0.11
 91 | 18,5.8,"1492",19.2,0.12
 92 | 18,5.8,"1251",23.4,0.038
 93 | 18,5.8,"1422",30.0,0.044
 94 | 18,5.8,"1376",38.9,0.046
 95 | 19,6.5,"NA",23.5,0.17
 96 | 19,6.5,"286",26.0,0.21
 97 | 19,6.5,"NA",0.5,0.32
 98 | 19,6.5,"5028",0.6,0.52
 99 | 19,6.5,"942",1.3,0.72
100 | 19,6.5,"NA",1.4,0.32
101 | 19,6.5,"5054",2.6,0.81
102 | 19,6.5,"958",3.8,0.64
103 | 19,6.5,"952",4.0,0.56
104 | 19,6.5,"5165",5.1,0.51
105 | 19,6.5,"117",6.2,0.4
106 | 19,6.5,"955",6.8,0.61
107 | 19,6.5,"5055",7.5,0.26
108 | 19,6.5,"NA",7.6,0.24
109 | 19,6.5,"NA",8.4,0.46
110 | 19,6.5,"5060",8.5,0.22
111 | 19,6.5,"412",8.5,0.23
112 | 19,6.5,"5053",10.6,0.28
113 | 19,6.5,"5058",12.6,0.38
114 | 19,6.5,"5057",12.7,0.27
115 | 19,6.5,"NA",12.9,0.31
116 | 19,6.5,"5051",14.0,0.2
117 | 19,6.5,"NA",15.0,0.11
118 | 19,6.5,"5115",16.0,0.43
119 | 19,6.5,"NA",17.7,0.27
120 | 19,6.5,"931",18.0,0.15
121 | 19,6.5,"5056",22.0,0.15
122 | 19,6.5,"5059",22.0,0.15
123 | 19,6.5,"5061",23.0,0.13
124 | 19,6.5,"NA",23.2,0.19
125 | 19,6.5,"5062",29.0,0.13
126 | 19,6.5,"5052",32.0,0.066
127 | 19,6.5,"NA",32.7,0.35
128 | 19,6.5,"724",36.0,0.1
129 | 19,6.5,"NA",43.5,0.16
130 | 19,6.5,"5066",49.0,0.14
131 | 19,6.5,"5050",60.0,0.049
132 | 19,6.5,"2316",64.0,0.034
133 | 20,5.0,"5055",7.5,0.264
134 | 20,5.0,"942",8.8,0.263
135 | 20,5.0,"5028",8.9,0.23
136 | 20,5.0,"5165",9.4,0.147
137 | 20,5.0,"952",9.7,0.286
138 | 20,5.0,"958",9.7,0.157
139 | 20,5.0,"955",10.5,0.237
140 | 20,5.0,"117",10.5,0.133
141 | 20,5.0,"412",12.0,0.055
142 | 20,5.0,"5053",12.2,0.097
143 | 20,5.0,"5054",12.8,0.129
144 | 20,5.0,"5058",14.6,0.192
145 | 20,5.0,"5057",14.9,0.147
146 | 20,5.0,"5115",17.6,0.154
147 | 20,5.0,"5056",23.9,0.06
148 | 20,5.0,"5060",25.0,0.057
149 | 21,5.8,"1030",10.8,0.12
150 | 21,5.8,"1418",15.7,0.154
151 | 21,5.8,"1383",16.7,0.052
152 | 21,5.8,"1308",20.8,0.045
153 | 21,5.8,"1298",28.5,0.086
154 | 21,5.8,"1299",33.1,0.056
155 | 21,5.8,"1219",40.3,0.065
156 | 22,5.5,"NA",4.0,0.259
157 | 22,5.5,"NA",10.1,0.267
158 | 22,5.5,"1030",11.1,0.071
159 | 22,5.5,"1418",17.7,0.275
160 | 22,5.5,"1383",22.5,0.058
161 | 22,5.5,"NA",26.5,0.026
162 | 22,5.5,"1299",29.0,0.039
163 | 22,5.5,"1308",30.9,0.112
164 | 22,5.5,"1219",37.8,0.065
165 | 22,5.5,"1456",48.3,0.026
166 | 23,5.3,"5045",5.8,0.123
167 | 23,5.3,"5044",12.0,0.133
168 | 23,5.3,"5160",12.1,0.073
169 | 23,5.3,"5043",20.5,0.097
170 | 23,5.3,"5047",20.5,0.096
171 | 23,5.3,"c168",25.3,0.23
172 | 23,5.3,"5068",35.9,0.082
173 | 23,5.3,"c118",36.1,0.11
174 | 23,5.3,"5042",36.3,0.11
175 | 23,5.3,"5067",38.5,0.094
176 | 23,5.3,"5049",41.4,0.04
177 | 23,5.3,"c204",43.6,0.05
178 | 23,5.3,"5070",44.4,0.022
179 | 23,5.3,"c266",46.1,0.07
180 | 23,5.3,"c203",47.1,0.08
181 | 23,5.3,"5069",47.7,0.033
182 | 23,5.3,"5073",49.2,0.017
183 | 23,5.3,"5072",53.1,0.022
184 | 


--------------------------------------------------------------------------------
/benchmark/randoms_small.csv:
--------------------------------------------------------------------------------
  1 | id,firstname,lastname,salary,hourlyrate,hiredate,lastclockin
  2 | 1,Lawrence,Powell,87216.81,26.47,2002-04-09,2002-01-17T21:32:00
  3 | 2,Benjamin,Chavez,57043.38,39.44,2011-07-07,2000-09-25T06:36:00
  4 | 3,Wayne,Burke,46134.09,33.8,2016-02-19,2002-09-13T08:28:00
  5 | 4,Sean,Richards,45046.21,15.64,2000-11-24,2011-07-10T11:24:00
  6 | 5,Charles,Long,30555.6,17.67,2002-01-05,2003-02-11T11:43:00
  7 | 6,Linda,Rose,88894.06,34.6,2008-05-15,2016-01-21T06:32:00
  8 | 7,Steve,Gardner,32414.46,36.39,2006-03-21,2004-01-12T12:36:00
  9 | 8,Jacqueline,Roberts,54839.54,26.27,,
 10 | 9,Tammy,Reynolds,62300.64,37.67,2000-06-09,2006-12-30T09:48:00
 11 | 10,Nicholas,Ramos,57661.69,21.37,2002-09-20,2016-04-07T14:07:00
 12 | 11,Irene,King,55565.61,13.88,2006-04-14,2015-03-19T15:01:00
 13 | 12,Gary,Banks,57620.06,15.68,,
 14 | 13,David,Knight,49729.65,10.39,2002-08-21,2005-06-29T11:14:00
 15 | 14,Jennifer,Collins,86834,10.18,2007-06-06,2001-09-17T11:47:00
 16 | 15,Gary,Vasquez,47974.45,24.52,2010-06-07,2014-08-30T02:41:00
 17 | 16,Theresa,Mason,67476.24,41.47,2000-09-25,2015-11-07T01:23:00
 18 | 17,Carl,Williams,71048.06,29.67,2008-11-12,2009-09-06T20:21:00
 19 | 18,Judy,Howard,53110.54,42.1,2015-04-29,2011-05-14T14:38:00
 20 | 19,Jane,Harris,52664.59,16.48,2004-11-28,2000-10-17T14:18:00
 21 | 20,Paula,Hall,77300.12,13.46,2004-02-09,2006-08-19T23:35:00
 22 | 21,Jennifer,Pierce,98176.06,28.4,2012-04-30,2014-05-16T04:50:00
 23 | 22,Wanda,Graham,,,2010-10-06,2013-01-22T10:06:00
 24 | 23,Donna,Stevens,52731.24,35.64,2008-02-18,2004-04-09T23:03:00
 25 | 24,Shawn,Olson,71341.2,28.74,2001-01-03,2014-01-08T20:25:00
 26 | 25,Larry,Mills,94185.21,17.98,2008-11-07,2000-10-16T21:14:00
 27 | 26,Diane,Dean,48035.26,14.52,2005-08-13,2013-07-15T05:35:00
 28 | 27,Gerald,Reynolds,68902.06,14.02,2006-01-05,2013-03-25T21:58:00
 29 | 28,Angela,Reyes,68431.89,29.15,2016-02-18,2007-06-02T21:37:00
 30 | 29,Terry,Williamson,82284.42,11.32,2010-02-13,2009-04-28T10:16:00
 31 | 30,Thomas,Peterson,32202.5,44.81,2011-04-21,2002-11-01T05:09:00
 32 | 31,Gary,Ryan,47322.49,38.28,2009-11-04,2014-03-18T16:40:00
 33 | 32,Kelly,Wright,77681.48,40.62,2008-10-17,2008-09-13T04:35:00
 34 | 33,Jerry,Mcdonald,48634.67,40.64,2012-12-03,2009-04-04T13:34:00
 35 | 34,Sara,Williamson,93591.36,29.12,2013-09-05,2009-06-15T14:10:00
 36 | 35,Tammy,Richards,64163.05,27.97,2011-02-15,2004-09-19T04:44:00
 37 | 36,Willie,Wilson,55013.5,23.13,2000-07-07,2016-02-04T13:32:00
 38 | 37,Howard,Moore,68933.52,29.74,2009-06-18,2012-03-05T18:46:00
 39 | 38,Harry,Powell,88803.14,43.68,2014-09-04,2013-01-28T23:06:00
 40 | 39,Sarah,Larson,59087.65,15.33,2002-03-14,2003-06-09T10:49:00
 41 | 40,Clarence,Fernandez,,,2002-06-15,2003-02-27T20:50:00
 42 | 41,Frank,Oliver,46920.82,27.12,2010-06-30,2008-07-25T03:32:00
 43 | 42,Helen,Diaz,79150.2,44.47,2003-11-19,2009-05-03T11:00:00
 44 | 43,Carl,Payne,47331.2,28.55,2003-09-19,2004-06-10T16:44:00
 45 | 44,Julie,Gibson,62976,12.89,2014-04-13,2010-10-26T12:46:00
 46 | 45,Matthew,Vasquez,56466.09,28.87,2010-12-15,2006-08-04T04:15:00
 47 | 46,Roger,Frazier,76210.95,17.7,2009-04-19,2010-10-26T10:16:00
 48 | 47,Terry,Bailey,71592.53,30.66,2011-04-08,2004-02-10T16:05:00
 49 | 48,Charles,Harris,73087.71,27.97,2008-09-15,2011-09-02T06:12:00
 50 | 49,Carlos,Washington,96820.05,26.53,2004-08-11,2009-11-04T14:43:00
 51 | 50,Arthur,Martinez,77322.05,43.76,2004-10-20,2003-02-28T00:55:00
 52 | 51,Aaron,Ramos,39447.66,23.22,2001-09-25,2004-06-19T14:13:00
 53 | 52,Diane,Wagner,58776.97,15.87,2009-01-28,2015-07-24T12:09:00
 54 | 53,Patrick,Harvey,62550.45,27.79,2015-10-16,2014-01-17T01:46:00
 55 | 54,Mildred,Kelly,56818.96,19.68,2010-05-05,2001-12-15T22:01:00
 56 | 55,Christina,Wagner,92207.21,37.24,2006-03-15,2011-07-25T18:08:00
 57 | 56,Walter,Jacobs,91261.51,41.24,2005-02-09,2012-12-16T08:25:00
 58 | 57,Edward,Roberts,89979.1,30.01,2004-03-07,2013-01-17T09:02:00
 59 | 58,Eric,Price,66234.11,16.89,2009-03-06,2009-07-18T18:53:00
 60 | 59,Arthur,Mills,80119.32,20.54,2008-12-03,2010-10-12T12:11:00
 61 | 60,Frank,Crawford,55120.2,20.61,2007-05-19,2013-11-28T04:05:00
 62 | 61,Denise,Reynolds,41558.91,31,2003-12-12,2010-06-04T18:38:00
 63 | 62,Christopher,Harvey,80935.62,43.37,2006-05-31,2002-10-17T03:34:00
 64 | 63,Philip,Miller,34367.91,32.27,2003-11-21,2016-02-01T19:24:00
 65 | 64,Todd,Boyd,41129.51,18.46,2007-03-28,2001-10-01T20:52:00
 66 | 65,Brandon,Gibson,74951.17,13.34,2008-07-05,2001-02-07T10:34:00
 67 | 66,Stephen,Simpson,82619.36,25.09,2003-06-10,2009-03-03T06:10:00
 68 | 67,Matthew,Ramirez,69707.47,19.48,2008-10-28,2015-11-17T05:09:00
 69 | 68,Christine,Peters,50284.83,31.02,2004-09-15,2006-02-16T00:42:00
 70 | 69,Amanda,Butler,91480.48,38.19,2003-11-27,2012-01-04T08:38:00
 71 | 70,Julia,Graham,92477.08,41.81,2014-05-13,2006-05-28T21:28:00
 72 | 71,Gregory,Rice,91185.29,37.58,2012-03-24,2007-10-17T12:11:00
 73 | 72,Louis,Crawford,52832.8,42.93,2005-05-31,2006-03-19T06:07:00
 74 | 73,Brandon,Watson,45723.17,41.92,2012-10-02,2015-11-13T07:19:00
 75 | 74,Philip,Shaw,69490.63,23.92,,
 76 | 75,Harold,Gonzales,79224.07,36.25,2012-12-05,2010-10-05T01:09:00
 77 | 76,James,Webb,33969.44,30.36,2015-05-08,2010-04-10T19:12:00
 78 | 77,Joshua,Mccoy,45820.89,20.4,2004-08-28,2012-01-25T12:32:00
 79 | 78,Russell,Clark,74467.95,31,2002-10-26,2013-05-29T18:09:00
 80 | 79,Jerry,Greene,46403.2,13.31,2002-06-08,2009-05-03T10:00:00
 81 | 80,Theresa,Barnes,57409.47,24.39,2007-01-09,2015-04-23T09:49:00
 82 | 81,Sara,Graham,70440.8,29.56,2010-01-19,2008-04-08T06:55:00
 83 | 82,Phillip,Johnson,91284.91,30.45,2008-01-11,2005-01-10T21:16:00
 84 | 83,Patricia,Evans,75978.21,31.9,2014-04-27,2013-06-27T16:08:00
 85 | 84,Donna,Crawford,62172.48,13.32,2016-01-10,2005-03-20T09:10:00
 86 | 85,Kenneth,Webb,69379,37.31,2011-07-06,2005-09-10T18:28:00
 87 | 86,Clarence,Jackson,94199.35,25.46,2003-10-27,2008-12-19T03:45:00
 88 | 87,Robin,Armstrong,35735.08,39.43,,
 89 | 88,Andrew,Baker,31860.73,10.51,2015-03-20,2000-05-01T00:01:00
 90 | 89,Cheryl,Webb,47783.33,31.8,2012-12-07,2008-11-29T17:51:00
 91 | 90,Wanda,Hill,53157.17,44.11,2005-04-02,2006-11-18T05:07:00
 92 | 91,Lisa,Sullivan,76040.59,33.44,2006-05-08,2002-07-18T06:24:00
 93 | 92,Terry,Dean,60309.32,41.71,2008-01-02,2007-09-29T12:09:00
 94 | 93,Frank,Hamilton,63715.31,11.99,2006-11-14,2006-05-07T21:24:00
 95 | 94,Mark,Jones,45956,42.76,2009-12-16,2015-05-16T05:16:00
 96 | 95,Steve,Johnson,34983.95,25.33,2002-12-29,2011-08-19T21:06:00
 97 | 96,Joan,Graham,77844.68,28.74,2007-01-01,2007-02-15T15:10:00
 98 | 97,Diana,Cunningham,33346.63,42.61,2013-11-05,2014-06-07T16:14:00
 99 | 98,Annie,Hunt,67398.43,23.99,2004-01-28,2002-12-13T08:38:00
100 | 70000,Craig,Robertson,,,2008-06-23,2005-04-18T07:02:00


--------------------------------------------------------------------------------
/src/Sink.jl:
--------------------------------------------------------------------------------
  1 | function Sink(fullpath::Union{AbstractString, IO};
  2 |               delim::Char=',',
  3 |               quotechar::Char='"',
  4 |               escapechar::Char='\\',
  5 |               null::AbstractString="",
  6 |               dateformat=nothing,
  7 |               header::Bool=true,
  8 |               colnames::Vector{String}=String[],
  9 |               append::Bool=false,
 10 |               quotefields::Bool=false)
 11 |     delim = delim % UInt8; quotechar = quotechar % UInt8; escapechar = escapechar % UInt8
 12 |     dateformat = isa(dateformat, AbstractString) ? Dates.DateFormat(dateformat) : dateformat
 13 |     io = IOBuffer()
 14 |     options = CSV.Options(delim=delim, quotechar=quotechar, escapechar=escapechar, null=null, dateformat=dateformat)
 15 |     !append && header && !isempty(colnames) && writeheaders(io, colnames, options, Val{quotefields})
 16 |     return Sink(options, io, fullpath, position(io), !append && header && !isempty(colnames), colnames, length(colnames), append, Val{quotefields})
 17 | end
 18 | 
 19 | quoted(::Type{Val{true}},  val, q, e, d) =  string(q, replace(val, q, string(e, q)), q)
 20 | quoted(::Type{Val{false}}, val, q, e, d) = (q in val || d in val) ? string(q, replace(val, q, string(e, q)), q) : val
 21 | 
 22 | function writeheaders(io::IOBuffer, h::Vector{String}, options, quotefields)
 23 |     cols = length(h)
 24 |     q = options.quotechar; e = options.escapechar; d = options.delim
 25 |     for col = 1:cols
 26 |         Base.write(io, quoted(quotefields, h[col], q, e, d), ifelse(col == cols, NEWLINE, options.delim))
 27 |     end
 28 |     return nothing
 29 | end
 30 | 
 31 | # DataStreams interface
 32 | Data.streamtypes(::Type{CSV.Sink}) = [Data.Field]
 33 | Data.weakrefstrings(::Type{CSV.Sink}) = true
 34 | 
 35 | # Constructors
 36 | function Sink(sch::Data.Schema, T, append, file::Union{AbstractString, IO}; reference::Vector{UInt8}=UInt8[], kwargs...)
 37 |     sink = Sink(file; append=append, colnames=Data.header(sch), kwargs...)
 38 |     return sink
 39 | end
 40 | 
 41 | function Sink(sink, sch::Data.Schema, T, append; reference::Vector{UInt8}=UInt8[])
 42 |     sink.append = append
 43 |     sink.cols = size(sch, 2)
 44 |     !sink.header && !append && writeheaders(sink.io, Data.header(sch), sink.options, sink.quotefields)
 45 |     return sink
 46 | end
 47 | 
 48 | Data.streamto!(sink::Sink, ::Type{Data.Field}, val, row, col::Int) = Base.write(sink.io, string(val), ifelse(col == sink.cols, NEWLINE, sink.options.delim))
 49 | function Data.streamto!(sink::Sink, ::Type{Data.Field}, val::AbstractString, row, col::Int)
 50 |     q = Char(sink.options.quotechar); e = Char(sink.options.escapechar); d = sink.options.delim
 51 |     Base.write(sink.io, quoted(sink.quotefields, string(val), q, e, d), ifelse(col == sink.cols, NEWLINE, d))
 52 |     return nothing
 53 | end
 54 | 
 55 | defaultformat(::Date) = Dates.ISODateFormat
 56 | defaultformat(::DateTime) = Dates.ISODateTimeFormat
 57 | 
 58 | function Data.streamto!(sink::Sink, ::Type{Data.Field}, val::Dates.TimeType, row, col::Int)
 59 |     v = Dates.format(val, sink.options.dateformat === nothing ? defaultformat(val) : sink.options.dateformat)
 60 |     Base.write(sink.io, v, ifelse(col == sink.cols, NEWLINE, sink.options.delim))
 61 |     return nothing
 62 | end
 63 | 
 64 | const EMPTY_UINT8_ARRAY = UInt8[]
 65 | function Data.streamto!(sink::Sink, ::Type{Data.Field}, val::Missing, row, col::Int)
 66 |     Base.write(sink.io, sink.options.nullcheck ? sink.options.null : EMPTY_UINT8_ARRAY, ifelse(col == sink.cols, NEWLINE, sink.options.delim))
 67 |     return nothing
 68 | end
 69 | 
 70 | function Data.close!(sink::CSV.Sink)
 71 |     io = isa(sink.fullpath, AbstractString) ? open(sink.fullpath, sink.append ? "a" : "w") : sink.fullpath
 72 |     Base.write(io, take!(sink.io))
 73 |     isa(sink.fullpath, AbstractString) && close(io)
 74 |     return sink
 75 | end
 76 | 
 77 | """
 78 | `CSV.write(file_or_io::Union{AbstractString,IO}, source::Type{T}, args...; kwargs...)` => `CSV.Sink`
 79 | 
 80 | `CSV.write(file_or_io::Union{AbstractString,IO}, source::Data.Source; kwargs...)` => `CSV.Sink`
 81 | 
 82 | 
 83 | write a `Data.Source` out to a `file_or_io`.
 84 | 
 85 | Positional Arguments:
 86 | 
 87 | * `file_or_io`; can be a file name (string) or other `IO` instance
 88 | * `source` can be the *type* of `Data.Source`, plus any required `args...`, or an already constructed `Data.Source` can be passsed in directly (2nd method)
 89 | 
 90 | Keyword Arguments:
 91 | 
 92 | * `delim::Union{Char,UInt8}`; how fields in the file will be delimited; default is `UInt8(',')`
 93 | * `quotechar::Union{Char,UInt8}`; the character that indicates a quoted field that may contain the `delim` or newlines; default is `UInt8('"')`
 94 | * `escapechar::Union{Char,UInt8}`; the character that escapes a `quotechar` in a quoted field; default is `UInt8('\\')`
 95 | * `null::String`; the ascii string that indicates how NULL values will be represented in the dataset; default is the emtpy string `""`
 96 | * `dateformat`; how dates/datetimes will be represented in the dataset; default is ISO-8601 `yyyy-mm-ddTHH:MM:SS.s`
 97 | * `header::Bool`; whether to write out the column names from `source`
 98 | * `colnames::Vector{String}`; a vector of string column names to be used when writing the header row
 99 | * `append::Bool`; start writing data at the end of `io`; by default, `io` will be reset to the beginning or overwritten before writing
100 | * `transforms::Dict{Union{String,Int},Function}`; a Dict of transforms to apply to values as they are parsed. Note that a column can be specified by either number or column name.
101 | 
102 | A few example invocations include:
103 | ```julia
104 | # write out a DataFrame `df` to a file name "out.csv" with all defaults, including comma as delimiter
105 | CSV.write("out.csv", df)
106 | 
107 | # write out a DataFrame, this time as a tab-delimited file
108 | CSV.write("out.csv", df; delim='\t')
109 | 
110 | # write out a DataFrame, with null values represented by the string "NA"
111 | CSV.write("out.csv", df; null="NA")
112 | 
113 | # write out a "header-less" file, with actual data starting on row 1
114 | CSV.write("out.csv", df; header=false)
115 | 
116 | # write out a DataFrame `df` twice to a file, the resulting file with have twice the # of rows as the DataFrame
117 | # note the usage of the keyword argument `append=true` in the 2nd call
118 | CSV.write("out.csv", df)
119 | CSV.write("out.csv", df; append=true)
120 | 
121 | # write a DataFrame out to an IOBuffer instead of a file
122 | io = IOBuffer
123 | CSV.write(io, df)
124 | 
125 | # write the result of an SQLite query out to a comma-delimited file
126 | db = SQLite.DB()
127 | sqlite_source = SQLite.Source(db, "select * from sqlite_table")
128 | CSV.write("sqlite_table.csv", sqlite_source)
129 | ```
130 | """
131 | function write end
132 | 
133 | function write(file::Union{AbstractString, IO}, ::Type{T}, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}(), kwargs...) where {T}
134 |     sink = Data.stream!(T(args...), CSV.Sink, file; append=append, transforms=transforms, kwargs...)
135 |     return Data.close!(sink)
136 | end
137 | function write(file::Union{AbstractString, IO}, source; append::Bool=false, transforms::Dict=Dict{Int,Function}(), kwargs...)
138 |     sink = Data.stream!(source, CSV.Sink, file; append=append, transforms=transforms, kwargs...)
139 |     return Data.close!(sink)
140 | end
141 | 
142 | write(sink::Sink, ::Type{T}, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}()) where {T} = (sink = Data.stream!(T(args...), sink; append=append, transforms=transforms); return Data.close!(sink))
143 | write(sink::Sink, source; append::Bool=false, transforms::Dict=Dict{Int,Function}()) = (sink = Data.stream!(source, sink; append=append, transforms=transforms); return Data.close!(sink))
144 | 


--------------------------------------------------------------------------------
/src/CSV.jl:
--------------------------------------------------------------------------------
  1 | __precompile__(true)
  2 | module CSV
  3 | 
  4 | using DataStreams, WeakRefStrings, Missings, CategoricalArrays, DataFrames
  5 | 
  6 | using Compat, Compat.Mmap, Compat.Dates
  7 | 
  8 | struct ParsingException <: Exception
  9 |     msg::String
 10 | end
 11 | 
 12 | const RETURN  = UInt8('\r')
 13 | const NEWLINE = UInt8('\n')
 14 | const COMMA   = UInt8(',')
 15 | const QUOTE   = UInt8('"')
 16 | const ESCAPE  = UInt8('\\')
 17 | const PERIOD  = UInt8('.')
 18 | const SPACE   = UInt8(' ')
 19 | const TAB     = UInt8('\t')
 20 | const MINUS   = UInt8('-')
 21 | const PLUS    = UInt8('+')
 22 | const NEG_ONE = UInt8('0')-UInt8(1)
 23 | const ZERO    = UInt8('0')
 24 | const TEN     = UInt8('9')+UInt8(1)
 25 | Base.isascii(c::UInt8) = c < 0x80
 26 | 
 27 | readbyte(from::IO) = Base.read(from, UInt8)
 28 | peekbyte(from::IO) = Base.peek(from)
 29 | 
 30 | @inline function readbyte(from::IOBuffer)
 31 |     @inbounds byte = from.data[from.ptr]
 32 |     from.ptr = from.ptr + 1
 33 |     return byte
 34 | end
 35 | 
 36 | @inline function peekbyte(from::IOBuffer)
 37 |     @inbounds byte = from.data[from.ptr]
 38 |     return byte
 39 | end
 40 | 
 41 | substitute(::Type{Union{T, Missing}}, ::Type{T1}) where {T, T1} = Union{T1, Missing}
 42 | substitute(::Type{T}, ::Type{T1}) where {T, T1} = T1
 43 | substitute(::Type{Missing}, ::Type{T1}) where {T1} = Missing
 44 | 
 45 | """
 46 | Represents the various configuration settings for delimited text file parsing.
 47 | 
 48 | Keyword Arguments:
 49 | 
 50 |  * `delim::Union{Char,UInt8}`: how fields in the file are delimited; default `','`
 51 |  * `quotechar::Union{Char,UInt8}`: the character that indicates a quoted field that may contain the `delim` or newlines; default `'"'`
 52 |  * `escapechar::Union{Char,UInt8}`: the character that escapes a `quotechar` in a quoted field; default `'\\'`
 53 |  * `null::String`: indicates how NULL values are represented in the dataset; default `""`
 54 |  * `dateformat::Union{AbstractString,Dates.DateFormat}`: how dates/datetimes are represented in the dataset; default `Base.Dates.ISODateTimeFormat`
 55 |  * `decimal::Union{Char,UInt8}`: character to recognize as the decimal point in a float number, e.g. `3.14` or `3,14`; default `'.'`
 56 |  * `truestring`: string to represent `true::Bool` values in a csv file; default `"true"`. Note that `truestring` and `falsestring` cannot start with the same character.
 57 |  * `falsestring`: string to represent `false::Bool` values in a csv file; default `"false"`
 58 | """
 59 | struct Options{D}
 60 |     delim::UInt8
 61 |     quotechar::UInt8
 62 |     escapechar::UInt8
 63 |     null::Vector{UInt8}
 64 |     nullcheck::Bool
 65 |     dateformat::D
 66 |     decimal::UInt8
 67 |     truestring::Vector{UInt8}
 68 |     falsestring::Vector{UInt8}
 69 |     # non-public for now
 70 |     datarow::Int
 71 |     rows::Int
 72 |     header::Union{Integer,UnitRange{Int},Vector}
 73 |     types
 74 | end
 75 | 
 76 | Options(;delim=COMMA, quotechar=QUOTE, escapechar=ESCAPE, null="", dateformat=nothing, decimal=PERIOD, truestring="true", falsestring="false", datarow=-1, rows=0, header=1, types=Type[]) =
 77 |     Options(delim%UInt8, quotechar%UInt8, escapechar%UInt8,
 78 |             map(UInt8, collect(ascii(String(null)))), null != "", isa(dateformat, AbstractString) ? Dates.DateFormat(dateformat) : dateformat,
 79 |             decimal%UInt8, map(UInt8, collect(truestring)), map(UInt8, collect(falsestring)), datarow, rows, header, types)
 80 | function Base.show(io::IO,op::Options)
 81 |     println(io, "    CSV.Options:")
 82 |     println(io, "        delim: '", Char(op.delim), "'")
 83 |     println(io, "        quotechar: '", Char(op.quotechar), "'")
 84 |     print(io, "        escapechar: '"); escape_string(io, string(Char(op.escapechar)), "\\"); println(io, "'")
 85 |     print(io, "        null: \""); escape_string(io, isempty(op.null) ? "" : String(collect(op.null)), "\\"); println(io, "\"")
 86 |     println(io, "        dateformat: ", op.dateformat)
 87 |     println(io, "        decimal: '", Char(op.decimal), "'")
 88 |     println(io, "        truestring: '$(String(op.truestring))'")
 89 |     print(io, "        falsestring: '$(String(op.falsestring))'")
 90 | end
 91 | 
 92 | """
 93 | A type that satisfies the `Data.Source` interface in the `DataStreams.jl` package.
 94 | 
 95 | A `CSV.Source` can be manually constructed in order to be re-used multiple times.
 96 | 
 97 | `CSV.Source(file_or_io; kwargs...) => CSV.Source`
 98 | 
 99 | Note that a filename string can be provided or any `IO` type. For the full list of supported
100 | keyword arguments, see the docs for [`CSV.read`](@ref) or type `?CSV.read` at the repl
101 | 
102 | An example of re-using a `CSV.Source` is:
103 | ```julia
104 | # manually construct a `CSV.Source` once, then stream its data to both a DataFrame
105 | # and SQLite table `sqlite_table` in the SQLite database `db`
106 | # note the use of `CSV.reset!` to ensure the `source` can be streamed from again
107 | source = CSV.Source(file)
108 | df1 = CSV.read(source, DataFrame)
109 | CSV.reset!(source)
110 | sq1 = CSV.read(source, SQLite.Sink, db, "sqlite_table")
111 | ```
112 | """
113 | mutable struct Source{I, D} <: Data.Source
114 |     schema::Data.Schema
115 |     options::Options{D}
116 |     io::I
117 |     fullpath::String
118 |     datapos::Int # the position in the IOBuffer where the rows of data begins
119 | end
120 | 
121 | function Base.show(io::IO, f::Source)
122 |     println(io, "CSV.Source: ", f.fullpath)
123 |     println(io, f.options)
124 |     show(io, f.schema)
125 | end
126 | 
127 | """
128 | `CSV.TransposedSource(file_or_io; kwargs...) => CSV.TransposedSource`
129 | 
130 | Type that in all respects is identical to a `CSV.Source`, except when reading a csv file,
131 | the data will be parsed "transposed", i.e. rows will become columns / columns will become rows.
132 | This can be a huge convenience if the csv data happens to be transposed, as well as lead to huge
133 | performance gains as the resulting data set can be more consistently typed.
134 | 
135 | Typical usage involves calling `CSV.read(file; transpose=true)`.
136 | """
137 | mutable struct TransposedSource{I, D} <: Data.Source
138 |     schema::Data.Schema
139 |     options::Options{D}
140 |     io::I
141 |     fullpath::String
142 |     datapos::Int # the position in the IOBuffer where the rows of data begins
143 |     columnpositions::Vector{Int}
144 | end
145 | 
146 | """
147 | A type that satisfies the `Data.Sink` interface in the `DataStreams.jl` package.
148 | 
149 | A `CSV.Sink` can be manually constructed in order to be re-used multiple times.
150 | 
151 | `CSV.Sink(file_or_io; kwargs...) => CSV.Sink`
152 | 
153 | Note that a filename string can be provided or any `IO` type. For the full list of supported
154 | keyword arguments, see the docs for [`CSV.write`](@ref) or type `?CSV.write` at the repl
155 | 
156 | An example of re-using a `CSV.Sink` is:
157 | ```julia
158 | # manually construct a `CSV.Source` once, then stream its data to both a DataFrame
159 | # and SQLite table `sqlite_table` in the SQLite database `db`
160 | # note the use of `CSV.reset!` to ensure the `source` can be streamed from again
161 | source = CSV.Source(file)
162 | df1 = CSV.read(source, DataFrame)
163 | CSV.reset!(source)
164 | sq1 = CSV.read(source, SQLite.Sink, db, "sqlite_table")
165 | ```
166 | """
167 | mutable struct Sink{D, B} <: Data.Sink
168 |     options::Options{D}
169 |     io::IOBuffer
170 |     fullpath::Union{String, IO}
171 |     datapos::Int # the position in the IOBuffer where the rows of data begins
172 |     header::Bool
173 |     colnames::Vector{String}
174 |     cols::Int
175 |     append::Bool
176 |     quotefields::B
177 | end
178 | 
179 | include("parsefields.jl")
180 | include("float.jl")
181 | include("io.jl")
182 | include("TransposedSource.jl")
183 | include("Source.jl")
184 | include("Sink.jl")
185 | include("validate.jl")
186 | 
187 | end # module
188 | 


--------------------------------------------------------------------------------
/benchmark/benchmarks.jl:
--------------------------------------------------------------------------------
  1 | using PkgBenchmark, CSV, WeakRefStrings
  2 | if !is_windows()
  3 |     using DecFP
  4 | end
  5 | 
  6 | prep(io::IO, ::Type{Int64}) = write(io, "10")
  7 | prep(io::IO, ::Type{Float64}) = write(io, "10.0")
  8 | prep(io::IO, ::Type{WeakRefString{UInt8}}) = write(io, "hey there sailor")
  9 | prep(io::IO, ::Type{String}) = write(io, "hey there sailor")
 10 | prep(io::IO, ::Type{Date}) = write(io, "2016-09-28")
 11 | prep(io::IO, ::Type{DateTime}) = write(io, "2016-09-28T03:21:00")
 12 | if !is_windows()
 13 |     prep(io::IO, ::Type{Dec64}) = write(io, "10.0")
 14 | end
 15 | 
 16 | function prep{T}(::Type{IOBuffer}, ::Type{T})
 17 |     io = IOBuffer()
 18 |     prep(io, T)
 19 |     seekstart(io)
 20 |     return io, ()->return
 21 | end
 22 | function prep{T}(::Type{IOStream}, ::Type{T})
 23 |     t = tempname()
 24 |     io = open(t, "w")
 25 |     prep(io, T)
 26 |     close(io)
 27 |     io = open(t, "r")
 28 |     return io, ()->rm(t)
 29 | end
 30 | 
 31 | TYPES = !is_windows() ? (Int, Float64, WeakRefString{UInt8}, String, Date, DateTime, Dec64) : (Int, Float64, WeakRefString{UInt8}, String, Date, DateTime)
 32 | 
 33 | @benchgroup "CSV" begin
 34 |     @benchgroup "CSV.parsefield" begin
 35 |         opts = CSV.Options()
 36 |         row = col = 1
 37 |         state = Ref{CSV.ParsingState}(CSV.None)
 38 |         for I in (IOBuffer, IOStream)
 39 |             for T in TYPES
 40 |                 io, f = prep(I, T)
 41 |                 @bench "$I - $T" CSV.parsefield($io, $T, opts, row, col, state)
 42 |                 f()
 43 |             end
 44 |         end
 45 |     end
 46 |     FILE = joinpath(dirname(@__FILE__), "randoms_small.csv")
 47 | 
 48 |     @benchgroup "CSV.read" begin
 49 |         @bench "CSV.read" CSV.read(FILE)
 50 |     end
 51 | 
 52 |     @benchgroup "CSV.write" begin
 53 |         df = CSV.read(FILE)
 54 |         t = tempname()
 55 |         @bench "CSV.write" CSV.write(t, df)
 56 |     end
 57 | 
 58 | end
 59 | 
 60 | 
 61 | 
 62 | 
 63 | # generate single column files w/ 1M rows for each type
 64 | using WeakRefStrings
 65 | 
 66 | val = "hey"
 67 | for i in (1001, 100.1, WeakRefString{UInt8}(pointer(val), 3, 0), Date(2008, 1, 3), DateTime(2008, 3, 4))
 68 |     open("/Users/jacobquinn/Downloads/randoms_$(typeof(i)).csv", "w") do f
 69 |         for j = 1:1_000_000
 70 |             write(f, string(i))
 71 |             write(f, "\n")
 72 |         end
 73 |     end
 74 | end
 75 | 
 76 | using CSV, TextParse
 77 | for T in (Int, Float64, WeakRefStrings.WeakRefString{UInt8}, Date, DateTime)
 78 |     println("comparing for T = $T...")
 79 |     # T == WeakRefStrings.WeakRefString{UInt8} && continue
 80 |     @time CSV.read("/Users/jacobquinn/Downloads/randoms_$(T).csv"; nullable=true);
 81 |     # @time TextParse.csvread("/Users/jacobquinn/Downloads/randoms_$T.csv");
 82 | end
 83 | 
 84 | for T in (Int, Float64, WeakRefStrings.WeakRefString{UInt8}, Date, DateTime)
 85 |     println("comparing for T = $T...")
 86 |     # T == WeakRefStrings.WeakRefString{UInt8} && continue
 87 |     # @time CSV.read("/Users/jacobquinn/Downloads/randoms_$(T).csv"; nullable=true);
 88 |     @time TextParse.csvread("/Users/jacobquinn/Downloads/randoms_$T.csv");
 89 | end
 90 | 
 91 | @time CSV.read("/Users/jacobquinn/Downloads/yellow_tripdata_2015-01.csv");
 92 | @time TextParse.csvread("/Users/jacobquinn/Downloads/yellow_tripdata_2015-01.csv");
 93 | 
 94 | for T in ('Int64', 'Float64', 'WeakRefString{UInt8}', 'Date', 'DateTime'):
 95 |     start = time.time()
 96 |     delim = ','
 97 |     table = pandas.read_csv("/Users/jacobquinn/Downloads/randoms_" + T + ".csv", delimiter=delim)
 98 |     end = time.time()
 99 |     print(end - start)
100 | 
101 | 
102 | start = time.time()
103 | delim = ','
104 | table = pandas.read_csv("/Users/jacobquinn/Downloads/yellow_tripdata_2015-01.csv", delimiter=delim)
105 | end = time.time()
106 | print(end - start)
107 | @time df = CSV.read("/Users/jacobquinn/Downloads/file.txt"; delim=' ');
108 | @time TextParse.csvread("/Users/jacobquinn/Downloads/randoms_$(T).csv")
109 | # julia> for T in (Int, Float64, WeakRefStrings.WeakRefString{UInt8}, Date, DateTime)
110 | #            println("comparing for T = $T...")
111 | #            @time CSV.read("/Users/jacobquinn/Downloads/randoms_$(T).csv");
112 | #            @time TextParse.csvread("/Users/jacobquinn/Downloads/randoms_$(T).csv");
113 | #        end
114 | # comparing for T = Int64...
115 | # pre-allocating DataFrame w/ rows = 999999
116 | #   0.043684 seconds (1.00 M allocations: 22.929 MiB, 31.61% gc time)
117 | #   0.045556 seconds (460 allocations: 15.575 MiB, 3.20% gc time)
118 | # comparing for T = Float64...
119 | # pre-allocating DataFrame w/ rows = 999999
120 | #   0.080026 seconds (1.00 M allocations: 22.974 MiB, 23.80% gc time)
121 | #   0.082530 seconds (457 allocations: 16.528 MiB)
122 | # comparing for T = WeakRefString{UInt8}...
123 | # pre-allocating DataFrame w/ rows = 999999
124 | #   0.058446 seconds (1.89 k allocations: 22.986 MiB, 8.53% gc time)
125 | #   0.069034 seconds (595 allocations: 5.188 MiB)
126 | # comparing for T = Date...
127 | # pre-allocating DataFrame w/ rows = 999999
128 | #   0.125229 seconds (2.00 M allocations: 53.504 MiB, 20.94% gc time)
129 | #   0.120472 seconds (1.00 M allocations: 51.846 MiB, 6.73% gc time)
130 | # comparing for T = DateTime...
131 | # pre-allocating DataFrame w/ rows = 999999
132 | #   0.175855 seconds (2.00 M allocations: 53.504 MiB, 23.30% gc time)
133 | #   0.187619 seconds (1.00 M allocations: 60.516 MiB, 4.40% gc time)
134 | 
135 | 
136 | T = Int64
137 | @time source = CSV.Source("/Users/jacobquinn/Downloads/randoms_$(T).csv";)
138 | @time source = CSV.Source("/Users/jacobquinn/Downloads/randoms_small.csv"; nullable=true)
139 | @time source = CSV.Source("/Users/jacobquinn/Downloads/randoms_small.csv"; nullable=false)
140 | # source.schema = DataStreams.Data.Schema(DataStreams.Data.header(source.schema), (Int, String, String, Float64, Float64, Date, DateTime), 9)
141 | # @time df = CSV.read(source, NamedTuple);
142 | sink = Si = NamedTuple
143 | transforms = Dict{Int,Function}(1=>x->x-1)
144 | append = false
145 | args = kwargs = ()
146 | source_schema = DataStreams.Data.schema(source)
147 | sink_schema, transforms2 = DataStreams.Data.transform(source_schema, transforms, true);
148 | sinkstreamtype = DataStreams.Data.Field
149 | sink = Si(sink_schema, sinkstreamtype, append, args...; kwargs...);
150 | columns = []
151 | filter = x->true
152 | @code_warntype DataStreams.Data.stream!(source, sinkstreamtype, sink, source_schema, sink_schema, transforms2, filter, columns)
153 | @time DataStreams.Data.stream!(source, sinkstreamtype, sink, source_schema, sink_schema, transforms2, filter, columns)
154 | 
155 | function testt(t)
156 |     a = getfield(t, 1)
157 |     b = getfield(t, 2)
158 |     c = getfield(t, 3)
159 |     d = getfield(t, 4)
160 |     e = getfield(t, 5)
161 |     f = getfield(t, 6)
162 |     g = getfield(t, 7)
163 |     return (a, b, c, d, e, f, g)
164 | end
165 | @code_warntype testt((i1=(?Int)[], i2=(?String)[], i3=(?String)[], i4=(?Float64)[], i5=(?Float64)[], i6=(?Date)[], i7=(?DateTime)[]))
166 | 
167 | @code_llvm DataStreams.Data.stream!(source, sinkstreamtype, sink, source_schema, sink_schema, transforms2, filter, columns)
168 | @time DataStreams.Data.stream!(source, sinkstreamtype, sink, source_schema, sink_schema, transforms2, filter, columns)
169 | 
170 | @code_warntype @time CSV.parsefield(IOBuffer(), ?Int, CSV.Options(), 0, 0, CSV.STATE)
171 | 
172 | t = Vector{Int}(1000000)
173 | 
174 | # having CSV.parsefield(io, T) where T !>: Null decreases allocations by 1.00M
175 | # inlining CSV.parsefield also dropped allocations
176 | # making CSV.Options not have a type parameter also sped things up
177 | #
178 | 
179 | using BenchmarkTools
180 | 
181 | g(x) = x < 5 ? x : -1
182 | A = [i for i = 1:10]
183 | function get_then_set(A)
184 |     @simd for i = 1:10
185 |         @inbounds A[i] = g(i)
186 |     end
187 |     return A
188 | end
189 | @code_warntype g(1)
190 | @code_warntype get_then_set(A)
191 | @benchmark get_then_set(A) # 20ns
192 | 
193 | @inline g3(x) = g2(x)
194 | @inline function g2(x)
195 |     if x < 20
196 |         return x * 20
197 |     end
198 | 
199 |     if x < 15
200 |         return nothing
201 |     end
202 | 
203 |     if x < 12
204 |         return 2x
205 |     end
206 | 
207 |     if x * 20 / 4 % 2 == 0
208 |         return 1
209 |     end
210 | 
211 |     if x < 0
212 |         return nothing
213 |     end
214 |     return nothing
215 | end
216 | 
217 | A = Union{Int, Void}[i for i = 1:10]
218 | @inline function get_then_set2(A)
219 |     @simd for i = 1:10
220 |         # Base.arrayset(A, g2(i), i)
221 |         val = g3(i)
222 |         if val isa Void
223 |             @inbounds A[i] = val#::Union{Int, Void}
224 |         else
225 |             @inbounds A[i] = val#::Union{Int, Void}
226 |         end
227 |     end
228 |     return A
229 | end
230 | function run_lots(N)
231 |     A = Union{Int, Void}[i for i = 1:10]
232 |     for i = 1:N
233 |         get_then_set2(A)
234 |     end
235 |     return
236 | end
237 | 
238 | @code_warntype g2(1)
239 | @code_warntype get_then_set2(A)
240 | @code_llvm get_then_set2(A)
241 | @benchmark get_then_set2(A) # 155ns
242 | 
243 | 
244 | g4(x::Int) = 1
245 | g4(x::Void) = 0
246 | 
247 | A = [i for i = 1:10]
248 | function get_sum(A)
249 |     s = 0
250 |     for a in A
251 |         s += g4(a)
252 |     end
253 |     return s
254 | end
255 | @code_warntype get_sum(A)
256 | @code_llvm get_sum(A)
257 | @benchmark get_sum(A) # 24ns
258 | 
259 | A = Union{Int, Void}[i for i = 1:10]
260 | A[[3, 5, 7]] = nothing
261 | function get_sum2(A)
262 |     s = 0
263 |     for a in A
264 |         s += g4(a)
265 |     end
266 |     return s
267 | end
268 | @code_warntype get_sum2(A)
269 | @code_llvm get_sum(A)
270 | @benchmark get_sum2(A) # 100ns
271 | 
272 | 
273 | function getstatic{T}(t::T)
274 |     return t[1]
275 | end
276 | 
277 | 
278 | 


--------------------------------------------------------------------------------
/src/float.jl:
--------------------------------------------------------------------------------
  1 | const EXPONENTS = [
  2 |     1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,   1e8,    1e9,
  3 |     1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
  4 |     1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,
  5 |     1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,
  6 |     1e40,  1e41,  1e42,  1e43,  1e44,  1e45,  1e46,  1e47,  1e48,  1e49,
  7 |     1e50,  1e51,  1e52,  1e53,  1e54,  1e55,  1e56,  1e57,  1e58,  1e59,
  8 |     1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,  1e68,  1e69,
  9 |     1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76,  1e77,  1e78,  1e79,
 10 |     1e80,  1e81,  1e82,  1e83,  1e84,  1e85,  1e86,  1e87,  1e88,  1e89,
 11 |     1e90,  1e91,  1e92,  1e93,  1e94,  1e95,  1e96,  1e97,  1e98,  1e99,
 12 |     1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109,
 13 |     1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119,
 14 |     1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
 15 |     1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139,
 16 |     1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
 17 |     1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
 18 |     1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
 19 |     1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179,
 20 |     1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189,
 21 |     1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199,
 22 |     1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209,
 23 |     1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219,
 24 |     1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229,
 25 |     1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239,
 26 |     1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249,
 27 |     1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
 28 |     1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269,
 29 |     1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279,
 30 |     1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289,
 31 |     1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
 32 |     1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308,
 33 | ]
 34 | 
 35 | pow10(exp) = (@inbounds v = EXPONENTS[exp+1]; return v)
 36 | 
 37 | maxexponent(::Type{Int16}) = 4
 38 | maxexponent(::Type{Int32}) = 38
 39 | maxexponent(::Type{Int64}) = 308
 40 | 
 41 | minexponent(::Type{Int16}) = -5
 42 | minexponent(::Type{Int32}) = -38
 43 | minexponent(::Type{Int64}) = -308
 44 | 
 45 | inttype(::Type{Float16}) = Int16
 46 | inttype(::Type{Float32}) = Int32
 47 | inttype(::Type{Float64}) = Int64
 48 | 
 49 | const BIGN = UInt8('N')
 50 | const LITTLEN = UInt8('n')
 51 | const BIGA = UInt8('A')
 52 | const LITTLEA = UInt8('a')
 53 | const BIGI = UInt8('I')
 54 | const LITTLEI = UInt8('i')
 55 | const BIGF = UInt8('F')
 56 | const LITTLEF = UInt8('f')
 57 | const BIGT = UInt8('T')
 58 | const LITTLET = UInt8('t')
 59 | const BIGY = UInt8('Y')
 60 | const LITTLEY = UInt8('y')
 61 | const BIGE = UInt8('E')
 62 | const LITTLEE = UInt8('e')
 63 | 
 64 | ParsingException(::Type{<:AbstractFloat}, exp::Signed, row, col) = ParsingException("error parsing a `$T` value on column $col, row $row; exponent out of range: $exp")
 65 | 
 66 | function scale(exp, v::T, frac, row, col) where T
 67 |     if exp >= 0
 68 |         max_exp = maxexponent(T)
 69 |         exp > max_exp && throw(ParsingException(T, exp, row, col))
 70 |         if exp > 15
 71 |             return Float64(Base.TwicePrecision{Float64}(v) * Base.TwicePrecision{Float64}(pow10(exp)))
 72 |         else
 73 |             return v * pow10(exp)
 74 |         end
 75 |     else
 76 |         min_exp = minexponent(T)
 77 |         if exp < min_exp
 78 |             -exp + min_exp > -min_exp && throw(ParsingException(T, exp, row, col))
 79 |             return Float64(Base.TwicePrecision{Float64}(v) / Base.TwicePrecision{Float64}(pow10(-exp + min_exp)))
 80 |         else
 81 |             if exp > 15
 82 |                 return Float64(Base.TwicePrecision{Float64}(v) / Base.TwicePrecision{Float64}(pow10(-exp)))
 83 |             else
 84 |                 return v / pow10(-exp)
 85 |             end
 86 |         end
 87 |     end
 88 | end
 89 | 
 90 | function parsefield(io::IO, ::Type{T}, opt::CSV.Options, row, col, state, ifnull::Function) where {T <: Union{Float16, Float32, Float64}}
 91 |     mark(io)
 92 |     @checknullstart()
 93 |     negative = false
 94 |     if b == MINUS # check for leading '-' or '+'
 95 |         c = peekbyte(io) 
 96 |         if (NEG_ONE < c < TEN) || c == opt.decimal
 97 |             negative = true
 98 |             b = readbyte(io)
 99 |         end
100 |     elseif b == PLUS
101 |         c = peekbyte(io) 
102 |         if (NEG_ONE < c < TEN) || c == opt.decimal
103 |             b = readbyte(io)
104 |         end
105 |     end
106 |     # float digit parsing
107 |     iT = inttype(T)
108 |     v = zero(iT)
109 |     parseddigits = false
110 |     while NEG_ONE < b < TEN
111 |         parseddigits = true
112 |         # process digits
113 |         v *= iT(10)
114 |         v += iT(b - ZERO)
115 |         eof(io) && (state[] = EOF; result = T(v); @goto done)
116 |         b = readbyte(io)
117 |     end
118 |     # if we didn't get any digits, check for NaN/Inf or leading dot
119 |     if !parseddigits
120 |         if b == LITTLEN || b == BIGN
121 |             eof(io) && @goto checknullend
122 |             b = readbyte(io)
123 |             (!(b == LITTLEA || b == BIGA) || eof(io)) && (reset(io); b = readbyte(io); @goto checknullend)
124 |             b = readbyte(io)
125 |             !(b == LITTLEN || b == BIGN) && (reset(io); b = readbyte(io); @goto checknullend)
126 |             result = T(NaN)
127 |             eof(io) && (state[] = EOF; @goto done)
128 |             b = readbyte(io)
129 |             @goto checkdone
130 |         elseif b == LITTLEI || b == BIGI
131 |             eof(io) && @goto checknullend
132 |             b = readbyte(io)
133 |             (!(b == LITTLEN || b == BIGN) || eof(io)) && (reset(io); b = readbyte(io); @goto checknullend)
134 |             b = readbyte(io)
135 |             !(b == LITTLEF || b == BIGF) && (reset(io); b = readbyte(io); @goto checknullend)
136 |             result = T(Inf)
137 |             eof(io) && (state[] = EOF; @goto done)
138 |             b = readbyte(io)
139 |             if b == LITTLEI || b == BIGI
140 |                 # read the rest of INFINITY
141 |                 eof(io) && (state[] = EOF; @goto done)
142 |                 b = readbyte(io)
143 |                 b == LITTLEN || b == BIGN || @goto checkdone
144 |                 eof(io) && (state[] = EOF; @goto done)
145 |                 b = readbyte(io)
146 |                 b == LITTLEI || b == BIGI || @goto checkdone
147 |                 eof(io) && (state[] = EOF; @goto done)
148 |                 b = readbyte(io)
149 |                 b == LITTLET || b == BIGT || @goto checkdone
150 |                 eof(io) && (state[] = EOF; @goto done)
151 |                 b = readbyte(io)
152 |                 b == LITTLEY || b == BIGY || @goto checkdone
153 |                 eof(io) && (state[] = EOF; @goto done)
154 |                 b = readbyte(io)
155 |             end
156 |             @goto checkdone
157 |         elseif b == opt.decimal
158 |             # keep parsing fractional part below
159 |         else
160 |             @goto checknullend
161 |         end
162 |     end
163 |     # parse fractional part
164 |     frac = 0
165 |     result = T(v)
166 |     if b == opt.decimal
167 |         eof(io) && (state[] = EOF; parseddigits ? @goto(done) : @goto(error))
168 |         b = readbyte(io)
169 |     elseif b == LITTLEE || b == BIGE
170 |         @goto parseexp
171 |     else
172 |         @goto checkdone
173 |     end
174 | 
175 |     while NEG_ONE < b < TEN
176 |         frac += 1
177 |         # process digits
178 |         v *= iT(10)
179 |         v += iT(b - ZERO)
180 |         eof(io) && (state[] = EOF; result = scale(-frac, v, 0, row, col); @goto done)
181 |         b = readbyte(io)
182 |     end
183 |     # parse potential exp
184 |     if b == LITTLEE || b == BIGE
185 |         @label parseexp
186 |         eof(io) && (state[] = EOF; result = scale(-frac, v, 0, row, col); @goto done)
187 |         b = readbyte(io)
188 |         exp = zero(iT)
189 |         negativeexp = false
190 |         if b == MINUS
191 |             negativeexp = true
192 |             b = readbyte(io)
193 |         elseif b == PLUS
194 |             b = readbyte(io)
195 |         end
196 |         parseddigits = false
197 |         while NEG_ONE < b < TEN
198 |             parseddigits = true
199 |             # process digits
200 |             exp *= iT(10)
201 |             exp += iT(b - ZERO)
202 |             eof(io) && (state[] = EOF; result = scale(ifelse(negativeexp, -exp, exp) - frac, v, frac, row, col); @goto done)
203 |             b = readbyte(io)
204 |         end
205 |         result = parseddigits ? scale(ifelse(negativeexp, -exp, exp) - frac, v, frac, row, col) : scale(-frac, v, 0, row, col)
206 |     else
207 |         result = scale(-frac, v, 0, row, col)
208 |     end
209 | 
210 |     @label checkdone
211 |     @checkdone(done)
212 |     @goto checknullend
213 | 
214 |     @label checknullend
215 |     @checknullend()
216 |     @goto error
217 | 
218 |     @label done
219 |     return T(ifelse(negative, -result, result))
220 | 
221 |     @label null
222 |     return ifnull(row, col)
223 | 
224 |     @label error
225 |     throw(ParsingException(T, b, row, col))
226 | end
227 | 
228 | 
229 | # (-73.99378204345703, -73.99378204345702)
230 | #  (-73.95227813720703, -73.95227813720702)
231 | #  (-73.98616027832031, -73.98616027832033)
232 | #  (-74.00163269042969, -74.00163269042967)
233 | #  (-73.96940612792969, -73.9694061279297)
234 | #  (-73.96797943115234, -73.96797943115236)
235 | #  (-73.95426940917969, -73.95426940917967)
236 | #  (-73.97286224365234, -73.97286224365236)
237 | #  (-73.99149322509766, -73.99149322509767)
238 | #  (-73.97639465332031, -73.97639465332033)
239 | #  (-73.97297668457031, -73.97297668457033)
240 | #  (-73.98991394042969, -73.98991394042967)
241 | #  (-73.98424530029297, -73.98424530029298)
242 | #  (-73.97872161865234, -73.97872161865236)
243 | #  (-73.99348449707031, -73.99348449707033)
244 | #  (-73.96598815917969, -73.96598815917967)
245 | #  (-73.98743438720703, -73.98743438720702)


--------------------------------------------------------------------------------
/src/parsefields.jl:
--------------------------------------------------------------------------------
  1 | @enum ParsingState None Delimiter EOF Newline
  2 | const P = Ref{ParsingState}
  3 | 
  4 | # at start of field: check if eof, remove leading whitespace, check if empty field
  5 | # returns `true` if result of initial parsing is a null field
  6 | # also return `b` which is last byte read
  7 | macro checknullstart()
  8 |     return esc(quote
  9 |         state[] = None
 10 |         eof(io) && (state[] = EOF; @goto null)
 11 |         b = readbyte(io)
 12 |         while b != opt.delim && (b == CSV.SPACE || b == CSV.TAB || b == opt.quotechar)
 13 |             eof(io) && (state[] = EOF; @goto null)
 14 |             b = readbyte(io)
 15 |         end
 16 |         if b == opt.delim
 17 |             state[] = Delimiter
 18 |             @goto null
 19 |         elseif b == NEWLINE
 20 |             state[] = Newline
 21 |             @goto null
 22 |         elseif b == RETURN
 23 |             state[] = Newline
 24 |             !eof(io) && peekbyte(io) == NEWLINE && readbyte(io)
 25 |             @goto null
 26 |         end
 27 |     end)
 28 | end
 29 | 
 30 | # check if we've successfully finished parsing a field by whether
 31 | # we've encountered a delimiter or newline break or reached eof
 32 | macro checkdone(label)
 33 |     return esc(quote
 34 |         b == opt.quotechar && !eof(io) && (b = readbyte(io))
 35 |         if b == opt.delim
 36 |             state[] = Delimiter
 37 |             @goto $label
 38 |         elseif b == NEWLINE
 39 |             state[] = Newline
 40 |             @goto $label
 41 |         elseif b == RETURN
 42 |             state[] = Newline
 43 |             !eof(io) && peekbyte(io) == NEWLINE && readbyte(io)
 44 |             @goto $label
 45 |         elseif b == opt.quotechar && eof(io)
 46 |             state[] = EOF
 47 |             @goto $label
 48 |         elseif b == CSV.SPACE || b == CSV.TAB
 49 |             # trailing whitespace
 50 |             while !eof(io) && (b == CSV.SPACE || b == CSV.TAB)
 51 |                 b = readbyte(io)
 52 |             end
 53 |             if b == opt.delim
 54 |                 state[] = Delimiter
 55 |                 @goto $label
 56 |             elseif b == NEWLINE
 57 |                 state[] = Newline
 58 |                 @goto $label
 59 |             elseif b == RETURN
 60 |                 state[] = Newline
 61 |                 !eof(io) && peekbyte(io) == NEWLINE && readbyte(io)
 62 |                 @goto $label
 63 |             elseif eof(io)
 64 |                 state[] = EOF
 65 |                 @goto $label
 66 |             end
 67 |         end
 68 |     end)
 69 | end
 70 | 
 71 | ParsingException(::Type{T}, b, row, col) where {T} = CSV.ParsingException("error parsing a `$T` value on column $col, row $row; encountered '$(Char(b))'")
 72 | 
 73 | # as a last ditch effort, after we've trying parsing the correct type,
 74 | # we check if the field is equal to a custom null type
 75 | # otherwise we give up and throw an error
 76 | macro checknullend()
 77 |     return esc(quote
 78 |         !opt.nullcheck && @goto error
 79 |         i = 1
 80 |         while true
 81 |             b == opt.null[i] || @goto error
 82 |             (i == length(opt.null) || eof(io)) && break
 83 |             b = readbyte(io)
 84 |             i += 1
 85 |         end
 86 |         if !eof(io)
 87 |             b = readbyte(io)
 88 |             @checkdone(null)
 89 |         end
 90 |         state[] = EOF
 91 |         @goto null
 92 |     end)
 93 | end
 94 | """
 95 | `CSV.parsefield{T}(io::IO, ::Type{T}, opt::CSV.Options=CSV.Options(), row=0, col=0)` => `Nullable{T}`
 96 | `CSV.parsefield{T}(s::CSV.Source, ::Type{T}, row=0, col=0)` => `Nullable{T}``
 97 | 
 98 | `io` is an `IO` type that is positioned at the first byte/character of an delimited-file field (i.e. a single cell)
 99 | leading whitespace is ignored for Integer and Float types.
100 | Specialized methods exist for Integer, Float, String, Date, and DateTime.
101 | For other types `T`, a generic fallback requires `parse(T, str::String)` to be defined.
102 | the field value may also be wrapped in `opt.quotechar`; two consecutive `opt.quotechar` results in a null field
103 | `opt.null` is also checked if there is a custom value provided (i.e. "NA", "\\N", etc.)
104 | For numeric fields, if field is non-null and non-digit characters are encountered at any point before a delimiter or newline, an error is thrown
105 | 
106 | The second method of `CSV.parsefield` operates on a `CSV.Source` directly allowing for easy usage when writing custom parsing routines.
107 | Do note, however, that the `row` and `col` arguments are for error-reporting purposes only. A `CSV.Source` maintains internal state with
108 | regards to the underlying data buffer and can **only** parse fields sequentially. This means that `CSV.parsefield` needs to be called somewhat like:
109 | 
110 | ```julia
111 | source = CSV.Source(file)
112 | 
113 | types = Data.types(source)
114 | 
115 | for col = 1:length(types)
116 |     println(get(CSV.parsefield(source, types[col]), "\"\""))
117 | end
118 | ```
119 | """
120 | function parsefield end
121 | 
122 | const NULLTHROW = (row, col)->throw(Missings.MissingException("encountered a missing value for a non-null column type on row = $row, col = $col"))
123 | const NULLRETURN = (row, col)->missing
124 | 
125 | parsefield(source::CSV.Source, ::Type{T}, row=0, col=0, state::P=P()) where {T} = CSV.parsefield(source.io, T, source.options, row, col, state, T !== Missing ? NULLTHROW : NULLRETURN)
126 | parsefield(source::CSV.Source, ::Type{Union{T, Missing}}, row=0, col=0, state::P=P()) where {T} = CSV.parsefield(source.io, T, source.options, row, col, state, NULLRETURN)
127 | parsefield(source::CSV.Source, ::Type{Missing}, row=0, col=0, state::P=P()) = CSV.parsefield(source.io, WeakRefString{UInt8}, source.options, row, col, state, NULLRETURN)
128 | 
129 | @inline parsefield(io::IO, ::Type{T}, opt::CSV.Options=CSV.Options(), row=0, col=0, state::P=P()) where {T} = parsefield(io, T, opt, row, col, state, T !== Missing ? NULLTHROW : NULLRETURN)
130 | @inline parsefield(io::IO, ::Type{Union{T, Missing}}, opt::CSV.Options=CSV.Options(), row=0, col=0, state::P=P()) where {T} = parsefield(io, T, opt, row, col, state, NULLRETURN)
131 | @inline parsefield(io::IO, ::Type{Missing}, opt::CSV.Options=CSV.Options(), row=0, col=0, state::P=P()) = parsefield(io, WeakRefString{UInt8}, opt, row, col, state, NULLRETURN)
132 | 
133 | function parsefield(io::IO, ::Type{T}, opt::CSV.Options, row, col, state, ifnull::Function) where {T <: Integer}
134 |     @checknullstart()
135 |     v = zero(T)
136 |     negative = false
137 |     if b == MINUS # check for leading '-' or '+'
138 |         c = peekbyte(io)
139 |         if NEG_ONE < c < TEN
140 |             negative = true
141 |             b = readbyte(io)
142 |         end
143 |     elseif b == PLUS
144 |         c = peekbyte(io)
145 |         if NEG_ONE < c < TEN
146 |             b = readbyte(io)
147 |         end
148 |     end
149 |     while NEG_ONE < b < TEN
150 |         # process digits
151 |         v, ov_mul = Base.mul_with_overflow(v, T(10))
152 |         v, ov_add = Base.add_with_overflow(v, T(b - ZERO))
153 |         (ov_mul | ov_add) && throw(OverflowError("overflow parsing $T, parsed $v"))
154 |         eof(io) && (state[] = EOF; @goto done)
155 |         b = readbyte(io)
156 |     end
157 |     @checkdone(done)
158 |     @checknullend()
159 | 
160 |     @label done
161 |     return ifelse(negative, -v, v)
162 | 
163 |     @label null
164 |     return ifnull(row, col)
165 | 
166 |     @label error
167 |     throw(ParsingException(T, b, row, col))
168 | end
169 | 
170 | const BUF = IOBuffer()
171 | 
172 | getptr(io::IO) = C_NULL
173 | getptr(io::IOBuffer) = pointer(io.data, io.ptr)
174 | incr(io::IO, b) = Base.write(BUF, b)
175 | incr(io::IOBuffer, b) = 1
176 | 
177 | make(io::IOBuffer, ::Type{WeakRefString{UInt8}}, ptr, len) = WeakRefString(Ptr{UInt8}(ptr), len)
178 | make(io::IOBuffer, ::Type{String}, ptr, len) = unsafe_string(ptr, len)
179 | make(io::IO, ::Type{WeakRefString{UInt8}}, ptr, len) = String(take!(BUF))
180 | make(io::IO, ::Type{String}, ptr, len) = String(take!(BUF))
181 | 
182 | function parsefield(io::IO, T::Type{<:AbstractString}, opt::CSV.Options, row, col, state, ifnull::Function)
183 |     eof(io) && (state[] = EOF; @goto null)
184 |     ptr = getptr(io)
185 |     len = 0
186 |     nullcheck = opt.nullcheck # if null is "", then we don't need to byte match it
187 |     n = opt.null
188 |     q = opt.quotechar
189 |     e = opt.escapechar
190 |     d = opt.delim
191 |     nulllen = length(n)
192 |     @inbounds while !eof(io)
193 |         b = readbyte(io)
194 |         if b == q
195 |             ptr += 1
196 |             while !eof(io)
197 |                 b = readbyte(io)
198 |                 if b == e
199 |                     if eof(io)
200 |                         break
201 |                     elseif e == q && peekbyte(io) != q
202 |                         break
203 |                     end
204 |                     len += incr(io, b)
205 |                     b = readbyte(io)
206 |                 elseif b == q
207 |                     break
208 |                 end
209 |                 (nullcheck && len+1 <= nulllen && b == n[len+1]) || (nullcheck = false)
210 |                 len += incr(io, b)
211 |             end
212 |         elseif b == d
213 |             state[] = Delimiter
214 |             break
215 |         elseif b == NEWLINE
216 |             state[] = Newline
217 |             break
218 |         elseif b == RETURN
219 |             state[] = Newline
220 |             !eof(io) && peekbyte(io) == NEWLINE && readbyte(io)
221 |             break
222 |         else
223 |             (nullcheck && len+1 <= nulllen && b == n[len+1]) || (nullcheck = false)
224 |             len += incr(io, b)
225 |         end
226 |     end
227 |     eof(io) && (state[] = EOF)
228 |     (len == 0 || nullcheck) && @goto null
229 |     return make(io, T, ptr, len)
230 | 
231 |     @label null
232 |     take!(BUF)
233 |     return ifnull(row, col)
234 | end
235 | 
236 | function parsefield(io::IO, ::Type{Date}, opt::CSV.Options, row, col, state, ifnull::Function)
237 |     v = parsefield(io, String, opt, row, col, state, ifnull)
238 |     return v isa Missing ? ifnull(row, col) : Date(v, opt.dateformat)
239 | end
240 | function parsefield(io::IO, ::Type{DateTime}, opt::CSV.Options, row, col, state, ifnull::Function)
241 |     v = parsefield(io, String, opt, row, col, state, ifnull)
242 |     return v isa Missing ? ifnull(row, col) : DateTime(v, opt.dateformat)
243 | end
244 | 
245 | function parsefield(io::IO, ::Type{Char}, opt::CSV.Options, row, col, state, ifnull::Function)
246 |     @checknullstart()
247 |     c = b
248 |     eof(io) && (state[] = EOF; @goto done)
249 |     opt.nullcheck && b == opt.null[1] && @goto null
250 |     b = readbyte(io)
251 |     @checkdone(done)
252 |     @checknullend()
253 | 
254 |     @label done
255 |     return Char(c)
256 | 
257 |     @label null
258 |     return ifnull(row, col)
259 | 
260 |     @label error
261 |     throw(ParsingException(Char, b, row, col))
262 | end
263 | 
264 | function parsefield(io::IO, ::Type{Bool}, opt::CSV.Options, row, col, state, ifnull::Function)
265 |     @checknullstart()
266 |     truestring = opt.truestring
267 |     falsestring = opt.falsestring
268 |     i = 1
269 |     if b == truestring[i]
270 |         v = true
271 |         while true
272 |             if eof(io)
273 |                 if i == length(truestring)
274 |                     state[] = EOF
275 |                     @goto done
276 |                 end
277 |                 @goto error
278 |             end
279 |             b = readbyte(io)
280 |             i += 1
281 |             i > length(truestring) && break
282 |             b == truestring[i] || @goto error
283 |         end
284 |         @checkdone(done)
285 |     elseif b == falsestring[i]
286 |         v = false
287 |         while true
288 |             if eof(io)
289 |                 if i == length(falsestring)
290 |                     state[] = EOF
291 |                     @goto done
292 |                 end
293 |                 @goto error
294 |             end
295 |             b = readbyte(io)
296 |             i += 1
297 |             i > length(falsestring) && break
298 |             b == falsestring[i] || @goto error
299 |         end
300 |         @checkdone(done)
301 |     end
302 |     @checknullend()
303 | 
304 |     @label done
305 |     return v
306 | 
307 |     @label null
308 |     return ifnull(row, col)
309 | 
310 |     @label error
311 |     throw(ParsingException(Bool, b, row, col))
312 | end
313 | 
314 | function parsefield(io::IO, ::Type{<:Union{CategoricalValue, CategoricalString}}, opt::CSV.Options, row, col, state, ifnull::Function)
315 |     v = parsefield(io, WeakRefString{UInt8}, opt, row, col, state, ifnull)
316 |     return v isa Missing ? ifnull(row, col) : v
317 | end
318 | 
319 | # Generic fallback
320 | function parsefield(io::IO, T, opt::CSV.Options, row, col, state, ifnull::Function)
321 |     v = parsefield(io, String, opt, row, col, state, ifnull)
322 |     ismissing(v) && return ifnull(row, col)
323 |     T === Missing && throw(ParsingException("encountered non-null value for a null-only column on row = $row, col = $col: '$v'"))
324 |     return parse(T, v)
325 | end
326 | 


--------------------------------------------------------------------------------
/src/io.jl:
--------------------------------------------------------------------------------
  1 | """
  2 |     CSV.readline(io::IO, q='"', e='\\', buf::IOBuffer=IOBuffer()) => String
  3 |     CSV.readline(source::CSV.Source) => String
  4 | 
  5 | Read a single line from `io` (any `IO` type) or a `CSV.Source` as a `String` object.
  6 | This function mirrors `Base.readline` except that the newlines within quoted
  7 | fields are ignored (e.g. value1, value2, \"value3 with \n embedded newlines\").
  8 | Uses `buf::IOBuffer` for intermediate IO operations, if specified.
  9 | """
 10 | function readline end
 11 | 
 12 | function readline(io::IO, q::UInt8, e::UInt8, buf::IOBuffer=IOBuffer())
 13 |     while !eof(io)
 14 |         b = readbyte(io)
 15 |         Base.write(buf, b)
 16 |         if b == q
 17 |             while !eof(io)
 18 |                 b = readbyte(io)
 19 |                 Base.write(buf, b)
 20 |                 if b == e
 21 |                     if eof(io)
 22 |                         break
 23 |                     elseif e == q && peekbyte(io) != q
 24 |                         break
 25 |                     end
 26 |                     b = readbyte(io)
 27 |                     Base.write(buf, b)
 28 |                 elseif b == q
 29 |                     break
 30 |                 end
 31 |             end
 32 |         elseif b == NEWLINE
 33 |             break
 34 |         elseif b == RETURN
 35 |             !eof(io) && peekbyte(io) == NEWLINE && Base.write(buf, readbyte(io))
 36 |             break
 37 |         end
 38 |     end
 39 |     return String(take!(buf))
 40 | end
 41 | readline(io::IO, q='"', e='\\', buf::IOBuffer=IOBuffer()) = readline(io, UInt8(q), UInt8(e), buf)
 42 | readline(source::CSV.Source) = readline(source.io, source.options.quotechar, source.options.escapechar)
 43 | 
 44 | # contents of a single CSV table field as returned by readsplitline!()
 45 | struct RawField
 46 |     value::String   # uparsed contents
 47 |     isquoted::Bool  # whether the field value was quoted or not
 48 | end
 49 | 
 50 | Base.:(==)(a::RawField, b::RawField) = (a.isquoted == b.isquoted) && (a.value == b.value)
 51 | 
 52 | """
 53 |     CSV.readsplitline!(vals::Vector{RawField}, io, d=',', q='"', e='\\', buf::IOBuffer=IOBuffer())
 54 |     CSV.readsplitline!(vals::Vector{RawField}, source::CSV.Source)
 55 | 
 56 | Read a single, delimited line from `io` (any `IO` type) or a `CSV.Source` as a `Vector{String}` and
 57 | store the values in `vals`.
 58 | Delimited fields are separated by `d`, quoted by `q` and escaped by `e` ASCII characters.
 59 | The contents of `vals` are replaced.
 60 | Uses `buf::IOBuffer` for intermediate IO operations, if specified.
 61 | """
 62 | function readsplitline! end
 63 | 
 64 | @enum ReadSplitLineState RSL_IN_FIELD RSL_IN_QUOTE RSL_AFTER_QUOTE RSL_AFTER_DELIM RSL_AFTER_NEWLINE
 65 | 
 66 | function readsplitline!(vals::Vector{RawField}, io::IO, d::UInt8, q::UInt8, e::UInt8, buf::IOBuffer=IOBuffer())
 67 |     empty!(vals)
 68 |     state = RSL_AFTER_DELIM
 69 |     push_buf_to_vals!() = push!(vals, RawField(String(take!(buf)), state==RSL_AFTER_QUOTE))
 70 |     while !eof(io)
 71 |         b = readbyte(io)
 72 |         if state == RSL_IN_QUOTE # in the quoted string
 73 |             if b == e # the escape character, read the next after it
 74 |                 Base.write(buf, b)
 75 |                 @assert !eof(io)
 76 |                 if e == q && peekbyte(io) != q
 77 |                     state = RSL_AFTER_QUOTE
 78 |                     break
 79 |                 end
 80 |                 b = readbyte(io)
 81 |                 Base.write(buf, b)
 82 |             elseif b == q # end the quoted string
 83 |                 state = RSL_AFTER_QUOTE
 84 |             else
 85 |                 Base.write(buf, b)
 86 |             end
 87 |         elseif b == d # delimiter
 88 |             if state == RSL_AFTER_DELIM # empty field
 89 |                 push!(vals, RawField("", false))
 90 |             else
 91 |                 push_buf_to_vals!()
 92 |             end
 93 |             state = RSL_AFTER_DELIM
 94 |         elseif b == q # start of quote
 95 |             if state == RSL_AFTER_DELIM
 96 |                 state = RSL_IN_QUOTE
 97 |             else
 98 |                 throw(ParsingException("Unexpected start of quote ($q), use \"$e$q\" to type \"$q\""))
 99 |             end
100 |         elseif b == NEWLINE
101 |             push_buf_to_vals!() # add the last field
102 |             state = RSL_AFTER_NEWLINE
103 |             break
104 |         elseif b == RETURN
105 |             !eof(io) && peekbyte(io) == NEWLINE && readbyte(io)
106 |             push_buf_to_vals!() # add the last field
107 |             state = RSL_AFTER_NEWLINE
108 |             break
109 |         else
110 |             if state == RSL_AFTER_QUOTE
111 |                 throw(ParsingException("Unexpected character ($b) after the end of quote ($q)"))
112 |             elseif b == e # the escape character, read the next after it
113 |                 Base.write(buf, b)
114 |                 @assert !eof(io)
115 |                 b = readbyte(io)
116 |             end
117 |             Base.write(buf, b)
118 |             state = RSL_IN_FIELD
119 |         end
120 |     end
121 |     if state == RSL_IN_QUOTE
122 |         @assert eof(io)
123 |         throw(ParsingException("EOF while trying to read the closing quote"))
124 |     elseif state == RSL_IN_FIELD || state == RSL_AFTER_DELIM # file ended without the newline, store the current buf
125 |         eof(io)
126 |         push_buf_to_vals!()
127 |     end
128 |     return vals
129 | end
130 | readsplitline!(vals::Vector{RawField}, io::IO, d=',', q='"', e='\\', buf::IOBuffer=IOBuffer()) = readsplitline!(vals, io, UInt8(d), UInt8(q), UInt8(e), buf)
131 | readsplitline!(vals::Vector{RawField}, source::CSV.Source) = readsplitline!(vals, source.io, source.options.delim, source.options.quotechar, source.options.escapechar)
132 | 
133 | readsplitline(io::IO, d=',', q='"', e='\\', buf::IOBuffer=IOBuffer()) =
134 |     readsplitline!(Vector{RawField}(), io, d, q, e, buf)
135 | readsplitline(args...) = readsplitline!(Vector{RawField}(), args...)
136 | 
137 | """
138 |     CSV.countlines(io::IO, quotechar, escapechar) => Int
139 |     CSV.countlines(source::CSV.Source) => Int
140 | 
141 | Count the number of lines in a file, accounting for potentially embedded newlines in quoted fields.
142 | """
143 | function countlines(io::IO, q::UInt8, e::UInt8)
144 |     nl = 1
145 |     b = 0x00
146 |     while !eof(io)
147 |         b = readbyte(io)
148 |         if b == q
149 |             while !eof(io)
150 |                 b = readbyte(io)
151 |                 if b == e
152 |                     if eof(io)
153 |                         break
154 |                     elseif e == q && peekbyte(io) != q
155 |                         break
156 |                     end
157 |                     b = readbyte(io)
158 |                 elseif b == q
159 |                     break
160 |                 end
161 |             end
162 |         elseif b == CSV.NEWLINE
163 |             nl += 1
164 |         elseif b == CSV.RETURN
165 |             nl += 1
166 |             !eof(io) && peekbyte(io) == CSV.NEWLINE && readbyte(io)
167 |         end
168 |     end
169 |     return nl - (b == CSV.NEWLINE || b == CSV.RETURN)
170 | end
171 | countlines(io::IO, q='"', e='\\') = countlines(io, UInt8(q), UInt8(e))
172 | countlines(source::CSV.Source) = countlines(source.io, source.options.quotechar, source.options.escapechar)
173 | 
174 | function skipto!(f::IO, cur, dest, q, e)
175 |     cur >= dest && return
176 |     for _ = 1:(dest-cur)
177 |         CSV.readline(f,q,e)
178 |     end
179 |     return
180 | end
181 | 
182 | # try to infer the type of the value in `val`. The precedence of type checking is `Int` => `Float64` => `Date` => `DateTime` => `String`
183 | timetype(df::Dates.DateFormat) = any(typeof(T) in (Dates.DatePart{'H'}, Dates.DatePart{'M'}, Dates.DatePart{'S'}, Dates.DatePart{'s'}) for T in df.tokens) ? DateTime : Date
184 | 
185 | # column types start out as Any, but we get rid of them as soon as possible
186 | promote_type2(T::Type{<:Any}, ::Type{Any}) = T
187 | promote_type2(::Type{Any}, T::Type{<:Any}) = T
188 | # same types
189 | promote_type2(::Type{T}, ::Type{T}) where {T} = T
190 | # if we come across a Missing field, turn that column type into a Union{T, Missing}
191 | promote_type2(T::Type{<:Any}, ::Type{Missing}) = Union{T, Missing}
192 | promote_type2(::Type{Missing}, T::Type{<:Any}) = Union{T, Missing}
193 | # these definitions allow Union{Int, Missing} to promote to Union{Float64, Missing}
194 | promote_type2(::Type{Union{T, Missing}}, ::Type{S}) where {T, S} = Union{promote_type2(T, S), Missing}
195 | promote_type2(::Type{S}, ::Type{Union{T, Missing}}) where {T, S} = Union{promote_type2(T, S), Missing}
196 | promote_type2(::Type{Union{T, Missing}}, ::Type{Union{S, Missing}}) where {T, S} = Union{promote_type2(T, S), Missing}
197 | promote_type2(::Type{Union{WeakRefString{UInt8}, Missing}}, ::Type{WeakRefString{UInt8}}) = Union{WeakRefString{UInt8}, Missing}
198 | promote_type2(::Type{WeakRefString{UInt8}}, ::Type{Union{WeakRefString{UInt8}, Missing}}) = Union{WeakRefString{UInt8}, Missing}
199 | # basic promote type definitions from Base
200 | promote_type2(::Type{Int}, ::Type{Float64}) = Float64
201 | promote_type2(::Type{Float64}, ::Type{Int}) = Float64
202 | promote_type2(::Type{Date}, ::Type{DateTime}) = DateTime
203 | promote_type2(::Type{DateTime}, ::Type{Date}) = DateTime
204 | # for cases when our current type can't widen, just promote to WeakRefString
205 | promote_type2(::Type{<:Real}, ::Type{<:Dates.TimeType}) = WeakRefString{UInt8}
206 | promote_type2(::Type{<:Dates.TimeType}, ::Type{<:Real}) = WeakRefString{UInt8}
207 | promote_type2(::Type{T}, ::Type{WeakRefString{UInt8}}) where T = WeakRefString{UInt8}
208 | promote_type2(::Type{Union{T, Missing}}, ::Type{WeakRefString{UInt8}}) where T = Union{WeakRefString{UInt8}, Missing}
209 | promote_type2(::Type{WeakRefString{UInt8}}, ::Type{T}) where T = WeakRefString{UInt8}
210 | promote_type2(::Type{WeakRefString{UInt8}}, ::Type{Union{T, Missing}}) where T = Union{WeakRefString{UInt8}, Missing}
211 | # avoid ambiguity
212 | promote_type2(::Type{Any}, ::Type{WeakRefString{UInt8}}) = WeakRefString{UInt8}
213 | promote_type2(::Type{WeakRefString{UInt8}}, ::Type{Any}) = WeakRefString{UInt8}
214 | promote_type2(::Type{WeakRefString{UInt8}}, ::Type{WeakRefString{UInt8}}) = WeakRefString{UInt8}
215 | promote_type2(::Type{WeakRefString{UInt8}}, ::Type{Missing}) = Union{WeakRefString{UInt8}, Missing}
216 | promote_type2(::Type{Missing}, ::Type{WeakRefString{UInt8}}) = Union{WeakRefString{UInt8}, Missing}
217 | promote_type2(::Type{Any}, ::Type{Missing}) = Missing
218 | promote_type2(::Type{Missing}, ::Type{Missing}) = Missing
219 | 
220 | function detecttype(io, opt::CSV.Options{D}, prevT, levels) where {D}
221 |     pos = position(io)
222 |     # update levels
223 |     try
224 |         lev = CSV.parsefield(io, Union{WeakRefString{UInt8}, Missing}, opt)
225 |         ismissing(lev) || (levels[lev] = get!(levels, lev, 0) + 1)
226 |     end
227 |     if Int <: prevT || prevT == Missing
228 |         try
229 |             seek(io, pos)
230 |             v1 = CSV.parsefield(io, Union{Int, Missing}, opt)
231 |             # print("...parsed = '$v1'...")
232 |             return v1 isa Missing ? Missing : Int
233 |         end
234 |     end
235 |     if Float64 <: prevT || Int <: prevT || prevT == Missing
236 |         try
237 |             seek(io, pos)
238 |             v2 = CSV.parsefield(io, Union{Float64, Missing}, opt)
239 |             # print("...parsed = '$v2'...")
240 |             return v2 isa Missing ? Missing : Float64
241 |         end
242 |     end
243 |     if Date <: prevT || DateTime <: prevT || prevT == Missing
244 |         if D == Nothing
245 |             # try to auto-detect TimeType
246 |             try
247 |                 seek(io, pos)
248 |                 v3 = CSV.parsefield(io, Union{String, Missing}, opt)
249 |                 # print("...parsed = '$v3'...")
250 |                 return v3 isa Missing ? Missing : (Date(v3, Dates.ISODateFormat); Date)
251 |             end
252 |             try
253 |                 seek(io, pos)
254 |                 v4 = CSV.parsefield(io, Union{String, Missing}, opt)
255 |                 # print("...parsed = '$v4'...")
256 |                 return v4 isa Missing ? Missing : (DateTime(v4, Dates.ISODateTimeFormat); DateTime)
257 |             end
258 |         else
259 |             # use user-provided dateformat
260 |             try
261 |                 seek(io, pos)
262 |                 T = timetype(opt.dateformat)
263 |                 v5 = CSV.parsefield(io, Union{T, Missing}, opt)
264 |                 return v5 isa Missing ? Missing : T
265 |             end
266 |         end
267 |     end
268 |     if Bool <: prevT || prevT == Missing
269 |         try
270 |             seek(io, pos)
271 |             v6 = CSV.parsefield(io, Union{Bool, Missing}, opt)
272 |             return v6 isa Missing ? Missing : Bool
273 |         end
274 |     end
275 |     try
276 |         seek(io, pos)
277 |         v7 = CSV.parsefield(io, Union{WeakRefString{UInt8}, Missing}, opt)
278 |         # print("...parsed = '$v7'...")
279 |         return v7 isa Missing ? Missing : WeakRefString{UInt8}
280 |     end
281 |     return Missing
282 | end
283 | 


--------------------------------------------------------------------------------
/src/TransposedSource.jl:
--------------------------------------------------------------------------------
  1 | # independent constructor
  2 | function TransposedSource(fullpath::Union{AbstractString,IO};
  3 | 
  4 |               delim=COMMA,
  5 |               quotechar=QUOTE,
  6 |               escapechar=ESCAPE,
  7 |               null::AbstractString="",
  8 | 
  9 |               header::Union{Integer, UnitRange{Int}, Vector}=1, # header can be a row number, range of rows, or actual string vector
 10 |               datarow::Int=-1, # by default, data starts immediately after header or start of file
 11 |               types=Type[],
 12 |               nullable::Union{Bool, Missing}=missing,
 13 |               dateformat=missing,
 14 |               decimal=PERIOD,
 15 |               truestring="true",
 16 |               falsestring="false",
 17 |               categorical::Bool=true,
 18 |               weakrefstrings::Bool=true,
 19 | 
 20 |               footerskip::Int=0,
 21 |               rows_for_type_detect::Int=100,
 22 |               rows::Int=0,
 23 |               use_mmap::Bool=true)
 24 |     # make sure character args are UInt8
 25 |     isascii(delim) || throw(ArgumentError("non-ASCII characters not supported for delim argument: $delim"))
 26 |     isascii(quotechar) || throw(ArgumentError("non-ASCII characters not supported for quotechar argument: $quotechar"))
 27 |     isascii(escapechar) || throw(ArgumentError("non-ASCII characters not supported for escapechar argument: $escapechar"))
 28 |     return CSV.TransposedSource(fullpath=fullpath,
 29 |                         options=CSV.Options(delim=typeof(delim) <: String ? UInt8(first(delim)) : (delim % UInt8),
 30 |                                             quotechar=typeof(quotechar) <: String ? UInt8(first(quotechar)) : (quotechar % UInt8),
 31 |                                             escapechar=typeof(escapechar) <: String ? UInt8(first(escapechar)) : (escapechar % UInt8),
 32 |                                             null=null, dateformat=dateformat, decimal=decimal, truestring=truestring, falsestring=falsestring),
 33 |                         header=header, datarow=datarow, types=types, nullable=nullable, categorical=categorical, footerskip=footerskip,
 34 |                         rows_for_type_detect=rows_for_type_detect, rows=rows, use_mmap=use_mmap)
 35 | end
 36 | 
 37 | function TransposedSource(;fullpath::Union{AbstractString,IO}="",
 38 |                 options::CSV.Options{D}=CSV.Options(),
 39 | 
 40 |                 header::Union{Integer,UnitRange{Int},Vector}=1, # header can be a row number, range of rows, or actual string vector
 41 |                 datarow::Int=-1, # by default, data starts immediately after header or start of file
 42 |                 types=Type[],
 43 |                 nullable::Union{Bool, Missing}=missing,
 44 |                 categorical::Bool=true,
 45 |                 weakrefstrings::Bool=true,
 46 | 
 47 |                 footerskip::Int=0,
 48 |                 rows_for_type_detect::Int=100,
 49 |                 rows::Int=0,
 50 |                 use_mmap::Bool=true) where {D}
 51 |     # argument checks
 52 |     isa(fullpath, AbstractString) && (isfile(fullpath) || throw(ArgumentError("\"$fullpath\" is not a valid file")))
 53 |     header = (isa(header, Integer) && header == 1 && datarow == 1) ? -1 : header
 54 |     isa(header, Integer) && datarow != -1 && (datarow > header || throw(ArgumentError("data row ($datarow) must come after header row ($header)")))
 55 | 
 56 |     # open the file for property detection
 57 |     if isa(fullpath, IOBuffer)
 58 |         source = fullpath
 59 |         fs = nb_available(fullpath)
 60 |         fullpath = "<IOBuffer>"
 61 |     elseif isa(fullpath, IO)
 62 |         source = IOBuffer(Base.read(fullpath))
 63 |         fs = nb_available(fullpath)
 64 |         fullpath = isdefined(fullpath, :name) ? fullpath.name : "__IO__"
 65 |     else
 66 |         source = open(fullpath, "r") do f
 67 |             IOBuffer(use_mmap ? Mmap.mmap(f) : Base.read(f))
 68 |         end
 69 |         fs = filesize(fullpath)
 70 |     end
 71 |     options.datarow != -1 && (datarow = options.datarow)
 72 |     options.rows != 0 && (rows = options.rows)
 73 |     options.header != 1 && (header = options.header)
 74 |     !isempty(options.types) && (types = options.types)
 75 |     startpos = position(source)
 76 |     # BOM character detection
 77 |     if fs > 0 && peekbyte(source) == 0xef
 78 |         readbyte(source)
 79 |         readbyte(source) == 0xbb || seek(source, startpos)
 80 |         readbyte(source) == 0xbf || seek(source, startpos)
 81 |     end
 82 |     datarow = datarow == -1 ? (isa(header, Vector) ? 0 : last(header)) + 1 : datarow # by default, data starts on line after header
 83 | 
 84 |     if isa(header, Integer) && header > 0
 85 |         # skip to header column to read column names
 86 |         row = 1
 87 |         while row < header
 88 |             while !eof(source)
 89 |                 b = readbyte(source)
 90 |                 b == options.delim && break
 91 |             end
 92 |             row += 1
 93 |         end
 94 |         # source now at start of 1st header cell
 95 |         columnnames = [strip(parsefield(source, String, options, 1, row))]
 96 |         columnpositions = [position(source)]
 97 |         datapos = position(source)
 98 |         rows = 0
 99 |         b = eof(source) ? 0x00 : peekbyte(source)
100 |         while !eof(source) && b != NEWLINE && b != RETURN
101 |             b = readbyte(source)
102 |             rows += ifelse(b == options.delim, 1, 0)
103 |             rows += ifelse(b == NEWLINE, 1, 0)
104 |             rows += ifelse(b == RETURN, 1, 0)
105 |             rows += ifelse(eof(source), 1, 0)
106 |         end
107 |         # we're now done w/ column 1, if EOF we're done, otherwise, parse column 2's column name
108 |         cols = 1
109 |         while !eof(source)
110 |            # skip to header column to read column names
111 |             row = 1
112 |             while row < header
113 |                 while !eof(source)
114 |                     b = readbyte(source)
115 |                     b == options.delim && break
116 |                 end
117 |                 row += 1
118 |             end
119 |             cols += 1
120 |             push!(columnnames, strip(parsefield(source, String, options, cols, row)))
121 |             push!(columnpositions, position(source))
122 |             b = eof(source) ? 0x00 : peekbyte(source)
123 |             while !eof(source) && b != NEWLINE && b != RETURN
124 |                 b = readbyte(source)
125 |             end
126 |         end
127 |         seek(source, datapos)
128 |     elseif isa(header, AbstractRange)
129 |         # column names span several columns
130 |         throw(ArgumentError("not implemented for transposed csv files"))
131 |     elseif fs == 0
132 |         # emtpy file, use column names if provided
133 |         datapos = position(source)
134 |         columnnames = header
135 |         cols = length(columnnames)
136 |     else
137 |         # column names provided explicitly or should be generated, they don't exist in data
138 |         # skip to header column to read column names
139 |         row = 1
140 |         while row < datarow
141 |             while !eof(source)
142 |                 b = readbyte(source)
143 |                 b == options.delim && break
144 |             end
145 |             row += 1
146 |         end
147 |         # source now at start of 1st header cell
148 |         columnnames = [isa(header, Integer) || isempty(header) ? "Column1" : header[1]]
149 |         columnpositions = [position(source)]
150 |         datapos = position(source)
151 |         rows = 0
152 |         b = peekbyte(source)
153 |         while !eof(source) && b != NEWLINE && b != RETURN
154 |             b = readbyte(source)
155 |             rows += ifelse(b == options.delim, 1, 0)
156 |             rows += ifelse(b == NEWLINE, 1, 0)
157 |             rows += ifelse(b == RETURN, 1, 0)
158 |             rows += ifelse(eof(source), 1, 0)
159 |         end
160 |         # we're now done w/ column 1, if EOF we're done, otherwise, parse column 2's column name
161 |         cols = 1
162 |         while !eof(source)
163 |            # skip to datarow column
164 |             row = 1
165 |             while row < datarow
166 |                 while !eof(source)
167 |                     b = readbyte(source)
168 |                     b == options.delim && break
169 |                 end
170 |                 row += 1
171 |             end
172 |             cols += 1
173 |             push!(columnnames, isa(header, Integer) || isempty(header) ? "Column$cols" : header[cols])
174 |             push!(columnpositions, position(source))
175 |             b = peekbyte(source)
176 |             while !eof(source) && b != NEWLINE && b != RETURN
177 |                 b = readbyte(source)
178 |             end
179 |         end
180 |         seek(source, datapos)
181 |     end
182 |     rows = rows - footerskip # rows now equals the actual number of rows in the dataset
183 |     startingcolumnpositions = deepcopy(columnpositions)
184 |     # Detect column types
185 |     cols = length(columnnames)
186 |     if isa(types, Vector) && length(types) == cols
187 |         columntypes = types
188 |     elseif isa(types, Dict) || isempty(types)
189 |         columntypes = fill!(Vector{Type}(uninitialized, cols), Any)
190 |         levels = [Dict{WeakRefString{UInt8}, Int}() for _ = 1:cols]
191 |         lineschecked = 0
192 |         while !eof(source) && lineschecked < min(rows < 0 ? rows_for_type_detect : rows, rows_for_type_detect)
193 |             lineschecked += 1
194 |             # println("type detecting on row = $lineschecked...")
195 |             for i = 1:cols
196 |                 # print("\tdetecting col = $i...")
197 |                 seek(source, columnpositions[i])
198 |                 typ = CSV.detecttype(source, options, columntypes[i], levels[i])::Type
199 |                 columnpositions[i] = position(source)
200 |                 # print(typ)
201 |                 columntypes[i] = CSV.promote_type2(columntypes[i], typ)
202 |                 # println("...promoting to: ", columntypes[i])
203 |             end
204 |         end
205 |         if options.dateformat === missing && any(x->x <: Dates.TimeType, columntypes)
206 |             # auto-detected TimeType
207 |             options = Options(delim=options.delim, quotechar=options.quotechar, escapechar=options.escapechar,
208 |                               null=options.null, dateformat=Dates.ISODateTimeFormat, decimal=options.decimal,
209 |                               datarow=options.datarow, rows=options.rows, header=options.header, types=options.types)
210 |         end
211 |         if categorical
212 |             for i = 1:cols
213 |                 T = columntypes[i]
214 |                 if length(levels[i]) / sum(values(levels[i])) < .67 && T !== Missing && Missings.T(T) <: WeakRefString
215 |                     columntypes[i] = substitute(T, CategoricalArrays.catvaluetype(Missings.T(T), UInt32))
216 |                 end
217 |             end
218 |         end
219 |     else
220 |         throw(ArgumentError("$cols number of columns detected; `types` argument has $(length(types)) entries"))
221 |     end
222 | 
223 |     if isa(types, Dict{Int, <:Any})
224 |         for (col, typ) in types
225 |             columntypes[col] = typ
226 |         end
227 |     elseif isa(types, Dict{String, <:Any})
228 |         for (col,typ) in types
229 |             c = findfirst(x->x == col, columnnames)
230 |             columntypes[c] = typ
231 |         end
232 |     end
233 |     if !weakrefstrings
234 |         columntypes = [(T !== Missing && Missings.T(T) <: WeakRefString) ? substitute(T, String) : T for T in columntypes]
235 |     end
236 |     if !ismissing(nullable)
237 |         if nullable # allow missing values in all columns
238 |             for i = 1:cols
239 |                 T = columntypes[i]
240 |                 columntypes[i] = Union{Missings.T(T), Missing}
241 |             end
242 |         else # disallow missing values in all columns
243 |             for i = 1:cols
244 |                 T = columntypes[i]
245 |                 columntypes[i] = Missings.T(T)
246 |             end
247 |         end
248 |     end
249 |     seek(source, datapos)
250 |     sch = Data.Schema(columntypes, columnnames, ifelse(rows < 0, missing, rows))
251 |     return TransposedSource(sch, options, source, String(fullpath), datapos, startingcolumnpositions)
252 | end
253 | 
254 | # construct a new TransposedSource from a Sink
255 | TransposedSource(s::CSV.Sink) = CSV.TransposedSource(fullpath=s.fullpath, options=s.options)
256 | 
257 | # Data.Source interface
258 | "reset a `CSV.Source` to its beginning to be ready to parse data from again"
259 | Data.reset!(s::CSV.TransposedSource) = (seek(s.io, s.datapos); return nothing)
260 | Data.schema(source::CSV.TransposedSource) = source.schema
261 | Data.accesspattern(::Type{<:CSV.TransposedSource}) = Data.Sequential
262 | @inline Data.isdone(io::CSV.TransposedSource, row, col, rows, cols) = eof(io.io) || (!ismissing(rows) && row > rows)
263 | @inline Data.isdone(io::TransposedSource, row, col) = Data.isdone(io, row, col, size(io.schema)...)
264 | Data.streamtype(::Type{<:CSV.TransposedSource}, ::Type{Data.Field}) = true
265 | @inline function Data.streamfrom(source::CSV.TransposedSource, ::Type{Data.Field}, ::Type{T}, row, col::Int) where {T}
266 |     seek(source.io, source.columnpositions[col])
267 |     v = CSV.parsefield(source.io, T, source.options, row, col)
268 |     source.columnpositions[col] = position(source.io)
269 |     return v
270 | end
271 | Data.reference(source::CSV.TransposedSource) = source.io.data
272 | 


--------------------------------------------------------------------------------
/src/Source.jl:
--------------------------------------------------------------------------------
  1 | # independent constructor
  2 | function Source(fullpath::Union{AbstractString,IO};
  3 | 
  4 |               delim=COMMA,
  5 |               quotechar=QUOTE,
  6 |               escapechar=ESCAPE,
  7 |               null::AbstractString="",
  8 | 
  9 |               header::Union{Integer, UnitRange{Int}, Vector}=1, # header can be a row number, range of rows, or actual string vector
 10 |               datarow::Int=-1, # by default, data starts immediately after header or start of file
 11 |               types=Type[],
 12 |               nullable::Union{Bool, Missing}=missing,
 13 |               dateformat=nothing,
 14 |               decimal=PERIOD,
 15 |               truestring="true",
 16 |               falsestring="false",
 17 |               categorical::Bool=true,
 18 |               weakrefstrings::Bool=true,
 19 | 
 20 |               footerskip::Int=0,
 21 |               rows_for_type_detect::Int=100,
 22 |               rows::Int=0,
 23 |               use_mmap::Bool=true)
 24 |     # make sure character args are UInt8
 25 |     isascii(delim) || throw(ArgumentError("non-ASCII characters not supported for delim argument: $delim"))
 26 |     isascii(quotechar) || throw(ArgumentError("non-ASCII characters not supported for quotechar argument: $quotechar"))
 27 |     isascii(escapechar) || throw(ArgumentError("non-ASCII characters not supported for escapechar argument: $escapechar"))
 28 |     return CSV.Source(fullpath=fullpath,
 29 |                         options=CSV.Options(delim=typeof(delim) <: String ? UInt8(first(delim)) : (delim % UInt8),
 30 |                                             quotechar=typeof(quotechar) <: String ? UInt8(first(quotechar)) : (quotechar % UInt8),
 31 |                                             escapechar=typeof(escapechar) <: String ? UInt8(first(escapechar)) : (escapechar % UInt8),
 32 |                                             null=null, dateformat=dateformat, decimal=decimal, truestring=truestring, falsestring=falsestring),
 33 |                         header=header, datarow=datarow, types=types, nullable=nullable, categorical=categorical, weakrefstrings=weakrefstrings, footerskip=footerskip,
 34 |                         rows_for_type_detect=rows_for_type_detect, rows=rows, use_mmap=use_mmap)
 35 | end
 36 | 
 37 | function Source(;fullpath::Union{AbstractString,IO}="",
 38 |                 options::CSV.Options{D}=CSV.Options(),
 39 | 
 40 |                 header::Union{Integer,UnitRange{Int},Vector}=1, # header can be a row number, range of rows, or actual string vector
 41 |                 datarow::Int=-1, # by default, data starts immediately after header or start of file
 42 |                 types=Type[],
 43 |                 nullable::Union{Bool, Missing}=missing,
 44 |                 categorical::Bool=true,
 45 |                 weakrefstrings::Bool=true,
 46 | 
 47 |                 footerskip::Int=0,
 48 |                 rows_for_type_detect::Int=100,
 49 |                 rows::Int=0,
 50 |                 use_mmap::Bool=true) where {D}
 51 |     # argument checks
 52 |     isa(fullpath, AbstractString) && (isfile(fullpath) || throw(ArgumentError("\"$fullpath\" is not a valid file")))
 53 |     header = (isa(header, Integer) && header == 1 && datarow == 1) ? -1 : header
 54 |     isa(header, Integer) && datarow != -1 && (datarow > header || throw(ArgumentError("data row ($datarow) must come after header row ($header)")))
 55 | 
 56 |     # open the file for property detection
 57 |     if isa(fullpath, IOBuffer)
 58 |         source = fullpath
 59 |         fs = nb_available(fullpath)
 60 |         fullpath = "<IOBuffer>"
 61 |     elseif isa(fullpath, IO)
 62 |         source = IOBuffer(Base.read(fullpath))
 63 |         fs = nb_available(source)
 64 |         fullpath = isdefined(fullpath, :name) ? fullpath.name : "__IO__"
 65 |     else
 66 |         source = open(fullpath, "r") do f
 67 |             IOBuffer(use_mmap ? Mmap.mmap(f) : Base.read(f))
 68 |         end
 69 |         fs = filesize(fullpath)
 70 |     end
 71 |     options.datarow != -1 && (datarow = options.datarow)
 72 |     options.rows != 0 && (rows = options.rows)
 73 |     options.header != 1 && (header = options.header)
 74 |     !isempty(options.types) && (types = options.types)
 75 |     startpos = position(source)
 76 |     rows = rows == 0 ? CSV.countlines(source, options.quotechar, options.escapechar) : rows
 77 |     seek(source, startpos)
 78 |     # BOM character detection
 79 |     if fs > 0 && peekbyte(source) == 0xef
 80 |         readbyte(source)
 81 |         readbyte(source) == 0xbb || seek(source, startpos)
 82 |         readbyte(source) == 0xbf || seek(source, startpos)
 83 |     end
 84 |     datarow = datarow == -1 ? (isa(header, Vector) ? 0 : last(header)) + 1 : datarow # by default, data starts on line after header
 85 |     rows = fs == 0 ? -1 : max(-1, rows - datarow + 1 - footerskip) # rows now equals the actual number of rows in the dataset
 86 | 
 87 |     # figure out # of columns and header, either an Integer, AbstractRange, or Vector{String}
 88 |     # also ensure that `f` is positioned at the start of data
 89 |     row_vals = Vector{RawField}()
 90 |     if isa(header, Integer)
 91 |         # default header = 1
 92 |         if header <= 0
 93 |             CSV.skipto!(source,1,datarow,options.quotechar,options.escapechar)
 94 |             datapos = position(source)
 95 |             CSV.readsplitline!(row_vals, source,options.delim,options.quotechar,options.escapechar)
 96 |             seek(source, datapos)
 97 |             columnnames = ["Column$i" for i = eachindex(row_vals)]
 98 |         else
 99 |             CSV.skipto!(source,1,header,options.quotechar,options.escapechar)
100 |             columnnames = [strip(x.value) for x in CSV.readsplitline!(row_vals, source,options.delim,options.quotechar,options.escapechar)]
101 |             datarow != header+1 && CSV.skipto!(source,header+1,datarow,options.quotechar,options.escapechar)
102 |             datapos = position(source)
103 |         end
104 |     elseif isa(header, AbstractRange)
105 |         CSV.skipto!(source,1,first(header),options.quotechar,options.escapechar)
106 |         columnnames = [x.value for x in readsplitline!(row_vals,source,options.delim,options.quotechar,options.escapechar)]
107 |         for row = first(header):(last(header)-1)
108 |             for (i,c) in enumerate([x.value for x in readsplitline!(row_vals,source,options.delim,options.quotechar,options.escapechar)])
109 |                 columnnames[i] *= "_" * c
110 |             end
111 |         end
112 |         datarow != last(header)+1 && CSV.skipto!(source,last(header)+1,datarow,options.quotechar,options.escapechar)
113 |         datapos = position(source)
114 |     elseif fs == 0
115 |         datapos = position(source)
116 |         columnnames = header
117 |         cols = length(columnnames)
118 |     else
119 |         CSV.skipto!(source,1,datarow,options.quotechar,options.escapechar)
120 |         datapos = position(source)
121 |         readsplitline!(row_vals,source,options.delim,options.quotechar,options.escapechar)
122 |         seek(source,datapos)
123 |         if isempty(header)
124 |             columnnames = ["Column$i" for i in eachindex(row_vals)]
125 |         else
126 |             length(header) == length(row_vals) || throw(ArgumentError("The length of provided header ($(length(header))) doesn't match the number of columns at row $datarow ($(length(row_vals)))"))
127 |             columnnames = header
128 |         end
129 |     end
130 | 
131 |     # Detect column types
132 |     cols = length(columnnames)
133 |     if isa(types, Vector) && length(types) == cols
134 |         # types might be a Vector{DataType}, which will be a problem if Unions are needed
135 |         columntypes = convert(Vector{Type}, types)
136 |     elseif isa(types, Dict) || isempty(types)
137 |         columntypes = fill!(Vector{Type}(uninitialized, cols), Any)
138 |         levels = [Dict{WeakRefString{UInt8}, Int}() for _ = 1:cols]
139 |         lineschecked = 0
140 |         while !eof(source) && lineschecked < min(rows < 0 ? rows_for_type_detect : rows, rows_for_type_detect)
141 |             lineschecked += 1
142 |             # println("type detecting on row = $lineschecked...")
143 |             for i = 1:cols
144 |                 # print("\tdetecting col = $i...")
145 |                 typ = CSV.detecttype(source, options, columntypes[i], levels[i])::Type
146 |                 # print(typ)
147 |                 columntypes[i] = CSV.promote_type2(columntypes[i], typ)
148 |                 # println("...promoting to: ", columntypes[i])
149 |             end
150 |         end
151 |         if options.dateformat === nothing && any(x->Missings.T(x) <: Dates.TimeType, columntypes)
152 |             # auto-detected TimeType
153 |             options = Options(delim=options.delim, quotechar=options.quotechar, escapechar=options.escapechar,
154 |                               null=options.null, dateformat=Dates.ISODateTimeFormat, decimal=options.decimal,
155 |                               datarow=options.datarow, rows=options.rows, header=options.header, types=options.types)
156 |         end
157 |         if categorical
158 |             for i = 1:cols
159 |                 T = columntypes[i]
160 |                 if length(levels[i]) / sum(values(levels[i])) < .67 && T !== Missing && Missings.T(T) <: WeakRefString
161 |                     columntypes[i] = substitute(T, CategoricalArrays.catvaluetype(Missings.T(T), UInt32))
162 |                 end
163 |             end
164 |         end
165 |     else
166 |         throw(ArgumentError("$cols number of columns detected; `types` argument has $(length(types)) entries"))
167 |     end
168 |     if isa(types, Dict{Int, <:Any})
169 |         for (col, typ) in types
170 |             columntypes[col] = typ
171 |         end
172 |     elseif isa(types, Dict{String, <:Any})
173 |         for (col, typ) in types
174 |             c = findfirst(x->x == col, columnnames)
175 |             columntypes[c] = typ
176 |         end
177 |     end
178 |     if !weakrefstrings
179 |         columntypes = [(T !== Missing && Missings.T(T) <: WeakRefString) ? substitute(T, String) : T for T in columntypes]
180 |     end
181 |     if !ismissing(nullable)
182 |         if nullable # allow missing values in all columns
183 |             for i = 1:cols
184 |                 T = columntypes[i]
185 |                 columntypes[i] = Union{Missings.T(T), Missing}
186 |             end
187 |         else # disallow missing values in all columns
188 |             for i = 1:cols
189 |                 T = columntypes[i]
190 |                 columntypes[i] = Missings.T(T)
191 |             end
192 |         end
193 |     end
194 |     seek(source, datapos)
195 |     sch = Data.Schema(columntypes, columnnames, ifelse(rows < 0, missing, rows))
196 |     return Source(sch, options, source, String(fullpath), datapos)
197 | end
198 | 
199 | # construct a new Source from a Sink
200 | Source(s::CSV.Sink) = CSV.Source(fullpath=s.fullpath, options=s.options)
201 | 
202 | # Data.Source interface
203 | "reset a `CSV.Source` to its beginning to be ready to parse data from again"
204 | Data.reset!(s::CSV.Source) = (seek(s.io, s.datapos); return nothing)
205 | Data.schema(source::CSV.Source) = source.schema
206 | Data.accesspattern(::Type{<:CSV.Source}) = Data.Sequential
207 | @inline Data.isdone(io::CSV.Source, row, col, rows, cols) = eof(io.io) || (!ismissing(rows) && row > rows)
208 | @inline Data.isdone(io::Source, row, col) = Data.isdone(io, row, col, size(io.schema)...)
209 | Data.streamtype(::Type{<:CSV.Source}, ::Type{Data.Field}) = true
210 | @inline Data.streamfrom(source::CSV.Source, ::Type{Data.Field}, ::Type{T}, row, col::Int) where {T} = CSV.parsefield(source.io, T, source.options, row, col)
211 | Data.reference(source::CSV.Source) = source.io.data
212 | 
213 | """
214 | `CSV.read(fullpath::Union{AbstractString,IO}, sink::Type{T}=DataFrame, args...; kwargs...)` => `typeof(sink)`
215 | 
216 | `CSV.read(fullpath::Union{AbstractString,IO}, sink::Data.Sink; kwargs...)` => `Data.Sink`
217 | 
218 | 
219 | parses a delimited file into a Julia structure (a DataFrame by default, but any valid `Data.Sink` may be requested).
220 | 
221 | Minimal error-reporting happens w/ `CSV.read` for performance reasons; for problematic csv files, try [`CSV.validate`](@ref) which takes exact same arguments as `CSV.read` and provides much more information for why reading the file failed.
222 | 
223 | Positional arguments:
224 | 
225 | * `fullpath`; can be a file name (string) or other `IO` instance
226 | * `sink::Type{T}`; `DataFrame` by default, but may also be other `Data.Sink` types that support streaming via `Data.Field` interface; note that the method argument can be the *type* of `Data.Sink`, plus any required arguments the sink may need (`args...`).
227 |                     or an already constructed `sink` may be passed (2nd method above)
228 | 
229 | Keyword Arguments:
230 | 
231 | * `delim::Union{Char,UInt8}`: how fields in the file are delimited; default `','`
232 | * `quotechar::Union{Char,UInt8}`: the character that indicates a quoted field that may contain the `delim` or newlines; default `'"'`
233 | * `escapechar::Union{Char,UInt8}`: the character that escapes a `quotechar` in a quoted field; default `'\\'`
234 | * `null::String`: indicates how NULL values are represented in the dataset; default `""`
235 | * `dateformat::Union{AbstractString,Dates.DateFormat}`: how dates/datetimes are represented in the dataset; default `Base.Dates.ISODateTimeFormat`
236 | * `decimal::Union{Char,UInt8}`: character to recognize as the decimal point in a float number, e.g. `3.14` or `3,14`; default `'.'`
237 | * `truestring`: string to represent `true::Bool` values in a csv file; default `"true"`. Note that `truestring` and `falsestring` cannot start with the same character.
238 | * `falsestring`: string to represent `false::Bool` values in a csv file; default `"false"`
239 | * `header`: column names can be provided manually as a complete Vector{String}, or as an Int/AbstractRange which indicates the row/rows that contain the column names
240 | * `datarow::Int`: specifies the row on which the actual data starts in the file; by default, the data is expected on the next row after the header row(s); for a file without column names (header), specify `datarow=1`
241 | * `types`: column types can be provided manually as a complete Vector{Type}, or in a Dict to reference individual columns by name or number
242 | * `nullable::Bool`: indicates whether values can be nullable or not; `true` by default. If set to `false` and missing values are encountered, a `Data.NullException` will be thrown
243 | * `footerskip::Int`: indicates the number of rows to skip at the end of the file
244 | * `rows_for_type_detect::Int=100`: indicates how many rows should be read to infer the types of columns
245 | * `rows::Int`: indicates the total number of rows to read from the file; by default the file is pre-parsed to count the # of rows; `-1` can be passed to skip a full-file scan, but the `Data.Sink` must be set up to account for a potentially unknown # of rows
246 | * `use_mmap::Bool=true`: whether the underlying file will be mmapped or not while parsing; note that on Windows machines, the underlying file will not be "deletable" until Julia GC has run (can be run manually via `gc()`) due to the use of a finalizer when reading the file.
247 | * `append::Bool=false`: if the `sink` argument provided is an existing table, `append=true` will append the source's data to the existing data instead of doing a full replace
248 | * `transforms::Dict{Union{String,Int},Function}`: a Dict of transforms to apply to values as they are parsed. Note that a column can be specified by either number or column name.
249 | * `transpose::Bool=false`: when reading the underlying csv data, rows should be treated as columns and columns as rows, thus the resulting dataset will be the "transpose" of the actual csv data.
250 | * `categorical::Bool=true`: read string column as a `CategoricalArray` ([ref](https://github.com/JuliaData/CategoricalArrays.jl)), as long as the % of unique values seen during type detection is less than 67%. This will dramatically reduce memory use in cases where the number of unique values is small.
251 | * `weakrefstrings::Bool=true`: whether to use [`WeakRefStrings`](https://github.com/quinnj/WeakRefStrings.jl) package to speed up file parsing; can only be `=true` for the `Sink` objects that support `WeakRefStringArray` columns. Note that `WeakRefStringArray` still returns regular `String` elements.
252 | 
253 | Example usage:
254 | ```
255 | julia> dt = CSV.read("bids.csv")
256 | 7656334×9 DataFrames.DataFrame
257 | │ Row     │ bid_id  │ bidder_id                               │ auction │ merchandise      │ device      │
258 | ├─────────┼─────────┼─────────────────────────────────────────┼─────────┼──────────────────┼─────────────┤
259 | │ 1       │ 0       │ "8dac2b259fd1c6d1120e519fb1ac14fbqvax8" │ "ewmzr" │ "jewelry"        │ "phone0"    │
260 | │ 2       │ 1       │ "668d393e858e8126275433046bbd35c6tywop" │ "aeqok" │ "furniture"      │ "phone1"    │
261 | │ 3       │ 2       │ "aa5f360084278b35d746fa6af3a7a1a5ra3xe" │ "wa00e" │ "home goods"     │ "phone2"    │
262 | ...
263 | ```
264 | 
265 | Other example invocations may include:
266 | ```julia
267 | # read in a tab-delimited file
268 | CSV.read(file; delim='\t')
269 | 
270 | # read in a comma-delimited file with null values represented as '\\N', such as a MySQL export
271 | CSV.read(file; null="\\N")
272 | 
273 | # read a csv file that happens to have column names in the first column, and grouped data in rows instead of columns
274 | CSV.read(file; transpose=true)
275 | 
276 | # manually provided column names; must match # of columns of data in file
277 | # this assumes there is no header row in the file itself, so data parsing will start at the very beginning of the file
278 | CSV.read(file; header=["col1", "col2", "col3"])
279 | 
280 | # manually provided column names, even though the file itself has column names on the first row
281 | # `datarow` is specified to ensure data parsing occurs at correct location
282 | CSV.read(file; header=["col1", "col2", "col3"], datarow=2)
283 | 
284 | # types provided manually; as a vector, must match length of columns in actual data
285 | CSV.read(file; types=[Int, Int, Float64])
286 | 
287 | # types provided manually; as a Dict, can specify columns by # or column name
288 | CSV.read(file; types=Dict(3=>Float64, 6=>String))
289 | CSV.read(file; types=Dict("col3"=>Float64, "col6"=>String))
290 | 
291 | # manually provided # of rows; if known beforehand, this will improve parsing speed
292 | # this is also a way to limit the # of rows to be read in a file if only a sample is needed
293 | CSV.read(file; rows=10000)
294 | 
295 | # for data files, `file` and `file2`, with the same structure, read both into a single DataFrame
296 | # note that `df` is used as a 2nd argument in the 2nd call to `CSV.read` and the keyword argument
297 | # `append=true` is passed
298 | df = CSV.read(file)
299 | df = CSV.read(file2, df; append=true)
300 | 
301 | # manually construct a `CSV.Source` once, then stream its data to both a DataFrame
302 | # and SQLite table `sqlite_table` in the SQLite database `db`
303 | # note the use of `CSV.reset!` to ensure the `source` can be streamed from again
304 | source = CSV.Source(file)
305 | df1 = CSV.read(source, DataFrame)
306 | CSV.reset!(source)
307 | db = SQLite.DB()
308 | sq1 = CSV.read(source, SQLite.Sink, db, "sqlite_table")
309 | ```
310 | """
311 | function read end
312 | 
313 | function read(fullpath::Union{AbstractString,IO}, sink::Type=DataFrame, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}(), transpose::Bool=false, kwargs...)
314 |     source = transpose ? TransposedSource(fullpath; kwargs...) : Source(fullpath; kwargs...)
315 |     sink = Data.stream!(source, sink, args...; append=append, transforms=transforms)
316 |     return Data.close!(sink)
317 | end
318 | 
319 | function read(fullpath::Union{AbstractString,IO}, sink::T; append::Bool=false, transforms::Dict=Dict{Int,Function}(), transpose::Bool=false, kwargs...) where {T}
320 |     source = transpose ? TransposedSource(fullpath; kwargs...) : Source(fullpath; kwargs...)
321 |     sink = Data.stream!(source, sink; append=append, transforms=transforms)
322 |     return Data.close!(sink)
323 | end
324 | 
325 | read(source::CSV.Source, sink=DataFrame, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}()) = (sink = Data.stream!(source, sink, args...; append=append, transforms=transforms); return Data.close!(sink))
326 | read(source::CSV.Source, sink::T; append::Bool=false, transforms::Dict=Dict{Int,Function}()) where {T} = (sink = Data.stream!(source, sink; append=append, transforms=transforms); return Data.close!(sink))
327 | read(source::CSV.TransposedSource, sink=DataFrame, args...; append::Bool=false, transforms::Dict=Dict{Int,Function}()) = (sink = Data.stream!(source, sink, args...; append=append, transforms=transforms); return Data.close!(sink))
328 | read(source::CSV.TransposedSource, sink::T; append::Bool=false, transforms::Dict=Dict{Int,Function}()) where {T} = (sink = Data.stream!(source, sink; append=append, transforms=transforms); return Data.close!(sink))
329 | 


--------------------------------------------------------------------------------
/test/source.jl:
--------------------------------------------------------------------------------
  1 | @testset "Basic CSV.Source" begin
  2 | 
  3 | #test on non-existent file
  4 | @test_throws ArgumentError CSV.Source("");
  5 | 
  6 | #test where datarow > headerrow
  7 | @test_throws ArgumentError CSV.Source(joinpath(dir, "test_no_header.csv");datarow=1,header=2);
  8 | 
  9 | #test various encodings
 10 | f = CSV.Source(joinpath(dir, "test_utf8_with_BOM.csv"))
 11 | @test Data.header(Data.schema(f)) == ["col1","col2","col3"]
 12 | 
 13 | f = CSV.Source(joinpath(dir, "test_utf8.csv"))
 14 | sch = Data.schema(f)
 15 | @test f.options.delim == UInt8(',')
 16 | @test size(sch, 2) == 3
 17 | @test size(sch, 1) == 3
 18 | @test Data.header(sch) == ["col1","col2","col3"]
 19 | @test Data.types(sch) == (Float64,Float64,Float64)
 20 | ds = CSV.read(f)
 21 | @test ds[1][1] == 1.0
 22 | @test ds[1][2] == 4.0
 23 | @test ds[1][3] == 7.0
 24 | @test ds[2][1] == 2.0
 25 | sch2 = Data.schema(ds)
 26 | @test Data.header(sch2) == Data.header(sch)
 27 | @test Data.types(sch) == (Float64,Float64,Float64)
 28 | 
 29 | f = CSV.Source(joinpath(dir, "test_utf8.csv"))
 30 | si = CSV.write(joinpath(dir, "new_test_utf8.csv"), f)
 31 | so = CSV.Source(si)
 32 | @test so.options.delim == UInt8(',')
 33 | @test size(Data.schema(so), 2) == 3
 34 | @test size(Data.schema(so), 1) == 3
 35 | @test Data.header(Data.schema(so)) == ["col1","col2","col3"]
 36 | @test Data.types(Data.schema(so)) == (Float64,Float64,Float64)
 37 | ds = CSV.read(so)
 38 | @test ds[1][1] == 1.0
 39 | @test ds[1][2] == 4.0
 40 | @test ds[1][3] == 7.0
 41 | @test ds[2][1] == 2.0
 42 | @test Data.types(Data.schema(f)) == Data.types(Data.schema(so)) == Data.types(Data.schema(ds))
 43 | f = si = so = ds = nothing; gc(); gc()
 44 | try
 45 | rm(joinpath(dir, "new_test_utf8.csv"))
 46 | end
 47 | 
 48 | # f = CSV.Source(joinpath(dir, "test_utf16_be.csv"))
 49 | # f = CSV.Source(joinpath(dir, "test_utf16_le.csv"))
 50 | # f = CSV.Source(joinpath(dir, "test_utf16.csv"))
 51 | f = CSV.Source(joinpath(dir, "test_windows.csv"))
 52 | @test size(Data.schema(f), 2) == 3
 53 | @test size(Data.schema(f), 1) == 3
 54 | 
 55 | #test one column file
 56 | f = CSV.Source(joinpath(dir, "test_single_column.csv"))
 57 | @test f.options.delim == UInt8(',')
 58 | @test f.options.quotechar == UInt8('"')
 59 | @test f.options.escapechar == UInt8('\\')
 60 | @test size(Data.schema(f), 2) == 1
 61 | @test size(Data.schema(f), 1) == 3
 62 | @test Data.header(Data.schema(f)) == ["col1"]
 63 | @test Data.types(Data.schema(f)) == (Int,)
 64 | ds = CSV.read(f)
 65 | @test ds[1][1] == 1
 66 | @test ds[1][2] == 2
 67 | @test ds[1][3] == 3
 68 | 
 69 | #test empty file
 70 | f = CSV.Source(joinpath(dir, "test_empty_file.csv"))
 71 | @test ismissing(size(Data.schema(f), 1))
 72 | 
 73 | #test file with just newlines
 74 | f = CSV.Source(joinpath(dir, "test_empty_file_newlines.csv"))
 75 | @test size(Data.schema(f), 2) == 1
 76 | @test size(Data.schema(f), 1) == 9
 77 | @test Data.header(Data.schema(f)) == [""]
 78 | @test Data.types(Data.schema(f)) == (Missing,)
 79 | 
 80 | #test with various quotechars, escapechars
 81 | f = CSV.Source(joinpath(dir, "test_simple_quoted.csv"))
 82 | @test size(Data.schema(f), 2) == 2
 83 | @test size(Data.schema(f), 1) == 1
 84 | ds = CSV.read(f)
 85 | @test String(ds[1][1]) == "quoted field 1"
 86 | @test String(ds[2][1]) == "quoted field 2"
 87 | f = CSV.Source(joinpath(dir, "test_quoted_delim_and_newline.csv"))
 88 | @test size(Data.schema(f), 2) == 2
 89 | @test size(Data.schema(f), 1) == 1
 90 | 
 91 | f = CSV.Source(joinpath(dir, "test_quoted_numbers.csv"); categorical=false)
 92 | @test size(Data.schema(f), 2) == 3
 93 | @test size(Data.schema(f), 1) == 3
 94 | ds = CSV.read(f)
 95 | @test Data.types(Data.schema(f)) == (WeakRefString{UInt8}, Int, Int)
 96 | 
 97 | #test various newlines
 98 | f = CSV.Source(joinpath(dir, "test_crlf_line_endings.csv"))
 99 | @test Data.header(Data.schema(f)) == ["col1","col2","col3"]
100 | @test size(Data.schema(f), 2) == 3
101 | @test Data.types(Data.schema(f)) == (Int,Int,Int)
102 | ds = CSV.read(f)
103 | @test ds[1][1] == 1
104 | f = CSV.Source(joinpath(dir, "test_newline_line_endings.csv"))
105 | @test Data.header(Data.schema(f)) == ["col1","col2","col3"]
106 | @test size(Data.schema(f), 2) == 3
107 | @test Data.types(Data.schema(f)) == (Int,Int,Int)
108 | f = CSV.Source(joinpath(dir, "test_mac_line_endings.csv"))
109 | @test Data.header(Data.schema(f)) == ["col1","col2","col3"]
110 | @test size(Data.schema(f), 2) == 3
111 | @test Data.types(Data.schema(f)) == (Int,Int,Int)
112 | 
113 | end # testset
114 | 
115 | @testset "CSV.Source keyword arguments" begin
116 | 
117 | #test headerrow, datarow, footerskips
118 | f = CSV.Source(joinpath(dir, "test_no_header.csv"); header=0, datarow=1)
119 | @test Data.header(Data.schema(f)) == ["Column1","Column2","Column3"]
120 | @test Data.types(Data.schema(f)) == (Float64,Float64,Float64)
121 | @test size(Data.schema(f), 2) == 3
122 | @test size(Data.schema(f), 1) == 3
123 | f = CSV.Source(joinpath(dir, "test_2_footer_rows.csv"); header=4, datarow=5, footerskip=2)
124 | @test Data.header(Data.schema(f)) == ["col1","col2","col3"]
125 | @test Data.types(Data.schema(f)) == (Int,Int,Int)
126 | @test size(Data.schema(f), 2) == 3
127 | @test size(Data.schema(f), 1) == 3
128 | 
129 | #test dates, dateformats
130 | f = CSV.Source(joinpath(dir, "test_dates.csv"); types=[Date], dateformat="yyyy-mm-dd")
131 | @test size(Data.schema(f), 2) == 1
132 | @test size(Data.schema(f), 1) == 3
133 | @test Data.types(Data.schema(f)) == (Date,)
134 | ds = CSV.read(f)
135 | @test ds[1][1] == Date(2015,1,1)
136 | @test ds[1][2] == Date(2015,1,2)
137 | @test ds[1][3] == Date(2015,1,3)
138 | f = CSV.Source(joinpath(dir, "test_excel_date_formats.csv"); dateformat="mm/dd/yy")
139 | @test size(Data.schema(f), 2) == 1
140 | @test size(Data.schema(f), 1) == 3
141 | @test Data.types(Data.schema(f)) == (Date,)
142 | f = CSV.Source(joinpath(dir, "test_excel_date_formats.csv"); types=[Date], dateformat="mm/dd/yy")
143 | @test size(Data.schema(f), 2) == 1
144 | @test size(Data.schema(f), 1) == 3
145 | @test Data.types(Data.schema(f)) == (Date,)
146 | f = CSV.Source(joinpath(dir, "test_datetimes.csv"); dateformat="yyyy-mm-dd HH:MM:SS.s")
147 | @test size(Data.schema(f), 2) == 1
148 | @test size(Data.schema(f), 1) == 3
149 | @test Data.types(Data.schema(f)) == (DateTime,)
150 | ds = CSV.read(f)
151 | @test ds[1][1] == DateTime(2015,1,1)
152 | @test ds[1][2] == DateTime(2015,1,2,0,0,1)
153 | @test ds[1][3] == DateTime(2015,1,3,0,12,0,1)
154 | 
155 | #test bad types
156 | f = CSV.Source(joinpath(dir, "test_float_in_int_column.csv"); types=[Int,Int,Int])
157 | @test_throws CSV.ParsingException CSV.read(f)
158 | 
159 | #test missing values
160 | f = CSV.Source(joinpath(dir, "test_missing_value_NULL.csv"); categorical=false)
161 | @test size(Data.schema(f), 2) == 3
162 | @test size(Data.schema(f), 1) == 3
163 | @test Data.types(Data.schema(f)) == (Float64,WeakRefString{UInt8},Float64)
164 | ds = CSV.read(f)
165 | @test ds[1][1] == 1.0
166 | @test string(ds[2][1]) == "2.0"
167 | @test string(ds[2][2]) == "NULL"
168 | f = CSV.Source(joinpath(dir, "test_missing_value_NULL.csv"); null="NULL")
169 | @test size(Data.schema(f), 2) == 3
170 | @test size(Data.schema(f), 1) == 3
171 | @test String(f.options.null) == "NULL"
172 | @test Data.types(Data.schema(f)) == (Float64,Union{Float64, Missing},Float64)
173 | ds = CSV.read(f)
174 | @test ds[1][1] == 1.0
175 | @test ds[2][1] == 2.0
176 | @test ismissing(ds[2][2])
177 | 
178 | # uses default missing value ""
179 | f = CSV.Source(joinpath(dir, "test_missing_value.csv"))
180 | @test size(Data.schema(f), 2) == 3
181 | @test size(Data.schema(f), 1) == 3
182 | @test Data.types(Data.schema(f)) == (Float64,Union{Float64, Missing},Float64)
183 | 
184 | f = CSV.Source(joinpath(dir, "test_header_range.csv");header=1:3)
185 | @test size(Data.schema(f), 2) == 3
186 | @test size(Data.schema(f), 1) == 3
187 | @test Data.header(Data.schema(f)) == ["col1_sub1_part1","col2_sub2_part2","col3_sub3_part3"]
188 | ds = CSV.read(f)
189 | 
190 | f = CSV.Source(joinpath(dir, "test_header_range.csv");header=["col1_sub1_part1","col2_sub2_part2","col3_sub3_part3"],datarow=4)
191 | @test size(Data.schema(f), 2) == 3
192 | @test size(Data.schema(f), 1) == 3
193 | @test Data.header(Data.schema(f)) == ["col1_sub1_part1","col2_sub2_part2","col3_sub3_part3"]
194 | ds = CSV.read(f)
195 | 
196 | f = CSV.Source(joinpath(dir, "test_basic.csv");types=Dict(2=>Float64))
197 | @test size(Data.schema(f), 2) == 3
198 | @test size(Data.schema(f), 1) == 3
199 | @test Data.types(Data.schema(f)) == (Int,Float64,Int)
200 | ds = CSV.read(f)
201 | 
202 | f = CSV.Source(joinpath(dir, "test_basic_pipe.csv");delim='|')
203 | @test size(Data.schema(f), 2) == 3
204 | @test size(Data.schema(f), 1) == 3
205 | @test Data.types(Data.schema(f)) == (Int,Int,Int)
206 | @test f.options.delim == UInt8('|')
207 | ds = CSV.read(f)
208 | 
209 | f = CSV.Source(joinpath(dir, "test_basic_pipe.csv");delim='|',footerskip=1)
210 | @test size(Data.schema(f), 2) == 3
211 | @test size(Data.schema(f), 1) == 2
212 | @test Data.types(Data.schema(f)) == (Int,Int,Int)
213 | @test f.options.delim == UInt8('|')
214 | ds = CSV.read(f)
215 | @show f
216 | 
217 | t = tempname()
218 | f = CSV.Sink(t)
219 | @show f
220 | 
221 | f = CSV.Source(joinpath(dir, "test_missing_value_NULL.csv"))
222 | types = Data.types(Data.schema(f))
223 | @test CSV.parsefield(f, types[1]) == 1.0
224 | @test CSV.parsefield(f, types[2]) == "2.0"
225 | @test CSV.parsefield(f, types[3]) == 3.0
226 | 
227 | t = tempname()
228 | f = open(t, "w+")
229 | Base.write(f, String(read(joinpath(dir, "test_missing_value_NULL.csv"))))
230 | seekstart(f)
231 | source = CSV.Source(f; header=[], datarow=2, nullable=false)
232 | df = CSV.read(source)
233 | @test Data.header(Data.schema(df)) == ["Column1", "Column2", "Column3"]
234 | 
235 | Data.reset!(source)
236 | df2 = CSV.read(source)
237 | @test df == df2
238 | 
239 | @test_throws ArgumentError CSV.Source(f; types = [Int, Int, Int, Int])
240 | close(f)
241 | f = source = nothing; gc(); gc()
242 | try
243 | rm(t)
244 | end
245 | 
246 | # test tab-delimited nulls
247 | d = CSV.read(joinpath(dir, "test_tab_null_empty.txt"); delim='\t')
248 | @test ismissing(d[2][2])
249 | 
250 | d = CSV.read(joinpath(dir, "test_tab_null_string.txt"); delim='\t', null="NULL")
251 | @test ismissing(d[2][2])
252 | 
253 | # read a write protected file
254 | let fn = tempname()
255 |     open(fn, "w") do f
256 |         write(f, "Julia")
257 |     end
258 |     chmod(fn, 0o444)
259 |     CSV.read(fn)
260 |     gc(); gc()
261 |     try
262 |     rm(fn)
263 |     end
264 | end
265 | 
266 | # CSV with header and no data is treated the same as an empty buffer with header supplied
267 | df1 = CSV.read(IOBuffer("a,b,c"))
268 | df2 = CSV.read(IOBuffer(""); header=["a", "b", "c"])
269 | @test size(Data.schema(df1)) == (0, 3)
270 | @test size(Data.schema(df2)) == (0, 3)
271 | @test df1 == df2
272 | 
273 | # Adding transforms to CSV with header but no data returns empty frame as expected
274 | # (previously the lack of a ::String dispatch in the transform function caused an error)
275 | transforms = Dict{Int, Function}(2 => x::Integer -> "b$x")
276 | df1 = CSV.read(IOBuffer("a,b,c\n1,2,3\n4,5,6"); nullable=false, transforms=transforms)
277 | df2 = CSV.read(IOBuffer("a,b,c\n1,b2,3\n4,b5,6"); nullable=false)
278 | @test size(Data.schema(df1)) == (2, 3)
279 | @test size(Data.schema(df2)) == (2, 3)
280 | @test df1 == df2
281 | df3 = CSV.read(IOBuffer("a,b,c"); nullable=false, transforms=transforms)
282 | df4 = CSV.read(IOBuffer("a,b,c"); nullable=false)
283 | @test size(Data.schema(df3)) == (0, 3)
284 | @test size(Data.schema(df4)) == (0, 3)
285 | @test df3 == df4
286 | 
287 | let fn = tempname()
288 |     df = CSV.read(IOBuffer("a,b,c\n1,2,3\n4,5,6"), CSV.Sink(fn); nullable=false, transforms=transforms)
289 |     @test String(read(fn)) == "a,b,c\n1,b2,3\n4,b5,6\n"
290 |     try
291 |     rm(fn)
292 |     end
293 | end
294 | 
295 | let fn = tempname()
296 |     df = CSV.read(IOBuffer("a,b,c"), CSV.Sink(fn); nullable=false, transforms=transforms)
297 |     @test String(read(fn)) == "a,b,c\n"
298 |     try
299 |     rm(fn)
300 |     end
301 | end
302 | 
303 | source = IOBuffer("col1,col2,col3") # empty dataset
304 | df = CSV.read(source; transforms=Dict(2 => floor))
305 | @test size(Data.schema(df)) == (0, 3)
306 | @test Data.types(Data.schema(df)) == (Any, Any, Any)
307 | 
308 | # Integer overflow; #100
309 | @test_throws OverflowError CSV.read(joinpath(dir, "int8_overflow.csv"); types=[Int8])
310 | 
311 | # dash as null; #92
312 | df = CSV.read(joinpath(dir, "dash_as_null.csv"); null="-")
313 | @test ismissing(df[1][2])
314 | 
315 | df = CSV.read(joinpath(dir, "plus_as_null.csv"); null="+")
316 | @test ismissing(df[1][2])
317 | 
318 | # #83
319 | df = CSV.read(joinpath(dir, "comma_decimal.csv"); delim=';', decimal=',')
320 | @test df[1][1] === 3.14
321 | @test df[1][2] === 1.0
322 | @test df[2][1] === 1
323 | @test df[2][2] === 1
324 | 
325 | # #86
326 | df = CSV.read(joinpath(dir, "double_quote_quotechar_and_escapechar.csv"); escapechar='"')
327 | @test size(df) == (24, 5)
328 | @test df[5][24] == "NORTH DAKOTA STATE \"\"A\"\" #1"
329 | 
330 | # #84
331 | df = CSV.read(joinpath(dir, "census.txt"); delim='\t')
332 | @test eltype(df[9]) == Float64
333 | @test size(df) == (3, 9)
334 | 
335 | # #79
336 | df = CSV.read(joinpath(dir, "bools.csv"))
337 | @test eltype(df[1]) == Bool
338 | @test df[1] == [true, false, true, false]
339 | @test df[2] == [false, true, true, false]
340 | @test df[3] == [1, 2, 3, 4]
341 | 
342 | # #64
343 | df = CSV.read(joinpath(dir, "attenu.csv"), null="NA", types=Dict(3=>Union{Missing, String}))
344 | @test size(df) == (182, 5)
345 | 
346 | f = CSV.Source(joinpath(dir, "test_null_only_column.csv"), categorical=false, null="NA")
347 | @test size(Data.schema(f)) == (3, 2)
348 | ds = CSV.read(f)
349 | @test Data.types(Data.schema(f)) == (WeakRefString{UInt8}, Missing)
350 | @test all(ismissing, ds[2])
351 | 
352 | # #107
353 | df = CSV.read(IOBuffer("1,a,i\n2,b,ii\n3,c,iii"); datarow=1)
354 | @test size(df) == (3, 3)
355 | 
356 | # #115 (Int -> Union{Int, Missing} -> Union{WeakRefString, Missing} promotion)
357 | df = CSV.read(joinpath(dir, "attenu.csv"), null="NA", rows_for_type_detect=200)
358 | @test size(df) == (182, 5)
359 | @test Data.types(Data.schema(df)) == (Int, Float64, Union{Missings.Missing, String}, Float64, Float64)
360 | 
361 | # #137
362 | tbl = DataFrame(a=[11,22], dt=[Date(2017,12,7), Date(2017,12,14)])
363 | tbl[:dttm] = DateTime.(tbl[:dt])
364 | CSV.write("test.tsv", tbl; delim='\t')
365 | df = CSV.read("test.tsv"; delim='\t')
366 | @test Data.types(Data.schema(df)) == (Int, Date, DateTime)
367 | df = nothing; gc(); gc()
368 | try
369 | rm("test.tsv")
370 | end
371 | 
372 | end # testset
373 | 
374 | @testset "CSV.Source various files" begin
375 | 
376 | #other various files found around the internet
377 | f = CSV.Source(joinpath(dir, "baseball.csv"); rows_for_type_detect=35)
378 | @test size(Data.schema(f), 2) == 15
379 | @test size(Data.schema(f), 1) == 35
380 | @test Data.header(Data.schema(f)) == ["Rk","Year","Age","Tm","Lg","","W","L","W-L%","G","Finish","Wpost","Lpost","W-L%post",""]
381 | @test Data.types(Data.schema(f)) == (Union{Int, Missing},Union{Int, Missing},Union{Int, Missing},Union{CategoricalString{UInt32}, Missing},Union{CategoricalString{UInt32}, Missing},Union{WeakRefString{UInt8}, Missing},Union{Int, Missing},Union{Int, Missing},Union{Float64, Missing},Union{Int, Missing},Union{Float64, Missing},Union{Int, Missing},Union{Int, Missing},Union{Float64, Missing},Union{CategoricalString{UInt32}, Missing})
382 | ds = CSV.read(f)
383 | 
384 | f = CSV.Source(joinpath(dir, "FL_insurance_sample.csv"); types=Dict(10=>Float64,12=>Float64))
385 | @test size(Data.schema(f), 2) == 18
386 | @test size(Data.schema(f), 1) == 36634
387 | @test Data.header(Data.schema(f)) == ["policyID","statecode","county","eq_site_limit","hu_site_limit","fl_site_limit","fr_site_limit","tiv_2011","tiv_2012","eq_site_deductible","hu_site_deductible","fl_site_deductible","fr_site_deductible","point_latitude","point_longitude","line","construction","point_granularity"]
388 | @test Data.types(Data.schema(f)) == (Int,CategoricalString{UInt32},CategoricalString{UInt32},Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Int,Float64,Float64,CategoricalString{UInt32},CategoricalString{UInt32},Int)
389 | ds = CSV.read(f)
390 | 
391 | f = CSV.Source(joinpath(dir, "FL_insurance_sample.csv");types=Dict("eq_site_deductible"=>Float64,"fl_site_deductible"=>Float64))
392 | @test size(Data.schema(f), 2) == 18
393 | @test size(Data.schema(f), 1) == 36634
394 | @test Data.header(Data.schema(f)) == ["policyID","statecode","county","eq_site_limit","hu_site_limit","fl_site_limit","fr_site_limit","tiv_2011","tiv_2012","eq_site_deductible","hu_site_deductible","fl_site_deductible","fr_site_deductible","point_latitude","point_longitude","line","construction","point_granularity"]
395 | @test Data.types(Data.schema(f)) == (Int,CategoricalString{UInt32},CategoricalString{UInt32},Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Int,Float64,Float64,CategoricalString{UInt32},CategoricalString{UInt32},Int)
396 | ds = CSV.read(f)
397 | 
398 | f = CSV.Source(joinpath(dir, "SacramentocrimeJanuary2006.csv"))
399 | @test size(Data.schema(f), 2) == 9
400 | @test size(Data.schema(f), 1) == 7584
401 | @test Data.header(Data.schema(f)) == ["cdatetime","address","district","beat","grid","crimedescr","ucr_ncic_code","latitude","longitude"]
402 | @test Data.types(Data.schema(f)) == (CategoricalString{UInt32},WeakRefString{UInt8},Int,CategoricalString{UInt32},Int,CategoricalString{UInt32},Int,Float64,Float64)
403 | ds = CSV.read(f)
404 | 
405 | f = CSV.Source(joinpath(dir, "Sacramentorealestatetransactions.csv"))
406 | @test size(Data.schema(f), 2) == 12
407 | @test size(Data.schema(f), 1) == 985
408 | @test Data.header(Data.schema(f)) == ["street","city","zip","state","beds","baths","sq__ft","type","sale_date","price","latitude","longitude"]
409 | @test Data.types(Data.schema(f)) == (WeakRefString{UInt8},CategoricalString{UInt32},Int,CategoricalString{UInt32},Int,Int,Int,CategoricalString{UInt32},CategoricalString{UInt32},Int,Float64,Float64)
410 | ds = CSV.read(f)
411 | 
412 | f = CSV.Source(joinpath(dir, "SalesJan2009.csv"); types=Dict(3=>WeakRefString{UInt8},7=>Union{WeakRefString{UInt8}, Missing}))
413 | @test size(Data.schema(f), 2) == 12
414 | @test size(Data.schema(f), 1) == 998
415 | @test Data.header(Data.schema(f)) == ["Transaction_date","Product","Price","Payment_Type","Name","City","State","Country","Account_Created","Last_Login","Latitude","Longitude"]
416 | @test Data.types(Data.schema(f)) == (WeakRefString{UInt8},CategoricalString{UInt32},WeakRefString{UInt8},CategoricalString{UInt32},WeakRefString{UInt8},WeakRefString{UInt8},Union{WeakRefString{UInt8}, Missing},CategoricalString{UInt32},WeakRefString{UInt8},WeakRefString{UInt8},Float64,Float64)
417 | ds = CSV.read(f)
418 | 
419 | f = CSV.Source(joinpath(dir, "stocks.csv"))
420 | @test size(Data.schema(f), 2) == 2
421 | @test size(Data.schema(f), 1) == 30
422 | @test Data.header(Data.schema(f)) == ["Stock Name","Company Name"]
423 | @test Data.types(Data.schema(f)) == (WeakRefString{UInt8},WeakRefString{UInt8})
424 | ds = CSV.read(f)
425 | 
426 | f = CSV.Source(joinpath(dir, "TechCrunchcontinentalUSA.csv"); types=Dict(4=>Union{WeakRefString{UInt8}, Missing},5=>Union{WeakRefString{UInt8}, Missing}))
427 | @test size(Data.schema(f), 2) == 10
428 | @test size(Data.schema(f), 1) == 1460
429 | @test Data.header(Data.schema(f)) == ["permalink","company","numEmps","category","city","state","fundedDate","raisedAmt","raisedCurrency","round"]
430 | @test Data.types(Data.schema(f)) == (CategoricalString{UInt32},CategoricalString{UInt32},Union{Int, Missing},Union{WeakRefString{UInt8}, Missing},Union{WeakRefString{UInt8}, Missing},CategoricalString{UInt32},CategoricalString{UInt32},Int,CategoricalString{UInt32},CategoricalString{UInt32})
431 | ds = CSV.read(f)
432 | 
433 | f = CSV.Source(joinpath(dir, "Fielding.csv"); nullable=true, types=Dict("GS"=>Int,"InnOuts"=>Int,"WP"=>Int,"SB"=>Int,"CS"=>Int,"ZR"=>Int))
434 | @test size(Data.schema(f), 2) == 18
435 | @test size(Data.schema(f), 1) == 167938
436 | @test Data.header(Data.schema(f)) == ["playerID","yearID","stint","teamID","lgID","POS","G","GS","InnOuts","PO","A","E","DP","PB","WP","SB","CS","ZR"]
437 | @test Data.types(Data.schema(f)) == (Union{CategoricalString{UInt32}, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{CategoricalString{UInt32}, Missing}, Union{CategoricalString{UInt32}, Missing}, Union{CategoricalString{UInt32}, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing}, Union{Int, Missing})
438 | ds = CSV.read(f)
439 | 
440 | f = CSV.Source(joinpath(dir, "latest (1).csv"); header=0, null="\\N", types=Dict(13=>Union{Float64, Missing},17=>Union{Int, Missing},18=>Union{Float64, Missing},20=>Union{Float64, Missing}))
441 | @test size(Data.schema(f), 2) == 25
442 | @test size(Data.schema(f), 1) == 1000
443 | @test Data.header(Data.schema(f)) == ["Column$i" for i = 1:size(Data.schema(f), 2)]
444 | @test Data.types(Data.schema(f)) == (CategoricalString{UInt32}, CategoricalString{UInt32}, Int, Int, CategoricalString{UInt32}, Int, CategoricalString{UInt32}, Int, Date, Date, Int, CategoricalString{UInt32}, Union{Float64, Missing}, Union{Float64, Missing}, Union{Float64, Missing}, Union{Float64, Missing}, Union{Int, Missing}, Union{Float64, Missing}, Float64, Union{Float64, Missing}, Union{Float64, Missing}, Union{Int, Missing}, Float64, Union{Float64, Missing}, Union{Float64, Missing})
445 | ds = CSV.read(f)
446 | 
447 | f = CSV.Source(joinpath(dir, "pandas_zeros.csv"))
448 | @test size(Data.schema(f), 2) == 50
449 | @test size(Data.schema(f), 1) == 100000
450 | @test Data.header(Data.schema(f)) == [string(i) for i = 0:49]
451 | @test Data.types(Data.schema(f)) == (repmat([Int],50)...,)
452 | @time ds = CSV.read(f)
453 | 
454 | end # testset
455 | 
456 | @testset "CSV.TransposedSource" begin
457 | 
458 | # CSV.TransposedSource
459 | df = CSV.read(joinpath(dir, "transposed.csv"); transpose=true)
460 | @test size(df) == (3, 3)
461 | @test Data.header(Data.schema(df)) == ["col1", "col2", "col3"]
462 | @test df[1][1] == 1
463 | @test df[1][2] == 2
464 | @test df[1][3] == 3
465 | 
466 | df = CSV.read(joinpath(dir, "transposed_1row.csv"); transpose=true)
467 | @test size(df) == (1, 1)
468 | 
469 | df = CSV.read(joinpath(dir, "transposed_emtpy.csv"); transpose=true)
470 | @test size(df) == (0, 1)
471 | 
472 | df = CSV.read(joinpath(dir, "transposed_extra_newline.csv"); transpose=true)
473 | @test size(df) == (2, 2)
474 | 
475 | df = CSV.read(joinpath(dir, "transposed_noheader.csv"); transpose=true, header=0)
476 | @test size(df) == (2, 3)
477 | @test Data.header(Data.schema(df)) == ["Column1", "Column2", "Column3"]
478 | 
479 | df = CSV.read(joinpath(dir, "transposed_noheader.csv"); transpose=true, header=["c1", "c2", "c3"])
480 | @test size(df) == (2, 3)
481 | @test Data.header(Data.schema(df)) == ["c1", "c2", "c3"]
482 | 
483 | end # testset
484 | 
485 | 
486 | 


--------------------------------------------------------------------------------
/docs/build/index.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <a id='CSV.jl-Documentation-1'></a>
  3 | 
  4 | # CSV.jl Documentation
  5 | 
  6 | - [CSV.jl Documentation](index.md#CSV.jl-Documentation-1)
  7 |     - [High-level interface](index.md#High-level-interface-1)
  8 |     - [Lower-level utilities](index.md#Lower-level-utilities-1)
  9 | 
 10 | 
 11 | <a id='High-level-interface-1'></a>
 12 | 
 13 | ## High-level interface
 14 | 
 15 | <a id='CSV.read' href='#CSV.read'>#</a>
 16 | **`CSV.read`** &mdash; *Function*.
 17 | 
 18 | 
 19 | 
 20 | `CSV.read(fullpath::Union{AbstractString,IO}, sink=DataFrame)` => `typeof(sink)`
 21 | 
 22 | parses a delimited file into a Julia structure (a DataFrame by default, but any `Data.Sink` may be given).
 23 | 
 24 | Positional arguments:
 25 | 
 26 |   * `fullpath`; can be a file name (string) or other `IO` instance
 27 |   * `sink`; a `DataFrame` by default, but may also be other `Data.Sink` types that support the `AbstractTable` interface
 28 | 
 29 | Keyword Arguments:
 30 | 
 31 |   * `delim::Union{Char,UInt8}`; how fields in the file are delimited
 32 |   * `quotechar::Union{Char,UInt8}`; the character that indicates a quoted field that may contain the `delim` or newlines
 33 |   * `escapechar::Union{Char,UInt8}`; the character that escapes a `quotechar` in a quoted field
 34 |   * `null::String`; an ascii string that indicates how NULL values are represented in the dataset
 35 |   * `header`; column names can be provided manually as a complete Vector{String}, or as an Int/Range which indicates the row/rows that contain the column names
 36 |   * `datarow::Int`; specifies the row on which the actual data starts in the file; by default, the data is expected on the next row after the header row(s)
 37 |   * `types`; column types can be provided manually as a complete Vector{DataType}, or in a Dict to reference a column by name or number
 38 |   * `dateformat::Union{AbstractString,Dates.DateFormat}`; how all dates/datetimes are represented in the dataset
 39 |   * `footerskip::Int`; indicates the number of rows to skip at the end of the file
 40 |   * `rows_for_type_detect::Int=100`; indicates how many rows should be read to infer the types of columns
 41 |   * `rows::Int`; indicates the total number of rows to read from the file; by default the file is pre-parsed to count the # of rows
 42 |   * `use_mmap::Bool=true`; whether the underlying file will be mmapped or not while parsing
 43 | 
 44 | Note by default, "string" or text columns will be parsed as the `WeakRefString` type. This is a custom type that only stores a pointer to the actual byte data + the number of bytes. To convert a `String` to a standard Julia string type, just call `string(::WeakRefString)`, this also works on an entire column `string(::NullableVector{WeakRefString})`. Oftentimes, however, it can be convenient to work with `WeakRefStrings` depending on the ultimate use, such as transfering the data directly to another system and avoiding all the intermediate byte copying.
 45 | 
 46 | Example usage:
 47 | 
 48 | ```
 49 | julia> dt = CSV.read("bids.csv")
 50 | 7656334×9 DataFrames.DataFrame
 51 | │ Row     │ bid_id  │ bidder_id                               │ auction │ merchandise      │ device      │
 52 | ├─────────┼─────────┼─────────────────────────────────────────┼─────────┼──────────────────┼─────────────┤
 53 | │ 1       │ 0       │ "8dac2b259fd1c6d1120e519fb1ac14fbqvax8" │ "ewmzr" │ "jewelry"        │ "phone0"    │
 54 | │ 2       │ 1       │ "668d393e858e8126275433046bbd35c6tywop" │ "aeqok" │ "furniture"      │ "phone1"    │
 55 | │ 3       │ 2       │ "aa5f360084278b35d746fa6af3a7a1a5ra3xe" │ "wa00e" │ "home goods"     │ "phone2"    │
 56 | ...
 57 | ```
 58 | 
 59 | 
 60 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Source.jl#L218-259' class='documenter-source'>source</a><br>
 61 | 
 62 | <strong>Methods</strong>
 63 | 
 64 | <ul class="documenter-methodtable">
 65 | <li>
 66 |     <pre class="documenter-inline"><span class="nf">read</span><span class="p">(</span><span class="n">fullpath</span><span class="p">::</span><span class="n">Union{AbstractString,IO}</span><span class="p">)</span></pre>
 67 |     defined at
 68 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Source.jl#L274">src/Source.jl:274</a>
 69 | </li>
 70 | <li>
 71 |     <pre><span class="nf">read</span><span class="p">(</span>
 72 |     <span class="n">fullpath</span><span class="p">::</span><span class="n">Union{AbstractString,IO}</span><span class="p">,
 73 | </span>    <span class="n">sink</span>
 74 | <span class="p">)</span></pre>
 75 |     defined at
 76 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Source.jl#L274">src/Source.jl:274</a>
 77 | </li>
 78 | </ul>
 79 | 
 80 | <a id='CSV.write' href='#CSV.write'>#</a>
 81 | **`CSV.write`** &mdash; *Function*.
 82 | 
 83 | 
 84 | 
 85 | write a `source::Data.Source` out to a `CSV.Sink`
 86 | 
 87 |   * `io::Union{String,IO}`; a filename (String) or `IO` type to write the `source` to
 88 |   * `source`; a `Data.Source` type
 89 |   * `delim::Union{Char,UInt8}`; how fields in the file will be delimited
 90 |   * `quotechar::Union{Char,UInt8}`; the character that indicates a quoted field that may contain the `delim` or newlines
 91 |   * `escapechar::Union{Char,UInt8}`; the character that escapes a `quotechar` in a quoted field
 92 |   * `null::String`; the ascii string that indicates how NULL values will be represented in the dataset
 93 |   * `dateformat`; how dates/datetimes will be represented in the dataset
 94 |   * `quotefields::Bool`; whether all fields should be quoted or not
 95 |   * `header::Bool`; whether to write out the column names from `source`
 96 |   * `append::Bool`; start writing data at the end of `io`; by default, `io` will be reset to its beginning before writing
 97 | 
 98 | 
 99 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Sink.jl#L98-111' class='documenter-source'>source</a><br>
100 | 
101 | <strong>Methods</strong>
102 | 
103 | <ul class="documenter-methodtable">
104 | <li>
105 |     <pre><span class="nf">write</span><span class="p">(</span>
106 |     <span class="n">io</span><span class="p">::</span><span class="n">Union{AbstractString,IO}</span><span class="p">,
107 | </span>    <span class="n">source</span>
108 | <span class="p">)</span></pre>
109 |     defined at
110 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Sink.jl#L122">src/Sink.jl:122</a>
111 | </li>
112 | </ul>
113 | 
114 | 
115 | <a id='Lower-level-utilities-1'></a>
116 | 
117 | ## Lower-level utilities
118 | 
119 | <a id='CSV.Source' href='#CSV.Source'>#</a>
120 | **`CSV.Source`** &mdash; *Type*.
121 | 
122 | 
123 | 
124 | constructs a `CSV.Source` file ready to start parsing data from
125 | 
126 | implements the `Data.Source` interface for providing convenient `Data.stream!` methods for various `Data.Sink` types
127 | 
128 | 
129 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L84-88' class='documenter-source'>source</a><br>
130 | 
131 | <strong>Methods</strong>
132 | 
133 | <ul class="documenter-methodtable">
134 | <li>
135 |     <pre class="documenter-inline"><span class="nf">Source</span><span class="p">(</span><span class="p">)</span></pre>
136 |     defined at
137 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Source.jl#L59">src/Source.jl:59</a>
138 | </li>
139 | <li>
140 |     <pre><span class="nf">Source</span><span class="p">{</span><span class="n">I<:IO</span><span class="p">}</span><span class="p">(</span>
141 |     <span class="n">schema</span><span class="p">::</span><span class="n">DataStreams.Data.Schema</span><span class="p">,
142 | </span>    <span class="n">options</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
143 | </span>    <span class="n">data</span><span class="p">::</span><span class="n">I</span><span class="p">,
144 | </span>    <span class="n">datapos</span><span class="p">::</span><span class="n">Int64</span><span class="p">,
145 | </span>    <span class="n">fullpath</span><span class="p">::</span><span class="n">String</span>
146 | <span class="p">)</span></pre>
147 |     defined at
148 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L90">src/CSV.jl:90</a>
149 | </li>
150 | <li>
151 |     <pre class="documenter-inline"><span class="nf">Source</span><span class="p">(</span><span class="n">fullpath</span><span class="p">::</span><span class="n">Union{AbstractString,IO}</span><span class="p">)</span></pre>
152 |     defined at
153 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Source.jl#L28">src/Source.jl:28</a>
154 | </li>
155 | <li>
156 |     <pre class="documenter-inline"><span class="nf">Source</span><span class="p">{</span><span class="n">I</span><span class="p">}</span><span class="p">(</span><span class="n">s</span><span class="p">::</span><span class="n">CSV.Sink{I}</span><span class="p">)</span></pre>
157 |     defined at
158 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Source.jl#L176">src/Source.jl:176</a>
159 | </li>
160 | </ul>
161 | 
162 | _Hiding 1 method defined outside of this package._
163 | 
164 | <a id='CSV.Sink' href='#CSV.Sink'>#</a>
165 | **`CSV.Sink`** &mdash; *Type*.
166 | 
167 | 
168 | 
169 | constructs a `CSV.Sink` file ready to start writing data to
170 | 
171 | implements the `Data.Sink` interface for providing convenient `Data.stream!` methods for various `Data.Source` types
172 | 
173 | 
174 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L103-107' class='documenter-source'>source</a><br>
175 | 
176 | <strong>Methods</strong>
177 | 
178 | <ul class="documenter-methodtable">
179 | <li>
180 |     <pre><span class="nf">Sink</span><span class="p">{</span><span class="n">I<:IO</span><span class="p">}</span><span class="p">(</span>
181 |     <span class="n">schema</span><span class="p">::</span><span class="n">DataStreams.Data.Schema</span><span class="p">,
182 | </span>    <span class="n">options</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
183 | </span>    <span class="n">data</span><span class="p">::</span><span class="n">I</span><span class="p">,
184 | </span>    <span class="n">datapos</span><span class="p">::</span><span class="n">Int64</span><span class="p">,
185 | </span>    <span class="n">quotefields</span><span class="p">::</span><span class="n">Bool</span>
186 | <span class="p">)</span></pre>
187 |     defined at
188 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L109">src/CSV.jl:109</a>
189 | </li>
190 | <li>
191 |     <pre class="documenter-inline"><span class="nf">Sink</span><span class="p">(</span><span class="n">s</span><span class="p">::</span><span class="n">CSV.Source</span><span class="p">)</span></pre>
192 |     defined at
193 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Sink.jl#L8">src/Sink.jl:8</a>
194 | </li>
195 | <li>
196 |     <pre class="documenter-inline"><span class="nf">Sink</span><span class="p">(</span><span class="n">s</span><span class="p">::</span><span class="n">CSV.Source</span><span class="p">, </span><span class="n">io</span><span class="p">::</span><span class="n">IOStream</span><span class="p">)</span></pre>
197 |     defined at
198 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Sink.jl#L17">src/Sink.jl:17</a>
199 | </li>
200 | <li>
201 |     <pre><span class="nf">Sink</span><span class="p">(</span>
202 |     <span class="n">s</span><span class="p">::</span><span class="n">CSV.Source</span><span class="p">,
203 | </span>    <span class="n">file</span><span class="p">::</span><span class="n">AbstractString</span>
204 | <span class="p">)</span></pre>
205 |     defined at
206 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Sink.jl#L21">src/Sink.jl:21</a>
207 | </li>
208 | <li>
209 |     <pre class="documenter-inline"><span class="nf">Sink</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">Union{AbstractString,IO}</span><span class="p">)</span></pre>
210 |     defined at
211 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/Sink.jl#L34">src/Sink.jl:34</a>
212 | </li>
213 | </ul>
214 | 
215 | _Hiding 1 method defined outside of this package._
216 | 
217 | <a id='CSV.Options' href='#CSV.Options'>#</a>
218 | **`CSV.Options`** &mdash; *Type*.
219 | 
220 | 
221 | 
222 | Represents the various configuration settings for csv file parsing.
223 | 
224 |   * `delim`::Union{Char,UInt8} = how fields in the file are delimited
225 |   * `quotechar`::Union{Char,UInt8} = the character that indicates a quoted field that may contain the `delim` or newlines
226 |   * `escapechar`::Union{Char,UInt8} = the character that escapes a `quotechar` in a quoted field
227 |   * `null`::String = indicates how NULL values are represented in the dataset
228 |   * `dateformat`::Union{AbstractString,Dates.DateFormat} = how dates/datetimes are represented in the dataset
229 | 
230 | 
231 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L51-59' class='documenter-source'>source</a><br>
232 | 
233 | <strong>Methods</strong>
234 | 
235 | <ul class="documenter-methodtable">
236 | <li>
237 |     <pre class="documenter-inline"><span class="nf">Options</span><span class="p">(</span><span class="p">)</span></pre>
238 |     defined at
239 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L72">src/CSV.jl:72</a>
240 | </li>
241 | <li>
242 |     <pre><span class="nf">Options</span><span class="p">(</span>
243 |     <span class="n">delim</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
244 | </span>    <span class="n">quotechar</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
245 | </span>    <span class="n">escapechar</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
246 | </span>    <span class="n">separator</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
247 | </span>    <span class="n">decimal</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
248 | </span>    <span class="n">null</span><span class="p">::</span><span class="n">String</span><span class="p">,
249 | </span>    <span class="n">nullcheck</span><span class="p">::</span><span class="n">Bool</span><span class="p">,
250 | </span>    <span class="n">dateformat</span><span class="p">::</span><span class="n">Base.Dates.DateFormat</span><span class="p">,
251 | </span>    <span class="n">datecheck</span><span class="p">::</span><span class="n">Bool</span>
252 | <span class="p">)</span></pre>
253 |     defined at
254 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L61">src/CSV.jl:61</a>
255 | </li>
256 | <li>
257 |     <pre><span class="nf">Options</span><span class="p">(</span>
258 |     <span class="n">delim</span><span class="p">,
259 | </span>    <span class="n">quotechar</span><span class="p">,
260 | </span>    <span class="n">escapechar</span><span class="p">,
261 | </span>    <span class="n">separator</span><span class="p">,
262 | </span>    <span class="n">decimal</span><span class="p">,
263 | </span>    <span class="n">null</span><span class="p">,
264 | </span>    <span class="n">nullcheck</span><span class="p">,
265 | </span>    <span class="n">dateformat</span><span class="p">,
266 | </span>    <span class="n">datecheck</span>
267 | <span class="p">)</span></pre>
268 |     defined at
269 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/CSV.jl#L61">src/CSV.jl:61</a>
270 | </li>
271 | </ul>
272 | 
273 | _Hiding 1 method defined outside of this package._
274 | 
275 | <a id='CSV.parsefield' href='#CSV.parsefield'>#</a>
276 | **`CSV.parsefield`** &mdash; *Function*.
277 | 
278 | 
279 | 
280 | `io` is an `IO` type that is positioned at the first byte/character of an delimited-file field (i.e. a single cell) leading whitespace is ignored for Integer and Float types. returns a `Tuple{T,Bool}` with a value & bool saying whether the field contains a null value or not Specialized methods exist for Integer, Float, String, Date, and DateTime. For other types `T`, a generic fallback requires `zero(T)` and `parse(T, str::String)` to be defined. field is null if the next delimiter or newline is encountered before any other characters. the field value may also be wrapped in `opt.quotechar`; two consecutive `opt.quotechar` results in a null field `opt.null` is also checked if there is a custom value provided (i.e. "NA", "\N", etc.) For numeric fields, if field is non-null and non-digit characters are encountered at any point before a delimiter or newline, an error is thrown
281 | 
282 | 
283 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L55-65' class='documenter-source'>source</a><br>
284 | 
285 | <strong>Methods</strong>
286 | 
287 | <ul class="documenter-methodtable">
288 | <li>
289 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
290 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
291 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span>
292 | <span class="p">)</span></pre>
293 |     defined at
294 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L99">src/parsefields.jl:99</a>
295 | </li>
296 | <li>
297 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
298 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
299 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
300 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
301 | <span class="p">)</span></pre>
302 |     defined at
303 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L99">src/parsefields.jl:99</a>
304 | </li>
305 | <li>
306 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
307 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
308 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
309 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
310 | </span>    <span class="n">row</span>
311 | <span class="p">)</span></pre>
312 |     defined at
313 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L99">src/parsefields.jl:99</a>
314 | </li>
315 | <li>
316 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
317 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
318 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
319 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
320 | </span>    <span class="n">row</span><span class="p">,
321 | </span>    <span class="n">col</span>
322 | <span class="p">)</span></pre>
323 |     defined at
324 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L99">src/parsefields.jl:99</a>
325 | </li>
326 | <li>
327 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
328 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
329 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span>
330 | <span class="p">)</span></pre>
331 |     defined at
332 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L149">src/parsefields.jl:149</a>
333 | </li>
334 | <li>
335 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
336 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
337 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
338 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
339 | <span class="p">)</span></pre>
340 |     defined at
341 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L149">src/parsefields.jl:149</a>
342 | </li>
343 | <li>
344 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
345 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
346 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
347 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
348 | </span>    <span class="n">row</span>
349 | <span class="p">)</span></pre>
350 |     defined at
351 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L149">src/parsefields.jl:149</a>
352 | </li>
353 | <li>
354 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
355 |     <span class="n">io</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span><span class="p">,
356 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
357 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
358 | </span>    <span class="n">row</span><span class="p">,
359 | </span>    <span class="n">col</span>
360 | <span class="p">)</span></pre>
361 |     defined at
362 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L149">src/parsefields.jl:149</a>
363 | </li>
364 | <li>
365 |     <pre class="documenter-inline"><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:Integer</span><span class="p">}</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">)</span></pre>
366 |     defined at
367 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L69">src/parsefields.jl:69</a>
368 | </li>
369 | <li>
370 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:Integer</span><span class="p">}</span><span class="p">(</span>
371 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
372 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
373 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
374 | <span class="p">)</span></pre>
375 |     defined at
376 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L69">src/parsefields.jl:69</a>
377 | </li>
378 | <li>
379 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:Integer</span><span class="p">}</span><span class="p">(</span>
380 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
381 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
382 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
383 | </span>    <span class="n">row</span>
384 | <span class="p">)</span></pre>
385 |     defined at
386 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L69">src/parsefields.jl:69</a>
387 | </li>
388 | <li>
389 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:Integer</span><span class="p">}</span><span class="p">(</span>
390 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
391 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
392 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
393 | </span>    <span class="n">row</span><span class="p">,
394 | </span>    <span class="n">col</span>
395 | <span class="p">)</span></pre>
396 |     defined at
397 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L69">src/parsefields.jl:69</a>
398 | </li>
399 | <li>
400 |     <pre class="documenter-inline"><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">)</span></pre>
401 |     defined at
402 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L131">src/parsefields.jl:131</a>
403 | </li>
404 | <li>
405 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
406 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
407 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
408 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
409 | <span class="p">)</span></pre>
410 |     defined at
411 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L131">src/parsefields.jl:131</a>
412 | </li>
413 | <li>
414 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
415 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
416 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
417 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
418 | </span>    <span class="n">row</span>
419 | <span class="p">)</span></pre>
420 |     defined at
421 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L131">src/parsefields.jl:131</a>
422 | </li>
423 | <li>
424 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractFloat</span><span class="p">}</span><span class="p">(</span>
425 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
426 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
427 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
428 | </span>    <span class="n">row</span><span class="p">,
429 | </span>    <span class="n">col</span>
430 | <span class="p">)</span></pre>
431 |     defined at
432 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L131">src/parsefields.jl:131</a>
433 | </li>
434 | <li>
435 |     <pre class="documenter-inline"><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">)</span></pre>
436 |     defined at
437 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L183">src/parsefields.jl:183</a>
438 | </li>
439 | <li>
440 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
441 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
442 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
443 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
444 | <span class="p">)</span></pre>
445 |     defined at
446 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L183">src/parsefields.jl:183</a>
447 | </li>
448 | <li>
449 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
450 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
451 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
452 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
453 | </span>    <span class="n">row</span>
454 | <span class="p">)</span></pre>
455 |     defined at
456 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L183">src/parsefields.jl:183</a>
457 | </li>
458 | <li>
459 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T<:AbstractString</span><span class="p">}</span><span class="p">(</span>
460 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
461 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
462 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
463 | </span>    <span class="n">row</span><span class="p">,
464 | </span>    <span class="n">col</span>
465 | <span class="p">)</span></pre>
466 |     defined at
467 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L183">src/parsefields.jl:183</a>
468 | </li>
469 | <li>
470 |     <pre class="documenter-inline"><span class="nf">parsefield</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n"></span><span class="p">::</span><span class="n">Type{Date}</span><span class="p">)</span></pre>
471 |     defined at
472 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L221">src/parsefields.jl:221</a>
473 | </li>
474 | <li>
475 |     <pre><span class="nf">parsefield</span><span class="p">(</span>
476 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
477 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{Date}</span><span class="p">,
478 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
479 | <span class="p">)</span></pre>
480 |     defined at
481 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L221">src/parsefields.jl:221</a>
482 | </li>
483 | <li>
484 |     <pre><span class="nf">parsefield</span><span class="p">(</span>
485 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
486 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{Date}</span><span class="p">,
487 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
488 | </span>    <span class="n">row</span>
489 | <span class="p">)</span></pre>
490 |     defined at
491 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L221">src/parsefields.jl:221</a>
492 | </li>
493 | <li>
494 |     <pre><span class="nf">parsefield</span><span class="p">(</span>
495 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
496 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{Date}</span><span class="p">,
497 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
498 | </span>    <span class="n">row</span><span class="p">,
499 | </span>    <span class="n">col</span>
500 | <span class="p">)</span></pre>
501 |     defined at
502 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L221">src/parsefields.jl:221</a>
503 | </li>
504 | <li>
505 |     <pre class="documenter-inline"><span class="nf">parsefield</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n"></span><span class="p">::</span><span class="n">Type{DateTime}</span><span class="p">)</span></pre>
506 |     defined at
507 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L269">src/parsefields.jl:269</a>
508 | </li>
509 | <li>
510 |     <pre><span class="nf">parsefield</span><span class="p">(</span>
511 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
512 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{DateTime}</span><span class="p">,
513 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
514 | <span class="p">)</span></pre>
515 |     defined at
516 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L269">src/parsefields.jl:269</a>
517 | </li>
518 | <li>
519 |     <pre><span class="nf">parsefield</span><span class="p">(</span>
520 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
521 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{DateTime}</span><span class="p">,
522 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
523 | </span>    <span class="n">row</span>
524 | <span class="p">)</span></pre>
525 |     defined at
526 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L269">src/parsefields.jl:269</a>
527 | </li>
528 | <li>
529 |     <pre><span class="nf">parsefield</span><span class="p">(</span>
530 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
531 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{DateTime}</span><span class="p">,
532 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
533 | </span>    <span class="n">row</span><span class="p">,
534 | </span>    <span class="n">col</span>
535 | <span class="p">)</span></pre>
536 |     defined at
537 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L269">src/parsefields.jl:269</a>
538 | </li>
539 | <li>
540 |     <pre class="documenter-inline"><span class="nf">parsefield</span><span class="p">{</span><span class="n">T</span><span class="p">}</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">)</span></pre>
541 |     defined at
542 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L338">src/parsefields.jl:338</a>
543 | </li>
544 | <li>
545 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T</span><span class="p">}</span><span class="p">(</span>
546 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
547 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
548 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span>
549 | <span class="p">)</span></pre>
550 |     defined at
551 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L338">src/parsefields.jl:338</a>
552 | </li>
553 | <li>
554 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T</span><span class="p">}</span><span class="p">(</span>
555 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
556 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
557 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
558 | </span>    <span class="n">row</span>
559 | <span class="p">)</span></pre>
560 |     defined at
561 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L338">src/parsefields.jl:338</a>
562 | </li>
563 | <li>
564 |     <pre><span class="nf">parsefield</span><span class="p">{</span><span class="n">T</span><span class="p">}</span><span class="p">(</span>
565 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
566 | </span>    <span class="n"></span><span class="p">::</span><span class="n">Type{T}</span><span class="p">,
567 | </span>    <span class="n">opt</span><span class="p">::</span><span class="n">CSV.Options</span><span class="p">,
568 | </span>    <span class="n">row</span><span class="p">,
569 | </span>    <span class="n">col</span>
570 | <span class="p">)</span></pre>
571 |     defined at
572 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/parsefields.jl#L338">src/parsefields.jl:338</a>
573 | </li>
574 | </ul>
575 | 
576 | <a id='CSV.readline-Tuple{CSV.Source}' href='#CSV.readline-Tuple{CSV.Source}'>#</a>
577 | **`CSV.readline`** &mdash; *Method*.
578 | 
579 | 
580 | 
581 | read a single line from `io` (any `IO` type) as a string, accounting for potentially embedded newlines in quoted fields (e.g. value1, value2, "value3 with   embedded newlines"). Can optionally provide a `buf::IOBuffer` type for buffer resuse
582 | 
583 | 
584 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L1-3' class='documenter-source'>source</a><br>
585 | 
586 | <a id='CSV.readsplitline' href='#CSV.readsplitline'>#</a>
587 | **`CSV.readsplitline`** &mdash; *Function*.
588 | 
589 | 
590 | 
591 | read a single line from `io` (any `IO` type) as a `Vector{String}` with elements being delimited fields. Can optionally provide a `buf::IOBuffer` type for buffer resuse
592 | 
593 | 
594 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L29' class='documenter-source'>source</a><br>
595 | 
596 | <strong>Methods</strong>
597 | 
598 | <ul class="documenter-methodtable">
599 | <li>
600 |     <pre class="documenter-inline"><span class="nf">readsplitline</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">)</span></pre>
601 |     defined at
602 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L31">src/io.jl:31</a>
603 | </li>
604 | <li>
605 |     <pre class="documenter-inline"><span class="nf">readsplitline</span><span class="p">(</span><span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">, </span><span class="n">d</span><span class="p">::</span><span class="n">UInt8</span><span class="p">)</span></pre>
606 |     defined at
607 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L31">src/io.jl:31</a>
608 | </li>
609 | <li>
610 |     <pre><span class="nf">readsplitline</span><span class="p">(</span>
611 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
612 | </span>    <span class="n">d</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
613 | </span>    <span class="n">q</span><span class="p">::</span><span class="n">UInt8</span>
614 | <span class="p">)</span></pre>
615 |     defined at
616 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L31">src/io.jl:31</a>
617 | </li>
618 | <li>
619 |     <pre><span class="nf">readsplitline</span><span class="p">(</span>
620 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
621 | </span>    <span class="n">d</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
622 | </span>    <span class="n">q</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
623 | </span>    <span class="n">e</span><span class="p">::</span><span class="n">UInt8</span>
624 | <span class="p">)</span></pre>
625 |     defined at
626 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L31">src/io.jl:31</a>
627 | </li>
628 | <li>
629 |     <pre><span class="nf">readsplitline</span><span class="p">(</span>
630 |     <span class="n">io</span><span class="p">::</span><span class="n">IO</span><span class="p">,
631 | </span>    <span class="n">d</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
632 | </span>    <span class="n">q</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
633 | </span>    <span class="n">e</span><span class="p">::</span><span class="n">UInt8</span><span class="p">,
634 | </span>    <span class="n">buf</span><span class="p">::</span><span class="n">Base.AbstractIOBuffer{Array{UInt8,1}}</span>
635 | <span class="p">)</span></pre>
636 |     defined at
637 |     <a target="_blank" href="https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L31">src/io.jl:31</a>
638 | </li>
639 | </ul>
640 | 
641 | <a id='CSV.countlines-Tuple{CSV.Source}' href='#CSV.countlines-Tuple{CSV.Source}'>#</a>
642 | **`CSV.countlines`** &mdash; *Method*.
643 | 
644 | 
645 | 
646 | count the number of lines in a file, accounting for potentially embedded newlines in quoted fields
647 | 
648 | 
649 | <a target='_blank' href='https://github.com/JuliaDB/CSV.jl/tree/d561a96d097552e999919a64051d97121232709e/src/io.jl#L60' class='documenter-source'>source</a><br>
650 | 
651 | 


--------------------------------------------------------------------------------