├── CODE_OF_CONDUCT.md
├── LICENSE.md
├── README.md
├── benchmarks
    ├── Gemfile
    ├── README.md
    ├── benchmark.rb
    ├── datasets
    │   ├── README.md
    │   ├── finance
    │   │   ├── AAPL.csv
    │   │   ├── AMZN.csv
    │   │   ├── FB.csv
    │   │   ├── GOOGL.csv
    │   │   ├── MSFT.csv
    │   │   └── o
    │   │   │   ├── MSFT.json.csv
    │   │   │   ├── MSFT.tab
    │   │   │   └── MSFT.txt
    │   └── weather
    │   │   ├── Hobo_15minute_2017.csv
    │   │   └── o
    │   │       ├── Hobo_15minute_2017.json.csv
    │   │       └── Hobo_15minute_2017.txt
    ├── helper.rb
    ├── io
    │   ├── README.md
    │   ├── benchmark.rb
    │   ├── buffer.rb
    │   ├── buffer_line.rb
    │   ├── buffer_line_pos.rb
    │   ├── buffer_line_scanner.rb
    │   ├── buffer_num.rb
    │   ├── helper.rb
    │   ├── parser.rb
    │   ├── parser_nobuf.rb
    │   ├── parser_num.rb
    │   ├── parser_scanner.rb
    │   ├── read.rb
    │   └── test
    │   │   ├── test_io.rb
    │   │   ├── test_sample.rb
    │   │   └── test_scanner.rb
    ├── split.rb
    └── test
    │   ├── test_numeric.rb
    │   └── test_read.rb
├── csv11
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    ├── lib
    │   ├── csv11.rb
    │   └── csv11
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── test_split.rb
    │   └── test_version.rb
├── csvhuman
    ├── .gitignore
    ├── ATTRIBUTES.md
    ├── HISTORY.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── TAGS.md
    ├── config
    │   ├── attributes.csv
    │   ├── langs.csv
    │   ├── tags.csv
    │   ├── types.csv
    │   └── versions.csv
    ├── lib
    │   ├── csvhuman.rb
    │   └── csvhuman
    │   │   ├── base.rb
    │   │   ├── column.rb
    │   │   ├── converter.rb
    │   │   ├── doc
    │   │       ├── helper.rb
    │   │       └── schema.rb
    │   │   ├── reader.rb
    │   │   ├── tag.rb
    │   │   └── version.rb
    ├── scripts
    │   ├── mkattributes.rb
    │   ├── mktags.rb
    │   ├── pages
    │   │   ├── attributes.txt
    │   │   └── tags.txt
    │   └── txt2csv.rb
    └── test
    │   ├── data
    │       ├── airports.csv
    │       ├── ebola.csv
    │       ├── hdx
    │       │   ├── ebola_treatment_centres.csv
    │       │   ├── phl_haima_houses_damaged.csv
    │       │   └── zika_cases.csv
    │       ├── sample1.csv
    │       ├── sample2.csv
    │       ├── sample3.csv
    │       ├── sample4.csv
    │       ├── test.csv
    │       └── unhcr.csv
    │   ├── helper.rb
    │   ├── test_doc.rb
    │   ├── test_hdx.rb
    │   ├── test_header_converter.rb
    │   ├── test_misc.rb
    │   ├── test_reader.rb
    │   ├── test_samples.rb
    │   ├── test_tags.rb
    │   ├── test_type_converters.rb
    │   └── test_type_mappings.rb
├── csvjson
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    ├── datasets
    │   ├── hello.json.csv
    │   └── hello11.json.csv
    ├── lib
    │   ├── csvjson.rb
    │   └── csvjson
    │   │   ├── parser.rb
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── test_parser.rb
    │   └── test_parser_misc.rb
├── csvpack
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── getting-started-samples
    │   └── start.rb
    ├── lib
    │   ├── csvpack.rb
    │   └── csvpack
    │   │   ├── downloader.rb
    │   │   ├── pack.rb
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── pack
    │       └── beer
    │       │   ├── data.csv
    │       │   └── datapackage.json
    │   ├── test_companies.rb
    │   ├── test_countries.rb
    │   ├── test_downloader.rb
    │   └── test_import.rb
├── csvreader
    ├── .gitignore
    ├── ALTERNATIVES.md
    ├── CHANGELOG.md
    ├── DIALECTS.md
    ├── ERRORS.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── datasets
    │   ├── beer.csv
    │   ├── beer11.csv
    │   ├── cars11.csv
    │   ├── cities11.csv
    │   ├── customers11.csv
    │   ├── iris.attrib.csv
    │   ├── iris11.csv
    │   ├── lcc.attrib.csv
    │   ├── shakespeare.csv
    │   └── test.csv
    ├── lib
    │   ├── csvreader.rb
    │   └── csvreader
    │   │   ├── base.rb
    │   │   ├── buffer.rb
    │   │   ├── builder.rb
    │   │   ├── converter.rb
    │   │   ├── parser.rb
    │   │   ├── parser_fixed.rb
    │   │   ├── parser_json.rb
    │   │   ├── parser_std.rb
    │   │   ├── parser_strict.rb
    │   │   ├── parser_tab.rb
    │   │   ├── parser_table.rb
    │   │   ├── parser_yaml.rb
    │   │   ├── reader.rb
    │   │   ├── reader_hash.rb
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── test_buffer.rb
    │   ├── test_converter.rb
    │   ├── test_parser.rb
    │   ├── test_parser_autofix.rb
    │   ├── test_parser_directive.rb
    │   ├── test_parser_fixed.rb
    │   ├── test_parser_formats.rb
    │   ├── test_parser_java.rb
    │   ├── test_parser_meta.rb
    │   ├── test_parser_null.rb
    │   ├── test_parser_numeric.rb
    │   ├── test_parser_quotes.rb
    │   ├── test_parser_strict.rb
    │   ├── test_parser_tab.rb
    │   ├── test_parser_table.rb
    │   ├── test_reader.rb
    │   ├── test_reader_converters.rb
    │   ├── test_reader_hash.rb
    │   ├── test_reader_hash_converters.rb
    │   └── test_samples.rb
├── csvrecord
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── lib
    │   ├── csvrecord.rb
    │   └── csvrecord
    │   │   ├── base.rb
    │   │   └── version.rb
    └── test
    │   ├── data
    │       ├── beer.csv
    │       └── beer11.csv
    │   ├── helper.rb
    │   ├── test_record.rb
    │   ├── test_record_auto.rb
    │   └── test_version.rb
├── csvutils
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── bin
    │   ├── csvcut
    │   ├── csvhead
    │   ├── csvheader
    │   ├── csvsplit
    │   └── csvstat
    ├── datasets
    │   ├── at-austria
    │   │   └── AUT.csv
    │   ├── de-deutschland
    │   │   └── bundesliga.csv
    │   └── eng-england
    │   │   └── 2017-18
    │   │       └── E0.csv
    ├── getting-started-samples
    │   ├── AUT.csv
    │   ├── ENG.csv
    │   ├── start.rb
    │   └── start.sh
    ├── lib
    │   ├── csvutils.rb
    │   └── csvutils
    │   │   ├── commands
    │   │       ├── cut.rb
    │   │       ├── head.rb
    │   │       ├── header.rb
    │   │       ├── split.rb
    │   │       └── stat.rb
    │   │   ├── cut.rb
    │   │   ├── head.rb
    │   │   ├── header.rb
    │   │   ├── split.rb
    │   │   ├── stat.rb
    │   │   ├── test.rb
    │   │   ├── utils.rb
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── test_cut.rb
    │   ├── test_head.rb
    │   ├── test_header.rb
    │   ├── test_misc.rb
    │   ├── test_split.rb
    │   └── test_version.rb
├── csvyaml
    ├── .gitignore
    ├── HISTORY.md
    ├── Manifest.txt
    ├── README.md
    ├── Rakefile
    ├── datasets
    │   ├── hello.yaml.csv
    │   └── hello11.yaml.csv
    ├── lib
    │   ├── csvyaml.rb
    │   └── csvyaml
    │   │   ├── parser.rb
    │   │   └── version.rb
    └── test
    │   ├── helper.rb
    │   ├── test_parser.rb
    │   └── test_parser_misc.rb
├── docs
    ├── README.md
    ├── csv-array-hash-struct.md
    ├── csv-formats.md
    ├── csv-libraries.md
    ├── csv-numerics.md
    ├── csv-parser.md
    ├── csv-quotes.md
    ├── csv-types.md
    ├── csv_stdlib_human.rb
    ├── csv_stdlib_test.rb
    ├── smarter-csv.md
    ├── sorry-sorry-sorry.md
    └── why-the-csv-stdlib-is-broken.md
└── tabreader
    ├── .gitignore
    ├── CHANGELOG.md
    ├── Manifest.txt
    ├── NOTES.md
    ├── README.md
    ├── Rakefile
    ├── datasets
        ├── empty.tab
        └── test.tab
    ├── lib
        ├── tabreader.rb
        └── tabreader
        │   ├── reader.rb
        │   ├── reader_hash.rb
        │   └── version.rb
    └── test
        ├── helper.rb
        ├── test_reader.rb
        └── test_reader_hash.rb


/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | Be A Mensch. 
2 | 
3 | 
4 | ## Attribution
5 | 
6 | This Code of Conduct is adapted from the Choose-A-Conduct org, version 1.1, available at <https://github.com/chooseaconduct>.
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Comma-Separated Values (CSV) Tabular Data Readers Incl. CSV <3 JSON, CSV <3 YAML And More
 2 | 
 3 | ---
 4 | 
 5 | NOTE:  I (Gerald Bauer) am happy to work on a better (alternate) csv library for ruby, BUT unfortunately for now I have first to figure out how to survive and earn some money. unfortunately, ruby is no longer (never was really) in demand over here (in austria). 
 6 | thus, if you want to see progress - please ping me (gerald.bauer @ gmail.com) and your support is more than welcome.
 7 | 
 8 | ---
 9 | 
10 | 
11 | 
12 | 
13 | Gem Family
14 | 
15 | [**csvreader**](csvreader) - read tabular data in the comma-separated values (csv) format the right way (uses best practices out-of-the-box with zero-configuration)
16 | 
17 | [csvjson](csvjson) - read tabular data in the CSV <3 JSON format, that is, comma-separated values CSV (line-by-line) records with javascript object notation (JSON) encoding rules
18 | 
19 | [csvyaml](csvyaml) - read tabular data in the CSV <3 YAML format, that is, comma-separated values (CSV) line-by-line records with yaml ain't markup language (YAML) encoding rules
20 | 
21 | [csvhuman](csvhuman) - read tabular data in the CSV Humanitarian eXchange Language (HXL)  format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules
22 | 
23 | [tabreader](tabreader) - read in tabular datafiles in text in the tabular (TAB) format
24 | 
25 | [csvpack](csvpack) - tools 'n' scripts for working with tabular data packages using comma-separated values (CSV) datafiles in text with meta info (that is, schema, datatypes, ..) in datapackage.json; download, read into and query CSV datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more
26 | 
27 | 
28 | 
29 | <!-- break -->
30 | 
31 | Typed Structs & More
32 | 
33 | [csvrecord](csvrecord) - read in comma-separated values (csv) records with typed structs / schemas
34 | 
35 | 
36 | <!-- break -->
37 | 
38 | (Command Line) Tools & More
39 | 
40 | [csvutils](csvutils) - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular data interchange format in text
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/benchmarks/Gemfile:
--------------------------------------------------------------------------------
1 | 
2 | gem 'csvreader'
3 | 
4 | 
5 | gem 'hippie_csv'
6 | gem 'wtf_csv'
7 | gem 'lenientcsv'
8 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'benchmark'
 5 | 
 6 | require_relative 'helper'
 7 | 
 8 | 
 9 | 
10 | ## "raw" string reader benchmark - no type inference and data conversion
11 | 
12 | n = 1000
13 | # n = 2
14 | 
15 | 
16 | Benchmark.bm(15) do |x|
17 |   x.report( 'std:' )            { n.times do CSV.read( "#{data_dir}/finance/MSFT.csv" ); end }
18 | 
19 |   x.report( 'split:' )          { n.times do read_csv( "#{data_dir}/finance/MSFT.csv" ); end }
20 |   x.report( 'split(tab):' )     { n.times do read_tab( "#{data_dir}/finance/o/MSFT.tab" ); end }
21 |   x.report( 'split(table)*:' )  { n.times do read_table( "#{data_dir}/finance/o/MSFT.tab" ); end }
22 |   x.report( 'split(table):' )   { n.times do read_table( "#{data_dir}/finance/o/MSFT.txt" ); end }
23 | 
24 |   x.report( 'reader:' )         { n.times do CsvReader.read( "#{data_dir}/finance/MSFT.csv" ); end }
25 |   x.report( 'reader(tab):' )    { n.times do CsvReader.tab.read( "#{data_dir}/finance/o/MSFT.tab" ); end }
26 |   x.report( 'reader(table)*:' ) { n.times do CsvReader.table.read( "#{data_dir}/finance/o/MSFT.tab" ); end }
27 |   x.report( 'reader(table):' )  { n.times do CsvReader.table.read( "#{data_dir}/finance/o/MSFT.txt" ); end }
28 |   x.report( 'reader(json):' )   { n.times do CsvReader.json.read( "#{data_dir}/finance/o/MSFT.json.csv" ); end }
29 |   x.report( 'reader(yaml):' )   { n.times do CsvReader.yaml.read( "#{data_dir}/finance/MSFT.csv" ); end }
30 | 
31 |   x.report( 'hippie:' )         { n.times do HippieCSV.read( "#{data_dir}/finance/MSFT.csv" ); end }
32 |   x.report( 'wtf:' )            { n.times do WtfCSV.scan( "#{data_dir}/finance/MSFT.csv" );end }
33 |   x.report( 'lenient:' )        { n.times do LenientCSV.read( "#{data_dir}/finance/MSFT.csv" ); end }
34 | end
35 | 
36 | 
37 | 
38 | ## numerics reader benchmark - all records numeric (limited type inference and data conversion)
39 | 
40 | n = 100
41 | # n=2
42 | 
43 | Benchmark.bm(15) do |x|
44 |   x.report( 'std:' ) { n.times do CSV.read( "#{data_dir}/weather/Hobo_15minute_2017.csv", { :converters => :all }); end }
45 | 
46 |   x.report( 'split:' )           { n.times do read_faster_csv( "#{data_dir}/weather/Hobo_15minute_2017.csv", { converter: ->(v) { Float(v) rescue v } }); end }
47 |   x.report( 'split(table):' )    { n.times do read_faster_csv( "#{data_dir}/weather/o/Hobo_15minute_2017.txt", { sep: /[ \t]+/, converter: ->(v) { Float(v) rescue v }}); end }
48 | 
49 |   x.report( 'reader:' )          { n.times do CsvReader.read( "#{data_dir}/weather/Hobo_15minute_2017.csv", { :converters => :all }); end }
50 |   x.report( 'reader(table):' )   { n.times do CsvReader.table.read( "#{data_dir}/weather/Hobo_15minute_2017.csv", { :converters => :all }); end }
51 |   x.report( 'reader(numeric):' ) { n.times do CsvReader.numeric.read( "#{data_dir}/weather/Hobo_15minute_2017.csv" ); end }
52 |   x.report( 'reader(json):' )    { n.times do CsvReader.json.read( "#{data_dir}/weather/o/Hobo_15minute_2017.json.csv" ); end }
53 |   x.report( 'reader(yaml):' )    { n.times do CsvReader.yaml.read( "#{data_dir}/weather/Hobo_15minute_2017.csv" ); end }
54 | end
55 | 


--------------------------------------------------------------------------------
/benchmarks/datasets/README.md:
--------------------------------------------------------------------------------
 1 | # Sample Datasets
 2 | 
 3 | 
 4 | ## Finance
 5 | 
 6 | Source: Yahoo! Finance <<https://finance.yahoo.com>> - Historical Stock Price Data (Available for Download as CSV).
 7 | Steps:
 8 | 
 9 | - Search for stock ticker symbol
10 | - Click on Historicial Data
11 | - Select Time Period (e.g. YTD - Year to Date) and Frequeny (Daily)
12 | - Click Download Data
13 | 
14 | Examples:
15 | - [GOOGL](https://finance.yahoo.com/quote/GOOGL/history) - Alphabet Inc. (Google) - Class A Shares
16 | - [APPL](https://finance.yahoo.com/quote/AAPL/history) - Apple Inc.
17 | - [FB](https://finance.yahoo.com/quote/FB/history) - Facebook Inc.
18 | - [AMZN](https://finance.yahoo.com/quote/AMZN/history) - Amazon Inc.
19 | - [MSFT](https://finance.yahoo.com/quote/MSFT/history) - Microsoft Corp.
20 | 
21 | 
22 | 
23 | 
24 | ## Weather
25 | 
26 | Source: Weather Station of the University of Waterloo, Ontario, Canada <<http://weather.uwaterloo.ca>>
27 |        
28 | Datasets (Available for Download as CSV) in Archive:
29 | - <<http://weather.uwaterloo.ca/download/Hobo_15minutedata_2017.csv>>
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/benchmarks/helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'pp'
 4 | 
 5 | 
 6 | require 'csv'
 7 | require 'csvreader'
 8 | 
 9 | 
10 | require_relative 'split'
11 | 
12 | require 'hippie_csv'
13 | require 'wtf_csv'
14 | require 'lenient_csv'
15 | 
16 | 
17 | 
18 | def data_dir
19 |   './datasets'
20 | end
21 | 
22 | 
23 | 
24 | class LenientCSV
25 |   def self.read( path )
26 |     txt = File.open( path, 'r:bom|utf-8' ) { |f| f.read }
27 |     csv = new( txt )
28 |     csv.to_a
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/benchmarks/io/benchmark.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'benchmark'
 5 | 
 6 | 
 7 | 
 8 | require_relative 'helper'
 9 | 
10 | 
11 | n = 100
12 | # n = 10_000
13 | 
14 | Benchmark.bm(26) do |x|
15 |   x.report( 'line (each_line):'           ) { n.times do readline_sample; end }
16 |   x.report( 'line (each_line+chomp!):'    ) { n.times do readline_inplace_sample; end }
17 |   x.report( 'line (each_line+scanner):'   ) { n.times do readline_scanner_sample; end }
18 |   x.report( 'line (each_line+each_char):' ) { n.times do readchar_sample; end }
19 | 
20 |   x.report( 'line (parse+getch):'         ) { n.times do parse1_sample; end }
21 |   x.report( 'line (parse+gets+slice):'    ) { n.times do parse2_sample; end }
22 |   x.report( 'line (parse+gets+pos):'      ) { n.times do parse3_sample; end }
23 |   x.report( 'line (parse+nobuf):'         ) { n.times do parse4_sample; end }
24 |   x.report( 'line (parse+getch+num):'     ) { n.times do parse5_sample; end }
25 | 
26 |   x.report( 'line (parse+gets+scanner):'  ) { n.times do parse_scanner_sample; end }
27 |   x.report( 'line (parse+gets+scanner*):' ) { n.times do parse_scanner_scanner_sample; end }
28 | end
29 | 
30 | 
31 | ##                                   user     system      total        real
32 | ## line (each_line):            5.375000   6.141000  11.516000 ( 11.522474)
33 | ## line (each_line+chomp!):     4.375000   6.109000  10.484000 ( 10.496063)
34 | ## line (each_line+scanner):   13.984000   5.656000  19.640000 ( 19.644859)
35 | ## line (each_line+each_char): 43.188000   8.141000  51.329000 ( 51.325110)
36 | ## line (parse+getch):        116.921000   7.312000 124.233000 (124.293261)
37 | ## line (parse+gets+slice):   188.032000   8.500000 196.532000 (196.711467)
38 | ## line (parse+gets+pos):     141.375000  13.485000 154.860000 (154.922206)
39 | ## line (parse+nobuf):         63.718000   7.047000  70.765000 ( 70.774960)
40 | ## line (parse+getch+num):    127.750000   8.156000 135.906000 (136.168328)
41 | ## line (parse+gets+scanner): 127.875000   8.140000 136.015000 (136.358474)
42 | ## line (parse+gets+scanner*): 26.516000   7.375000  33.891000 ( 33.912854)
43 | 


--------------------------------------------------------------------------------
/benchmarks/io/buffer.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | class Buffer
 4 |   def initialize( data )
 5 |     # create the IO object we will read from
 6 |     @input = data
 7 |     @buf   = [] ## last (buffer) chars (used for peek)
 8 |   end
 9 | 
10 |   def eof?()   @buf.size == 0 && @input.eof?;  end
11 | 
12 |   def getc
13 |     if @buf.size > 0
14 |       @buf.shift  ## get first char from buffer
15 |     else
16 |       @input.getc
17 |     end
18 |   end # method getc
19 | 
20 |   def peek
21 |     if @buf.size == 0 && @input.eof?
22 |       ## puts "peek - hitting eof!!!"
23 |       return  "\0"   ## return NUL char (0) for now
24 |     end
25 | 
26 |     if @buf.size == 0
27 |         c = @input.getc
28 |         @buf.push( c )
29 |         ## puts "peek - fill buffer >#{c}< (#{c.ord})"
30 |     end
31 | 
32 |     @buf[0]    ## @buf.first
33 |   end # method peek
34 | end # class Buffer
35 | 


--------------------------------------------------------------------------------
/benchmarks/io/buffer_line.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class BufferLine
 5 |   def initialize( data )
 6 |     # create the IO object we will read from
 7 |     @input = data
 8 |     @buf   = ""  ## last (buffer) chars (used for peek)
 9 |   end
10 | 
11 |   def eof?()   @buf.empty? && @input.eof?;  end
12 | 
13 |   def getc
14 |     if @buf.empty?
15 |       @buf = @input.gets
16 |     end
17 | 
18 |     ## todo: check - if works for multi-byte chars??
19 |     @buf.slice!(0)  ## get first char from buffer
20 |   end # method getc
21 | 
22 | 
23 |   def peek
24 |     if @buf.empty? && @input.eof?
25 |       ## puts "peek - hitting eof!!!"
26 |       return  "\0"   ## return NUL char (0) for now
27 |     end
28 | 
29 |     if @buf.empty?
30 |         @buf = @input.gets
31 |         ## puts "peek - fill buffer >#{c}< (#{c.ord})"
32 |     end
33 | 
34 |     ## todo: check - if works for multi-byte chars??
35 |     @buf[0]    ## @buf.first
36 |   end # method peek
37 | end # class Buffer
38 | 


--------------------------------------------------------------------------------
/benchmarks/io/buffer_line_pos.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class BufferLinePos
 5 |   def initialize( data )
 6 |     # create the IO object we will read from
 7 |     @input = data
 8 |     @buf   = "" ## last (buffer) chars (used for peek)
 9 |     @pos    = 0
10 |     @length = 0
11 |   end
12 | 
13 |   def empty?
14 |      @length == 0 || @pos >= @length
15 |   end
16 | 
17 |   def eof?()  @input.eof? && empty?; end
18 | 
19 |   def getc
20 |     if empty?
21 |       @buf    = @input.gets
22 |       @length = @buf.length
23 |       @pos    = 0
24 |     end
25 | 
26 |     ## todo: check - if works for multi-byte chars??
27 |     c = @buf[@pos]
28 |     @pos += 1
29 |     c
30 |   end # method getc
31 | 
32 | 
33 |   def peek
34 |     if empty?
35 | 
36 |       if @input.eof?
37 |         ## puts "peek - hitting eof!!!"
38 |         return  "\0"   ## return NUL char (0) for now
39 |       else
40 |         @buf    = @input.gets
41 |         @length = @buf.length
42 |         @pos    = 0
43 |         ## puts "peek - fill buffer >#{c}< (#{c.ord})"
44 |       end
45 |     end
46 | 
47 |     ## todo: check - if works for multi-byte chars??
48 |     @buf[@pos]    ## @buf.first
49 |   end # method peek
50 | end # class BufferLinePos
51 | 


--------------------------------------------------------------------------------
/benchmarks/io/buffer_line_scanner.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class BufferLineScanner
 5 |   def initialize( data )
 6 |     # create the IO object we will read from
 7 |     @input = data
 8 |     @buf   = StringScanner.new("")  ## last (buffer) chars (used for peek)
 9 |   end
10 | 
11 |   def eof?()   @buf.eos? && @input.eof?;  end
12 | 
13 |   def getc
14 |     if @buf.eos?
15 |       @buf.string = @input.gets     ## was: StringScanner.new( @input.gets )
16 |     end
17 | 
18 |     ## todo: check - if works for multi-byte chars??
19 |     @buf.getch
20 |   end # method getc
21 | 
22 | 
23 |   def skip( pattern )       @buf.skip( pattern ); end
24 |   def scan( pattern )       @buf.scan( pattern ); end
25 |   def scan_until( pattern ) @buf.scan_until( pattern ); end
26 | 
27 | 
28 | 
29 |   def peek
30 |     if @buf.eos? && @input.eof?
31 |       ## puts "peek - hitting eof!!!"
32 |       return  "\0"   ## return NUL char (0) for now
33 |     end
34 | 
35 |     if @buf.eos?
36 |        @buf.string = @input.gets    ## was: StringScanner.new( @input.gets )
37 |     end
38 | 
39 |     ## todo: check - if works for multi-byte chars??
40 |     @buf.peek(1)
41 |   end # method peek
42 | end # class BufferLineScanner
43 | 


--------------------------------------------------------------------------------
/benchmarks/io/buffer_num.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class BufferNum
 5 |   def initialize( data )
 6 |     # create the IO object we will read from
 7 |     @input = data
 8 |     @buf   = [] ## last (buffer) chars (used for peek)
 9 |   end
10 | 
11 |   def eof?()   @buf.size == 0 && @input.eof?;  end
12 | 
13 | 
14 |   def getc
15 |     if @buf.size > 0
16 |       @buf.shift.chr    ## get first char from buffer (convert back to char/string from ord number/integer)
17 |     else
18 |       @input.getc
19 |     end
20 |   end # method getc
21 | 
22 |   ## note: peek always returns an integer
23 |   def peek
24 |     if @buf.size == 0
25 | 
26 |       return 0    if @input.eof?
27 | 
28 |       c = @input.getc
29 |       @buf.push( c.ord )
30 |       ## puts "peek - fill buffer >#{c}< (#{c.ord})"
31 |    end
32 | 
33 |     @buf[0]    ## @buf.first
34 |   end # method peek
35 | end # class Buffer
36 | 


--------------------------------------------------------------------------------
/benchmarks/io/helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | require 'pp'
  5 | require 'strscan'
  6 | 
  7 | 
  8 | 
  9 | require_relative 'read'
 10 | 
 11 | require_relative 'buffer'
 12 | require_relative 'buffer_line'
 13 | require_relative 'buffer_line_pos'
 14 | require_relative 'buffer_line_scanner'
 15 | require_relative 'buffer_num'
 16 | 
 17 | require_relative 'parser'
 18 | require_relative 'parser_nobuf'
 19 | require_relative 'parser_num'
 20 | require_relative 'parser_scanner'
 21 | 
 22 | 
 23 | 
 24 | def data_dir
 25 |   './datasets'
 26 | end
 27 | 
 28 | 
 29 | 
 30 | def readline_sample
 31 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 32 |      readline( f )
 33 |   end
 34 | end
 35 | 
 36 | def readline_inplace_sample
 37 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 38 |      readline_inplace( f )
 39 |   end
 40 | end
 41 | 
 42 | def readline_scanner_sample
 43 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 44 |      readline_scanner( f )
 45 |   end
 46 | end
 47 | 
 48 | 
 49 | def readchar_sample
 50 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 51 |      readchar( f )
 52 |   end
 53 | end
 54 | 
 55 | 
 56 | def parse1_sample
 57 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 58 |      Parser.parse( Buffer.new( f ) )
 59 |   end
 60 | end
 61 | 
 62 | def parse2_sample
 63 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 64 |      Parser.parse( BufferLine.new( f ) )
 65 |   end
 66 | end
 67 | 
 68 | def parse3_sample
 69 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 70 |      Parser.parse( BufferLinePos.new( f ) )
 71 |   end
 72 | end
 73 | 
 74 | def parse4_sample
 75 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 76 |      ParserNobuf.parse( f )
 77 |   end
 78 | end
 79 | 
 80 | def parse5_sample
 81 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 82 |      ParserNum.parse( BufferNum.new( f ) )
 83 |   end
 84 | end
 85 | 
 86 | 
 87 | def parse_scanner_sample
 88 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 89 |      Parser.parse( BufferLineScanner.new( f ) )
 90 |   end
 91 | end
 92 | 
 93 | def parse_scanner_scanner_sample
 94 |   File.open( "#{data_dir}/finance/MSFT.csv", 'r:utf-8' ) do |f|
 95 |      ParserScanner.parse( BufferLineScanner.new( f ) )
 96 |   end
 97 | end
 98 | 
 99 | 
100 | ## pp read_sample
101 | ## pp getch_sample
102 | ## pp getch2_sample
103 | ## pp getch3_sample
104 | ## pp parse4_sample
105 | ## pp parse_scanner_sample
106 | ## pp parse_scanner_scanner_sample
107 | 


--------------------------------------------------------------------------------
/benchmarks/io/parser.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | class Parser
 4 | 
 5 | LF	          = "\n"     ##   \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
 6 | CR	          = "\r"     ##   \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
 7 | 
 8 | def self.parse( input )
 9 |   recs = []
10 |   loop do
11 |     break if input.eof?
12 | 
13 |     ## non-blanl line
14 |     line = ""
15 | 
16 |     c = input.peek
17 |     if c==LF || c==CR || input.eof?
18 |       ## blank line
19 |       recs << line
20 |       skip_newline( input )
21 |     else
22 |       loop do
23 |         line << input.getc
24 |         c = input.peek
25 |         break if c==LF || c==CR || input.eof?
26 |       end
27 |       recs << line
28 |       skip_newline( input )
29 |     end
30 |   end
31 |   recs
32 | end
33 | 
34 | def self.skip_newline( input )    ## note: singular (strict) version
35 |   return if input.eof?
36 | 
37 |   ## only skip CR LF or LF or CR
38 |   if input.peek == CR
39 |     input.getc ## eat-up
40 |     input.getc  if input.peek == LF
41 |   elsif input.peek == LF
42 |     input.getc ## eat-up
43 |   else
44 |     # do nothing
45 |   end
46 | end
47 | 
48 | end # class Parser
49 | 


--------------------------------------------------------------------------------
/benchmarks/io/parser_nobuf.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | class ParserNobuf
 4 | 
 5 | LF	          = "\n"     ##   \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
 6 | CR	          = "\r"     ##   \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
 7 | 
 8 | def self.parse( input )
 9 |   recs = []
10 |   if input.eof?
11 |   else
12 |     c = input.getc
13 |     loop do
14 |       ## non-blanl line
15 |       line = ""
16 | 
17 |       if c==LF || c==CR || input.eof?
18 |         ## blank line
19 |         recs << line
20 |         break if input.eof?
21 |         c = skip_newline( c, input )
22 |       else
23 |         loop do
24 |           line << c
25 |           c = input.getc
26 |           break if c==LF || c==CR || input.eof?
27 |         end
28 |         recs << line
29 |         break if input.eof?
30 |         c = skip_newline( c, input )
31 |       end
32 |     end
33 |   end
34 |   recs
35 | end
36 | 
37 | 
38 | def self.skip_newline( c, input )
39 |   return c if input.eof?
40 | 
41 |   ## only skip CR LF or LF or CR
42 |   if c == CR
43 |     c = input.getc
44 |     c = input.getc  if c == LF
45 |     c
46 |   elsif c == LF
47 |     c = input.getc ## eat-up
48 |     c
49 |   else
50 |     # do nothing
51 |     c
52 |   end
53 | end
54 | 
55 | end # class ParserNobuf
56 | 


--------------------------------------------------------------------------------
/benchmarks/io/parser_num.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class ParserNum
 5 | 
 6 | LF	          = "\n".ord   ##   \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
 7 | CR	          = "\r".ord   ##   \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
 8 | 
 9 | def self.parse( input )
10 |   recs = []
11 |   loop do
12 |     break if input.eof?
13 | 
14 |     ## non-blanl line
15 |     line = ""
16 | 
17 |     c = input.peek
18 |     if c==LF || c==CR || input.eof?
19 |       ## blank line
20 |       recs << line
21 |       skip_newline( input )
22 |     else
23 |       loop do
24 |         line << input.getc
25 |         c = input.peek
26 |         break if c==LF || c==CR || input.eof?
27 |       end
28 |       recs << line
29 |       skip_newline( input )
30 |     end
31 |   end
32 |   recs
33 | end
34 | 
35 | def self.skip_newline( input )    ## note: singular (strict) version
36 |   return if input.eof?
37 | 
38 |   ## only skip CR LF or LF or CR
39 |   if input.peek == CR
40 |     input.getc ## eat-up
41 |     input.getc  if input.peek == LF
42 |   elsif input.peek == LF
43 |     input.getc ## eat-up
44 |   else
45 |     # do nothing
46 |   end
47 | end
48 | 
49 | end # class ParserNum
50 | 


--------------------------------------------------------------------------------
/benchmarks/io/parser_scanner.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | class ParserScanner
 4 | 
 5 | 
 6 | NOT_COMMA_OR_NEWLINE_RX = /[^,\n\r]*/
 7 | 
 8 | NEWLINE_RX =  /\r?\n/
 9 | 
10 | 
11 | LF	  = "\n"     ##   \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
12 | CR	  = "\r"     ##   \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
13 | COMMA = ","
14 | 
15 | def self.parse_record( input )
16 |   values = []
17 | 
18 |   loop do
19 |     value = input.scan( NOT_COMMA_OR_NEWLINE_RX )
20 |     values << value
21 | 
22 |     if input.eof?
23 |       break
24 |     elsif (c=input.peek; c==LF || c==CR)
25 |       skip_newline( input )
26 |       break
27 |     elsif input.peek == COMMA
28 |       input.getc  ## eat-up comma
29 |     else
30 |       puts "!! error - found >#{input.peek} (#{input.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
31 |       exit(1)
32 |     end
33 |   end
34 | 
35 |   values
36 | end
37 | 
38 | 
39 | def self.parse( input )
40 |   recs = []
41 |   loop do
42 |     break if input.eof?
43 | 
44 |     ## non-blanl line
45 |     line = ""
46 | 
47 |     c = input.peek
48 |     if c==LF || c==CR || input.eof?
49 |       ## blank line
50 |       recs << line
51 |       skip_newline( input )
52 |     else
53 |       ## line << input.scan_until_newline
54 |       ## recs << line
55 |       ## skip_newline( input )
56 | 
57 |       line = parse_record( input )
58 |       recs << line
59 |     end
60 |   end
61 |   recs
62 | end
63 | 
64 | def self.skip_newline( input )    ## note: singular (strict) version
65 |   return if input.eof?
66 | 
67 |   input.skip( NEWLINE_RX )
68 | end
69 | 
70 | end # class Parser
71 | 


--------------------------------------------------------------------------------
/benchmarks/io/read.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | def readline( input )
 5 |   recs = []
 6 |   input.each_line do |line|
 7 |      line = line.chomp
 8 |      recs << line.split( "," )
 9 |   end
10 |   recs
11 | end
12 | 
13 | def readline_inplace( input )
14 |   recs = []
15 |   input.each_line do |line|
16 |      line.chomp!   ## use "inplace" chomp version
17 |      recs << line.split( "," )
18 |   end
19 |   recs
20 | end
21 | 
22 | 
23 | 
24 | 
25 | NOT_COMMA_RX = /  [^,]*  /x  ## everything until the next comma (or end of line)
26 | 
27 | def readline_scanner( input )
28 |   recs = []
29 |   buf  = StringScanner.new( "" )
30 |   input.each_line do |line|
31 |      buf.string = line.chomp    ## was: StringScanner.new( line.chomp )
32 |      rec = []
33 |      loop do
34 |        value = buf.scan( NOT_COMMA_RX )
35 |        rec << value   ## todo: check for value nil/no match - no more value found - why? why not?
36 |        break if buf.eos?
37 |        buf.getch  ## eat-up comma
38 |      end
39 |      recs << rec  # add record
40 |   end
41 |   recs
42 | end
43 | 
44 | 
45 | 
46 | def readchar( input )
47 |   recs = []
48 |   input.each_line do |line|
49 |      line = line.chomp
50 |      rec =  []
51 |      value = ""
52 |      line.each_char do |c|
53 |        if c == ","
54 |          rec << value
55 |          value = ""
56 |        else
57 |          value << c
58 |        end
59 |      end
60 |      rec << value # add last value
61 |      recs << rec  # add record
62 |   end
63 |   recs
64 | end
65 | 


--------------------------------------------------------------------------------
/benchmarks/io/test/test_io.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | ## note: to run use:
 5 | ##  ruby ./io/test/test_io.rb
 6 | 
 7 | 
 8 | require 'minitest/autorun'
 9 | 
10 | 
11 | 
12 | class TestIo  < MiniTest::Test
13 | 
14 | 
15 |   def test_chomp
16 |     ## If $/ has not been changed from the default Ruby record separator,
17 |     ##   then chomp also removes carriage return characters
18 |     ##   (that is it will remove \n, \r, and \r\n).
19 | 
20 |     assert_equal "line", "line".chomp
21 |     assert_equal "line ", "line ".chomp
22 |     assert_equal "line\r\n ", "line\r\n ".chomp
23 | 
24 |     assert_equal "line", "line\r\n".chomp
25 |     assert_equal "line", "line\r".chomp
26 |     assert_equal "line", "line\n".chomp
27 |     assert_equal "line\r\n", "line\r\n\r\n".chomp
28 |     assert_equal "line\r", "line\r\r".chomp
29 |     assert_equal "line\n", "line\n\n".chomp
30 | 
31 |     ## If $/ is an empty string, it will remove all trailing newlines from the string.
32 |     assert_equal "line",     "line\r\n\r\n".chomp('')
33 |     assert_equal "line",     "line\n\n".chomp('')
34 |     assert_equal "line\r\r", "line\r\r".chomp('')
35 |   end
36 | 
37 | end # class TestIo
38 | 


--------------------------------------------------------------------------------
/benchmarks/io/test/test_sample.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | ## note: to run use:
 5 | ##  ruby ./io/test/test_sample.rb
 6 | 
 7 | 
 8 | require 'minitest/autorun'
 9 | 
10 | 
11 | require_relative '../helper'
12 | 
13 | 
14 | class TestSample  < MiniTest::Test
15 | 
16 |   def recs
17 |     [["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"],
18 |      ["2018-01-02", "86.129997", "86.309998", "85.500000", "85.949997", "84.487411", "22483800"],
19 |      ["2018-01-03", "86.059998", "86.510002", "85.970001", "86.349998", "84.880608", "26061400"],
20 |      ["2018-01-04", "86.589996", "87.660004", "86.570000", "87.110001", "85.627678", "21912000"],
21 |      ["2018-01-05", "87.660004", "88.410004", "87.430000", "88.190002", "86.689301", "23407100"],
22 |      ["2018-01-08", "88.199997", "88.580002", "87.599998", "88.279999", "86.777763", "22113000"]]
23 |   end
24 | 
25 | 
26 |   def test_readline_sample
27 |     assert_equal recs, readline_sample[0..5]
28 |   end
29 | 
30 |   def test_readline_inplace_sample
31 |     assert_equal recs, readline_inplace_sample[0..5]
32 |   end
33 | 
34 |   def test_readline_scanner_sample
35 |     assert_equal recs, readline_scanner_sample[0..5]
36 |   end
37 | 
38 |   def test_readchar_sample
39 |     assert_equal recs, readchar_sample[0..5]
40 |   end
41 | 
42 | 
43 |   def test_parse_scanner_scanner_sample
44 |     assert_equal recs, parse_scanner_scanner_sample[0..5]
45 |   end
46 | end # class TestSample
47 | 


--------------------------------------------------------------------------------
/benchmarks/io/test/test_scanner.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | ## note: to run use:
 5 | ##  ruby ./io/test/test_scanner.rb
 6 | 
 7 | 
 8 | require 'minitest/autorun'
 9 | 
10 | 
11 | require 'strscan'
12 | 
13 | 
14 | class TestScanner  < MiniTest::Test
15 | 
16 | 
17 |   NOT_COMMA_OR_NEWLINE_RX = /[^,\n\r]*/
18 | 
19 |   def test_line
20 |     buf = StringScanner.new( "a,b,\r\n" )
21 | 
22 |     assert_equal "a",  buf.scan( NOT_COMMA_OR_NEWLINE_RX )
23 |     assert_equal ",",  buf.peek(1)
24 |     assert_equal ",",  buf.getch
25 |     assert_equal "b",  buf.scan( NOT_COMMA_OR_NEWLINE_RX )
26 |     assert_equal ",",  buf.peek(1)
27 |     assert_equal ",",  buf.getch
28 |     assert_equal "",   buf.scan( NOT_COMMA_OR_NEWLINE_RX )
29 |     assert_equal "\r", buf.peek(1)
30 |     assert_equal 2,    buf.skip( /\r?\n/ )
31 |     assert buf.eos?
32 |   end
33 | 
34 | 
35 |   COMMA_LOOKAHEAD_RX = /(?=,|\n|\r) | $ /x
36 | 
37 |   def test_line_with_lookahead
38 |     buf = StringScanner.new( "a,b,\r\n" )
39 | 
40 |     assert_equal "a",  buf.scan_until( COMMA_LOOKAHEAD_RX )
41 |     assert_equal ",",  buf.peek(1)
42 |     assert_equal ",",  buf.getch
43 |     assert_equal "b",  buf.scan_until( COMMA_LOOKAHEAD_RX )
44 |     assert_equal ",",  buf.peek(1)
45 |     assert_equal ",",  buf.getch
46 |     assert_equal "",   buf.scan_until( COMMA_LOOKAHEAD_RX )
47 |     assert_equal "\r", buf.peek(1)
48 |     assert_equal 2,    buf.skip( /\r?\n/ )
49 |     assert buf.eos?
50 |   end
51 | 
52 | 
53 |   def test_empty
54 |     buf = StringScanner.new( "" )
55 | 
56 |     assert_equal "", buf.scan_until( /$/ )
57 |     assert_equal "", buf.scan_until( /$/ )
58 |     assert buf.eos?
59 | 
60 |     assert_equal "", buf.scan_until( /(?=,) | $/x )
61 |     assert_equal "", buf.scan_until( /(?=,) | $/x )
62 |     assert buf.eos?
63 | 
64 |     assert_equal "", buf.scan( NOT_COMMA_OR_NEWLINE_RX )
65 |     assert buf.eos?
66 | 
67 |     assert_equal "", buf.scan_until( COMMA_LOOKAHEAD_RX )
68 |     assert buf.eos?
69 |   end
70 | end # class TestScanner
71 | 


--------------------------------------------------------------------------------
/benchmarks/split.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | def read_csv( path, sep: ',' )
 4 |   recs = []
 5 |   File.open( path, 'r:utf-8' ) do |f|
 6 |      f.each_line do |line|
 7 |        line   = line.chomp( '' )    ## fix: use line.chomp!  inplace - why? why not?
 8 |        values = line.split( sep )
 9 |        recs << values
10 |      end
11 |   end
12 |   recs
13 | end
14 | 
15 | 
16 | def read_tab( path )    read_csv( path, sep: "\t" ); end
17 | 
18 | ## todo: add converter for read_table - why, why not??
19 | ##   translate interpunct back to space
20 | ##    values = values.map { |value| value.tr( '•', ' ' ) }
21 | def read_table( path )  read_csv( path, sep: /[ \t]+/ ); end
22 | 
23 | 
24 | 
25 | 
26 | 
27 | def read_faster_csv( path, sep: ',', converter: nil )
28 |   recs = []
29 |   File.open( path, 'r:utf-8' ) do |f|
30 |      f.each_line do |line|
31 |        ##  note: chomp('') if is an empty string,
32 |        line   = line.chomp( '' )    ## fix: use line.chomp!  inplace - why? why not?
33 |        values = line.split( sep )
34 | 
35 |        values = values.map { |v| converter.call(v) }    if converter
36 | 
37 |        recs << values
38 |      end
39 |   end
40 |   recs
41 | end
42 | 
43 | 
44 | 
45 | if __FILE__ == $0
46 | 
47 | require 'pp'
48 | 
49 | data = read_csv( './datasets/finance/MSFT.csv' )
50 | pp data
51 | date = read_tab( './datasets/finance/o/MSFT.tab' )
52 | pp data
53 | data = read_table( './datasets/finance/o/MSFT.txt' )
54 | pp data[0..2]
55 | 
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/csv11/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /spec/examples.txt
 9 | /test/tmp/
10 | /test/version_tmp/
11 | /tmp/
12 | 
13 | # Used by dotenv library to load environment variables.
14 | # .env
15 | 
16 | ## Specific to RubyMotion:
17 | .dat*
18 | .repl_history
19 | build/
20 | *.bridgesupport
21 | build-iPhoneOS/
22 | build-iPhoneSimulator/
23 | 
24 | ## Specific to RubyMotion (use of CocoaPods):
25 | #
26 | # We recommend against adding the Pods directory to your .gitignore. However
27 | # you should judge for yourself, the pros and cons are mentioned at:
28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29 | #
30 | # vendor/Pods/
31 | 
32 | ## Documentation cache and generated files:
33 | /.yardoc/
34 | /_yardoc/
35 | /doc/
36 | /rdoc/
37 | 
38 | ## Environment normalization:
39 | /.bundle/
40 | /vendor/bundle
41 | /lib/bundler/man/
42 | 
43 | # for a library or gem, you might want to ignore these files since the code is
44 | # intended to run in multiple environments; otherwise, check them in:
45 | # Gemfile.lock
46 | # .ruby-version
47 | # .ruby-gemset
48 | 
49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50 | .rvmrc
51 | 


--------------------------------------------------------------------------------
/csv11/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2017-07-02
2 | 
3 | * Everything is new. First release.
4 | 


--------------------------------------------------------------------------------
/csv11/Manifest.txt:
--------------------------------------------------------------------------------
1 | HISTORY.md
2 | Manifest.txt
3 | README.md
4 | Rakefile
5 | lib/csv11.rb
6 | lib/csv11/version.rb
7 | test/helper.rb
8 | test/test_version.rb
9 | 


--------------------------------------------------------------------------------
/csv11/README.md:
--------------------------------------------------------------------------------
 1 | # csv11
 2 | 
 3 | csv11 library / gem - read / parse comma-separated values (csv); supports csv 1.1 incl. comments, named values, multi-line records, and more
 4 | 
 5 | * home  :: [github.com/csv11/csv11](https://github.com/csv11/csv11)
 6 | * bugs  :: [github.com/csv11/csv11/issues](https://github.com/csv11/csv11/issues)
 7 | * gem   :: [rubygems.org/gems/csv11](https://rubygems.org/gems/csv11)
 8 | * rdoc  :: [rubydoc.info/gems/csv11](http://rubydoc.info/gems/csv11)
 9 | 
10 | 
11 | 
12 | 
13 | ## Usage
14 | 
15 | to be done
16 | 
17 | 
18 | ## License
19 | 
20 | ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
21 | 
22 | The `csv11` scripts are dedicated to the public domain.
23 | Use it as you please with no restrictions whatsoever.
24 | 
25 | ## Questions? Comments?
26 | 
27 | Post them to the [wwwmake forum](http://groups.google.com/group/wwwmake). Thanks!
28 | 


--------------------------------------------------------------------------------
/csv11/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csv11/version.rb'
 3 | 
 4 | Hoe.spec 'csv11' do
 5 | 
 6 |   self.version = Values::VERSION
 7 | 
 8 |   self.summary = 'csv11 - read / parse comma-separated values (csv); supports csv 1.1 incl. comments, named values, multi-line records, and more'
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/csv11/csv11']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'ruby-talk@ruby-lang.org'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.licenses = ['Public Domain']
21 | 
22 |   self.spec_extras = {
23 |    required_ruby_version: '>= 2.2.2'
24 |   }
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/csv11/lib/csv11/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module Values
 4 |   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
 5 |   MINOR = 0
 6 |   PATCH = 3
 7 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 8 | 
 9 |   def self.version
10 |     VERSION
11 |   end
12 | 
13 |   def self.banner
14 |     "csv11/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
15 |   end
16 | 
17 |   def self.root
18 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
19 |   end
20 | end # module Values
21 | 


--------------------------------------------------------------------------------
/csv11/test/helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ## minitest setup
 4 | require 'minitest/autorun'
 5 | 
 6 | 
 7 | $RUBYLIBS_DEBUG = true
 8 | 
 9 | ## our own code
10 | require 'csv11'
11 | 


--------------------------------------------------------------------------------
/csv11/test/test_split.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_split.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestSplit < MiniTest::Test
12 | 
13 |   def test_split
14 | 
15 |     assert_equal ['a', 'b', 'c'], Values.split( 'a,b,c' )
16 |     assert_equal ['a', 'b', 'c'], Values.split( 'a, b, c' )
17 |     assert_equal ['a', 'b', ''], Values.split( 'a, b,' )
18 |     assert_equal ['a', 'b'], Values.split( 'a, b' )
19 | 
20 |     assert_equal ['a', ['n','b'], ['m','c'],'d'], Values.split( 'a,n:b,m:c,d' )
21 |     assert_equal ['a', ['n','b'], ['m','c'],'d'], Values.split( 'a, n: b, m: c, d' )
22 | 
23 |     ## check reserved names (e.g. http and https)
24 |     assert_equal ['a', 'http://example.com', 'b'], Values.split( 'a, http://example.com, b' )
25 |     assert_equal ['a', 'http://example.com:80', 'b'], Values.split( 'a, http://example.com:80, b' )
26 |     assert_equal ['a', 'https://example.com', 'b'], Values.split( 'a, https://example.com, b' )
27 |     assert_equal ['a', 'https://example.com:80', 'b'], Values.split( 'a, https://example.com:80, b' )
28 |     assert_equal ['https://example.com'],    Values.split( 'https://example.com' )
29 |     assert_equal ['https://example.com:80'], Values.split( 'https://example.com:80' )
30 | 
31 |     assert_equal ['a', 'n n: b', 'm&m: c','d'], Values.split( 'a, n n: b, m&m: c, d' )
32 | 
33 |     assert_equal [%{Hello, World!}], Values.split( %{"Hello, World!"} )
34 |     assert_equal [%{Hello, World!}], Values.split( %{'Hello, World!'} )
35 |     assert_equal [%{'Hello, World!'}], Values.split( %{"'Hello, World!'"} )
36 |     assert_equal [%{"Hello, World!"}], Values.split( %{'"Hello, World!"'} )
37 |     assert_equal [%{'Hello, World!'}, %{"Hello, World!"}], Values.split( %{"'Hello, World!'",'"Hello, World!"'} )
38 | 
39 |     assert_equal [%{The "Quoted" World}], Values.split( %{The "Quoted" World} )   ## no need to escape quotes if not first (letter) of value
40 | 
41 |     assert_equal [%{'""Hello""', Quotes}], Values.split( %{"""'""Hello""', Quotes"""} )
42 | 
43 |     ## check single-line named Values  - will IGNORE commas (not special)
44 |     assert_equal [['open', '12h, 13h, 14, 15h']], Values.split( 'open: 12h, 13h, 14, 15h')
45 | 
46 |     ## check named value with comma escaped with quote
47 |     assert_equal ['a', 'b,c', 'd'], Values.split( %{a,"b,c",d} )
48 |     assert_equal ['a', 'b,c', 'd'], Values.split( %{  a , "b,c" ,  d  } )
49 |     assert_equal ['a', ['n','b,c'], ['m','d,e'],'f,g'], Values.split( %{a,n:"b,c",m:"d,e","f,g"} )
50 | 
51 |     assert_equal ['a', ['n','b:c'], ['m','d:e'],'f'], Values.split( 'a, n: b:c, m:d:e, f' )
52 |     assert_equal ['a', ['n','b:c'], ['m','d:e:f'],'g h:i:j'], Values.split( 'a,n:b:c,m:d:e:f,g h:i:j' )
53 | 
54 |     ## note: space in quotes is significant - keep? why ? why not??
55 |     assert_equal ['a', ' b  , c ', 'd'], Values.split( %{  a , " b  , c " ,  d  } )
56 |   end
57 | 
58 | 
59 | end  # class TestSplit
60 | 


--------------------------------------------------------------------------------
/csv11/test/test_version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_version.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestVersion < MiniTest::Test
12 | 
13 | 
14 |   def test_version
15 | 
16 |     puts Values::VERSION
17 |     assert true
18 |     ## assume everything ok if get here
19 |   end
20 | 
21 | end # class TestVersion
22 | 


--------------------------------------------------------------------------------
/csvhuman/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Specific to RubyMotion:
13 | .dat*
14 | .repl_history
15 | build/
16 | 
17 | ## Documentation cache and generated files:
18 | /.yardoc/
19 | /_yardoc/
20 | /doc/
21 | /rdoc/
22 | 
23 | ## Environment normalisation:
24 | /.bundle/
25 | /vendor/bundle
26 | /lib/bundler/man/
27 | 
28 | # for a library or gem, you might want to ignore these files since the code is
29 | # intended to run in multiple environments; otherwise, check them in:
30 | # Gemfile.lock
31 | # .ruby-version
32 | # .ruby-gemset
33 | 
34 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35 | .rvmrc
36 | 


--------------------------------------------------------------------------------
/csvhuman/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-11-06
2 | 
3 | * Everything is new. First release
4 | 


--------------------------------------------------------------------------------
/csvhuman/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | config/attributes.csv
 6 | config/langs.csv
 7 | config/tags.csv
 8 | config/types.csv
 9 | config/versions.csv
10 | lib/csvhuman.rb
11 | lib/csvhuman/base.rb
12 | lib/csvhuman/column.rb
13 | lib/csvhuman/converter.rb
14 | lib/csvhuman/doc/helper.rb
15 | lib/csvhuman/doc/schema.rb
16 | lib/csvhuman/reader.rb
17 | lib/csvhuman/tag.rb
18 | lib/csvhuman/version.rb
19 | test/data/airports.csv
20 | test/data/ebola.csv
21 | test/data/hdx/ebola_treatment_centres.csv
22 | test/data/hdx/phl_haima_houses_damaged.csv
23 | test/data/hdx/zika_cases.csv
24 | test/data/sample1.csv
25 | test/data/sample2.csv
26 | test/data/sample3.csv
27 | test/data/sample4.csv
28 | test/data/test.csv
29 | test/data/unhcr.csv
30 | test/helper.rb
31 | test/test_doc.rb
32 | test/test_hdx.rb
33 | test/test_header_converter.rb
34 | test/test_misc.rb
35 | test/test_reader.rb
36 | test/test_samples.rb
37 | test/test_tags.rb
38 | test/test_type_converters.rb
39 | test/test_type_mappings.rb
40 | 


--------------------------------------------------------------------------------
/csvhuman/NOTES.md:
--------------------------------------------------------------------------------
 1 | # Notes
 2 | 
 3 | ## Todos
 4 | 
 5 | - [ ] check if `+id` is always a number type (for auto-conversion) e.g. `#event+id`
 6 | - [ ] add check for `#geo` and `+lat`, `+lon` (for auto-conversion) to floats
 7 | - [ ] add type converter for `#date` (e.g. `#date+start`, `#date+reported`, etc.) - support 2017-12-11 and 11/14/2017 for now?
 8 | - [ ] header converter for symbols - turn `+` into `_x_` or `_I_` or into `$` - why? why not? (check if `$` supported in ruby inline? - no, it's not possible)
 9 | 
10 | 
11 | ## Examples
12 | 
13 | Add more .csv examples with hxl tags, see <https://tools.humdata.org/examples/hxl/>
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | ### Use `$` in symbol for `+`
21 | 
22 | #### Ruby
23 | 
24 | ```
25 | >> s = :adm1
26 | => :adm1
27 | >> s = :adm1_x_code
28 | => :adm1_x_code
29 | >> s = :adm1$code
30 | SyntaxError: (irb):3: syntax error, unexpected tGVAR, expecting end-of-input
31 | ```
32 | 


--------------------------------------------------------------------------------
/csvhuman/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvhuman/version.rb'
 3 | 
 4 | Hoe.spec 'csvhuman' do
 5 | 
 6 |   self.version = CsvHuman::VERSION
 7 | 
 8 |   self.summary = "csvhuman - read tabular data in the CSV Humanitarian eXchange Language (HXL)  format, that is, comma-separated values (CSV) line-by-line records with a hashtag (meta data) line using the Humanitarian eXchange Language (HXL) rules"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/csvreader/csvhuman']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |      ['csvreader',  '>=1.2.1']
22 |   ]
23 | 
24 |   self.licenses = ['Public Domain']
25 | 
26 |   self.spec_extras = {
27 |     required_ruby_version: '>= 2.2.2'
28 |   }
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/csvhuman/config/langs.csv:
--------------------------------------------------------------------------------
 1 | code, name+en, name
 2 | ar,	Arabic,
 3 | en, English, English
 4 | es, Spanish, Español
 5 | de, German, Deutsch
 6 | fa, Dari / Farsi / Persian,
 7 | fr, French,
 8 | ms,	Malay,
 9 | ps,	Pashto,
10 | ru, Russian,
11 | sw, Swahili,
12 | tl, Tagalog,
13 | uk, Ukrainian,
14 | ur, Urdu,
15 | 


--------------------------------------------------------------------------------
/csvhuman/config/types.csv:
--------------------------------------------------------------------------------
1 | type,description
2 | text,
3 | number,
4 | url,
5 | email,
6 | phone,
7 | date,
8 | 


--------------------------------------------------------------------------------
/csvhuman/config/versions.csv:
--------------------------------------------------------------------------------
1 | version, date
2 | 1.0, 2016-03-18
3 | 1.1, 2018-04-30
4 | 1.2,
5 | 


--------------------------------------------------------------------------------
/csvhuman/lib/csvhuman.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'csvreader'      ## add all "alternative" shortcut aliases
 4 | 
 5 | 
 6 | ## our own code (without "top-level" shortcuts e.g. "modular version")
 7 | require 'csvhuman/base'
 8 | 
 9 | 
10 | ####
11 | #  add some "alternative" shortcut aliases
12 | CsvHum  = CsvHuman
13 | CSV_HXL = CsvHuman
14 | CSVHXL  = CsvHuman
15 | HXL     = CsvHuman
16 | 


--------------------------------------------------------------------------------
/csvhuman/lib/csvhuman/base.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'csvreader/base'
 4 | 
 5 | 
 6 | ## our own code
 7 | require 'csvhuman/version'    # note: let version always go first
 8 | require 'csvhuman/tag'
 9 | require 'csvhuman/column'
10 | require 'csvhuman/converter'
11 | require 'csvhuman/reader'
12 | 
13 | require 'csvhuman/doc/helper.rb'
14 | require 'csvhuman/doc/schema.rb'
15 | 
16 | 
17 | # say hello
18 | puts CsvHuman.banner     if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
19 | 


--------------------------------------------------------------------------------
/csvhuman/lib/csvhuman/column.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | class CsvHuman
  5 | 
  6 | 
  7 | class Columns
  8 | 
  9 | 
 10 |   def self.build( values, header_converter )
 11 | 
 12 |     ## "clean" unify/normalize names
 13 |     keys = values.map do |value|
 14 |       if value
 15 |         if value.empty?
 16 |           nil
 17 |         else
 18 |           ## e.g. #ADM1 CODE                      => #adm1 +code
 19 |           ##      POPULATION F CHILDREN AFFECTED  => #population +affected +children +f
 20 |           tag_key = Tag.normalize( value )
 21 |           ## turn empty normalized tags (e.g. "stray" hashtag) into nil too
 22 | 
 23 |           if value.empty?
 24 |               nil
 25 |           else
 26 |               header_key =
 27 |               ##   todo/fix: pass in column index - why? why not?
 28 |               ##     pass in column index for all columns (or only tagged ones?) or both?
 29 |               ##   if header_converter.arity == 1  # straight converter
 30 |                      header_converter.call( tag_key )
 31 |               ##   else
 32 |               ##       header_converter.call( value, index )
 33 |               ##    end
 34 | 
 35 |               ## note:
 36 |               ##   return nil, "" or false to skip column
 37 |               if header_key.nil? || header_key.empty? || header_key == false   ## check again: skip empty "" columns
 38 |                 nil
 39 |               else
 40 |                 ##  note: return header_key (used for returned record/hash) AND tag_key (used for type conversion config)
 41 |                 ## lets us fold more columns into one or splat single list/array columns into many
 42 |                 [header_key,tag_key]
 43 |               end
 44 |           end
 45 |         end
 46 |       else  # keep (nil) as is
 47 |         nil
 48 |       end
 49 |     end
 50 | 
 51 | 
 52 |     counts = {}
 53 |     keys.each_with_index do |key,i|
 54 |        if key
 55 |          header_key = key[0]
 56 |          counts[header_key] ||= []
 57 |          counts[header_key] << i
 58 |        end
 59 |     end
 60 |     ## puts "counts:"
 61 |     ## pp counts
 62 | 
 63 | 
 64 |     ## create all unique tags  (used for type conversion)
 65 |     tags = {}
 66 |     keys.each do |key|
 67 |       if key
 68 |         tag_key = key[1]
 69 |         tags[tag_key] ||= Tag.parse( tag_key )  ## note: "reuse" tag for all columns if same tag key
 70 |       end
 71 |     end
 72 |     ## puts "tags:"
 73 |     ## pp tags
 74 | 
 75 | 
 76 |     cols = []
 77 |     keys.each do |key|
 78 |       if key
 79 |         header_key = key[0]
 80 |         tag_key    = key[1]
 81 | 
 82 |         count = counts[header_key]
 83 |         tag   = tags[tag_key]        ## note: "reuse" tag for all columns if same tag key
 84 | 
 85 |         if count.size > 1
 86 |           ## note: defaults to use "standard/default" tag key (as a string)
 87 |           cols << Column.new( header_key, tag, list: true )
 88 |         else
 89 |           cols << Column.new( header_key, tag )
 90 |         end
 91 |       else
 92 |         cols << Column.new
 93 |       end
 94 |     end
 95 | 
 96 |     cols
 97 |   end
 98 | end ## class Columns
 99 | 
100 | 
101 | 
102 | 
103 | class Column
104 |    attr_reader  :key   # used for record (record key); note: list columns must use the same key
105 |    attr_reader  :tag
106 | 
107 | 
108 |    def initialize( key=nil, tag=nil, list: false )
109 |      @key  = key
110 |      @tag  = tag
111 |      @list = list
112 |    end
113 | 
114 | 
115 |    def tagged?()  @tag.nil? == false; end
116 |    def list?()    @list; end
117 | end  # class Column
118 | 
119 | end # class CsvHuman
120 | 


--------------------------------------------------------------------------------
/csvhuman/lib/csvhuman/doc/helper.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | class CsvHuman
  4 | module DocHelper
  5 | 
  6 | 
  7 | HASHTAG_LINE_RX= /^
  8 |                    \s*
  9 |                    \#
 10 |                    (?<name>[a-z][a-z0-9]+)
 11 |                    \s*
 12 |                    $/x
 13 | 
 14 | def match_hashtag( line )
 15 |    if (m=HASHTAG_LINE_RX.match(line))
 16 |      puts "hashtag >#{m[:name]}<"
 17 |      m
 18 |    else
 19 |      nil
 20 |    end
 21 | end
 22 | 
 23 | 
 24 | 
 25 | ## note: attrib might be one letter only (e.g.) +m,+f, etc.
 26 | ATTRIBUTE_LINE_RX= /^
 27 |                    \s*
 28 |                    \+
 29 |                    (?<name>[a-z][a-z0-9]*)
 30 |                    \s*
 31 |                    $/x
 32 | 
 33 | def match_attribute( line )
 34 |    if (m=ATTRIBUTE_LINE_RX.match(line))
 35 |      puts "attrib >#{m[:name]}<"
 36 |      m
 37 |    else
 38 |      false
 39 |    end
 40 | end
 41 | 
 42 | 
 43 | 
 44 | ##
 45 | ## e.g. 1.1. Places
 46 | ##      2.1. Sex- and-age disaggregation (SADD) attributes
 47 | 
 48 | HEADING_LINE_RX=/^
 49 |                    \s*
 50 |                    (?<level1>[1-9])
 51 |                      \.
 52 |                    (?<level2>[1-9])
 53 |                      \.
 54 |                      \s+
 55 |                     (?<title>.+?)
 56 |                      \s*
 57 |                    $/x
 58 | 
 59 | def match_heading( line )
 60 |   if (m=HEADING_LINE_RX.match(line))
 61 |     puts "heading #{m[:level1]}.#{m[:level2]}. (#{m[:level2]}) >#{m[:title]}<"
 62 |     m
 63 |   else
 64 |     false
 65 |   end
 66 | end
 67 | 
 68 | 
 69 | 
 70 | TYPE_RX = /Every value must be a (?<type>[a-z]+)./
 71 | def match_type( line )
 72 |   if (m=TYPE_RX.match(line))
 73 |     puts "type: >#{m[:type]}<"
 74 |     m
 75 |   else
 76 |     false
 77 |   end
 78 | end
 79 | 
 80 | 
 81 | 
 82 | SINCE_HXL_RX = /Since HXL (?<version>[1]\.[0-9])\.?/
 83 | def match_since_hxl( line )
 84 |   if (m=SINCE_HXL_RX.match(line))
 85 |     puts "version: >#{m[:version]}<"
 86 |     m
 87 |   else
 88 |     false
 89 |   end
 90 | end
 91 | 
 92 | 
 93 | 
 94 | def split_descr( line )
 95 |   if( m=match_since_hxl( line ))
 96 |     version = m[:version]
 97 |     ## remove "Since HXL 1.0" from text
 98 |     text    = line.gsub( SINCE_HXL_RX, '' ).strip
 99 |   else
100 |     version = '?'
101 |     text = line
102 |   end
103 |   [text,version]
104 | end
105 | 
106 | 
107 | end # module DocHelper
108 | end # class CsvHuman
109 | 


--------------------------------------------------------------------------------
/csvhuman/lib/csvhuman/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvHuman
 5 | 
 6 |   MAJOR = 1
 7 |   MINOR = 1
 8 |   PATCH = 1
 9 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
10 | 
11 | 
12 |   def self.version
13 |     VERSION
14 |   end
15 | 
16 |   def self.banner
17 |     "csvhuman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18 |   end
19 | 
20 |   def self.root
21 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
22 |   end
23 | 
24 | end # module CsvHuman
25 | 


--------------------------------------------------------------------------------
/csvhuman/scripts/mkattributes.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | require 'csvreader'
  4 | 
  5 | 
  6 | 
  7 | def linkify_attribute( attribute )
  8 |   "[`+#{attribute}`](##{attribute})"
  9 | end
 10 | 
 11 | 
 12 | ATTRIBUTE_RX = /
 13 |                \+[a-z][a-z0-9_]*
 14 |              /x
 15 | 
 16 | HASHTAG_RX = /
 17 |                \#[a-z][a-z0-9]+
 18 |              /x
 19 | 
 20 | 
 21 | 
 22 | def linkify_hashtags( line, page: '' )
 23 |   ## note: assumes #adm1 etc. (that is, includes leading hashtag)
 24 |   line.gsub( HASHTAG_RX ) do |hashtag|
 25 |     puts "linkify hashtag >#{hashtag}<"
 26 |     "[`#{hashtag}`](#{page}#{hashtag})"
 27 |   end
 28 | end
 29 | 
 30 | def linkify_attributes( line )
 31 |   ## note: assumes +f etc. (that is, includes leading plus)
 32 |   line.gsub( ATTRIBUTE_RX ) do |attrib|
 33 |     puts "linkify attribute >#{attrib}<"
 34 |     if attrib.index( '_' )
 35 |       "`#{attrib}`"    ## note: do NOT linkify custom attributes for now (if include underscore e.g. +age12_17 etc.)
 36 |     else
 37 |       "[`#{attrib}`](##{attrib[1..-1]})"  ## note: cut-of leading + in intralink
 38 |     end
 39 |   end
 40 | end
 41 | 
 42 | 
 43 | 
 44 | def build_summary( attributes )
 45 |   pp attributes
 46 | 
 47 |   attributes_a_to_z = attributes.sort { |l,r|  l['attribute'] <=> r['attribute'] }
 48 |   pp attributes_a_to_z
 49 | 
 50 | 
 51 |   buf = ""
 52 |   buf << "# Humanitarian eXchangle Language (HXL) Attributes\n\n"
 53 | 
 54 |   attributes_a_to_z.each do |attribute|
 55 |     buf << linkify_attribute( attribute['attribute'])
 56 |     buf << "\n"
 57 |   end
 58 | 
 59 |   buf << "\n\n"
 60 | 
 61 | 
 62 | 
 63 |   last_category = nil
 64 | 
 65 |   attributes.each do |attribute|
 66 | 
 67 |     if attribute['category'] != last_category
 68 |       buf << "## #{attribute['category']}\n\n"
 69 |     end
 70 | 
 71 |     buf << "### `+#{attribute['attribute']}`\n\n"
 72 |     buf << "#{linkify_attributes(attribute['description'])}"
 73 |     buf << " "
 74 |     buf << "_Since version #{attribute['since']}_\n\n"
 75 | 
 76 |     unless attribute['tags'].empty?
 77 |       buf << "Tags: #{linkify_hashtags(attribute['tags'], page: 'TAGS.md')}\n\n"
 78 |     end
 79 | 
 80 | 
 81 |     last_category = attribute['category']
 82 |   end
 83 | 
 84 |   buf
 85 | end
 86 | 
 87 | 
 88 | 
 89 | 
 90 | ## pp Csv.read( "./config/attributes.csv" )
 91 | 
 92 | attributes = CsvHash.read( "./config/attributes.csv" )
 93 | 
 94 | buf = build_summary( attributes )
 95 | puts buf
 96 | 
 97 | File.open( "./ATTRIBUTES.md", 'w:utf-8') do |f|
 98 |   f.write( buf )
 99 | end
100 | 


--------------------------------------------------------------------------------
/csvhuman/scripts/mktags.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | require 'csvreader'
  4 | 
  5 | 
  6 | 
  7 | def linkify_tag( tag )
  8 |   "[`##{tag}`](##{tag})"
  9 | end
 10 | 
 11 | 
 12 | ATTRIBUTE_RX = /
 13 |                \+[a-z][a-z0-9_]*
 14 |              /x
 15 | 
 16 | HASHTAG_RX = /
 17 |                \#[a-z][a-z0-9]+
 18 |              /x
 19 | 
 20 | def linkify_hashtags( line )
 21 |   ## note: assumes #adm1 etc. (that is, includes leading hashtag)
 22 |   line.gsub( HASHTAG_RX ) do |hashtag|
 23 |     puts "linkify hashtag >#{hashtag}<"
 24 |     "[`#{hashtag}`](#{hashtag})"
 25 |   end
 26 | end
 27 | 
 28 | def linkify_attributes( line, page: '' )
 29 |   ## note: assumes +f etc. (that is, includes leading plus)
 30 |   line.gsub( ATTRIBUTE_RX ) do |attrib|
 31 |     puts "linkify attribute >#{attrib}<"
 32 |     if attrib.index( '_' )
 33 |       "`#{attrib}`"    ## note: do NOT linkify custom attributes for now (if include underscore e.g. +age12_17 etc.)
 34 |     else
 35 |       "[`#{attrib}`](#{page}##{attrib[1..-1]})"  ## note: cut-of leading + in intralink
 36 |     end
 37 |   end
 38 | end
 39 | 
 40 | 
 41 | 
 42 | 
 43 | def build_summary( tags )
 44 |   pp tags
 45 | 
 46 |   tags_a_to_z = tags.sort { |l,r|  l['tag'] <=> r['tag'] }
 47 |   pp tags_a_to_z
 48 | 
 49 | 
 50 |   buf = ""
 51 |   buf << "# Humanitarian eXchangle Language (HXL) Tags\n\n"
 52 | 
 53 |   tags_a_to_z.each do |tag|
 54 |     buf << linkify_tag( tag['tag'])
 55 |     buf << "\n"
 56 |   end
 57 | 
 58 |   buf << "\n\n"
 59 | 
 60 | 
 61 | 
 62 |   last_category = nil
 63 | 
 64 |   tags.each do |tag|
 65 | 
 66 |     if tag['category'] != last_category
 67 |       buf << "## #{tag['category']}\n\n"
 68 |     end
 69 | 
 70 |     buf << "### `##{tag['tag']}`\n\n"
 71 |     buf << "#{linkify_hashtags(tag['description'])}"
 72 |     buf << " "
 73 |     buf << "_Since version #{tag['since']}_\n\n"
 74 | 
 75 |     unless tag['type'].empty?
 76 |       buf << "Every value must be a **#{tag['type']}**.\n\n"
 77 |     end
 78 | 
 79 |     unless tag['attributes'].empty?
 80 |       buf << "Attributes: #{linkify_attributes(tag['attributes'], page: 'ATTRIBUTES.md')}\n\n"
 81 |     end
 82 | 
 83 | 
 84 |     last_category = tag['category']
 85 |   end
 86 | 
 87 |   buf
 88 | end
 89 | 
 90 | 
 91 | ## pp Csv.read( "./config/tags.csv" )
 92 | 
 93 | tags = CsvHash.read( "./config/tags.csv" )
 94 | 
 95 | buf = build_summary( tags )
 96 | puts buf
 97 | 
 98 | File.open( "./TAGS.md", 'w:utf-8') do |f|
 99 |   f.write( buf )
100 | end
101 | 


--------------------------------------------------------------------------------
/csvhuman/scripts/txt2csv.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'csvhuman'
 4 | 
 5 | 
 6 | def csv_row( *values )
 7 |   values.map do |value|
 8 |      if value && (value.index(",") || value.index('"'))
 9 |        ## double quotes and enclose in double qoutes
10 |        value = %Q{"#{value.gsub('"', '""')}"}
11 |      else
12 |        value
13 |      end
14 |   end
15 | end
16 | 
17 | 
18 | attribs = CsvHuman::Doc.read_attributes( "./scripts/pages/attributes.txt" )
19 | pp attribs
20 | 
21 | File.open( "./config/attributes.csv", 'w:utf-8') do |f|
22 |  f.write ["attribute","since","category","tags","description"].join(",")
23 |  f.write "\n"
24 |  attribs.each do |attrib|
25 |    f.write csv_row(*attrib).join(",")
26 |    f.write "\n"
27 |  end
28 | end
29 | 
30 | 
31 | tags = CsvHuman::Doc.read_tags( "./scripts/pages/tags.txt" )
32 | pp tags
33 | 
34 | File.open( "./config/tags.csv", 'w:utf-8') do |f|
35 |  f.write ["tag", "type", "since", "category", "attributes", "description"].join(",")
36 |  f.write "\n"
37 |  tags.each do |tag|
38 |   f.write csv_row(*tag).join(",")
39 |   f.write "\n"
40 |  end
41 | end
42 | 


--------------------------------------------------------------------------------
/csvhuman/test/data/airports.csv:
--------------------------------------------------------------------------------
1 | id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords,score,last_updated
2 | "#meta +id","#meta +code","#loc +airport +type","#loc +airport +name","#geo +lat","#geo +lon","#geo +elevation +ft","#region +continent +code","#country +code +iso2","#adm1 +code +iso","#loc +municipality +name","#status +scheduled","#loc +airport +code +gps","#loc +airport +code +iata","#loc +airport +code +local","#meta +url +airport","#meta +url +wikipedia","#meta +keywords","#meta +score","#date +updated"
3 | 4976,NSFA,medium_airport,"Faleolo International Airport",-13.829999923706055,-172.00799560546875,58,OC,WS,WS-AA,Apia,1,NSFA,APW,,,http://en.wikipedia.org/wiki/Faleolo_International_Airport,,1050,2009-08-31T16:22:49+00:00
4 | 35173,NSMA,small_airport,"Maota Airport",-13.742300033569336,-172.25799560546875,,OC,WS,WS-PA,Maota,1,NSMA,MXS,,,http://en.wikipedia.org/wiki/Maota_Airport,"Savaii Island",450,2009-08-31T16:13:53+00:00
5 | 31127,NSFI,small_airport,"Fagali'i Airport",-13.848699569699999,-171.740005493,131,OC,WS,WS-TU,Apia,0,NSFI,FGI,,,http://en.wikipedia.org/wiki/Fagali'i_Airport,,50,2012-11-26T12:09:24+00:00
6 | 30608,NSAU,small_airport,"Asau Airport",-13.505132,-172.627888,,OC,WS,WS-VS,Asau,1,NSAU,AAU,,,http://en.wikipedia.org/wiki/Asau_Airport,,50,2016-06-01T06:17:22+00:00
7 | 


--------------------------------------------------------------------------------
/csvhuman/test/data/sample1.csv:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%
 2 | % sample from HXL tagging conventions (version 1.1)
 3 | %  see http://hxlstandard.org/standard/1_1final/tagging/
 4 | 
 5 | CAMP INFORMATION,       ,        NEEDS
 6 | LOCATION NAME,    LOCATION CODE, NUMBER AFFECTED
 7 | #loc +name,       #loc +code,    #affected
 8 | Camp A,           01000001,      2000
 9 | Camp B,           01000002,      750
10 | Camp C,           01000003,      1920
11 | 


--------------------------------------------------------------------------------
/csvhuman/test/data/sample2.csv:
--------------------------------------------------------------------------------
1 | %%%%%%%%%%%%%%%%%%%%%
2 | % sample from HXL tagging conventions (version 1.1)
3 | %  see http://hxlstandard.org/standard/1_1final/tagging/
4 | 
5 | #event+id, #affected+killed, #region,                      #meta+source+reliability, #date+reported, #geo+lat,  #geo+lon
6 | 1,         1,                Mediterranean,                Verified,                 2015-11-05,     36.891500,  27.287700
7 | 3,         1,                Central America incl. Mexico, Partially Verified,       2015-11-03,     15.956400, -93.663099
8 | 


--------------------------------------------------------------------------------
/csvhuman/test/data/sample3.csv:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%
 2 | % sample from HXL tagging conventions (version 1.1)
 3 | %  see http://hxlstandard.org/standard/1_1final/tagging/
 4 | 
 5 | P-CODE 1,  P-CODE 2,  P-CODE 3
 6 | #loc+code, #loc+code, #loc+code
 7 | 020503
 8 | 060107,    060108
 9 | 173219
10 | 530012
11 | 530013,    530015,    279333
12 | 


--------------------------------------------------------------------------------
/csvhuman/test/data/sample4.csv:
--------------------------------------------------------------------------------
1 | %%%%%%%%%%%%%%%%%%%%%
2 | % sample from HXL tagging conventions (version 1.1)
3 | %  see http://hxlstandard.org/standard/1_1final/tagging/
4 | 
5 | REGION,            2008,            2009,            2010,            2011
6 | #adm1 +name,       #affected+label, #affected+label, #affected+label, #affected+label
7 | Coast District,    0,               30,              100,             250
8 | Mountain District, 15,              75,              30,              45
9 | 


--------------------------------------------------------------------------------
/csvhuman/test/data/test.csv:
--------------------------------------------------------------------------------
1 | What,,,Who,Where,For whom,
2 | Record,Sector/Cluster,Subsector,Organisation,Country,Males,Females,Subregion
3 | ,#sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
4 | 001,WASH,Subsector 1,Org 1,Country 1,100,100,Region 1
5 | 002,Health,Subsector 2,Org 2,Country 2,,,Region 2
6 | 003,Education,Subsector 3,Org 3,Country 2,250,300,Region 3
7 | 004,WASH,Subsector 4,Org 1,Country 3,80,95,Region 4
8 | 


--------------------------------------------------------------------------------
/csvhuman/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | ## require 'csvhuman/base'
10 | require 'csvhuman'
11 | 
12 | 
13 | ## add test_data_dir helper
14 | class CsvHuman
15 |   def self.test_data_dir
16 |     "#{root}/test/data"
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/csvhuman/test/test_hdx.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_hdx.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestHdxSamples < MiniTest::Test
11 | 
12 | 
13 | def test_ebola
14 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/hdx/ebola_treatment_centres.csv" )
15 |   pp recs
16 | end
17 | 
18 | def test_phl_haima
19 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/hdx/phl_haima_houses_damaged.csv" )
20 |   pp recs
21 | end
22 | 
23 | def test_zika_cases
24 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/hdx/zika_cases.csv" )
25 |   pp recs
26 | end
27 | 
28 | end # class TestHdxSamples
29 | 


--------------------------------------------------------------------------------
/csvhuman/test/test_header_converter.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_header_converter.rb
 6 | 
 7 | 
 8 | 
 9 | require 'helper'
10 | 
11 | class TestHeaderConverter < MiniTest::Test
12 | 
13 | def conv_none( value )
14 |   CsvHuman::HEADER_CONVERTERS[:none].call( value )
15 | end
16 | 
17 | def conv_default( value )
18 |   CsvHuman::HEADER_CONVERTERS[:default].call( value )
19 | end
20 | 
21 | def conv_symbol( value )
22 |   CsvHuman::HEADER_CONVERTERS[:symbol].call( value )
23 | end
24 | 
25 | 
26 | 
27 | def test_none
28 |   assert_equal "#sector", conv_none( "#sector" )
29 |   assert_equal "#adm1",   conv_none( "#adm1" )
30 | 
31 |   assert_equal "#sector +en", conv_none( "#sector +en" )
32 |   assert_equal "#adm1 +code", conv_none( "#adm1 +code" )
33 | 
34 |   assert_equal "#affected +children +f",             conv_none( "#affected +children +f" )
35 |   assert_equal "#population +affected +children +m", conv_none( "#population +affected +children +m" )
36 | end
37 | 
38 | 
39 | def test_default
40 |   assert_equal "sector", conv_default( "#sector" )
41 |   assert_equal "adm1",   conv_default( "#adm1" )
42 | 
43 |   assert_equal "sector+en", conv_default( "#sector +en" )
44 |   assert_equal "adm1+code", conv_default( "#adm1 +code" )
45 | 
46 |   assert_equal "affected+children+f",            conv_default( "#affected +children +f" )
47 |   assert_equal "population+affected+children+m", conv_default( "#population +affected +children +m" )
48 | end
49 | 
50 | 
51 | def test_symbol
52 |   assert_equal :sector, conv_symbol( "#sector" )
53 |   assert_equal :adm1,   conv_symbol( "#adm1" )
54 | 
55 |   assert_equal :sector_en, conv_symbol( "#sector +en" )
56 |   assert_equal :adm1_code, conv_symbol( "#adm1 +code" )
57 | 
58 |   assert_equal :affected_children_f,            conv_symbol( "#affected +children +f" )
59 |   assert_equal :population_affected_children_m, conv_symbol( "#population +affected +children +m" )
60 | end
61 | 
62 | 
63 | end # class TestHeaderConverter
64 | 


--------------------------------------------------------------------------------
/csvhuman/test/test_misc.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_misc.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestMisc < MiniTest::Test
11 | 
12 | 
13 | def test_airports
14 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/airports.csv" )
15 |   pp recs
16 | end
17 | 
18 | def test_unhcr
19 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/unhcr.csv" )
20 |   pp recs
21 | end
22 | 
23 | def test_ebola
24 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/ebola.csv" )
25 |   pp recs
26 | end
27 | 
28 | end # class TestMisc
29 | 


--------------------------------------------------------------------------------
/csvhuman/test/test_samples.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_samples.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestSamples < MiniTest::Test
11 | 
12 | 
13 | def test_sample1
14 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/sample1.csv" )
15 |   ## pp recs
16 |   assert_equal [{"loc+name"=>"Camp A", "loc+code"=>"01000001", "affected"=>2000},
17 |                 {"loc+name"=>"Camp B", "loc+code"=>"01000002", "affected"=>750},
18 |                 {"loc+name"=>"Camp C", "loc+code"=>"01000003", "affected"=>1920}], recs
19 | end
20 | 
21 | def test_sample2
22 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/sample2.csv" )
23 |   ## pp recs
24 |   assert_equal [{"event+id"=>1,
25 |                  "affected+killed"=>1,
26 |                  "region"=>"Mediterranean",
27 |                  "meta+reliability+source"=>"Verified",
28 |                  "date+reported"=>Date.new( 2015, 11, 5 ),
29 |                  "geo+lat"=>36.8915,
30 |                  "geo+lon"=>27.2877},
31 |                 {"event+id"=>3,
32 |                  "affected+killed"=>1,
33 |                  "region"=>"Central America incl. Mexico",
34 |                  "meta+reliability+source"=>"Partially Verified",
35 |                  "date+reported"=>Date.new( 2015, 11, 3 ),
36 |                  "geo+lat"=>15.9564,
37 |                  "geo+lon"=>-93.663099}], recs
38 | end
39 | 
40 | def test_sample3
41 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/sample3.csv" )
42 |   ## pp recs
43 |   assert_equal [{"loc+code"=>["020503", nil, nil]},
44 |                 {"loc+code"=>["060107", "060108", nil]},
45 |                 {"loc+code"=>["173219", nil, nil]},
46 |                 {"loc+code"=>["530012", nil, nil]},
47 |                 {"loc+code"=>["530013", "530015", "279333"]}], recs
48 | end
49 | 
50 | def test_sample4
51 |   recs = CsvHuman.read( "#{CsvHuman.test_data_dir}/sample4.csv" )
52 |   ## pp recs
53 |   assert_equal [{"adm1+name"=>"Coast District",    "affected+label"=>[0, 30, 100, 250]},
54 |                 {"adm1+name"=>"Mountain District", "affected+label"=>[15, 75, 30, 45]}], recs
55 | end
56 | 
57 | 
58 | end # class TestSamples
59 | 


--------------------------------------------------------------------------------
/csvhuman/test/test_type_converters.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_type_converters.rb
 6 | 
 7 | 
 8 | 
 9 | require 'helper'
10 | 
11 | class TestTypeConverters < MiniTest::Test
12 | 
13 | def conv_to_i( value )
14 |   CsvHuman::TYPE_CONVERTERS[Integer].call( value )
15 | end
16 | 
17 | def conv_to_f( value )
18 |   CsvHuman::TYPE_CONVERTERS[Float].call( value )
19 | end
20 | 
21 | def conv_to_date( value )
22 |   CsvHuman::TYPE_CONVERTERS[Date].call( value )
23 | end
24 | 
25 | 
26 | 
27 | def test_integer
28 |   assert_equal 0,    conv_to_i( "0" )
29 |   assert_equal 2011, conv_to_i( "2011" )
30 | end
31 | 
32 | def test_float
33 |   assert_equal 0.0,    conv_to_f( "0" )
34 |   assert_equal 2011.0, conv_to_f( "2011" )
35 | end
36 | 
37 | def test_date
38 |   assert_equal Date.new( 2011, 12, 25 ), conv_to_date( "2011-12-25")
39 | end
40 | 
41 | 
42 | end # class TestTypeConverters
43 | 


--------------------------------------------------------------------------------
/csvhuman/test/test_type_mappings.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_type_mappings.rb
 6 | 
 7 | 
 8 | 
 9 | require 'helper'
10 | 
11 | class TestTypeMappings < MiniTest::Test
12 | 
13 | def split( value )
14 |   parts = CsvHuman::Tag.split( value )
15 | 
16 |   name       = parts[0]
17 |   attributes = parts[1..-1]   ## todo/fix: check if nil (make it empty array [] always) - why? why not?
18 | 
19 |   [name, attributes]
20 | end
21 | 
22 | 
23 | def conv_guess( value )
24 |   CsvHuman.guess_type( *split(value) )
25 | end
26 | 
27 | def conv_default( value )
28 |   CsvHuman::TYPE_MAPPINGS[:default].call( *split(value) )
29 | end
30 | 
31 | def conv_none( value )
32 |   CsvHuman::TYPE_MAPPINGS[:none].call( *split(value) )
33 | end
34 | 
35 | 
36 | 
37 | def test_none
38 |   assert_equal String, conv_none( "#date" )
39 |   assert_equal String, conv_none( "#date +year" )
40 |   assert_equal String, conv_none( "#geo +lat" )
41 |   assert_equal String, conv_none( "#geo +elevation" )
42 | end
43 | 
44 | def test_guess_and_default
45 |   assert_equal Date,    conv_guess( "#date" )
46 |   assert_equal Integer, conv_guess( "#date +year" )
47 |   assert_equal Float,   conv_guess( "#geo +lat" )
48 |   assert_equal Float,   conv_guess( "#geo +elevation" )
49 | 
50 |   assert_equal Date,    conv_default( "#date" )
51 |   assert_equal Integer, conv_default( "#date +year" )
52 |   assert_equal Float,   conv_default( "#geo +lat" )
53 |   assert_equal Float,   conv_default( "#geo +elevation" )
54 | end
55 | 
56 | 
57 | end # class TestTypeMappings
58 | 


--------------------------------------------------------------------------------
/csvjson/.gitignore:
--------------------------------------------------------------------------------
 1 | #######################
 2 | # ignore ruby rake generated folders
 3 | 
 4 | /pkg/
 5 | /doc/
 6 | 
 7 | 
 8 | ################
 9 | # ignore (top-level) datapackage folders
10 | 
11 | /pack/
12 | /.pack/
13 | 


--------------------------------------------------------------------------------
/csvjson/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-10-14
2 | 
3 | * Everything is new. First release
4 | 


--------------------------------------------------------------------------------
/csvjson/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | LICENSE.md
 3 | Manifest.txt
 4 | README.md
 5 | Rakefile
 6 | datasets/hello.json.csv
 7 | datasets/hello11.json.csv
 8 | lib/csvjson.rb
 9 | lib/csvjson/parser.rb
10 | lib/csvjson/version.rb
11 | test/helper.rb
12 | test/test_parser.rb
13 | test/test_parser_misc.rb
14 | 


--------------------------------------------------------------------------------
/csvjson/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvjson/version.rb'
 3 | 
 4 | Hoe.spec 'csvjson' do
 5 | 
 6 |   self.version = CsvJson::VERSION
 7 | 
 8 |   self.summary = "csvjson - read tabular data in the CSV <3 JSON format, that is, comma-separated values CSV (line-by-line) records with javascript object notation (JSON) encoding rules"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/csvreader/csvjson']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |   ]
22 | 
23 |   self.licenses = ['Public Domain']
24 | 
25 |   self.spec_extras = {
26 |     required_ruby_version: '>= 2.2.2'
27 |   }
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/csvjson/datasets/hello.json.csv:
--------------------------------------------------------------------------------
1 | 1,"John","12 Totem Rd. Aspen",true
2 | 2,"Bob",null,false
3 | 3,"Sue","Bigsby, 345 Carnival, WA 23009",false
4 | 


--------------------------------------------------------------------------------
/csvjson/datasets/hello11.json.csv:
--------------------------------------------------------------------------------
1 | # hello world
2 | 
3 | 1, "John", "12 Totem Rd. Aspen",             true
4 | 2, "Bob",  null,                             false
5 | 3, "Sue",  "Bigsby, 345 Carnival, WA 23009", false
6 | 


--------------------------------------------------------------------------------
/csvjson/lib/csvjson.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'pp'
 4 | require 'json'
 5 | require 'logger'
 6 | 
 7 | 
 8 | ## our own code
 9 | ##   todo/check: use require_relative - why? why not?
10 | require 'csvjson/version'    # note: let version always go first
11 | require 'csvjson/parser'
12 | 
13 | 
14 | ## add some "alternative" shortcut aliases
15 | CSV_JSON = CsvJson
16 | CSVJSON  = CsvJson
17 | CSVJ     = CsvJson
18 | CsvJ     = CsvJson
19 | 
20 | 
21 | # say hello
22 | puts CsvJson.banner    if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
23 | 


--------------------------------------------------------------------------------
/csvjson/lib/csvjson/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ## note: for now CsvJson is a class!! (not a module)
 4 | 
 5 | class CsvJson
 6 | 
 7 |   MAJOR = 1
 8 |   MINOR = 0
 9 |   PATCH = 1
10 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
11 | 
12 |   def self.version
13 |     VERSION
14 |   end
15 | 
16 |   def self.banner
17 |     "csvjson/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18 |   end
19 | 
20 |   def self.root
21 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
22 |   end
23 | 
24 | end # class CsvJson
25 | 


--------------------------------------------------------------------------------
/csvjson/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | require 'csvjson'
10 | 
11 | 
12 | ## add test_data_dir helper
13 | class CsvJson
14 |   def self.test_data_dir
15 |     "#{root}/datasets"
16 |   end
17 | end
18 | 
19 | 
20 | CsvJson.logger.level = :debug   ## turn on "global" logging
21 | 


--------------------------------------------------------------------------------
/csvjson/test/test_parser.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ###
  4 | #  to run use
  5 | #     ruby -I ./lib -I ./test test/test_parser.rb
  6 | 
  7 | 
  8 | require 'helper'
  9 | 
 10 | class TestParser < MiniTest::Test
 11 | 
 12 | 
 13 | def parser
 14 |   CsvJson
 15 | end
 16 | 
 17 | def records   ## "standard" records for testing
 18 |   [[1, "John", "12 Totem Rd. Aspen",             true],
 19 |    [2, "Bob",  nil,                              false],
 20 |    [3, "Sue",  "Bigsby, 345 Carnival, WA 23009", false]]
 21 | end
 22 | 
 23 | 
 24 | 
 25 | def test_parse
 26 |   assert_equal records, parser.parse( <<TXT )
 27 | 1,"John","12 Totem Rd. Aspen",true
 28 | 2,"Bob",null,false
 29 | 3,"Sue","Bigsby, 345 Carnival, WA 23009",false
 30 | TXT
 31 | 
 32 |   assert_equal records, parser.parse( <<TXT )
 33 | # hello world
 34 | 
 35 | 1,"John","12 Totem Rd. Aspen",true
 36 | 2,"Bob",null,false
 37 | 3,"Sue","Bigsby, 345 Carnival, WA 23009",false
 38 | TXT
 39 | 
 40 |   assert_equal records, parser.parse( <<TXT )
 41 |   # hello world (pretty printed)
 42 | 
 43 |   1, "John", "12 Totem Rd. Aspen",             true
 44 |   2, "Bob",  null,                             false
 45 |   3, "Sue",  "Bigsby, 345 Carnival, WA 23009", false
 46 | 
 47 |   # try more comments and empty lines
 48 | 
 49 | TXT
 50 | 
 51 | 
 52 |   txt =<<TXT
 53 |   # hello world
 54 | 
 55 |   1,"John","12 Totem Rd. Aspen",true
 56 |   2,"Bob",null,false
 57 |   3,"Sue","Bigsby, 345 Carnival, WA 23009",false
 58 | TXT
 59 | 
 60 |   recs = []
 61 |   parser.parse( txt ) { |rec| recs << rec }
 62 |   assert_equal records, recs
 63 | end
 64 | 
 65 | 
 66 | def test_read
 67 |   assert_equal records, parser.read( "#{CsvJson.test_data_dir}/hello.json.csv" )
 68 |   assert_equal records, parser.read( "#{CsvJson.test_data_dir}/hello11.json.csv" )
 69 | end
 70 | 
 71 | 
 72 | def test_open
 73 |   assert_equal records, parser.open( "#{CsvJson.test_data_dir}/hello.json.csv", "r:bom|utf-8" ).read
 74 |   assert_equal records, parser.open( "#{CsvJson.test_data_dir}/hello11.json.csv", "r:bom|utf-8" ).read
 75 | end
 76 | 
 77 | 
 78 | def test_foreach
 79 |   recs = []
 80 |   parser.foreach( "#{CsvJson.test_data_dir}/hello.json.csv" ) { |rec| recs << rec }
 81 |   assert_equal records, recs
 82 | 
 83 |   recs = []
 84 |   parser.foreach( "#{CsvJson.test_data_dir}/hello11.json.csv" ) { |rec| recs << rec }
 85 |   assert_equal records, recs
 86 | end
 87 | 
 88 | 
 89 | def test_enum
 90 |   csv = CsvJson.new( <<TXT )
 91 |   # hello world
 92 | 
 93 |   1,"John","12 Totem Rd. Aspen",true
 94 |   2,"Bob",null,false
 95 |   3,"Sue","Bigsby, 345 Carnival, WA 23009",false
 96 | TXT
 97 | 
 98 |   it = csv.to_enum
 99 |   assert_equal [1, "John", "12 Totem Rd. Aspen",             true], it.next
100 |   assert_equal [2, "Bob",  nil,                              false], it.next
101 |   assert_equal [3, "Sue",  "Bigsby, 345 Carnival, WA 23009", false], it.next
102 | end
103 | 
104 | end # class TestParser
105 | 


--------------------------------------------------------------------------------
/csvjson/test/test_parser_misc.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_misc.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserMisc < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvJson
15 | end
16 | 
17 | 
18 | def test_quotes_and_commas
19 |   assert_equal [
20 |     [1, "John", "12 Totem Rd., Aspen", true],
21 |     [2, "Bob",  nil,                   false],
22 |     [3, "Sue", "\"Bigsby\", 345 Carnival, WA 23009", false]
23 |     ], parser.parse( <<TXT )
24 |   1,"John","12 Totem Rd., Aspen",true
25 |   2,"Bob",null,false
26 |   3,"Sue","\\"Bigsby\\", 345 Carnival, WA 23009",false
27 | TXT
28 | end
29 | 
30 | 
31 | def test_arrays
32 |   assert_equal [
33 |     [1, "directions", ["north","south","east","west"]],
34 |     [2, "colors", ["red","green","blue"]],
35 |     [3, "drinks", ["soda","water","tea","coffe"]],
36 |     [4, "spells", []],
37 |   ], parser.parse( <<TXT )
38 |   # CSV <3 JSON with array values
39 | 
40 |   1,"directions",["north","south","east","west"]
41 |   2,"colors",["red","green","blue"]
42 |   3,"drinks",["soda","water","tea","coffe"]
43 |   4,"spells",[]
44 | TXT
45 | end
46 | 
47 | def test_misc
48 |   ## note:
49 |   ##   in the csv <3 json source text backslash needs to get doubled / escaped twice e.g.
50 |   ##   \\"  for quotes
51 |   ##   \\n  for newlines and so on
52 | 
53 |   assert_equal [
54 |     ["index", "value1", "value2"],
55 |     ["number", 1, 2],
56 |     ["boolean", false, true],
57 |     ["null", nil, "non null"],
58 |     ["array of numbers", [1], [1,2]],
59 |     ["simple object", {"a" => 1}, {"a" => 1, "b" => 2}],
60 |     ["array with mixed objects", [1, nil,"ball"], [2,{"a" => 10, "b" => 20},"cube"]],
61 |     ["string with quotes", "a\"b", "alert(\"Hi!\")"],
62 |     ["string with bell&newlines","bell is \u0007","multi\nline\ntext"]
63 |   ], parser.parse( <<TXT )
64 |   # CSV with all kinds of values
65 | 
66 |   "index","value1","value2"
67 |   "number",1,2
68 |   "boolean",false,true
69 |   "null",null,"non null"
70 |   "array of numbers",[1],[1,2]
71 |   "simple object",{"a": 1},{"a":1, "b":2}
72 |   "array with mixed objects",[1,null,"ball"],[2,{"a": 10, "b": 20},"cube"]
73 |   "string with quotes","a\\"b","alert(\\"Hi!\\")"
74 |   "string with bell&newlines","bell is \\u0007","multi\\nline\\ntext"
75 | TXT
76 | 
77 | end
78 | 
79 | 
80 | end # class TestParserMisc
81 | 


--------------------------------------------------------------------------------
/csvpack/.gitignore:
--------------------------------------------------------------------------------
 1 | #######################
 2 | # ignore ruby rake generated folders
 3 | 
 4 | /pkg/
 5 | /doc/
 6 | 
 7 | 
 8 | ################
 9 | # ignore (top-level) datapackage folders
10 | 
11 | /pack/
12 | /.pack/
13 | 


--------------------------------------------------------------------------------
/csvpack/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2015-04-23
2 | 
3 | * Everything is new. First release
4 | 
5 | 


--------------------------------------------------------------------------------
/csvpack/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | lib/csvpack.rb
 6 | lib/csvpack/downloader.rb
 7 | lib/csvpack/pack.rb
 8 | lib/csvpack/version.rb
 9 | test/helper.rb
10 | test/test_companies.rb
11 | test/test_countries.rb
12 | test/test_downloader.rb
13 | test/test_import.rb
14 | 


--------------------------------------------------------------------------------
/csvpack/NOTES.md:
--------------------------------------------------------------------------------
 1 | # Notes
 2 | 
 3 | 
 4 | ## Todos
 5 | 
 6 | - [ ] add CsvPack.config !!! - e.g config.cache_dir "global" setting and others
 7 | - [ ] add loading from directory
 8 | - [ ] add loading from zip
 9 | - [ ] add csvpack command line tool / bin
10 | - [ ] add schema class for json_schema - why? why not?
11 | - [ ] add table lookup by name e.g. table['constituent']
12 | 


--------------------------------------------------------------------------------
/csvpack/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvpack/version.rb'
 3 | 
 4 | Hoe.spec 'csvpack' do
 5 | 
 6 |   self.version = CsvPack::VERSION
 7 | 
 8 |   self.summary = "csvpack - tools 'n' scripts for working with tabular data packages using comma-separated values (CSV) datafiles in text with meta info (that is, schema, datatypes, ..) in datapackage.json; download, read into and query CSV datafiles with your SQL database (e.g. SQLite, PostgreSQL, ...) of choice and much more"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/csv11/csvpack']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |     ['logutils',     '>=0.6.1'],
22 |     ['fetcher',      '>=0.4.5'],
23 |     ['activerecord', '>=5.0.0'],
24 |   ]
25 | 
26 |   self.licenses = ['Public Domain']
27 | 
28 |   self.spec_extras = {
29 |     required_ruby_version: '>= 2.2.2'
30 |   }
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/csvpack/getting-started-samples/start.rb:
--------------------------------------------------------------------------------
  1 | ###
  2 | #  ruby script (data work flow) getting started sample from the csvpack readme
  3 | #    see https://github.com/csvreader/csvpack
  4 | #
  5 | 
  6 | require 'csvpack'
  7 | 
  8 | 
  9 | CsvPack.import(
 10 |   's-and-p-500-companies',
 11 |   'gdb'
 12 | )
 13 | 
 14 | 
 15 | 
 16 | 
 17 | 
 18 | pack = CsvPack::Pack.new( './pack/s-and-p-500-companies' )
 19 | 
 20 | Constituent = pack.table.ar_clazz
 21 | 
 22 | 
 23 | pp Constituent.count
 24 | # SELECT COUNT(*) FROM "constituents"
 25 | # => 496
 26 | 
 27 | 
 28 | pp Constituent.first
 29 | # SELECT  "constituents".* FROM "constituents" ORDER BY "constituents"."id" ASC LIMIT 1
 30 | # => #<Constituent:0x9f8cb78
 31 | #         id:     1,
 32 | #         symbol: "MMM",
 33 | #         name:   "3M Company",
 34 | #         sector: "Industrials">
 35 | 
 36 | 
 37 | pp Constituent.find_by!( symbol: 'MMM' )
 38 | # SELECT  "constituents".*
 39 | #         FROM "constituents"
 40 | #         WHERE "constituents"."symbol" = "MMM"
 41 | #         LIMIT 1
 42 | # => #<Constituent:0x9f8cb78
 43 | #         id:     1,
 44 | #         symbol: "MMM",
 45 | #         name:   "3M Company",
 46 | #         sector: "Industrials">
 47 | 
 48 | 
 49 | pp Constituent.find_by!( name: '3M Company' )
 50 | # SELECT  "constituents".*
 51 | #          FROM "constituents"
 52 | #          WHERE "constituents"."name" = "3M Company"
 53 | #          LIMIT 1
 54 | # => #<Constituent:0x9f8cb78
 55 | #         id:     1,
 56 | #         symbol: "MMM",
 57 | #         name:   "3M Company",
 58 | #         sector: "Industrials">
 59 | 
 60 | 
 61 | pp Constituent.where( sector: 'Industrials' ).count
 62 | # SELECT COUNT(*) FROM "constituents"
 63 | #         WHERE "constituents"."sector" = "Industrials"
 64 | # => 63
 65 | 
 66 | 
 67 | pp Constituent.where( sector: 'Industrials' ).all
 68 | # SELECT "constituents".*
 69 | #         FROM "constituents"
 70 | #         WHERE "constituents"."sector" = "Industrials"
 71 | # => [#<Constituent:0x9f8cb78
 72 | #          id:     1,
 73 | #          symbol: "MMM",
 74 | #          name:   "3M Company",
 75 | #          sector: "Industrials">,
 76 | #      #<Constituent:0xa2a4180
 77 | #          id:     8,
 78 | #          symbol: "ADT",
 79 | #          name:   "ADT Corp (The)",
 80 | #          sector: "Industrials">,...]
 81 | 
 82 | 
 83 | 
 84 | #####
 85 | #   From F.A.Q.
 86 | 
 87 | 
 88 | dl = CsvPack::Downloader.new
 89 | dl.fetch( 'language-codes' )
 90 | dl.fetch( 's-and-p-500-companies' )
 91 | dl.fetch( 'un-locode')
 92 | 
 93 | 
 94 | 
 95 | #######
 96 | #  New db connection - store to ./mine.db
 97 | 
 98 | ActiveRecord::Base.establish_connection( adapter:  'sqlite3',
 99 |                                          database: './mine.db' )
100 | 
101 | ## import 1) "auto"-magic
102 | CsvPack.import(
103 |   's-and-p-500-companies'
104 | )
105 | 
106 | ## import 2) "by hand"
107 | pack = CsvPack::Pack.new( './pack/gdb' )
108 | pack.tables.each do |table|
109 |   table.up!      # (auto-) add table  using SQL create_table via ActiveRecord migration
110 |   table.import!  # import all records using SQL inserts
111 | end
112 | 


--------------------------------------------------------------------------------
/csvpack/lib/csvpack.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'pp'
 5 | require 'forwardable'
 6 | 
 7 | ### csv
 8 | require 'csv'
 9 | require 'json'
10 | require 'fileutils'
11 | 
12 | 
13 | ### downloader
14 | require 'fetcher'
15 | 
16 | ### activerecord w/ sqlite3
17 | ##  require 'active_support/all'    ## needed for String#binary? method
18 | require 'active_record'
19 | 
20 | 
21 | 
22 | # our own code
23 | 
24 | require 'csvpack/version'      ## let version always go first
25 | require 'csvpack/pack'
26 | require 'csvpack/downloader'
27 | 
28 | module CsvPack
29 | 
30 |   def self.import( *args )
31 |     ## step 1: download
32 |     dl = Downloader.new
33 |     args.each do |arg|
34 |       dl.fetch( arg )
35 |     end
36 | 
37 |     ## step 2: up 'n' import
38 |     args.each do |arg|
39 |       pack = Pack.new( "./pack/#{arg}/datapackage.json" )
40 |       pack.tables.each do |table|
41 |         table.up!
42 |         table.import!
43 |       end
44 |     end
45 |   end
46 | 
47 | end # module CsvPack
48 | 
49 | 
50 | 
51 | # say hello
52 | puts CsvPack.banner    if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
53 | 


--------------------------------------------------------------------------------
/csvpack/lib/csvpack/downloader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module CsvPack
 4 | 
 5 | class Downloader
 6 | 
 7 |   def initialize( cache_dir='./pack' )
 8 |     @cache_dir = cache_dir   # todo: check if folder exists now (or on demand)?
 9 |     @worker = Fetcher::Worker.new
10 |   end
11 | 
12 |   SHORTCUTS = {
13 |     ## to be done
14 |   }
15 | 
16 |   def fetch( name_or_shortcut_or_url )   ## todo/check: use (re)name to get/update/etc. why? why not??
17 | 
18 |     name = name_or_shortcut_or_url
19 | 
20 |     ##
21 |     ## e.g. try
22 |     ##   country-list
23 |     ##
24 | 
25 |     ## url_base = "http://data.okfn.org/data/core/#{name}"
26 |     ## url_base = "https://datahub.io/core/#{name}"
27 | 
28 |     ## or use "https://github.com/datasets/#{name}/raw/master"
29 |     url_base = "https://raw.githubusercontent.com/datasets/#{name}/master"
30 | 
31 | 
32 |     url = "#{url_base}/datapackage.json"
33 | 
34 |     dest_dir = "#{@cache_dir}/#{name}"
35 |     FileUtils.mkdir_p( dest_dir )
36 | 
37 |     pack_path = "#{dest_dir}/datapackage.json"   ## todo/fix: rename to meta_path - why? why not?
38 |     @worker.copy( url, pack_path )
39 | 
40 |     h = Meta.load_file( pack_path )
41 |     pp h
42 | 
43 |     ## copy resources (tables)
44 |     h.resources.each do |r|
45 |       puts "== resource:"
46 |       pp r
47 | 
48 |       res_name          = r['name']
49 |       res_relative_path = r['path']   ## fix/todo: might no contain the url - is now res_url_or_relative_path !!!!!
50 |       if res_relative_path.nil?
51 |         res_relative_path = "#{res_name}.csv"
52 |       end
53 | 
54 |       res_url       = r['url']   ## check - old package format - url NO longer used!!!!
55 |       if res_url.nil?
56 |          ## build url
57 |          res_url = "#{url_base}/#{res_relative_path}"
58 |       end
59 | 
60 |       ## todo/fix: rename - use just res_path - why? why not?
61 |       local_res_path = "#{dest_dir}/#{res_relative_path}"
62 |       puts "[debug] local_res_path: >#{local_res_path}<"
63 |       local_res_dir   = File.dirname( local_res_path )
64 |       FileUtils.mkdir_p( local_res_dir )
65 | 
66 |       @worker.copy( res_url, local_res_path )
67 |     end
68 |   end
69 | 
70 | end # class Downloader
71 | 
72 | end # module CsvPack
73 | 


--------------------------------------------------------------------------------
/csvpack/lib/csvpack/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | module CsvPack
 4 | 
 5 |   MAJOR = 0 ## todo: namespace inside version or something - why? why not??
 6 |   MINOR = 2
 7 |   PATCH = 0
 8 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
 9 | 
10 |   def self.version
11 |     VERSION
12 |   end
13 | 
14 |   def self.banner
15 |     "csvpack/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16 |   end
17 | 
18 |   def self.root
19 |     File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
20 |   end
21 | 
22 | end  # module CsvPack
23 | 


--------------------------------------------------------------------------------
/csvpack/test/helper.rb:
--------------------------------------------------------------------------------
1 | 
2 | ## minitest setup
3 | require 'minitest/autorun'
4 | 
5 | 
6 | ## our own code
7 | require 'csvpack'
8 | 


--------------------------------------------------------------------------------
/csvpack/test/pack/beer/data.csv:
--------------------------------------------------------------------------------
1 | Brewery,City,Name,Abv
2 | Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
3 | Augustiner Bräu München,München,Edelstoff,5.6%
4 | Bayerische Staatsbrauerei Weihenstephan,Freising,Hefe Weissbier,5.4%
5 | Brauerei Spezial,Bamberg,Rauchbier Märzen,5.1%
6 | Hacker-Pschorr Bräu,München,Münchner Dunkel,5.0%
7 | Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
8 | 


--------------------------------------------------------------------------------
/csvpack/test/pack/beer/datapackage.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "beer",
 3 |   "resources": [
 4 |     {
 5 |       "path": "data.csv",
 6 |       "schema": {
 7 |         "fields": [{ "name": "Brewery",   "type": "string" },
 8 |                    { "name": "City",      "type": "string" },
 9 |                    { "name": "Name",      "type": "string" },
10 |                    { "name": "Abv",       "type": "number" }]
11 |       }
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/csvpack/test/test_companies.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_companies.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestCompanies < MiniTest::Test
11 | 
12 |   def test_s_and_p_500_companies
13 | 
14 |     pack = CsvPack::Pack.new( './pack/s-and-p-500-companies/datapackage.json' )
15 | 
16 |     meta = pack.meta
17 |     puts "name: #{meta.name}"
18 |     puts "title: #{meta.title}"
19 |     puts "license: #{meta.license}"
20 | 
21 |     pp pack.tables
22 |     pp pack.table[0]['Symbol']
23 |     pp pack.table[495]['Symbol']
24 | 
25 |     ## pak.table.each do |row|
26 |     ##  pp row
27 |     ## end
28 | 
29 |     puts pack.tables[0].dump_schema
30 | 
31 |     # database setup 'n' config
32 |     ActiveRecord::Base.establish_connection( adapter:  'sqlite3',
33 |                                              database: ':memory:' )
34 |     ActiveRecord::Base.logger = Logger.new( STDOUT )
35 | 
36 |     pack.table.up!
37 |     pack.table.import!
38 | 
39 |     ## pack.tables[0].up!
40 |     ## pack.tables[0].import!
41 | 
42 | 
43 |     pp pack.table.ar_clazz
44 | 
45 | 
46 |     company = pack.table.ar_clazz
47 | 
48 |     puts "Company:"
49 |     pp company.count
50 |     pp company.first
51 |     pp company.find_by!( symbol: 'MMM' )
52 |     pp company.find_by!( name: '3M Company' )
53 |     pp company.where( sector: 'Industrials' ).count
54 |     pp company.where( sector: 'Industrials' ).all
55 | 
56 | 
57 |     ### todo: try a join w/ belongs_to ??
58 | 
59 |     assert true  # if we get here - test success
60 |   end
61 | 
62 | end # class TestCompanies
63 | 


--------------------------------------------------------------------------------
/csvpack/test/test_countries.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_countries.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestCountries < MiniTest::Test
11 | 
12 |   def test_country_list
13 |     pack = CsvPack::Pack.new( './pack/country-list/datapackage.json' )
14 | 
15 |     meta = pack.meta
16 |     puts "name: #{meta.name}"
17 |     puts "title: #{meta.title}"
18 |     puts "license: #{meta.license}"
19 | 
20 |     pp pack.tables
21 | 
22 |     ## pak.table.each do |row|
23 |     ##  pp row
24 |     ## end
25 | 
26 |     puts pack.table.dump_schema
27 | 
28 |     # database setup 'n' config
29 |     ActiveRecord::Base.establish_connection( adapter:  'sqlite3',
30 |                                              database: ':memory:' )
31 |     ActiveRecord::Base.logger = Logger.new( STDOUT )
32 | 
33 |     pack.table.up!
34 |     pack.table.import!
35 | 
36 |     pp pack.table.ar_clazz
37 | 
38 |     assert true  # if we get here - test success
39 |   end
40 | 
41 | end # class TestCountries
42 | 


--------------------------------------------------------------------------------
/csvpack/test/test_downloader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_downloader.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestDownloader < MiniTest::Test
11 | 
12 |   def test_download
13 | 
14 |     names = [
15 |       'country-list',
16 |       'country-codes',
17 |       'language-codes',
18 |       'cpi',                    ## Annual Consumer Price Index (CPI)
19 |       'gdp',                    ## Country, Regional and World GDP (Gross Domestic Product)
20 |       's-and-p-500-companies',  ## S&P 500 Companies with Financial Information
21 |       'un-locode',              ## UN-LOCODE Codelist  - note: incl. country-codes.csv
22 |     ]
23 | 
24 |     dl = CsvPack::Downloader.new
25 |     names.each do |name|
26 |       dl.fetch( name )
27 |     end
28 | 
29 |     assert true  # if we get here - test success
30 |   end
31 | 
32 | end # class TestDownloader
33 | 


--------------------------------------------------------------------------------
/csvpack/test/test_import.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_import.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestImport < MiniTest::Test
11 | 
12 |   def test_import
13 | 
14 |     CsvPack.import(
15 |       'cpi',                    ## Annual Consumer Price Index (CPI)
16 |       'gdp',                    ## Country, Regional and World GDP (Gross Domestic Product)
17 |     )
18 | 
19 |     assert true  # if we get here - test success
20 |   end
21 | 
22 | end # class TestImport
23 | 


--------------------------------------------------------------------------------
/csvreader/.gitignore:
--------------------------------------------------------------------------------
 1 | #######################
 2 | # ignore ruby rake generated folders
 3 | 
 4 | /pkg/
 5 | /doc/
 6 | 
 7 | 
 8 | ################
 9 | # ignore (top-level) datapackage folders
10 | 
11 | /pack/
12 | /.pack/
13 | 


--------------------------------------------------------------------------------
/csvreader/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-08-15
2 | 
3 | * Everything is new. First release.
4 | 


--------------------------------------------------------------------------------
/csvreader/ERRORS.md:
--------------------------------------------------------------------------------
 1 | # Errors / Error Recovery / Auto-Fixes
 2 | 
 3 | 
 4 | 
 5 | ###  Quoted Value with Trailing Data   (Auto-Fixed)
 6 | 
 7 | ```
 8 | Farrokh,"Freddy" Mercury,Bulsara
 9 | ```
10 | 
11 | See `"Freddy" Mercury` for example. 
12 | 
13 | How to handle?
14 | 
15 | Add new rule! 
16 | If quoted value is followed by more data auto-add all the data 
17 | until hitting the separator (that is, comma) 
18 | and turn the quotes into "literal" quotes as part of the value.
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/csvreader/Manifest.txt:
--------------------------------------------------------------------------------
 1 | CHANGELOG.md
 2 | Manifest.txt
 3 | README.md
 4 | Rakefile
 5 | datasets/beer.csv
 6 | datasets/beer11.csv
 7 | datasets/cars11.csv
 8 | datasets/cities11.csv
 9 | datasets/customers11.csv
10 | datasets/iris.attrib.csv
11 | datasets/iris11.csv
12 | datasets/lcc.attrib.csv
13 | datasets/shakespeare.csv
14 | datasets/test.csv
15 | lib/csvreader.rb
16 | lib/csvreader/base.rb
17 | lib/csvreader/buffer.rb
18 | lib/csvreader/builder.rb
19 | lib/csvreader/converter.rb
20 | lib/csvreader/parser.rb
21 | lib/csvreader/parser_fixed.rb
22 | lib/csvreader/parser_json.rb
23 | lib/csvreader/parser_std.rb
24 | lib/csvreader/parser_strict.rb
25 | lib/csvreader/parser_tab.rb
26 | lib/csvreader/parser_table.rb
27 | lib/csvreader/parser_yaml.rb
28 | lib/csvreader/reader.rb
29 | lib/csvreader/reader_hash.rb
30 | lib/csvreader/version.rb
31 | test/helper.rb
32 | test/test_buffer.rb
33 | test/test_converter.rb
34 | test/test_parser.rb
35 | test/test_parser_autofix.rb
36 | test/test_parser_directive.rb
37 | test/test_parser_fixed.rb
38 | test/test_parser_formats.rb
39 | test/test_parser_java.rb
40 | test/test_parser_meta.rb
41 | test/test_parser_null.rb
42 | test/test_parser_numeric.rb
43 | test/test_parser_quotes.rb
44 | test/test_parser_strict.rb
45 | test/test_parser_tab.rb
46 | test/test_parser_table.rb
47 | test/test_reader.rb
48 | test/test_reader_converters.rb
49 | test/test_reader_hash.rb
50 | test/test_reader_hash_converters.rb
51 | test/test_samples.rb
52 | 


--------------------------------------------------------------------------------
/csvreader/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvreader/version.rb'
 3 | 
 4 | 
 5 | Hoe.spec 'csvreader' do
 6 | 
 7 |   self.version = CsvReader::VERSION
 8 | 
 9 |   self.summary = "csvreader - read tabular data in the comma-separated values (csv) format the right way (uses best practices out-of-the-box with zero-configuration)"
10 |   self.description = summary
11 | 
12 |   self.urls = { home: 'https://github.com/csvreader/csvreader' }
13 | 
14 |   self.author = 'Gerald Bauer'
15 |   self.email  = 'wwwmake@googlegroups.com'
16 | 
17 |   # switch extension to .markdown for gihub formatting
18 |   self.readme_file  = 'README.md'
19 |   self.history_file = 'CHANGELOG.md'
20 | 
21 |   self.extra_deps = [
22 |      ['tabreader',  '>=1.0.1'],
23 |      ['csvyaml',    '>=0.1.0'],
24 |      ['csvjson',    '>=1.0.0']
25 |    ]
26 | 
27 |   self.licenses = ['Public Domain']
28 | 
29 |   self.spec_extras = {
30 |     required_ruby_version: '>= 2.2.2'
31 |   }
32 | 
33 | end
34 | 


--------------------------------------------------------------------------------
/csvreader/datasets/beer.csv:
--------------------------------------------------------------------------------
1 | Brewery,City,Name,Abv
2 | Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
3 | Augustiner Bräu München,München,Edelstoff,5.6%
4 | Bayerische Staatsbrauerei Weihenstephan,Freising,Hefe Weissbier,5.4%
5 | Brauerei Spezial,Bamberg,Rauchbier Märzen,5.1%
6 | Hacker-Pschorr Bräu,München,Münchner Dunkel,5.0%
7 | Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
8 | 


--------------------------------------------------------------------------------
/csvreader/datasets/beer11.csv:
--------------------------------------------------------------------------------
 1 | #######
 2 | #  try with some comments
 3 | #   and blank lines even before header
 4 | 
 5 | Brewery,City,Name,Abv
 6 | Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
 7 | Augustiner Bräu München,München,Edelstoff,5.6%
 8 | Bayerische Staatsbrauerei Weihenstephan,Freising,Hefe Weissbier,5.4%
 9 | 
10 | Brauerei Spezial,      Bamberg,  Rauchbier Märzen,   5.1%
11 | 
12 | Hacker-Pschorr Bräu,   München,  Münchner Dunkel,    5.0%
13 | 
14 | ## some more comments here
15 | 
16 | Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
17 | 
18 | ## check for nil
19 | "", ,,"",
20 | 
21 | ## check for blank line with spaces
22 | ##    yes, will get added as a record!! e.g. ["", nil, nil, nil]
23 | ##    use regex to skip blank lines with spaces!!!!
24 |     
25 | 
26 | ## test double quotes and double quotes escaped
27 | ##   note: double quotes do NOT work with leading AND/OR trailing spaces
28 | ##    leads to:
29 | ##    CSV::MalformedCSVError - Missing or stray quote in line xxx
30 | ##
31 | ##   note: for now double quote does not accept leading AND/OR trailing spaces!!!!
32 | ##
33 | ##  todo/fix: check liberal_quote option starting in csv ruby 2.4 ???
34 | ##
35 | ## examples:
36 | ##  "value with comma, comma","some ""hello""","some ""hello""",
37 | ##    works - but does NOT work (note the leading and trailing spaces for double quotes):
38 | ##  "value with comma, comma" ,"some ""hello""",  "some ""hello""",
39 | ##
40 | ## check for "multi-line":
41 | ## "hello
42 | ##  and another line
43 | ##  and another",two,three,
44 | 
45 | 
46 | "value with comma, comma","some ""hello""","some ""hello""",
47 | 
48 | ## check for "multi-line"
49 | "hello
50 |   and another line
51 |   and another",two,three,
52 | 


--------------------------------------------------------------------------------
/csvreader/datasets/cars11.csv:
--------------------------------------------------------------------------------
 1 | #####
 2 | # csv sample from the wikipedia article "Comma-separated values"
 3 | #   see en.wikipedia.org/wiki/Comma-separated_values
 4 | 
 5 | Year,Make,Model,Description,Price
 6 | 1997, Ford,  E350,"ac, abs, moon",3000.00
 7 | 1999, Chevy, "Venture ""Extended Edition""","",4900.00
 8 | 1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
 9 | 1996, Jeep,  Grand Cherokee,"MUST SELL!
10 | air, moon roof, loaded",4799.00
11 | 


--------------------------------------------------------------------------------
/csvreader/datasets/cities11.csv:
--------------------------------------------------------------------------------
 1 | #####
 2 | # csv sample from the wikipedia article "Comma-separated values"
 3 | #   see en.wikipedia.org/wiki/Comma-separated_values
 4 | #
 5 | # note:
 6 | #  Double quote processing need only apply if the field starts
 7 | #  with a double quote. Note, however, that double quotes are not
 8 | #  allowed in unquoted fields according to RFC 4180
 9 | 
10 | Los Angeles,   34°03'N,    118°15'W
11 | New York City, 40°42'46"N, 74°00'21"W
12 | Paris,         48°51'24"N, 2°21'03"E
13 | 


--------------------------------------------------------------------------------
/csvreader/datasets/customers11.csv:
--------------------------------------------------------------------------------
 1 | #####
 2 | # csv sample from the article:
 3 | #  A Guide to the Ruby CSV Library, Part I
 4 | #  - sitepoint.com/guide-ruby-csv-library-part
 5 | 
 6 | Name,Times arrived,Total $ spent,Food feedback
 7 | Dan,      34, 2548, Lovin it!
 8 | Maria,    55, 5054, "Good, delicious food"
 9 | Carlos,   22, 4352, "I am ""pleased"", but could be better"
10 | Stephany, 34, 6542, I want bigger steaks!!!!!
11 | James,     1,   43, Not bad
12 | Robin,     1,   56, Fish is tasty
13 | Anna,      1,   79, "Good, better, the best!"
14 | 


--------------------------------------------------------------------------------
/csvreader/datasets/iris.attrib.csv:
--------------------------------------------------------------------------------
 1 | % 1. Title: Iris Plants Database 
 2 | % 
 3 | % 2. Sources: 
 4 | % (a) Creator: R.A. Fisher 
 5 |  
 6 | 
 7 | @RELATION iris
 8 | 
 9 | @ATTRIBUTE sepallength NUMERIC 
10 | @ATTRIBUTE sepalwidth NUMERIC 
11 | @ATTRIBUTE petallength NUMERIC 
12 | @ATTRIBUTE petalwidth NUMERIC 
13 | @ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
14 | 
15 | @DATA
16 | 5.1,3.5,1.4,0.2,Iris-setosa 
17 | 4.9,3.0,1.4,0.2,Iris-setosa 
18 | 4.7,3.2,1.3,0.2,Iris-setosa 
19 | 4.6,3.1,1.5,0.2,Iris-setosa 
20 | 5.0,3.6,1.4,0.2,Iris-setosa 
21 | 5.4,3.9,1.7,0.4,Iris-setosa 
22 | 4.6,3.4,1.4,0.3,Iris-setosa 
23 | 5.0,3.4,1.5,0.2,Iris-setosa 
24 | 4.4,2.9,1.4,0.2,Iris-setosa 
25 | 4.9,3.1,1.5,0.1,Iris-setosa
26 | 


--------------------------------------------------------------------------------
/csvreader/datasets/lcc.attrib.csv:
--------------------------------------------------------------------------------
 1 | %  Attribute-Relation File Format (ARFF) Example
 2 | %    see https://www.cs.waikato.ac.nz/ml/weka/arff.html
 3 | 
 4 | @relation LCCvsLCSH
 5 | 
 6 | @attribute LCC string
 7 | @attribute LCSH string
 8 | 
 9 | @data
10 | AG5,   'Encyclopedias and dictionaries.;Twentieth century.'
11 | AS262, 'Science -- Soviet Union -- History.'
12 | AE5,   'Encyclopedias and dictionaries.'
13 | AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'
14 | AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Tables.'
15 | 


--------------------------------------------------------------------------------
/csvreader/datasets/shakespeare.csv:
--------------------------------------------------------------------------------
 1 | Quote,Play,Cite
 2 | Sweet are the uses of adversity,As You Like It,"Act 2, scene 1, 12"
 3 | All the world's a stage,As You Like It,"Act 2, scene 7, 139"
 4 | "We few, we happy few",Henry V,
 5 | """Seems,"" madam! Nay it is; I know not ""seems.""",Hamlet,(1.ii.76)
 6 | "To be, or not to be",Hamlet,"Act 3, scene 1, 55"
 7 | What's in a name? That which we call a rose by any other name would smell as sweet.,Romeo and Juliet,"(II, ii, 1-2)"
 8 | "O Romeo, Romeo, wherefore art thou Romeo?",Romeo and Juliet,"Act 2, scene 2, 33"
 9 | "Tomorrow, and tomorrow, and tomorrow",Macbeth,"Act 5, scene 5, 19"
10 | 


--------------------------------------------------------------------------------
/csvreader/datasets/test.csv:
--------------------------------------------------------------------------------
 1 | ##################################################
 2 | ##   Apache Commons CSV Reader Test Sample
 3 | ##    see https://github.com/apache/commons-csv/blob/master/src/test/resources/CSVFileParser/test.csv
 4 | 
 5 | 
 6 | A,B,C,"D"
 7 | # plain values
 8 | a,b,c,d
 9 | # spaces before and after
10 |  e ,f , g,h
11 | # quoted: with spaces before and after
12 | " i ", " j " , " k "," l "
13 | # empty values
14 | ,,,
15 | # empty quoted values
16 | "","","",""
17 | # 3 empty lines
18 | 
19 | 
20 | 
21 | # EOF on next line
22 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## our own code (without "top-level" shortcuts e.g. "modular version")
 4 | require 'csvreader/base'
 5 | 
 6 | 
 7 | ###
 8 | #  add convenience top-level shortcuts / aliases
 9 | 
10 | Csv     = CsvReader
11 | CsvHash = CsvHashReader
12 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/buffer.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class CsvReader
 3 | class Buffer   ## todo: find a better name:
 4 |                ##   BufferedReader
 5 |                ##   BufferedInput
 6 |                ##   BufferI
 7 |                ## - why? why not? is really just for reading (keep io?)
 8 | 
 9 |   def initialize( data )
10 |     # create the IO object we will read from
11 |     @io = data.is_a?(String) ? StringIO.new(data) : data
12 |     @buf = [] ## last (buffer) chars (used for peek)
13 |   end
14 | 
15 |   def eof?()   @buf.size == 0 && @io.eof?;  end
16 | 
17 |   def getc
18 |     if @buf.size > 0
19 |       @buf.shift  ## get first char from buffer
20 |     else
21 |       @io.getc
22 |     end
23 |   end # method getc
24 | 
25 | 
26 |   def peekn( lookahead )
27 |     ## todo/check:  use a new method peekstr or match or something
28 |     ##    for more than
29 |       if @buf.size == 0 && @io.eof?
30 |         ## puts "peek - hitting eof!!!"
31 |         return  "\0"   ## return NUL char (0) for now
32 |       end
33 | 
34 |       while @buf.size < lookahead do
35 |          ## todo/check: add/append NUL char (0) - why? why not?
36 |          break if @io.eof?    ## nothing more to read; break out of filling up buffer
37 | 
38 |          c = @io.getc
39 |          @buf.push( c )
40 |          ## puts "peek - fill buffer >#{c}< (#{c.ord})"
41 |       end
42 | 
43 |       @buf[0,lookahead].join
44 |   end
45 | 
46 | 
47 |   def peek1
48 |     if @buf.size == 0 && @io.eof?
49 |       ## puts "peek - hitting eof!!!"
50 |       return  "\0"   ## return NUL char (0) for now
51 |     end
52 | 
53 |     if @buf.size == 0
54 |         c = @io.getc
55 |         @buf.push( c )
56 |         ## puts "peek - fill buffer >#{c}< (#{c.ord})"
57 |     end
58 | 
59 |     @buf[0]    ## @buf.first
60 |   end # method peek1
61 |   alias :peek :peek1  ## for now alias for peek1
62 | 
63 | 
64 | 
65 | end # class Buffer
66 | end # class CsvReader
67 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/builder.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class CsvReader
 3 | class Builder
 4 | 
 5 | 
 6 |   def initialize( parser )
 7 |     @parser = parser
 8 |   end
 9 | 
10 | 
11 |   ## (auto-)forward to wrapped parser
12 |   ##   note/fix: not all parser use/have config e.g. ParserTab, ParserFixed, etc.
13 |   ##
14 |   ## todo/fix:
15 |   ##   add parser config (attribute) setter e.g.
16 |   ##   - sep=(value)
17 |   ##   - comment=(value)
18 |   ##   - and so on!!!
19 |   def config() @parser.config; end
20 | 
21 | 
22 | 
23 |   def open( path, mode=nil, **kwargs, &block )
24 |     CsvReader.open( path, mode, parser: @parser, **kwargs, &block )
25 |   end
26 | 
27 |   def read( path, **kwargs )
28 |     CsvReader.read( path, parser: @parser, **kwargs )
29 |   end
30 | 
31 |   def header( path, **kwargs )
32 |     CsvReader.header( path, parser: @parser, **kwargs )
33 |   end
34 | 
35 |   def foreach( path, **kwargs, &block )
36 |     CsvReader.foreach( path, parser: @parser, **kwargs, &block )
37 |   end
38 | 
39 | 
40 |   def parse( str_or_readable, **kwargs, &block )
41 |     CsvReader.parse( str_or_readable, parser: @parser, **kwargs, &block )
42 |   end
43 | end # class Builder
44 | end # class CsvReader
45 | 
46 | 
47 | 
48 | class CsvHashReader
49 | class Builder
50 |   def initialize( parser )
51 |     @parser = parser
52 |   end
53 | 
54 |   ## (auto-)forward to wrapped parser
55 |   ##   note/fix: not all parser use/have config e.g. ParserTab, ParserFixed, etc.
56 |   ##
57 |   ## todo/fix:
58 |   ##   add parser config (attribute) setter e.g.
59 |   ##   - sep=(value)
60 |   ##   - comment=(value)
61 |   ##   - and so on!!!
62 |   def config() @parser.config; end
63 | 
64 | 
65 | 
66 |   def open( path, mode=nil, **kwargs, &block )
67 |     CsvHashReader.open( path, mode, parser: @parser, **kwargs, &block )
68 |   end
69 | 
70 |   def read( path, **kwargs )
71 |     CsvHashReader.read( path, parser: @parser, **kwargs )
72 |   end
73 | 
74 |   def foreach( path, **kwargs, &block )
75 |     CsvHashReader.foreach( path, parser: @parser, **kwargs, &block )
76 |   end
77 | 
78 | 
79 |   def parse( str_or_readable, **kwargs, &block )
80 |     CsvHashReader.parse( str_or_readable, parser: @parser, **kwargs, &block )
81 |   end
82 | end # class Builder
83 | end # class CsvHashReader
84 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/parser.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class CsvReader
 3 | 
 4 | class Parser
 5 |   ## "forward" reference,
 6 |   ##     see base.rb for more
 7 | end
 8 | 
 9 | 
10 | ####################################
11 | # define errors / exceptions
12 | #   for all parsers for (re)use
13 | 
14 | class Error < StandardError
15 | end
16 | 
17 | ####
18 | # todo/check:
19 | #  use "common" error class - why? why not?
20 | 
21 | class ParseError < Error
22 |   attr_reader :message
23 | 
24 |   def initialize( message )
25 |     @message = message
26 |   end
27 | 
28 |   def to_s
29 |     "*** csv parse error: #{@message}"
30 |   end
31 | end # class ParseError
32 | end # class CsvReader
33 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/parser_fixed.rb:
--------------------------------------------------------------------------------
  1 | 
  2 | class CsvReader
  3 | 
  4 | class ParserFixed
  5 | 
  6 | ###################################
  7 | ## add simple logger with debug flag/switch
  8 | #
  9 | #  use Parser.debug = true   # to turn on
 10 | #
 11 | #  todo/fix: use logutils instead of std logger - why? why not?
 12 | 
 13 | def self.build_logger()
 14 |   l = Logger.new( STDOUT )
 15 |   l.level = :info    ## set to :info on start; note: is 0 (debug) by default
 16 |   l
 17 | end
 18 | def self.logger() @@logger ||= build_logger; end
 19 | def logger()  self.class.logger; end
 20 | 
 21 | 
 22 | def parse( data, width:, &block )
 23 | 
 24 |   ## note: input: required each_line (string or io/file for example)
 25 | 
 26 |   input = data   ## assume it's a string or io/file handle
 27 | 
 28 |   if block_given?
 29 |     parse_lines( input, width: width, &block )
 30 |   else
 31 |     records = []
 32 | 
 33 |     parse_lines( input, width: width ) do |record|
 34 |       records << record
 35 |     end
 36 | 
 37 |     records
 38 |   end
 39 | end ## method parse
 40 | 
 41 | 
 42 | 
 43 | private
 44 | 
 45 | def parse_lines( input, width:, &block )
 46 | 
 47 |   ## note: each line only works with \n (windows) or \r\n (unix)
 48 |   ##   will NOT work with \r (old mac, any others?) only!!!!
 49 |   input.each_line do |line|
 50 | 
 51 |     ##  note: chomp('') if is an empty string,
 52 |     ##    it will remove all trailing newlines from the string.
 53 |     ##    use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
 54 |     line = line.chomp( '' )
 55 |     logger.debug "line:"                if logger.debug?
 56 |     logger.debug line.pretty_inspect    if logger.debug?
 57 | 
 58 | 
 59 |     ## skip empty lines and comments
 60 |     if line =~ /^[ \t]*$/   ## skip blank lines (with whitespace only)
 61 |        logger.debug "skip blank line"    if logger.debug?
 62 |        next
 63 |     end
 64 | 
 65 |     if line =~ /^[ \t]*#/   # start_with?( "#" ) -- skip comment lines (note: allow leading whitespaces)
 66 |        logger.debug "skip comment line"   if logger.debug?
 67 |        next
 68 |     end
 69 | 
 70 | 
 71 |     if width.is_a?( String )
 72 |       ## assume it's String#unpack format e.g.
 73 |       ##   "209231-231992395    MoreData".unpack('aa5A1A9a4Z*')
 74 |       ##     returns an array as follows :
 75 |       ##   ["2", "09231", "-", "231992395", "    ", "MoreData"]
 76 |       ##  see String#unpack
 77 | 
 78 |       values = line.unpack( width )
 79 |     else  ## assume array with integers
 80 |       values = []
 81 |       offset = 0  # start position / offset
 82 |       width.each_with_index do |w,i|
 83 |          logger.debug "[#{i}] start: #{offset}, width: #{w}"   if logger.debug?
 84 | 
 85 |          if w < 0   ## convention - if width negative, skip column
 86 |             # note: minus (-) and minus (-) equal plus (+)
 87 |             ##   e.g. 2 - -2 = 4
 88 |            offset -= w
 89 |          else
 90 |            value = line[offset, w]
 91 |            value = value.strip   if value    ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
 92 |            values << value
 93 |            offset += w
 94 |          end
 95 |       end
 96 |     end
 97 | 
 98 |     ## note: requires block - enforce? how? why? why not?
 99 |     block.call( values )
100 |   end
101 | end # method parse_lines
102 | 
103 | 
104 | end # class ParserFixed
105 | end # class CsvReader
106 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/parser_json.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class CsvReader
 4 | 
 5 | class ParserJson
 6 | 
 7 | def parse( data, **kwargs, &block )
 8 |   ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
 9 | 
10 |   ## note: input: required each_line (string or io/file for example)
11 |   ## assume data is a string or io/file handle
12 |   csv = CsvJson.new( data )
13 | 
14 |   if block_given?
15 |     csv.each( &block )
16 |   else
17 |     csv.to_a
18 |   end
19 | end ## method parse
20 | 
21 | 
22 | end # class ParserJson
23 | end # class CsvReader
24 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/parser_tab.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class CsvReader
 3 | 
 4 | class ParserTab
 5 | 
 6 | def parse( data, **kwargs, &block )
 7 |   ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
 8 | 
 9 |   ## note: input: required each_line (string or io/file for example)
10 |   ## assume data is a string or io/file handle
11 |   tab = TabReader.new( data )
12 | 
13 |   if block_given?
14 |     tab.each( &block )
15 |   else
16 |     tab.to_a
17 |   end
18 | end ## method parse
19 | 
20 | 
21 | end # class ParserTab
22 | end # class CsvReader
23 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/parser_yaml.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class CsvReader
 4 | 
 5 | class ParserYaml
 6 | 
 7 | def parse( data, **kwargs, &block )
 8 |   ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
 9 | 
10 |   ## note: input: required each_line (string or io/file for example)
11 |   ## assume data is a string or io/file handle
12 |   csv = CsvYaml.new( data )
13 | 
14 |   if block_given?
15 |     csv.each( &block )
16 |   else
17 |     csv.to_a
18 |   end
19 | end ## method parse
20 | 
21 | 
22 | end # class ParserYaml
23 | end # class CsvReader
24 | 


--------------------------------------------------------------------------------
/csvreader/lib/csvreader/version.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | class CsvReader   ## note: uses a class for now - change to module - why? why not?
 3 | 
 4 |   module Version
 5 |     MAJOR = 1    ## todo: namespace inside version or something - why? why not??
 6 |     MINOR = 2
 7 |     PATCH = 5
 8 | 
 9 |     ## self.to_s  - why? why not?
10 |   end
11 | 
12 |   VERSION = [Version::MAJOR,
13 |              Version::MINOR,
14 |              Version::PATCH].join('.')
15 | 
16 |   def self.version   ## keep (as an alternative to VERSION) - why? why not?
17 |     VERSION
18 |   end
19 | 
20 | 
21 | 
22 |   def self.banner
23 |     "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
24 |   end
25 | 
26 |   def self.root
27 |     File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
28 |   end
29 | 
30 | end # class CsvReader
31 | 


--------------------------------------------------------------------------------
/csvreader/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | require 'csvreader'
10 | ## require 'csvreader/base'    ## try modular version (that is, without Csv,CsvHash "top-level" shortcuts)
11 | 
12 | 
13 | ## add test_data_dir helper
14 | class CsvReader
15 |   def self.test_data_dir
16 |     "#{root}/datasets"
17 |   end
18 | end
19 | 
20 | 
21 | ## CsvReader::ParserStd.logger.level    = :debug   ## turn on "global" logging
22 | ## CsvReader::ParserStrict.logger.level = :debug   ## turn on "global" logging
23 | ## CsvReader::ParserFixed.logger.level = :debug   ## turn on "global" logging
24 | CsvReader::ParserTable.logger.level = :debug   ## turn on "global" logging
25 | 


--------------------------------------------------------------------------------
/csvreader/test/test_buffer.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_buffer.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestBuffer < MiniTest::Test
12 | 
13 | 
14 | def test_peek
15 | 
16 |   buf = CsvReader::Buffer.new( <<TXT )
17 | # hello
18 | 1,2,3
19 | TXT
20 | 
21 |   assert_equal '#',    buf.peek
22 |   assert_equal '#',    buf.peek1
23 |   assert_equal '#',    buf.peekn(1)
24 |   assert_equal '# ',   buf.peekn(2)
25 |   assert_equal '# h',  buf.peekn(3)
26 |   assert_equal '# he', buf.peekn(4)
27 | 
28 |   buf.getc   ## eat first char
29 | 
30 |   assert_equal ' ',    buf.peek
31 |   assert_equal ' ',    buf.peek1
32 |   assert_equal ' ',    buf.peekn(1)
33 |   assert_equal ' h',   buf.peekn(2)
34 |   assert_equal ' he',  buf.peekn(3)
35 |   assert_equal ' hel', buf.peekn(4)
36 | end
37 | 
38 | 
39 | end # class TestBuffer
40 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_autofix.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_autofix.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestParserAutofix < MiniTest::Test
12 | 
13 | 
14 | def parser
15 |   CsvReader::Parser::DEFAULT
16 | end
17 | 
18 | 
19 | def test_quote_with_trailing_value
20 |   recs = [[ "Farrokh", "\"Freddy\" Mercury", "Bulsara" ]]
21 | 
22 |   assert_equal recs, parser.parse( %Q{Farrokh,"Freddy" Mercury,Bulsara} )
23 |   assert_equal recs, parser.parse( %Q{  Farrokh , "Freddy" Mercury  , Bulsara } )
24 |   assert_equal recs, parser.parse( %Q{Farrokh,  "Freddy" Mercury   ,Bulsara} )
25 | end
26 | 
27 | 
28 | end # class TestParserAutofix
29 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_directive.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_directive.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserDirective < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvReader::Parser::DEFAULT
15 | end
16 | 
17 | 
18 | def test_iris
19 |   records = [["5.1","3.5","1.4","0.2","Iris-setosa"],
20 |              ["4.9","3.0","1.4","0.2","Iris-setosa"]]
21 | 
22 | 
23 |   assert_equal records, parser.parse( <<TXT )
24 | % with meta data - arff (attribute relation file format)-style
25 | %
26 | 
27 | @RELATION iris
28 | 
29 | @ATTRIBUTE sepallength NUMERIC
30 | @ATTRIBUTE sepalwidth NUMERIC
31 | @ATTRIBUTE petallength NUMERIC
32 | @ATTRIBUTE petalwidth NUMERIC
33 | @ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
34 | 
35 | @DATA
36 | 5.1,3.5,1.4,0.2,Iris-setosa
37 | 4.9,3.0,1.4,0.2,Iris-setosa
38 | TXT
39 | end
40 | 
41 | 
42 | def test_lcc
43 |   records = [['AG5',   'Encyclopedias and dictionaries.;Twentieth century.'],
44 |              ['AS262', 'Science -- Soviet Union -- History.'],
45 |              ['AE5',   'Encyclopedias and dictionaries.'],
46 |              ['AS281', 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'],
47 |              ['AS281', 'Astronomy, Assyro-Babylonian.;Moon -- Tables.']]
48 | 
49 | 
50 |   assert_equal records, parser.parse( <<TXT )
51 | %  Attribute-Relation File Format (ARFF) Example
52 | %    see https://www.cs.waikato.ac.nz/ml/weka/arff.html
53 | 
54 | @relation LCCvsLCSH
55 | 
56 | @attribute LCC string
57 | @attribute LCSH string
58 | 
59 | @data
60 | AG5,   'Encyclopedias and dictionaries.;Twentieth century.'
61 | AS262, 'Science -- Soviet Union -- History.'
62 | AE5,   'Encyclopedias and dictionaries.'
63 | AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'
64 | AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Tables.'
65 | TXT
66 | end
67 | 
68 | end # class TestParserDirective
69 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_fixed.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_fixed.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserFixed < MiniTest::Test
11 | 
12 | 
13 | def parser() CsvReader::Parser::FIXED;  end
14 | def reader() CsvReader.fixed;           end
15 | 
16 | 
17 | def test_numbers
18 |    numbers( parser )
19 |    numbers( reader )
20 | end
21 | 
22 | def test_contacts
23 |    contacts( parser )
24 |    contacts( reader )
25 | end
26 | 
27 | 
28 | def numbers( parser )
29 |   records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
30 | 
31 |   assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
32 | # fixed width with comments and blank lines
33 | 
34 | 12345678123456781234567890123456789012345678901212345678901234
35 | 
36 | TXT
37 | 
38 |   assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
39 | 12345678123456781234567890123456789012345678901212345678901234
40 | TXT
41 | 
42 |   ## note: negative width fields gets skipped
43 |   assert_equal records, parser.parse( <<TXT, width: [8,-2,8,-3,32,-2,14] )
44 | 12345678XX12345678XXX12345678901234567890123456789012XX12345678901234XXX
45 | TXT
46 | end
47 | 
48 | 
49 | def contacts( parser )
50 |   records = [["John",    "Smith",    "john@example.com",    "1-888-555-6666"],
51 |              ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
52 | 
53 |   assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
54 | # fixed width with comments and blank lines
55 | 
56 | John    Smith   john@example.com                1-888-555-6666
57 | Michele O'Reileymichele@example.com             1-333-321-8765
58 | 
59 | TXT
60 | 
61 | 
62 |    assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
63 | John    Smith   john@example.com                1-888-555-6666
64 | Michele O'Reileymichele@example.com             1-333-321-8765
65 | TXT
66 | end
67 | 
68 | 
69 | 
70 | def test_unpack_numbers
71 |   records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
72 | 
73 |   assert_equal records, parser.parse( <<TXT, width: 'a8 a8 a32 Z*' )
74 | 12345678123456781234567890123456789012345678901212345678901234
75 | TXT
76 | end
77 | 
78 | def test_unpack_contacts
79 |   records = [["John",    "Smith",    "john@example.com",    "1-888-555-6666"],
80 |              ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
81 | 
82 |   assert_equal records, parser.parse( <<TXT, width: 'A8 A8 A32 Z*' )
83 | John    Smith   john@example.com                1-888-555-6666
84 | Michele O'Reileymichele@example.com             1-333-321-8765
85 | TXT
86 | end
87 | 
88 | 
89 | 
90 | end # class TestParserFixed
91 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_formats.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_formats.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserFormats < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvReader::Parser
15 | end
16 | 
17 | 
18 | def test_parse_whitespace
19 |    records = [["a", "b", "c"],
20 |               ["1", "2", "3"]]
21 | 
22 |    ## don't care about newlines (\r\n) ??? - fix? why? why not?
23 |    assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
24 |    assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
25 |    assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
26 |    assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
27 | 
28 |    assert_equal [["a", "b", "c"],
29 |                  [""],
30 |                  ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
31 |    assert_equal [["", ""],
32 |                  [""],
33 |                  ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
34 | 
35 | 
36 |    ## strict rfc4180 - no trim leading or trailing spaces or blank lines
37 |    assert_equal records,   parser.strict.parse( "a,b,c\n1,2,3" )
38 |    assert_equal [["a", "b", "c"],
39 |                  [""],
40 |                  ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
41 |    assert_equal [[" a", " b ", "c "],
42 |                  [""],
43 |                  ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
44 |     assert_equal [[" a", " b ", "c "],
45 |                   [" "],
46 |                   ["",""],
47 |                   ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
48 | end
49 | 
50 | 
51 | def test_parse_empties
52 |     assert_equal [], parser.default.parse( "\n \n \n" )
53 | 
54 |     ## strict rfc4180 - no trim leading or trailing spaces or blank lines
55 |     assert_equal [[""],
56 |                   [" "],
57 |                   [" "]], parser.strict.parse( "\n \n \n" )
58 |     assert_equal [[""],
59 |                   [" "],
60 |                   [" "]], parser.strict.parse( "\n \n " )
61 | 
62 |     assert_equal [[""]], parser.strict.parse( "\n" )
63 |     assert_equal [],     parser.strict.parse( "" )
64 | end
65 | 
66 | end # class TestParserFormats
67 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_meta.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_meta.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserMeta < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvReader::Parser::DEFAULT
15 | end
16 | 
17 | 
18 | def test_parse
19 |   pp parser.config
20 |   
21 |   records = [["a", "b", "c"],
22 |              ["1", "2", "3"]]
23 | 
24 |   assert_equal records, parser.parse( <<TXT )
25 | # with meta data
26 | ## see https://blog.datacite.org/using-yaml-frontmatter-with-csv/
27 | ---
28 | columns:
29 | - title: Purchase Date
30 |   type: date
31 | - title: Item
32 |   type: string
33 | - title: Amount (€)
34 |   type: float
35 | ---
36 | a,b,c
37 | 1,2,3
38 | TXT
39 | 
40 |   pp parser.meta
41 |   meta = { "columns"=>
42 |              [{"title"=>"Purchase Date", "type"=>"date"},
43 |               {"title"=>"Item",          "type"=>"string"},
44 |               {"title"=>"Amount (€)",    "type"=>"float"}]
45 |          }
46 |   assert_equal meta, parser.meta
47 | 
48 | 
49 |   assert_equal records, parser.parse( <<TXT )
50 | # with (empty) meta data
51 | ---
52 | ---
53 | a,b,c
54 | 1,2,3
55 | TXT
56 | 
57 |   pp parser.meta
58 |   meta = {}
59 |   assert_equal meta, parser.meta
60 | 
61 | 
62 | 
63 |   assert_equal records, parser.parse( <<TXT )
64 | # without meta data
65 | a,b,c
66 | 1,2,3
67 | TXT
68 | 
69 |   assert_nil parser.meta
70 | end
71 | 
72 | 
73 | end # class TestParserMeta
74 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_null.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ###
  4 | #  to run use
  5 | #     ruby -I ./lib -I ./test test/test_parser_null.rb
  6 | 
  7 | 
  8 | require 'helper'
  9 | 
 10 | 
 11 | class TestParserNull < MiniTest::Test
 12 | 
 13 | 
 14 | def parser
 15 |   CsvReader::Parser
 16 | end
 17 | 
 18 | 
 19 | def test_escaped_mysql_null_value
 20 |   ## MySQL uses \N to symbolize null values. We have to restore this
 21 | 
 22 |     ## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
 23 |     ##   only supports \n \r  (sep e.g \, or \t)  (quote e.g. \") for now - any others?
 24 |     assert_equal [[ "character\\NEscaped" ]],
 25 |                  parser.default.parse( "character\\NEscaped" )
 26 | 
 27 |     assert_equal [[ "character\\NEscaped" ]],
 28 |                  parser.strict.parse( "character\\NEscaped" )
 29 | end
 30 | 
 31 | 
 32 | def test_mysql_null_value
 33 |   default_null_values = parser.default.config[:null]  ## save default null settings
 34 | 
 35 |   assert_equal [[ nil, nil, "" ]],
 36 |                parser.default.parse( "\\N, \\N ," )
 37 | 
 38 |   ## escaped with quotes
 39 |   assert_equal [[ "\\N", "\\N", "" ]],
 40 |                parser.default.parse( %Q{"\\N", "\\N" ,} )
 41 | 
 42 |   ## try single \N setting
 43 |   parser.default.null = "\\N"
 44 |   assert_equal [[ nil, nil, "" ]],
 45 |                parser.default.parse( "\\N, \\N ," )
 46 | 
 47 |   ## try no null values setting
 48 |   parser.default.null = nil
 49 |   assert_equal [[ "\\N", "\\N", "" ]],
 50 |                parser.default.parse( "\\N, \\N ," )
 51 | 
 52 |   ## try postgresql unquoted empty string is nil/null
 53 |   parser.default.null = ""
 54 |   assert_equal [[ nil, nil, "" ],
 55 |                 [ nil, nil, "", nil ]],
 56 |                parser.default.parse( %Q{,,""\n ,  , "" ,} )
 57 | 
 58 |   ## try proc
 59 |   parser.default.null = ->(value) { value.downcase == 'nil' }
 60 |   assert_equal [[ nil, nil, nil, "" ]],
 61 |                parser.default.parse( "nil, Nil, NIL," )
 62 | 
 63 |   ## try array
 64 |   parser.default.null = ['nil', 'Nil', 'NIL']
 65 |   assert_equal [[ nil, nil, nil, "" ]],
 66 |                 parser.default.parse( "nil, Nil, NIL," )
 67 | 
 68 |   ## restore defaults
 69 |   parser.default.null = default_null_values  ## ['\N', 'NA']
 70 | end
 71 | 
 72 | 
 73 | def test_strict_mysql_null_value
 74 |   assert_equal [[ "\\N", " \\N ", "" ]],
 75 |                parser.strict.parse( "\\N, \\N ," )
 76 | 
 77 |   ## try single \N setting
 78 |   parser.strict.null = "\\N"
 79 |   assert_equal [[ nil, nil, " \\N", "\\N ", "" ]],
 80 |                parser.strict.parse( "\\N,\\N, \\N,\\N ," )
 81 | 
 82 |   ## escaped with quotes
 83 |   assert_equal [[ "\\N", "\\N", nil, "" ]],
 84 |                parser.strict.parse( %Q{"\\N","\\N",\\N,} )
 85 | 
 86 | 
 87 |   ## try postgresql unquoted empty string is nil/null
 88 |   parser.strict.null = ""
 89 |   assert_equal [[ nil, nil, "" ],
 90 |                 [ " ", "  ", "", nil ]],
 91 |                parser.strict.parse( %Q{,,""\n ,  ,"",} )
 92 | 
 93 |   ## try proc
 94 |   parser.strict.null = ->(value) { value.downcase == 'nil' }
 95 |   assert_equal [[ nil, nil, nil, "" ]],
 96 |                parser.strict.parse( "nil,Nil,NIL," )
 97 | 
 98 |   ## try array
 99 |   parser.strict.null = ['nil', 'Nil', 'NIL']
100 |   assert_equal [[ nil, nil, nil, "" ]],
101 |                 parser.strict.parse( "nil,Nil,NIL," )
102 | 
103 |   ## restore defaults
104 |   parser.strict.null = nil
105 | end
106 | 
107 | end # class TestParserNull
108 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_numeric.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_numeric.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestParserNumeric < MiniTest::Test
12 | 
13 | def parser
14 |   CsvReader::Parser::NUMERIC
15 | end
16 | 
17 | 
18 | def test_parser_numeric
19 |   pp CsvReader::Parser::NUMERIC
20 |   pp CsvReader::Parser.numeric
21 |   assert true
22 | end
23 | 
24 | def test_parse
25 |    assert_equal [[1.0,2.0,3.0],
26 |                  [4.0,5.0,6.0]], parser.parse( "1,2,3\n4,5,6" )
27 |    assert_equal [[1.0,2.0,3.0],
28 |                  ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n"4","5","6"} )
29 |    assert_equal [[1.0,2.0,3.0],
30 |                 ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n "4", "5" ,"6" } )
31 |    assert_equal [["a","b","c"]], parser.parse( %Q{"a","b","c"} )
32 | end
33 | 
34 | 
35 | def test_empty
36 |    assert_equal [[nil,nil,nil],
37 |                  ["","",""]],    parser.parse( %Q{,,\n"","",""} )
38 | end
39 | 
40 | end # class TestParserNumeric
41 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_quotes.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_quotes.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | 
11 | class TestParserQuotes < MiniTest::Test
12 | 
13 | 
14 | def parser
15 |   CsvReader::Parser::DEFAULT
16 | end
17 | 
18 | 
19 | def test_french_single
20 |   assert_equal [[ "a", "b", "c" ]],
21 |                parser.parse( " ‹a›, ‹b›, ›c‹ " )
22 | 
23 |   assert_equal [[ "a,1", " b,2", "c, 3" ]],
24 |                parser.parse( " ‹a,1›, ‹ b,2›, ›c, 3‹ " )
25 | 
26 |   assert_equal [[ %Q{"a"}, %Q{'b'}, %Q{c'"'"} ]],
27 |                parser.parse( %Q{ ‹"a"›, ‹'b'›, ›c'"'"‹} )
28 | 
29 |   # note: quote matches only if first non-whitespace char
30 |   assert_equal [[ "_‹a›", "_‹b›", "›c‹" ]],
31 |                parser.parse( %Q{ _‹a›, _‹b›, "›c‹"} )
32 | 
33 | end
34 | 
35 | 
36 | def test_french_double
37 |   assert_equal [[ "a", "b", "c" ]],
38 |                parser.parse( " «a», «b», »c« " )
39 | 
40 |   assert_equal [[ "a,1", " b,2", "c, 3" ]],
41 |                parser.parse( " «a,1», « b,2», »c, 3« " )
42 | 
43 |   assert_equal [[ %Q{"a"}, %Q{'b'}, %Q{c'"'"} ]],
44 |                parser.parse( %Q{ «"a"», «'b'», »c'"'"«} )
45 | 
46 |   # note: quote matches only if first non-whitespace char
47 |   assert_equal [[ "_«a»", "_«b»", "»c«" ]],
48 |                parser.parse( %Q{ _«a», _«b», "»c«"} )
49 | 
50 | end
51 | 
52 | 
53 | end # class TestParserQuotes
54 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_strict.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_strict.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserStrict < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvReader::Parser::STRICT
15 | end
16 | 
17 | 
18 | def test_parser_strict
19 |   pp CsvReader::Parser::STRICT
20 |   pp CsvReader::Parser.strict
21 |   assert true
22 | end
23 | 
24 | def test_parse
25 |    records = [["a", "b", "c"],
26 |               ["1", "2", "3"],
27 |               ["4", "5", "6"]]
28 | 
29 |    ## don't care about newlines (\r\n) ??? - fix? why? why not?
30 |    assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6" )
31 |    assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
32 |    assert_equal records, parser.parse( "a,b,c\r1,2,3\r4,5,6" )
33 |    assert_equal records, parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
34 | end
35 | 
36 | def test_parse_semicolon
37 |    records = [["a", "b", "c"],
38 |               ["1", "2", "3"],
39 |               ["4", "5", "6"]]
40 | 
41 |    ## don't care about newlines (\r\n) ??? - fix? why? why not?
42 |    assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6",         sep: ';' )
43 |    assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6\n",       sep: ';' )
44 |    assert_equal records, parser.parse( "a;b;c\r1;2;3\r4;5;6",         sep: ';' )
45 |    assert_equal records, parser.parse( "a;b;c\r\n1;2;3\r\n4;5;6\r\n", sep: ';' )
46 | end
47 | 
48 | def test_parse_tab
49 |    records = [["a", "b", "c"],
50 |               ["1", "2", "3"],
51 |               ["4", "5", "6"]]
52 | 
53 |    ## don't care about newlines (\r\n) ??? - fix? why? why not?
54 |    assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6",         sep: "\t" )
55 |    assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6\n",       sep: "\t" )
56 |    assert_equal records, parser.parse( "a\tb\tc\r1\t2\t3\r4\t5\t6",         sep: "\t" )
57 |    assert_equal records, parser.parse( "a\tb\tc\r\n1\t2\t3\r\n4\t5\t6\r\n", sep: "\t" )
58 | end
59 | 
60 | 
61 | 
62 | def test_parse_empties
63 |   assert_equal [["","",""],["","",""]],
64 |                parser.parse( %Q{"","",""\n,,} )
65 | 
66 |   parser.null = ""
67 |   assert_equal [["","",""," "],[nil,nil,nil," "]],
68 |                parser.parse( %Q{"","",""," "\n,,, } )
69 |   parser.null = [""]   ## try array (allows multiple null values)
70 |   assert_equal [[nil,nil,nil," "],["","",""," "]],
71 |                parser.parse( %Q{,,, \n"","",""," "} )
72 | 
73 |   ## reset to defaults
74 |   parser.null = nil
75 |   assert_equal [["","",""],["","",""]],
76 |                parser.parse( %Q{"","",""\n,,} )
77 | end
78 | 
79 | 
80 | end # class TestParserStrict
81 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_tab.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_tab.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserTab < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvReader::Parser::TAB
15 | end
16 | 
17 | 
18 | def test_parser_tab
19 |   pp CsvReader::Parser::TAB
20 |   pp CsvReader::Parser.tab
21 |   assert true
22 | end
23 | 
24 | def test_parse
25 |   records = [["a", "b", "c"],
26 |              ["1", "2", "3"],
27 |              ["4", "5", "6"]]
28 | 
29 |   ## don't care about newlines (\r\n)
30 |   assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6" )
31 |   assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6\n" )
32 |   assert_equal records, parser.parse( "a\tb\tc\r\n1\t2\t3\r\n4\t5\t6\r\n" )
33 | end
34 | 
35 | def test_parse_empties
36 |   # note: trailing empty fields got (auto-)trimmed !!!!!!!;
37 |   #        add missing -1 limit option :-) now works
38 |   assert_equal [["","",""]],        parser.parse( "\t\t" )
39 |   assert_equal [["","","","",""]],  parser.parse( "\t\t\t\t" )
40 |   assert_equal [["1","",""]],       parser.parse( "1\t\t" )
41 |   assert_equal [["1","","","",""]], parser.parse( "1\t\t\t\t" )
42 |   assert_equal [["","","3"]],       parser.parse( "\t\t3" )
43 |   assert_equal [["","","","","5"]], parser.parse( "\t\t\t\t5" )
44 | 
45 |   assert_equal [], parser.parse( "" )
46 | end
47 | 
48 | 
49 | end # class TestParserTab
50 | 


--------------------------------------------------------------------------------
/csvreader/test/test_parser_table.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_table.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserTable < MiniTest::Test
11 | 
12 | 
13 | def parser() CsvReader::Parser::TABLE;  end
14 | 
15 | 
16 | def test_space
17 |   records = [["1", "Man City", "10", "8", "2", "0", "27", "3", "24", "26"],
18 |              ["2", "Liverpool", "10", "8", "2", "0", "20", "4", "16", "26"],
19 |              ["3", "Chelsea", "10", "7", "3", "0", "24", "7", "17", "24"],
20 |              ["4", "Arsenal", "10", "7", "1", "2", "24", "13", "11", "22"],
21 |              ["8", "Man Utd", "10", "5", "2", "3", "17", "17", "0", "17"],
22 |              ["13", "West Ham", "10", "2", "2", "6", "9", "15", "-6", "8"],
23 |              ["14", "Crystal Palace", "10", "2", "2", "6", "7", "13", "-6", "8"]]
24 | 
25 |   parser.space='_'
26 | 
27 |   assert_equal records, parser.parse( <<TXT )
28 |       1  Man_City 10 8 2 0 27 3 24 26
29 |       2  Liverpool 10 8 2 0 20 4 16 26
30 |       3  Chelsea 10 7 3 0 24 7 17 24
31 |       4  Arsenal 10 7 1 2 24 13 11 22
32 |       8  Man_Utd 10 5 2 3 17 17 0 17
33 |       13  West_Ham 10 2 2 6 9 15 -6 8
34 |       14  Crystal_Palace 10 2 2 6 7 13 -6 8
35 | TXT
36 | 
37 |   assert_equal [[" "," ","  "]], parser.parse( "_ _ __" )
38 | 
39 | 
40 |   parser.space='•'
41 | 
42 |   assert_equal records, parser.parse( <<TXT )
43 |       1  Man•City 10 8 2 0 27 3 24 26
44 |       2  Liverpool 10 8 2 0 20 4 16 26
45 |       3  Chelsea 10 7 3 0 24 7 17 24
46 |       4  Arsenal 10 7 1 2 24 13 11 22
47 |       8  Man•Utd 10 5 2 3 17 17 0 17
48 |       13  West•Ham 10 2 2 6 9 15 -6 8
49 |       14  Crystal•Palace 10 2 2 6 7 13 -6 8
50 | TXT
51 | 
52 |   assert_equal [[" "," ","  "]], parser.parse( "• • ••" )
53 | 
54 |   parser.space = nil  ## reset to default setting
55 | end
56 | 
57 | 
58 | def test_contacts
59 |   records = [["aa", "bbb"],
60 |              ["cc", "dd", "ee"]]
61 | 
62 |   assert_equal records, parser.parse( <<TXT )
63 | # space-separated with comments and blank lines
64 | 
65 |  aa bbb
66 | cc    dd ee
67 | 
68 | TXT
69 | 
70 |    assert_equal records, parser.parse( <<TXT )
71 |  aa bbb
72 | cc    dd ee
73 | TXT
74 | end
75 | 
76 | 
77 | end # class TestParserTable
78 | 


--------------------------------------------------------------------------------
/csvreader/test/test_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_reader.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestReader < MiniTest::Test
11 | 
12 | 
13 | 
14 | 
15 | def test_parse_line
16 |   puts "== parse_line:"
17 |   row = CsvReader.parse_line( <<TXT )
18 | Augustiner Bräu München,                 München,  Edelstoff,      5.6%
19 | Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
20 | TXT
21 | 
22 |   pp row
23 |   assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
24 | end
25 | 
26 | def test_parse_line11
27 |   puts "== parse_line:"
28 |   row = CsvReader.parse_line( <<TXT )
29 | #######
30 | #  try with some comments
31 | #   and blank lines even before header
32 | 
33 | Augustiner Bräu München,                 München,   Edelstoff,      5.6%
34 | Bayerische Staatsbrauerei Weihenstephan, Freising,  Hefe Weissbier, 5.4%
35 | TXT
36 | 
37 |   pp row
38 |   assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
39 | end
40 | 
41 | 
42 | 
43 | def test_read
44 |   puts "== read: beer.csv:"
45 |   rows = CsvReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
46 |   pp rows
47 | 
48 |   rows.each do |row|
49 |     pp row
50 |   end
51 |   puts "  #{rows.size} rows"
52 |   assert_equal 7, rows.size   ## note: include header row in count
53 | end
54 | 
55 | 
56 | def test_header
57 |   puts "== header: beer.csv:"
58 |   header = CsvReader.header( "#{CsvReader.test_data_dir}/beer.csv" )
59 |   pp header
60 |   assert_equal ['Brewery','City','Name','Abv'], header
61 | end
62 | 
63 | def test_header11
64 |   puts "== header: beer11.csv:"
65 |   header = CsvReader.header( "#{CsvReader.test_data_dir}/beer11.csv" )
66 |   pp header
67 |   assert_equal ['Brewery','City','Name','Abv'], header
68 | end
69 | 
70 | 
71 | 
72 | def test_foreach
73 |   puts "== foreach: beer11.csv:"
74 |   CsvReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
75 |     pp row
76 |   end
77 |   assert true
78 | end
79 | 
80 | 
81 | def test_enum
82 |   csv = CsvReader.new( "a,b,c" )
83 |   enum = csv.to_enum
84 |   assert_equal ["a","b","c"], enum.next
85 | 
86 |   ## test Csv == CsvReader class alias
87 |   if defined?( Csv )
88 |     csv = Csv.new( "a,b,c" )
89 |     enum = csv.to_enum
90 |     assert_equal ["a","b","c"], enum.next
91 |   end
92 | end
93 | 
94 | 
95 | end # class TestReader
96 | 


--------------------------------------------------------------------------------
/csvreader/test/test_reader_converters.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_reader_converters.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestReaderConverters < MiniTest::Test
11 | 
12 | 
13 | def test_all
14 |   rows = CsvReader.parse( <<TXT, :converters => :all )
15 | 1,2,3
16 | true,false,null
17 | ,,
18 | TXT
19 | 
20 |   pp rows
21 | 
22 |   assert_equal 3, rows.size
23 |   assert_equal [1,2,3],          rows[0]
24 |   assert_equal [true,false,nil], rows[1]
25 |   assert_equal [nil,nil,nil],    rows[2]
26 | end
27 | 
28 | 
29 | def test_all_quotes
30 |   ## only convert unquoted values - why? why not?
31 |   ##   e.g.  1      =>  1 (integer)
32 |   ##         "1"    => "1" (string)
33 |   ##         true   => true (boolean)
34 |   ##         "true" => "true" (string)
35 |   ##
36 |   ##
37 |   ##  note: use CsvRecord for by column types / converters
38 | 
39 |   rows = CsvReader.parse( <<TXT, :converters => :all )
40 | "1","2","3"
41 | "true","false","null"
42 | "","",""
43 | TXT
44 | 
45 |   pp rows
46 | 
47 |   assert_equal 3, rows.size
48 |   assert_equal [1,2,3],          rows[0]
49 |   assert_equal [true,false,nil], rows[1]
50 |   assert_equal [nil,nil,nil],    rows[2]
51 | end
52 | 
53 | 
54 | end # class TestReaderConverters
55 | 


--------------------------------------------------------------------------------
/csvreader/test/test_reader_hash.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_reader_hash.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestHashReader < MiniTest::Test
11 | 
12 | 
13 | def test_read
14 |   puts "== read (hash): beer.csv:"
15 |   rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer.csv" )
16 |   pp rows
17 |   pp rows.to_a
18 | 
19 |   rows.each do |row|   ## note: will skip (NOT include) header row!!
20 |     pp row
21 |   end
22 |   puts "  #{rows.size} rows"  ## note: again will skip (NOT include) header row in count!!!
23 |   assert_equal 6, rows.size
24 | end
25 | 
26 | def test_read11
27 |   puts "== read (hash): beer11.csv:"
28 |   rows = CsvHashReader.read( "#{CsvReader.test_data_dir}/beer11.csv" )
29 |   pp rows
30 |   pp rows.to_a   ## note: includes header (first row with column names)
31 | 
32 |   assert true
33 | end
34 | 
35 | 
36 | def test_foreach
37 |   puts "== foreach (hash): beer.csv:"
38 |   CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer.csv" ) do |row|
39 |     pp row
40 |   end
41 |   assert true
42 | end
43 | 
44 | def test_foreach11
45 |   puts "== foreach (hash): beer11.csv:"
46 |   CsvHashReader.foreach( "#{CsvReader.test_data_dir}/beer11.csv" ) do |row|
47 |     pp row
48 |   end
49 |   assert true
50 | end
51 | 
52 | end # class TestHashReader
53 | 


--------------------------------------------------------------------------------
/csvreader/test/test_reader_hash_converters.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ###
  4 | #  to run use
  5 | #     ruby -I ./lib -I ./test test/test_reader_hash_converters.rb
  6 | 
  7 | 
  8 | require 'helper'
  9 | 
 10 | class TestReaderHashConverters < MiniTest::Test
 11 | 
 12 | 
 13 | def test_nil
 14 |   ## default no converters
 15 |   rows = CsvHashReader.parse( <<TXT )
 16 | a,b,c
 17 | 1,2,3
 18 | true,false,null
 19 | ,,
 20 | TXT
 21 | 
 22 |   pp rows
 23 | 
 24 |   assert_equal 3, rows.size
 25 |   assert_equal( {'a'=>'1',   'b'=>'2',    'c'=>'3'},    rows[0] )
 26 |   assert_equal( {'a'=>'true','b'=>'false','c'=>'null'}, rows[1] )
 27 |   assert_equal( {'a'=>'',    'b'=>'',     'c'=>'' },    rows[2] )
 28 | end
 29 | 
 30 | 
 31 | def test_all
 32 |   rows = CsvHashReader.parse( <<TXT, :converters => :all )
 33 | a,b,c
 34 | 1,2,3
 35 | true,false,null
 36 | ,,
 37 | TXT
 38 | 
 39 |   pp rows
 40 | 
 41 |   assert_equal 3, rows.size
 42 |   assert_equal( {'a'=>1,   'b'=>2,    'c'=>3},   rows[0] )
 43 |   assert_equal( {'a'=>true,'b'=>false,'c'=>nil}, rows[1] )
 44 |   assert_equal( {'a'=>nil, 'b'=>nil,  'c'=>nil}, rows[2] )
 45 | end
 46 | 
 47 | 
 48 | def test_downcase
 49 |   rows = CsvHashReader.parse( <<TXT, :converters => :all, :header_converters => :downcase )
 50 | A,B,C
 51 | 1,2,3
 52 | true,false,null
 53 | ,,
 54 | TXT
 55 | 
 56 |   pp rows
 57 | 
 58 |   assert_equal 3, rows.size
 59 |   assert_equal( {'a'=>1,   'b'=>2,    'c'=>3},   rows[0] )
 60 |   assert_equal( {'a'=>true,'b'=>false,'c'=>nil}, rows[1] )
 61 |   assert_equal( {'a'=>nil, 'b'=>nil,  'c'=>nil}, rows[2] )
 62 | end
 63 | 
 64 | 
 65 | def test_symbol
 66 |   rows = CsvHashReader.parse( <<TXT, :converters => :all, :header_converters => :symbol )
 67 | a,b,c
 68 | 1,2,3
 69 | true,false,null
 70 | ,,
 71 | TXT
 72 | 
 73 |   pp rows
 74 | 
 75 |   assert_equal 3, rows.size
 76 |   assert_equal( {a: 1,    b: 2,     c: 3},   rows[0] )
 77 |   assert_equal( {a: true, b: false, c: nil}, rows[1] )
 78 |   assert_equal( {a: nil,  b: nil,   c: nil}, rows[2] )
 79 | end
 80 | 
 81 | 
 82 | 
 83 | def test_all_quotes
 84 |   ## only convert unquoted values - why? why not?
 85 |   ##   e.g.  1      =>  1 (integer)
 86 |   ##         "1"    => "1" (string)
 87 |   ##         true   => true (boolean)
 88 |   ##         "true" => "true" (string)
 89 |   ##
 90 |   ##
 91 |   ##  note: use CsvRecord for by column types / converters
 92 | 
 93 |   rows = CsvHashReader.parse( <<TXT, :converters => :all )
 94 | "a","b","c"
 95 | "1","2","3"
 96 | "true","false","null"
 97 | "","",""
 98 | TXT
 99 | 
100 |   pp rows
101 | 
102 |   assert_equal 3, rows.size
103 |   assert_equal( {'a'=>1,   'b'=>2,    'c'=>3},   rows[0] )
104 |   assert_equal( {'a'=>true,'b'=>false,'c'=>nil}, rows[1] )
105 |   assert_equal( {'a'=>nil, 'b'=>nil,  'c'=>nil}, rows[2] )
106 | end
107 | 
108 | 
109 | end # class TestHashReaderConverters
110 | 


--------------------------------------------------------------------------------
/csvreader/test/test_samples.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_samples.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestSamples < MiniTest::Test
11 | 
12 | 
13 | def test_cities11
14 |   records = CsvReader.read( "#{CsvReader.test_data_dir}/cities11.csv" )
15 |   pp records
16 | 
17 |   assert_equal [["Los Angeles",   "34°03'N",      "118°15'W"],
18 |                 ["New York City", %Q{40°42'46"N}, %Q{74°00'21"W}],
19 |                 ["Paris",         %Q{48°51'24"N}, %Q{2°21'03"E}]], records
20 | end
21 | 
22 | 
23 | def test_cars11
24 |   records = CsvReader.read( "#{CsvReader.test_data_dir}/cars11.csv" )
25 |   pp records
26 | 
27 |   assert_equal [["Year", "Make",  "Model",  "Description", "Price"],
28 |                 ["1997", "Ford",  "E350",   "ac, abs, moon", "3000.00"],
29 |                 ["1999", "Chevy", %Q{Venture "Extended Edition"}, "", "4900.00"],
30 |                 ["1999", "Chevy", %Q{Venture "Extended Edition, Very Large"}, "", "5000.00"],
31 |                 ["1996", "Jeep",  "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"]], records
32 | end
33 | 
34 | 
35 | def test_customers11
36 |   records = CsvReader.read( "#{CsvReader.test_data_dir}/customers11.csv" )
37 |   pp records
38 | 
39 |   assert_equal [["Name",   "Times arrived", "Total $ spent", "Food feedback"],
40 |                 ["Dan",      "34", "2548", "Lovin it!"],
41 |                 ["Maria",    "55", "5054", "Good, delicious food"],
42 |                 ["Carlos",   "22", "4352", %Q{I am "pleased", but could be better}],
43 |                 ["Stephany", "34", "6542", "I want bigger steaks!!!!!"],
44 |                 ["James",    "1",    "43", "Not bad"],
45 |                 ["Robin",    "1",    "56", "Fish is tasty"],
46 |                 ["Anna",     "1",    "79", "Good, better, the best!"]], records
47 | end
48 | 
49 | def test_shakespeare11
50 |   records = CsvReader.read( "#{CsvReader.test_data_dir}/shakespeare.csv" )
51 |   pp records
52 | 
53 |   assert_equal [["Quote", "Play", "Cite"],
54 |                 ["Sweet are the uses of adversity", "As You Like It", "Act 2, scene 1, 12"],
55 |                 ["All the world's a stage", "As You Like It", "Act 2, scene 7, 139"],
56 |                 ["We few, we happy few", "Henry V", ""],
57 |                 [%Q{"Seems," madam! Nay it is; I know not "seems."}, "Hamlet", "(1.ii.76)"],
58 |                 ["To be, or not to be", "Hamlet", "Act 3, scene 1, 55"],
59 |                 ["What's in a name? That which we call a rose by any other name would smell as sweet.", "Romeo and Juliet", "(II, ii, 1-2)"],
60 |                 ["O Romeo, Romeo, wherefore art thou Romeo?", "Romeo and Juliet", "Act 2, scene 2, 33"],
61 |                 ["Tomorrow, and tomorrow, and tomorrow", "Macbeth", "Act 5, scene 5, 19"]], records
62 | end
63 | 
64 | 
65 | def test_test
66 |   records = CsvReader.read( "#{CsvReader.test_data_dir}/test.csv" )
67 |   pp records
68 | 
69 |   assert_equal [["A", "B", "C", "D"],
70 |                 ["a", "b", "c", "d"],
71 |                 ["e", "f", "g", "h"],
72 |                 [" i ", " j ", " k ", " l "],
73 |                 ["", "", "", ""],
74 |                 ["", "", "", ""]], records
75 | end
76 | 
77 | 
78 | end # class TestSamples
79 | 


--------------------------------------------------------------------------------
/csvrecord/.gitignore:
--------------------------------------------------------------------------------
 1 | #######################
 2 | # ignore ruby rake generated folders
 3 | 
 4 | /pkg/
 5 | /doc/
 6 | 
 7 | 
 8 | ################
 9 | # ignore (top-level) datapackage folders
10 | 
11 | /pack/
12 | /.pack/
13 | 


--------------------------------------------------------------------------------
/csvrecord/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-08-11
2 | 
3 | * Everything is new. First release.
4 | 


--------------------------------------------------------------------------------
/csvrecord/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | LICENSE.md
 3 | Manifest.txt
 4 | README.md
 5 | Rakefile
 6 | lib/csvrecord.rb
 7 | lib/csvrecord/base.rb
 8 | lib/csvrecord/version.rb
 9 | test/data/beer.csv
10 | test/data/beer11.csv
11 | test/helper.rb
12 | test/test_record.rb
13 | test/test_record_auto.rb
14 | test/test_version.rb
15 | 


--------------------------------------------------------------------------------
/csvrecord/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvrecord/version.rb'
 3 | 
 4 | Hoe.spec 'csvrecord' do
 5 | 
 6 |   self.version = CsvRecord::VERSION
 7 | 
 8 |   self.summary = "csvrecord - read in comma-separated values (csv) records with typed structs / schemas"
 9 |   self.description = summary
10 | 
11 |   self.urls = ['https://github.com/csvreader/csvrecord']
12 | 
13 |   self.author = 'Gerald Bauer'
14 |   self.email = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |      ['record',     '>=1.2.0'],
22 |      ['csvreader',  '>=1.1.4']
23 |    ]
24 | 
25 |   self.licenses = ['Public Domain']
26 | 
27 |   self.spec_extras = {
28 |    required_ruby_version: '>= 2.2.2'
29 |   }
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/csvrecord/lib/csvrecord.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | # 3rd party gems
 5 | require 'record'
 6 | require 'csvreader'
 7 | 
 8 | 
 9 | ###
10 | # our own code
11 | require 'csvrecord/version' # let version always go first
12 | require 'csvrecord/base'
13 | 
14 | 
15 | # say hello
16 | puts CsvRecord.banner     if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
17 | 


--------------------------------------------------------------------------------
/csvrecord/lib/csvrecord/base.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | module CsvRecord
  5 | 
  6 | ########################
  7 | # Base
  8 | 
  9 | class Base < Record::Base
 10 | 
 11 | def self.foreach( path, sep: nil, headers: true )
 12 | 
 13 |   ## note: always use reader w/o headers to get row/record values as array of strings
 14 |   ##   if headers: true -> skip first row
 15 |   names = nil
 16 | 
 17 |   CsvReader.foreach( path, sep: sep ) do |row|
 18 |     if headers && names.nil?
 19 |       names = row   ## store header row / a.k.a. field/column names
 20 |     else
 21 |       rec = new
 22 |       rec.parse( row )
 23 | 
 24 |       yield( rec )    ## check: use block.class( rec ) - why? why not?
 25 |     end
 26 |   end
 27 | end
 28 | 
 29 | 
 30 | 
 31 | 
 32 | def self.parse( txt_or_rows, sep: nil, headers: true )  ## note: returns an (lazy) enumarator
 33 |   if txt_or_rows.is_a? String
 34 |     txt = txt_or_rows
 35 |     ## note: always use reader w/o headers to get row/record values as array of strings
 36 |     ##   if headers: true -> skip first row
 37 |     rows = CsvReader.parse( txt, sep: sep )
 38 |   else
 39 |     ### todo/fix: use only self.create( array-like ) for array-like data  - why? why not?
 40 |     rows = txt_or_rows
 41 |   end
 42 | 
 43 |   ## pp rows
 44 | 
 45 | 
 46 |   names = nil
 47 | 
 48 |   Enumerator.new do |yielder|
 49 |     rows.each do |row|
 50 |       if headers && names.nil?
 51 |         names = row   ## store header row / a.k.a. field/column names
 52 |       else
 53 |         rec = new
 54 |         rec.parse( row )
 55 | 
 56 |         yielder.yield( rec )
 57 |       end
 58 |     end
 59 |   end
 60 | end
 61 | 
 62 | 
 63 | def self.read( path, sep: nil, headers: true )  ## not returns an enumarator
 64 |   txt  = File.open( path, 'r:utf-8' ).read
 65 |   parse( txt, sep: sep, headers: headers )
 66 | end
 67 | 
 68 | 
 69 | 
 70 | def to_csv   ## use/rename/alias to to_row too - why? why not?
 71 |   ## todo/fix: check for date and use own date to string format!!!!
 72 |   @values.map{ |value| value.to_s }
 73 | end
 74 | 
 75 | end # class Base
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | ###########################################
 82 | ## "magic" lazy auto-build schema from headers versions
 83 | 
 84 | def self.build_class( headers )   ## check: find a better name - why? why not?
 85 |   ## (auto-)build record class from an array of headers
 86 |   ##   add fields (all types will be string for now)
 87 |   clazz = Class.new( Base )
 88 |   headers.each do |header|
 89 |     ## downcase and remove all non-ascii chars etc.
 90 |     ##  todo/fix: remove all non-ascii chars!!!
 91 |     ##  todo: check if header starts with a number too!!
 92 |     name = header.downcase.gsub( ' ', '_' )
 93 |     name = name.to_sym   ## symbol-ify
 94 |     clazz.field( name )
 95 |   end
 96 |   clazz
 97 | end
 98 | 
 99 | def self.read( path, sep: nil )
100 |   headers = CsvReader.header( path, sep: sep )
101 | 
102 |   clazz = build_class( headers )
103 |   clazz.read( path, sep: sep )
104 | end
105 | 
106 | def self.foreach( path, sep: nil, &block )
107 |   headers = CsvReader.header( path, sep: sep )
108 | 
109 |   clazz = build_class( headers )
110 |   clazz.foreach( path, sep: sep, &block )
111 | end
112 | 
113 | 
114 | #########
115 | # alternative class (record) builder
116 | 
117 | def self.define( &block )   ## check: rename super_class to base - why? why not?
118 |   Record.define( Base, &block )
119 | end
120 | 
121 | end # module CsvRecord
122 | 


--------------------------------------------------------------------------------
/csvrecord/lib/csvrecord/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | module CsvRecord
 5 | 
 6 |   module Version
 7 |     MAJOR = 0
 8 |     MINOR = 4
 9 |     PATCH = 3
10 |   end
11 |   VERSION = [Version::MAJOR,
12 |              Version::MINOR,
13 |              Version::PATCH].join('.')
14 | 
15 | 
16 |   def self.version
17 |     VERSION
18 |   end
19 | 
20 |   def self.banner
21 |     "csvrecord/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
22 |   end
23 | 
24 |   def self.root
25 |     File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
26 |   end
27 | 
28 | end # module CsvRecord
29 | 


--------------------------------------------------------------------------------
/csvrecord/test/data/beer.csv:
--------------------------------------------------------------------------------
1 | Brewery,City,Name,Abv
2 | Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
3 | Augustiner Bräu München,München,Edelstoff,5.6%
4 | Bayerische Staatsbrauerei Weihenstephan,Freising,Hefe Weissbier,5.4%
5 | Brauerei Spezial,Bamberg,Rauchbier Märzen,5.1%
6 | Hacker-Pschorr Bräu,München,Münchner Dunkel,5.0%
7 | Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
8 | 


--------------------------------------------------------------------------------
/csvrecord/test/data/beer11.csv:
--------------------------------------------------------------------------------
 1 | #######
 2 | #  try with some comments
 3 | #   and blank lines even before header
 4 | 
 5 | Brewery,City,Name,Abv
 6 | Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
 7 | Augustiner Bräu München,München,Edelstoff,5.6%
 8 | Bayerische Staatsbrauerei Weihenstephan,Freising,Hefe Weissbier,5.4%
 9 | 
10 | Brauerei Spezial,      Bamberg,  Rauchbier Märzen,   5.1%
11 | 
12 | Hacker-Pschorr Bräu,   München,  Münchner Dunkel,    5.0%
13 | 
14 | ## some more comments here
15 | 
16 | Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
17 | 
18 | ## check for nil
19 | "", ,,"",
20 | 
21 | ## check for blank line with spaces
22 | ##    yes, will get added as a record!! e.g. ["", nil, nil, nil]
23 | ##    use regex to skip blank lines with spaces!!!!
24 |     
25 | 
26 | ## test double quotes and double quotes escaped
27 | ##   note: double quotes do NOT work with leading AND/OR trailing spaces
28 | ##    leads to:
29 | ##    CSV::MalformedCSVError - Missing or stray quote in line xxx
30 | ##
31 | ##   note: for now double quote does not accept leading AND/OR trailing spaces!!!!
32 | ##
33 | ##  todo/fix: check liberal_quote option starting in csv ruby 2.4 ???
34 | ##
35 | ## examples:
36 | ##  "value with comma, comma","some ""hello""","some ""hello""",
37 | ##    works - but does NOT work (note the leading and trailing spaces for double quotes):
38 | ##  "value with comma, comma" ,"some ""hello""",  "some ""hello""",
39 | ##
40 | ## check for "multi-line":
41 | ## "hello
42 | ##  and another line
43 | ##  and another",two,three,
44 | 
45 | 
46 | "value with comma, comma","some ""hello""","some ""hello""",
47 | 
48 | ## check for "multi-line"
49 | "hello
50 |   and another line
51 |   and another",two,three,
52 | 


--------------------------------------------------------------------------------
/csvrecord/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | require 'csvrecord'
10 | 
11 | ## add test_data_dir helper
12 | module CsvRecord
13 |   def self.test_data_dir
14 |     "#{root}/test/data"
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/csvrecord/test/test_record_auto.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_record_auto.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestRecordAuto < MiniTest::Test
11 | 
12 | 
13 | def test_read
14 |   beers = CsvRecord.read( "#{CsvRecord.test_data_dir}/beer.csv" ).to_a
15 |   pp beers
16 | 
17 |   assert_equal 6, beers.size
18 |   assert_equal 'Andechser Klosterbrauerei', beers[0].brewery
19 |   assert_equal 'Andechs',                   beers[0].city
20 |   assert_equal 'Doppelbock Dunkel',         beers[0].name
21 |   assert_equal '7%',                        beers[0].abv
22 | end
23 | 
24 | 
25 | def test_foreach
26 |   CsvRecord.foreach( "#{CsvRecord.test_data_dir}/beer.csv" ) do |rec|
27 |     pp rec
28 |     puts "#{rec.name} (#{rec.abv}%) by #{rec.brewery}, #{rec.city}"
29 |   end
30 | 
31 |   assert true
32 | end
33 | 
34 | end # class TestRecordAuto
35 | 


--------------------------------------------------------------------------------
/csvrecord/test/test_version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_version.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestVersion < MiniTest::Test
11 | 
12 |   def test_version
13 |     pp CsvRecord::VERSION
14 |     pp CsvRecord.banner
15 |     pp CsvRecord.root
16 | 
17 |     assert true  ## assume ok if we get here
18 |   end
19 | 
20 | end # class TestVersion
21 | 


--------------------------------------------------------------------------------
/csvutils/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Specific to RubyMotion:
13 | .dat*
14 | .repl_history
15 | build/
16 | 
17 | ## Documentation cache and generated files:
18 | /.yardoc/
19 | /_yardoc/
20 | /doc/
21 | /rdoc/
22 | 
23 | ## Environment normalisation:
24 | /.bundle/
25 | /lib/bundler/man/
26 | 
27 | # for a library or gem, you might want to ignore these files since the code is
28 | # intended to run in multiple environments; otherwise, check them in:
29 | # Gemfile.lock
30 | # .ruby-version
31 | # .ruby-gemset
32 | 
33 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34 | .rvmrc
35 | 
36 | 
37 | ####
38 | # add some auto-generated getting started samples
39 | 
40 | getting-started-samples/AUT_2016-2017.csv
41 | getting-started-samples/AUT_2017-2018.csv
42 | 


--------------------------------------------------------------------------------
/csvutils/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-08-04
2 | 
3 | * Everything is new. First release.
4 | 


--------------------------------------------------------------------------------
/csvutils/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | LICENSE.md
 3 | Manifest.txt
 4 | README.md
 5 | Rakefile
 6 | bin/csvcut
 7 | bin/csvhead
 8 | bin/csvheader
 9 | bin/csvsplit
10 | bin/csvstat
11 | datasets/at-austria/AUT.csv
12 | datasets/de-deutschland/bundesliga.csv
13 | datasets/eng-england/2017-18/E0.csv
14 | lib/csvutils.rb
15 | lib/csvutils/commands/cut.rb
16 | lib/csvutils/commands/head.rb
17 | lib/csvutils/commands/header.rb
18 | lib/csvutils/commands/split.rb
19 | lib/csvutils/commands/stat.rb
20 | lib/csvutils/cut.rb
21 | lib/csvutils/head.rb
22 | lib/csvutils/header.rb
23 | lib/csvutils/split.rb
24 | lib/csvutils/stat.rb
25 | lib/csvutils/test.rb
26 | lib/csvutils/utils.rb
27 | lib/csvutils/version.rb
28 | test/helper.rb
29 | test/test_cut.rb
30 | test/test_head.rb
31 | test/test_header.rb
32 | test/test_misc.rb
33 | test/test_split.rb
34 | test/test_version.rb
35 | 


--------------------------------------------------------------------------------
/csvutils/NOTES.md:
--------------------------------------------------------------------------------
 1 | # Notes
 2 | 
 3 | 
 4 | ## Todos
 5 | 
 6 | - [ ] use line-by-line reading / streaming for utils - do NOT read all into memory
 7 | - [ ] add "classic" stdin (standard input) support too (e.g. `-` on the command line) or check tty?
 8 | - [ ] package (include) test datasets in gem - why? why not?
 9 | 
10 | 
11 | 
12 | ## More CSV Tools
13 | 
14 | ### Ruby
15 | 
16 | See csvlint in ruby - <https://github.com/theodi/csvlint.rb>  -
17 |  supports validating CSV files to check their syntax and contents;
18 |  by Stuart Harrison (pezholio) et al
19 | 
20 | See ?
21 | 
22 | Add more CSV tools here.
23 | 
24 | 
25 | ### Other Langs
26 | 
27 | See xcv in rust - <https://github.com/BurntSushi/xsv> -
28 |  fast CSV command line toolkit;
29 |  written in Rust by Andrew Gallant (burntsushi) et al
30 | 
31 | See csvkit in python - <https://github.com/wireservice/csvkit>, <https://csvkit.readthedocs.io> -
32 |  a suite of command-line tools for converting to and working with CSV, the king of tabular file formats;
33 |  written in Python by Christopher Groskopf (onyxfish) et al
34 | 
35 | See ?
36 | 
37 | Add more CSV tools here.
38 | 


--------------------------------------------------------------------------------
/csvutils/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvutils/version.rb'
 3 | 
 4 | Hoe.spec 'csvutils' do
 5 | 
 6 |   self.version = CsvUtils::VERSION
 7 | 
 8 |   self.summary = "csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular data interchange format in text"
 9 |   self.description = summary
10 | 
11 |   self.urls = ['https://github.com/csvreader/csvutils']
12 | 
13 |   self.author = 'Gerald Bauer'
14 |   self.email = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |      ['csvreader',  '>=1.2.3']
22 |   ]
23 | 
24 |   self.licenses = ['Public Domain']
25 | 
26 |   self.spec_extras = {
27 |    required_ruby_version: '>= 2.2.2'
28 |   }
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/csvutils/bin/csvcut:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | ###################
 4 | # DEV TIPS:
 5 | #
 6 | # For local testing run like:
 7 | #
 8 | #    ruby -Ilib bin/csvcut
 9 | #
10 | # Set the executable bit in Linux. Example:
11 | #
12 | #    % chmod a+x bin/csvcut
13 | #
14 | 
15 | require 'csvutils'
16 | 
17 | CsvTool.cut( ARGV )
18 | 


--------------------------------------------------------------------------------
/csvutils/bin/csvhead:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | ###################
 4 | # DEV TIPS:
 5 | #
 6 | # For local testing run like:
 7 | #
 8 | #    ruby -Ilib bin/csvhead
 9 | #
10 | # Set the executable bit in Linux. Example:
11 | #
12 | #    % chmod a+x bin/csvhead
13 | #
14 | 
15 | require 'csvutils'
16 | 
17 | CsvTool.head( ARGV )
18 | 


--------------------------------------------------------------------------------
/csvutils/bin/csvheader:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | ###################
 4 | # DEV TIPS:
 5 | #
 6 | # For local testing run like:
 7 | #
 8 | #    ruby -Ilib bin/csvheader
 9 | #
10 | # Set the executable bit in Linux. Example:
11 | #
12 | #    % chmod a+x bin/csvheader
13 | #
14 | 
15 | require 'csvutils'
16 | 
17 | CsvTool.header( ARGV )
18 | 


--------------------------------------------------------------------------------
/csvutils/bin/csvsplit:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | ###################
 4 | # DEV TIPS:
 5 | #
 6 | # For local testing run like:
 7 | #
 8 | #    ruby -Ilib bin/csvsplit
 9 | #
10 | # Set the executable bit in Linux. Example:
11 | #
12 | #    % chmod a+x bin/csvsplit
13 | #
14 | 
15 | require 'csvutils'
16 | 
17 | CsvTool.split( ARGV )
18 | 


--------------------------------------------------------------------------------
/csvutils/bin/csvstat:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | ###################
 4 | # DEV TIPS:
 5 | #
 6 | # For local testing run like:
 7 | #
 8 | #    ruby -Ilib bin/csvstat
 9 | #
10 | # Set the executable bit in Linux. Example:
11 | #
12 | #    % chmod a+x bin/csvstat
13 | #
14 | 
15 | require 'csvutils'
16 | 
17 | CsvTool.stat( ARGV )
18 | 


--------------------------------------------------------------------------------
/csvutils/getting-started-samples/start.rb:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  ruby script (data work flow) getting started sample from the csvutils readme
 3 | #    see https://github.com/csv11/csvutils
 4 | #
 5 | 
 6 | require 'csvutils'
 7 | 
 8 | 
 9 | CsvUtils.head( 'ENG.csv' )
10 | # same as:
11 | #  $ csvhead ENG.csv
12 | 
13 | CsvUtils.header( 'ENG.csv' )
14 | # same as:
15 | #  $ csvheader ENG.csv
16 | 
17 | CsvUtils.stat( 'ENG.csv', 'Team1', 'Team2' )
18 | # same as:
19 | #  $ csvstat -c Team1,Team2 ENG.csv
20 | 
21 | 
22 | CsvUtils.stat( 'AUT.csv', 'Season' )
23 | # same as:
24 | #  $ csvstat -c Season AUT.csv
25 | 
26 | 
27 | CsvUtils.split( 'AUT.csv', 'Season' )
28 | # same as:
29 | #  $ csvsplit -c Season AUT.csv
30 | 
31 | CsvUtils.cut( 'AUT_2016-2017.csv', 'Date', 'Team1', 'Team2', 'FT1', 'FT2' )
32 | # same as:
33 | #  $ csvcut -c Date,Team1,Team2,FT1,FT2 AUT_2016-2017.csv
34 | 


--------------------------------------------------------------------------------
/csvutils/getting-started-samples/start.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 | #  command line shell script getting started sample from the csvutils readme
 3 | #    see https://github.com/csv11/csvutils
 4 | #
 5 | 
 6 | ##########################################
 7 | ## try help output of tools
 8 | csvhead -h          # or
 9 | csvhead --help
10 | 
11 | csvheader -h
12 | csvstat -h
13 | csvsplit -h
14 | csvcut -h
15 | 
16 | ####################################################
17 | # Working with Comma-Separated Values (CSV) Datafile Examples
18 | 
19 | csvhead ENG.csv
20 | csvheader ENG.csv
21 | csvstat -c Team1,Team2 ENG.csv
22 | 
23 | #####################################################
24 | # Split & Cut - Split One Datafile into Many or Cut / Reorder Columns
25 | 
26 | csvstat -c Season AUT.csv
27 | csvsplit -c Season AUT.csv
28 | csvcut -c Date,Team1,Team2,FT1,FT2 AUT_2016-2017.csv
29 | csvhead AUT_2016-2017.csv
30 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'date'
 4 | require 'fileutils'
 5 | require 'optparse'
 6 | 
 7 | 
 8 | require 'csvreader'
 9 | 
10 | 
11 | 
12 | ###
13 | # our own code
14 | require 'csvutils/version' # let version always go first
15 | require 'csvutils/utils'
16 | require 'csvutils/split'
17 | require 'csvutils/cut'
18 | require 'csvutils/test'
19 | require 'csvutils/stat'
20 | require 'csvutils/header'
21 | require 'csvutils/head'
22 | 
23 | require 'csvutils/commands/head'
24 | require 'csvutils/commands/header'
25 | require 'csvutils/commands/stat'
26 | require 'csvutils/commands/cut'
27 | require 'csvutils/commands/split'
28 | 
29 | 
30 | 
31 | # say hello
32 | puts CsvUtils.banner    if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
33 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/commands/cut.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvTool
 5 | 
 6 | ## command line tools
 7 | def self.cut( args )
 8 | 
 9 |   config = { columns: [] }
10 | 
11 |   parser = OptionParser.new do |opts|
12 |      opts.banner = "Usage: csvcut [OPTS] source [dest]"
13 | 
14 |      opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
15 |        config[:columns] = columns.split(/[,|;]/)   ## allow differnt separators
16 |      end
17 | 
18 |      opts.on("-h", "--help", "Prints this help") do
19 |        puts opts
20 |        exit
21 |      end
22 |   end
23 | 
24 |   parser.parse!( args )
25 | 
26 |   ## pp config
27 |   ## pp args
28 | 
29 |   source = args[0]
30 |   dest   = args[1] || source   ## default to same as source (note: overwrites datafile in place!!!)
31 | 
32 |   unless args[0]
33 |     puts "** error: arg missing - source filepath required - #{args.inspect}"
34 |     exit 1
35 |   end
36 | 
37 |   columns = config[:columns]
38 | 
39 |   CsvUtils.cut( source, *columns, output: dest )
40 | end
41 | 
42 | 
43 | end # class CsvTool
44 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/commands/head.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvTool
 5 | 
 6 | ## command line tools
 7 | def self.head( args )
 8 | 
 9 |   config = { n: 4 }
10 | 
11 |   parser = OptionParser.new do |opts|
12 |      opts.banner = "Usage: csvhead [OPTS] datafile ..."
13 | 
14 |      opts.on("-n", "--num=NUM", "Number of rows" ) do |num|
15 |        config[:n] = num.to_i
16 |      end
17 | 
18 |      opts.on("-h", "--help", "Prints this help") do
19 |        puts opts
20 |        exit
21 |      end
22 |   end
23 | 
24 |   parser.parse!( args )
25 | 
26 |   ## pp config
27 |   ## pp args
28 | 
29 |   args.each do |arg|
30 |     path = arg
31 |     n = config[:n]
32 | 
33 |     puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
34 |     puts
35 |     CsvUtils.head( path, n: n )
36 |     puts
37 |   end # each arg
38 | end
39 | 
40 | end # class CsvTool
41 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/commands/header.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvTool
 5 | 
 6 | ## command line tools
 7 | def self.header( args )
 8 | 
 9 |   config = {}
10 | 
11 |   parser = OptionParser.new do |opts|
12 |      opts.banner = "Usage: csvheader [OPTS] datafile ..."
13 | 
14 |      opts.on("-h", "--help", "Prints this help") do
15 |        puts opts
16 |        exit
17 |      end
18 |   end
19 | 
20 |   parser.parse!( args )
21 | 
22 |   ## pp config
23 |   ## pp args
24 | 
25 |   args.each do |arg|
26 |     path = arg
27 | 
28 |     puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
29 |     puts
30 |     CsvUtils.pp_header( CsvUtils.header( path ) )
31 |     puts
32 |   end # each arg
33 | end
34 | 
35 | end # class CsvTool
36 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/commands/split.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvTool
 5 | 
 6 | ## command line tools
 7 | def self.split( args )
 8 | 
 9 |   config = { columns: [] }
10 | 
11 |   parser = OptionParser.new do |opts|
12 |      opts.banner = "Usage: csvsplit [OPTS] datafile ..."
13 | 
14 |      opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
15 |        config[:columns] = columns.split(/[,|;]/)   ## allow differnt separators
16 |      end
17 | 
18 |      opts.on("-h", "--help", "Prints this help") do
19 |        puts opts
20 |        exit
21 |      end
22 |   end
23 | 
24 |   parser.parse!( args )
25 | 
26 |   ## pp config
27 |   ## pp args
28 | 
29 |   args.each do |arg|
30 |     path = arg
31 |     columns = config[:columns]
32 | 
33 |     puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
34 |     puts
35 |     CsvUtils.split( path, *columns )
36 |     puts
37 |   end
38 | end
39 | 
40 | 
41 | end # class CsvTool
42 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/commands/stat.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvTool
 5 | 
 6 | ## command line tools
 7 | def self.stat( args )
 8 | 
 9 |   config = { columns: [] }
10 | 
11 |   parser = OptionParser.new do |opts|
12 |      opts.banner = "Usage: csvstat [OPTS] datafile ..."
13 | 
14 |      opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
15 |        config[:columns] = columns.split(/[,|;]/)   ## allow differnt separators
16 |      end
17 | 
18 |      opts.on("-h", "--help", "Prints this help") do
19 |        puts opts
20 |        exit
21 |      end
22 |   end
23 | 
24 |   parser.parse!( args )
25 | 
26 |   ## pp config
27 |   ## pp args
28 | 
29 |   args.each do |arg|
30 |     path = arg
31 |     columns = config[:columns]
32 | 
33 |     puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
34 |     puts
35 |     CsvUtils.stat( path, *columns )
36 |     puts
37 |   end # each arg
38 | end
39 | 
40 | 
41 | end # class CsvTool
42 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/cut.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ## check/use class or module ???
 4 | 
 5 | 
 6 | class CsvUtils
 7 | 
 8 |   def self.cut( path, *columns, output: path, sep: ',' )
 9 | 
10 |     inpath  = path
11 |     outpath = output   # note: output defaults to inpath (overwrites datafile in-place!!!)
12 | 
13 |     puts "cvscut in: >#{inpath}<  out: >#{outpath}<"
14 | 
15 |     ##  ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
16 |     puts "columns:"
17 |     pp columns
18 | 
19 |     csv_options = { sep: sep }
20 | 
21 |     recs = CsvHash.read( inpath, csv_options )
22 | 
23 | 
24 |     ## for convenience - make sure parent folders/directories exist
25 |     FileUtils.mkdir_p( File.dirname( outpath ))  unless Dir.exists?( File.dirname( outpath ))
26 | 
27 | 
28 |     ## note:
29 |     ##  todo/fix: add two trailing spaces for pretty printing - why? why not?
30 |     File.open( outpath, 'w:utf-8' ) do |out|
31 |       out << csv_row( *columns, sep: sep ).join( sep )   ## for row add headers/columns
32 |       out << "\n"
33 |       recs.each do |rec|
34 |         values = columns.map { |col| rec[col] }  ## find data for column
35 |         out << csv_row( *values, sep: sep ).join( sep )
36 |         out << "\n"
37 |       end
38 |     end
39 | 
40 |     puts 'Done.'
41 |   end  ## method self.cut
42 | 
43 | end # class CsvUtils
44 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/head.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvUtils
 5 | 
 6 |   ## test or dry run to check if rows can get read/scanned
 7 |   def self.head( path, sep: ',', n: 4 )
 8 |     i = 0
 9 |     csv_options = { sep: sep }
10 | 
11 |     CsvHash.foreach( path, csv_options ) do |rec|
12 |       i += 1
13 | 
14 |       pp rec
15 | 
16 |       break if i >= n
17 |     end
18 | 
19 |     puts " #{i} records"
20 |   end
21 | 
22 | end  # class CsvUtils
23 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/header.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvUtils
 5 | 
 6 |   def self.header( path, sep: ',', debug: false )   ## use header or headers - or use both (with alias)?
 7 |     row = CsvReader.header( path, sep: sep )
 8 | 
 9 |     pp row   if debug
10 |     ## e.g.:
11 |     #  "Country,League,Season,Date,Time,Home,Away,HG,AG,Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA\n"
12 | 
13 |     row
14 |   end  # method self.header
15 | 
16 | end  # class CsvUtils
17 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/split.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | 
  4 | 
  5 | 
  6 | class CsvUtils
  7 | 
  8 |   def self.split( path, *columns, sep: ',', &blk )
  9 | 
 10 |     puts "cvssplit in: >#{path}<"
 11 | 
 12 |     ##  ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
 13 |     puts "columns:"
 14 |     pp columns
 15 | 
 16 |     ## note: do NOT use headers
 17 |     ##   for easy sorting use "plain" array of array for records
 18 |     csv_options = { sep: sep }
 19 | 
 20 |     data = CsvReader.read( path, csv_options )
 21 | 
 22 |     ## todo/check: (auto-) strip (remove all leading and trailing spaces)
 23 |     ##     from all values - why? why not?
 24 |     ##   check if CSV.parse has an option for it?
 25 | 
 26 |     headers = data.shift   ## remove top array item (that is, row with headers)
 27 | 
 28 |     header_mapping = {}
 29 |     headers.each_with_index  { | header,i | header_mapping[header]=i }
 30 |     pp header_mapping
 31 | 
 32 |     ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
 33 |     column_indices = columns.map { |col| header_mapping[col] }
 34 |     pp column_indices
 35 | 
 36 | 
 37 |     ###################################################
 38 |     ## note: sort data by columns (before split)
 39 |     data = data.sort do |row1,row2|
 40 |        res = 0
 41 |        column_indices.each do |col|
 42 |          res = row1[col] <=> row2[col]    if res == 0
 43 |        end
 44 |        res
 45 |     end
 46 | 
 47 |     chunk = []
 48 |     data.each_with_index do |row,i|
 49 |       chunk << row
 50 | 
 51 |       next_row = data[i+1]
 52 | 
 53 |       changed = false
 54 |       if next_row.nil?   ## end-of-file
 55 |         changed = true
 56 |       else
 57 |         column_indices.each do |col|
 58 |           if row[col] != next_row[col]
 59 |              changed = true
 60 |              break   ## out of each column_indices loop
 61 |            end
 62 |         end
 63 |       end
 64 | 
 65 |       if changed
 66 |         puts "save new chunk:"
 67 |         column_values = column_indices.map {|col| row[col] }
 68 |         pp column_values
 69 | 
 70 |         # note: add header(s) row upfront (as first row) to chunk (with unshift)
 71 |         chunk_with_headers = chunk.unshift( headers )
 72 |         if blk
 73 |           yield( column_values, chunk_with_headers )
 74 |         else
 75 |           ## auto-save (write-to-file) by default - why? why not?
 76 |           split_write( path, column_values, chunk_with_headers, sep: sep )
 77 |         end
 78 | 
 79 |         chunk = []   ## reset chunk for next batch of records
 80 |       end
 81 |     end
 82 | 
 83 |     puts 'Done.'
 84 |   end  ## method self.split
 85 | 
 86 | 
 87 |   def self.split_write( inpath, values, chunk, sep: )
 88 |     basename = File.basename( inpath, '.*' )
 89 |     dirname  = File.dirname( inpath )
 90 | 
 91 |     ## check/change invalid filename chars
 92 |     ##  e.g. change 1990/91 to 1990-91
 93 |     extraname = values.map {|value| value.tr('/','-')}.join('~')
 94 | 
 95 |     outpath = "#{dirname}/#{basename}_#{extraname}.csv"
 96 |     puts "saving >#{basename}_#{extraname}.csv<..."
 97 | 
 98 |     File.open( outpath, 'w:utf-8' ) do |out|
 99 |       chunk.each do |row|
100 |         out << csv_row( *row, sep: sep ).join( sep )
101 |         out << "\n"
102 |       end
103 |     end
104 |   end
105 | 
106 | end # class CsvUtils
107 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/stat.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvUtils
 5 | 
 6 |   def self.stat( path, *columns, sep: ',', debug: false )
 7 | 
 8 |     csv_options = { sep: sep }
 9 | 
10 |     values = {}
11 |     nulls  = {}
12 |     # check 1) nulls/nils (e.g. empty strings ""),
13 |     #       2) not/appliation or available  n/a NA or NaN or ...
14 |     #       3) missing - e.g. ?
15 | 
16 |     i=0
17 |     CsvHash.foreach( path, csv_options ) do |rec|
18 |       i += 1
19 | 
20 |       pp rec    if i == 1 && debug
21 | 
22 |       print '.' if i % 100 == 0
23 | 
24 |       ## collect unique values for passed in columns
25 |       columns.each do |col|
26 |         value = rec[col]    ## note: value might be nil!!!!!
27 | 
28 |         values[col] ||= Hash.new(0)
29 |         values[col][ value ? value : '<nil>' ] +=1
30 |       end
31 | 
32 |       ## alway track nulls - why? why not
33 |       rec.each do |col,value|
34 |         ## if value.nil?    ## todo/check - nil value possible (not always empty string - why? why not?)
35 |         ##   puts "[debug] nil value in row:"
36 |         ##   puts "#{col} = #{value.inspect} : #{value.class.name}"
37 |         ## end
38 | 
39 |         if value.nil?
40 |           nulls[col] ||= Hash.new(0)
41 |           nulls[col]['nil'] +=1
42 |         elsif value.empty?
43 |           nulls[col] ||= Hash.new(0)
44 |           nulls[col]['empty'] +=1
45 |         elsif ['na', 'n/a', '-'].include?( value.downcase )
46 |           nulls[col] ||= Hash.new(0)
47 |           nulls[col]['na'] +=1
48 |         elsif value == '?'    ## check for (?) e.g. value.include?( '(?)') - why? why not?
49 |           nulls[col] ||= Hash.new(0)
50 |           nulls[col]['?'] +=1
51 |         else
52 |           # do nothing; "regular" value
53 |         end
54 |       end
55 |     end
56 | 
57 |     puts " #{i} rows"
58 |     puts
59 |     puts " nils/nulls :: empty strings :: na / n/a / undefined :: missing (?):"
60 |     puts "   #{nulls.inspect}"
61 |     puts
62 | 
63 |     ## dump headers first (first row with names of columns)
64 |     headers = header( path, sep: sep, debug: debug )
65 |     pp_header( headers )  ## pretty print header columns
66 |     puts
67 | 
68 |     if values.any?
69 |        ## pretty print (pp) / dump unique values for passed in columns
70 |        values.each do |col,h|
71 |          puts " column >#{col}< #{h.size} unique values:"
72 |          ## sort by name/value for now (not frequency) - change - why? why not?
73 |          sorted_values = h.to_a.sort {|l,r| l[0] <=> r[0] }
74 |          sorted_values.each do |rec|
75 |            puts "   #{rec[1]} x  #{rec[0]}"
76 |          end
77 |        end
78 |     end
79 |   end # method self.stat
80 | 
81 | end  # class CsvUtils
82 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/test.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvUtils
 5 | 
 6 |   ## test or dry run to check if rows can get read/scanned
 7 |   def self.test( path, sep: ',' )
 8 |     i = 0
 9 |     csv_options = { sep: sep }
10 | 
11 |     CsvHash.foreach( path, csv_options ) do |rec|
12 |       i += 1
13 |       print '.' if i % 100 == 0
14 |     end
15 | 
16 |     puts " #{i} rows"
17 |   end
18 | 
19 | end  # class CsvUtils
20 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/utils.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvUtils
 5 | 
 6 |   def self.pp_header( headers )  ## check: rename to print_headers or prettyprint_header - why? why not?
 7 |     puts "#{headers.size} columns:"
 8 |     headers.each_with_index do |header,i|
 9 |       puts "  #{i+1}: #{header}"
10 |     end
11 |   end
12 | 
13 | 
14 |   ###################
15 |   ## (simple) helper for "csv-encoding" values / row
16 |   ##
17 |   ##  todo: check for newline in value too? why? why not?
18 |   def self.csv_row( *values, sep: ',' )
19 |     values.map do |value|
20 |        if value && (value.index( sep ) || value.index('"'))
21 |          ## double quotes and enclose in double qoutes
22 |          value = %Q{"#{value.gsub('"', '""')}"}
23 |        else
24 |          value
25 |        end
26 |     end
27 |   end
28 | 
29 | end  # class CsvUtils
30 | 


--------------------------------------------------------------------------------
/csvutils/lib/csvutils/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | ## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
 5 | class CsvUtils
 6 | 
 7 |   MAJOR = 0    ## todo: namespace inside version or something - why? why not??
 8 |   MINOR = 3
 9 |   PATCH = 0
10 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
11 | 
12 |   def self.version
13 |     VERSION
14 |   end
15 | 
16 |   def self.banner
17 |     "csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18 |   end
19 | 
20 |   def self.root
21 |     File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22 |   end
23 | 
24 | end # class CsvUtils
25 | 


--------------------------------------------------------------------------------
/csvutils/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | require 'csvutils'
10 | 
11 | ## add test_data_dir helper
12 | class CsvUtils
13 |   def self.test_data_dir
14 |     "#{root}/datasets"
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/csvutils/test/test_cut.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_cut.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestCut < MiniTest::Test
11 | 
12 |   def test_eng
13 |     path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14 |     columns = [ 'HomeTeam', 'FTHG', 'FTAG', 'AwayTeam', 'Date' ]
15 |     CsvUtils.cut( path, *columns, output: './tmp/cut_test_eng.csv' )
16 |   end
17 | 
18 |   def test_at
19 |     path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
20 |     columns = [ 'Home', 'HG', 'AG', 'Away', 'Date', 'Time' ]
21 |     CsvUtils.cut( path, *columns, output: './tmp/cut_test_at.csv' )
22 |   end
23 | 
24 |   def test_de
25 |     path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
26 |     columns = ['Saison', 'Spieltag',
27 |                'Heim', 'Ergebnis', 'Gast', 'Datum', 'Uhrzeit' ]
28 |     CsvUtils.cut( path, *columns, sep: ';', output: './tmp/cut_test_de.csv' )
29 |   end
30 | 
31 | end # class TestHead
32 | 


--------------------------------------------------------------------------------
/csvutils/test/test_head.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_head.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestHead < MiniTest::Test
11 | 
12 |   def test_eng
13 |     path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14 | 
15 |     CsvUtils.head( path )
16 |   end
17 | 
18 |   def test_at
19 |     path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
20 | 
21 |     CsvUtils.head( path )
22 |   end
23 | 
24 |   def test_de
25 |     path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
26 | 
27 |     CsvUtils.head( path, sep: ';' )
28 |   end
29 | 
30 | end # class TestHead
31 | 


--------------------------------------------------------------------------------
/csvutils/test/test_header.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_header.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestHeader < MiniTest::Test
11 | 
12 | 
13 | ##
14 | # Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
15 | #   Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
16 | #    B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
17 | #    WHH,WHD,WHA,VCH,VCD,VCA,
18 | #    Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
19 | #    BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
20 |   def test_eng
21 |     path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
22 | 
23 |     headers = CsvUtils.header( path )
24 |     pp headers
25 | 
26 |     assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
27 |   end
28 | 
29 | ###
30 | # Country,League,Season,Date,Time,Home,Away,HG,AG,
31 | #  Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
32 |   def test_at
33 |     path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
34 | 
35 |     headers =  CsvUtils.header( path )
36 |     pp headers
37 | 
38 |     assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
39 |   end
40 | 
41 |   def test_de
42 |     path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
43 | 
44 |     headers =  CsvUtils.header( path, sep: ';' )
45 |     pp headers
46 | 
47 |     assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
48 |   end
49 | 
50 | end # class TestHeader
51 | 


--------------------------------------------------------------------------------
/csvutils/test/test_misc.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_misc.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestMiscellaneous < MiniTest::Test
11 | 
12 |   def test_eng
13 |     path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14 | 
15 |     CsvUtils.test( path )
16 | 
17 |     CsvUtils.stat( path )
18 |     CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
19 | 
20 |     assert true
21 |   end
22 | 
23 |   def test_test_de
24 |     path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
25 | 
26 |     CsvUtils.test( path, sep: ';' )
27 | 
28 |     CsvUtils.stat( path, sep: ';' )
29 |     CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
30 | 
31 |     assert true
32 |   end
33 | 
34 |   def test_test_at
35 |     path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
36 | 
37 |     CsvUtils.test( path )
38 | 
39 |     CsvUtils.stat( path )
40 |     CsvUtils.stat( path, 'Season', 'Home', 'Away' )
41 |     assert true
42 |   end
43 | 
44 | end # class TestMiscellaneous
45 | 


--------------------------------------------------------------------------------
/csvutils/test/test_split.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_split.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestSplit < MiniTest::Test
11 | 
12 |   def test_eng
13 |     path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14 |     columns = [ 'HomeTeam' ]
15 |     CsvUtils.split( path, *columns ) do |values, chunk|
16 |       pp values
17 |       pp chunk
18 |     end
19 |   end
20 | 
21 | 
22 |   def test_de
23 |     path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
24 |     columns = ['Saison', 'Spieltag' ]
25 |     CsvUtils.split( path, *columns, sep: ';' ) do |values, chunk|
26 |       pp values
27 |       pp chunk
28 |     end
29 |   end
30 | 
31 | end # class TestSplit
32 | 


--------------------------------------------------------------------------------
/csvutils/test/test_version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_version.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestVersion < MiniTest::Test
11 | 
12 |   def test_version
13 |     pp CsvUtils::VERSION
14 |     pp CsvUtils.banner
15 |     pp CsvUtils.root
16 | 
17 |     assert true  ## assume ok if we get here
18 |   end
19 | 
20 | end # class TestVersion
21 | 


--------------------------------------------------------------------------------
/csvyaml/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Specific to RubyMotion:
13 | .dat*
14 | .repl_history
15 | build/
16 | 
17 | ## Documentation cache and generated files:
18 | /.yardoc/
19 | /_yardoc/
20 | /doc/
21 | /rdoc/
22 | 
23 | ## Environment normalisation:
24 | /.bundle/
25 | /vendor/bundle
26 | /lib/bundler/man/
27 | 
28 | # for a library or gem, you might want to ignore these files since the code is
29 | # intended to run in multiple environments; otherwise, check them in:
30 | # Gemfile.lock
31 | # .ruby-version
32 | # .ruby-gemset
33 | 
34 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35 | .rvmrc
36 | 


--------------------------------------------------------------------------------
/csvyaml/HISTORY.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-10-18
2 | 
3 | * Everything is new. First release
4 | 


--------------------------------------------------------------------------------
/csvyaml/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | LICENSE.md
 3 | Manifest.txt
 4 | README.md
 5 | Rakefile
 6 | datasets/hello.yaml.csv
 7 | datasets/hello11.yaml.csv
 8 | lib/csvyaml.rb
 9 | lib/csvyaml/parser.rb
10 | lib/csvyaml/version.rb
11 | test/helper.rb
12 | test/test_parser.rb
13 | test/test_parser_misc.rb
14 | 


--------------------------------------------------------------------------------
/csvyaml/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/csvyaml/version.rb'
 3 | 
 4 | Hoe.spec 'csvyaml' do
 5 | 
 6 |   self.version = CsvYaml::VERSION
 7 | 
 8 |   self.summary = "csvyaml - read tabular data in the CSV <3 YAML format, that is, comma-separated values CSV (line-by-line) records with yaml ain't markup language (YAML) encoding rules"
 9 |   self.description = summary
10 | 
11 |   self.urls    = ['https://github.com/csvreader/csvyaml']
12 | 
13 |   self.author  = 'Gerald Bauer'
14 |   self.email   = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file  = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.extra_deps = [
21 |   ]
22 | 
23 |   self.licenses = ['Public Domain']
24 | 
25 |   self.spec_extras = {
26 |     required_ruby_version: '>= 2.2.2'
27 |   }
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/csvyaml/datasets/hello.yaml.csv:
--------------------------------------------------------------------------------
1 | 1,John,12 Totem Rd. Aspen,true
2 | 2,Bob,null,false
3 | 3,Sue,"Bigsby, 345 Carnival, WA 23009",false
4 | 


--------------------------------------------------------------------------------
/csvyaml/datasets/hello11.yaml.csv:
--------------------------------------------------------------------------------
1 | # hello world
2 | 
3 | 1, John, 12 Totem Rd. Aspen,               true
4 | 2, Bob,  null,                             false
5 | 3, Sue,  "Bigsby, 345 Carnival, WA 23009", false
6 | 


--------------------------------------------------------------------------------
/csvyaml/lib/csvyaml.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'pp'
 4 | require 'yaml'
 5 | require 'logger'
 6 | 
 7 | ## our own code
 8 | require 'csvyaml/version'    # note: let version always go first
 9 | require 'csvyaml/parser'
10 | 
11 | 
12 | 
13 | ## add some "alternative" shortcut aliases
14 | CSV_YAML = CsvYaml
15 | CSVYAML  = CsvYaml
16 | CSVY     = CsvYaml
17 | CsvY     = CsvYaml
18 | 
19 | 
20 | # say hello
21 | puts CsvYaml.banner     if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
22 | 


--------------------------------------------------------------------------------
/csvyaml/lib/csvyaml/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | class CsvYaml
 5 | 
 6 |   MAJOR = 1
 7 |   MINOR = 0
 8 |   PATCH = 0
 9 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
10 | 
11 | 
12 |   def self.version
13 |     VERSION
14 |   end
15 | 
16 |   def self.banner
17 |     "csvyaml/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18 |   end
19 | 
20 |   def self.root
21 |     "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
22 |   end
23 | 
24 | end # module CsvYaml
25 | 


--------------------------------------------------------------------------------
/csvyaml/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | require 'csvyaml'
10 | 
11 | 
12 | ## add test_data_dir helper
13 | class CsvYaml
14 |   def self.test_data_dir
15 |     "#{root}/datasets"
16 |   end
17 | end
18 | 
19 | 
20 | CsvYaml.logger.level = :debug   ## turn on "global" logging
21 | 


--------------------------------------------------------------------------------
/csvyaml/test/test_parser.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | ###
  4 | #  to run use
  5 | #     ruby -I ./lib -I ./test test/test_parser.rb
  6 | 
  7 | 
  8 | require 'helper'
  9 | 
 10 | class TestParser < MiniTest::Test
 11 | 
 12 | 
 13 | def parser
 14 |   CsvYaml
 15 | end
 16 | 
 17 | 
 18 | def records   ## "standard" records for testing
 19 |   [[1, "John", "12 Totem Rd. Aspen",             true],
 20 |    [2, "Bob",  nil,                              false],
 21 |    [3, "Sue",  "Bigsby, 345 Carnival, WA 23009", false]]
 22 | end
 23 | 
 24 | 
 25 | 
 26 | def test_parse
 27 |   assert_equal records, parser.parse( <<TXT )
 28 | 1,John,12 Totem Rd. Aspen,true
 29 | 2,Bob,null,false
 30 | 3,Sue,"Bigsby, 345 Carnival, WA 23009",false
 31 | TXT
 32 | 
 33 |   assert_equal records, parser.parse( <<TXT )
 34 | # hello world
 35 | 
 36 | 1,John,12 Totem Rd. Aspen,true
 37 | 2,Bob,null,false
 38 | 3,Sue,"Bigsby, 345 Carnival, WA 23009",false
 39 | TXT
 40 | 
 41 |   assert_equal records, parser.parse( <<TXT )
 42 |   # hello world (pretty printed)
 43 | 
 44 |   1, John, 12 Totem Rd. Aspen,               true
 45 |   2, Bob,  null,                             false
 46 |   3, Sue,  "Bigsby, 345 Carnival, WA 23009", false
 47 | 
 48 |   # try more comments and empty lines
 49 | 
 50 | TXT
 51 | 
 52 | 
 53 |   txt =<<TXT
 54 |   # hello world
 55 |   1,John,12 Totem Rd. Aspen,true
 56 |   2,Bob,null,false
 57 |   3,Sue,"Bigsby, 345 Carnival, WA 23009",false
 58 | TXT
 59 | 
 60 |   recs = []
 61 |   parser.parse( txt ) { |rec| recs << rec }
 62 |   assert_equal records, recs
 63 | end
 64 | 
 65 | 
 66 | def test_read
 67 |   assert_equal records, parser.read( "#{CsvYaml.test_data_dir}/hello.yaml.csv" )
 68 |   assert_equal records, parser.read( "#{CsvYaml.test_data_dir}/hello11.yaml.csv" )
 69 | end
 70 | 
 71 | 
 72 | def test_open
 73 |   assert_equal records, parser.open( "#{CsvYaml.test_data_dir}/hello.yaml.csv", "r:bom|utf-8" ).read
 74 |   assert_equal records, parser.open( "#{CsvYaml.test_data_dir}/hello11.yaml.csv", "r:bom|utf-8" ).read
 75 | end
 76 | 
 77 | 
 78 | def test_foreach
 79 |   recs = []
 80 |   parser.foreach( "#{CsvYaml.test_data_dir}/hello.yaml.csv" ) { |rec| recs << rec }
 81 |   assert_equal records, recs
 82 | 
 83 |   recs = []
 84 |   parser.foreach( "#{CsvYaml.test_data_dir}/hello11.yaml.csv" ) { |rec| recs << rec }
 85 |   assert_equal records, recs
 86 | end
 87 | 
 88 | 
 89 | def test_enum
 90 |   csv = CsvYaml.new( <<TXT )
 91 |   # hello world
 92 | 
 93 |   1,John,12 Totem Rd. Aspen,true
 94 |   2,Bob,null,false
 95 |   3,Sue,"Bigsby, 345 Carnival, WA 23009",false
 96 | TXT
 97 | 
 98 |   it = csv.to_enum
 99 |   assert_equal [1, "John", "12 Totem Rd. Aspen",             true],  it.next
100 |   assert_equal [2, "Bob",  nil,                              false], it.next
101 |   assert_equal [3, "Sue",  "Bigsby, 345 Carnival, WA 23009", false], it.next
102 | end
103 | 
104 | end # class TestParser
105 | 


--------------------------------------------------------------------------------
/csvyaml/test/test_parser_misc.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_parser_misc.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestParserMisc < MiniTest::Test
11 | 
12 | 
13 | def parser
14 |   CsvYaml
15 | end
16 | 
17 | 
18 | def test_quotes_and_commas
19 |   assert_equal [
20 |     [1, "John", "12 Totem Rd., Aspen", true],
21 |     [2, "Bob",  nil,                   false],
22 |     [3, "Sue", "\"Bigsby\", 345 Carnival, WA 23009", false]
23 |     ], parser.parse( <<TXT )
24 |   1,John,"12 Totem Rd., Aspen",true
25 |   2,Bob,null,false
26 |   3,Sue,"\\"Bigsby\\", 345 Carnival, WA 23009",false
27 | TXT
28 | end
29 | 
30 | 
31 | def test_arrays
32 |   assert_equal [
33 |     [1, "directions", ["north","south","east","west"]],
34 |     [2, "colors", ["red","green","blue"]],
35 |     [3, "drinks", ["soda","water","tea","coffe"]],
36 |     [4, "spells", []],
37 |   ], parser.parse( <<TXT )
38 |   # CSV <3 YAML with array values
39 | 
40 |   1,directions,[north,south,east,west]
41 |   2,colors,[red,green,blue]
42 |   3,drinks,[soda,water,tea,coffe]
43 |   4,spells,[]
44 | TXT
45 | end
46 | 
47 | def test_misc
48 |   ## note:
49 |   ##   in the csv <3 json source text backslash needs to get doubled / escaped twice e.g.
50 |   ##   \\"  for quotes
51 |   ##   \\n  for newlines and so on
52 | 
53 |   assert_equal [
54 |     ["index", "value1", "value2"],
55 |     ["number", 1, 2],
56 |     ["boolean", false, true],
57 |     ["null", nil, "non null"],
58 |     ["array of numbers", [1], [1,2]],
59 |     ["simple object", {"a" => 1}, {"a" => 1, "b" => 2}],
60 |     ["array with mixed objects", [1, nil,"ball"], [2,{"a" => 10, "b" => 20},"cube"]],
61 |     ["string with quotes", "a\"b", "alert(\"Hi!\")"],
62 |     ["string with bell&newlines","bell is \u0007","multi\nline\ntext"]
63 |   ], parser.parse( <<TXT )
64 |   # CSV with all kinds of values
65 | 
66 |   index,value1,value2
67 |   number,1,2
68 |   boolean,false,true
69 |   "null",null,non null
70 |   array of numbers,[1],[1,2]
71 |   ## note: key:value pairs need a space after colon!!! NOT working {a:1},{a:1, b:2}
72 |   simple object,{a: 1},{a: 1, b: 2}
73 |   ## note: again - key:value pairs need a space after colon!!! NOT working {a:10, b:20}
74 |   array with mixed objects,[1,null,ball],[2,{a: 10,b: 20},cube]
75 |   string with quotes,"a\\"b","alert(\\"Hi!\\")"
76 |   string with bell&newlines,"bell is \\u0007","multi\\nline\\ntext"
77 | TXT
78 | 
79 | end
80 | 
81 | 
82 | end # class TestParserMisc
83 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Comma-separated values (csv) scripts & tools docs
 2 | 
 3 | 
 4 | 
 5 | ## Article Series - Why the CSV standard library is broken (and how to fix it)
 6 | 
 7 | <!--  comment out introduction
 8 |         
 9 | ### Introduction
10 | 
11 | <details>
12 |   <summary>Show/Hide Text</summary>
13 | 
14 | 
15 | Reminder:  Dear [James Edward Gray II](https://twitter.com/JEG2), We love you. We thank you for your code.
16 | You're a genius. You're beautiful. [We stand on your shoulders. You're a giant.¹](https://en.wikipedia.org/wiki/Standing_on_the_shoulders_of_giants)
17 | Please, please, please - these articles are NOT about you.
18 | It's about the code and how to fix it.
19 | 
20 | >  I'm seeing from you is that we should not consider people's feelings when criticizing their work. [...]
21 | >  Please take time to sit down [..] and offer an apology to the author of the CSV library.
22 | 
23 | [I Apologize - Sorry, Sorry, Sorry - Why the standard CSV library author deserves our hugs and thank yous and why new giants are wanted »](sorry-sorry-sorry.md)
24 | 
25 | 
26 | ---
27 | ¹: stand on someone's shoulders - to make discoveries, insights, or progress due to the discoveries or previous work of those who have come before.
28 | 
29 | </details>
30 | 
31 | -->
32 | 
33 | 
34 | <!--
35 | ### Content
36 | -->
37 | 
38 | 
39 | > "Criticism is something we can avoid easily by saying nothing, doing nothing, and being nothing."
40 | >
41 | > --  Aristotle
42 | 
43 | 
44 | _What's broken (and wrong, wrong, wrong) in the CSV standard library? Let's count the ways:_
45 | 
46 | - [**Part I or A (Simplistic) String#split Kludge vs A Purpose Built CSV Parser**](why-the-csv-stdlib-is-broken.md)
47 | - [**Part II or The Wonders of CSV Formats / Dialects**](csv-formats.md)
48 | - [**Part III or Returning a CSV Record as an Array? Hash? Struct? Row?**](csv-array-hash-struct.md)
49 | - [**Part IV or Numerics a.k.a. Auto-Magic Type Inference for Strings and Numbers**](csv-numerics.md)
50 | - [**Part V or Escaping the Stray Quote Error Hell - Do You Want Single, Double, or French Quotes With That Comma?**](csv-quotes.md)
51 | - [**Part VI or Fixes in Alternative CSV Libraries or Evolve or Die or Fast, Faster, Fasterer, Fastest**](csv-libraries.md)
52 | - [**Part VII or What's Your Type? Guess. Again. And Again. And Again. Guess What's a Schema For?**](csv-types.md)
53 | 
54 | 
55 | 
56 | <!--
57 | 
58 | > "He has a right to criticize, who has a heart to help."
59 | >
60 | > -- Abraham Lincoln
61 | 
62 | 
63 | -->
64 | 
65 | 
66 | 
67 | 
68 | ## Migrate / Upgrade from ___  - Side-by-Side Examples
69 | 
70 | - [**Migrate / Upgrade from Smarter CSV to CSV Reader - Side-by-Side Examples**](smarter-csv.md)
71 | 
72 | 


--------------------------------------------------------------------------------
/docs/csv_stdlib_human.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'csv'
 4 | require 'pp'
 5 | 
 6 | 
 7 | txt = <<TXT
 8 | #######
 9 | # try with some comments
10 | #   and blank lines even before header (first row)
11 | 
12 | Brewery,City,Name,Abv
13 | Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
14 | Augustiner Bräu München,München,Edelstoff,5.6%
15 | 
16 | Bayerische Staatsbrauerei Weihenstephan,  Freising,  Hefe Weissbier,   5.4%
17 | Brauerei Spezial,                         Bamberg,   Rauchbier Märzen, 5.1%
18 | Hacker-Pschorr Bräu,                      München,   Münchner Dunkel,  5.0%
19 | Staatliches Hofbräuhaus München,          München,   Hofbräu Oktoberfestbier, 6.3%
20 | TXT
21 | 
22 | 
23 | COMMENTS_REGEX = /^\s*#/
24 | BLANK_REGEX    = /^\s*$/   ## skip all whitespace lines - note: use "" for a blank record
25 | SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
26 | 
27 | ##  register our own converters
28 | CSV::Converters[:strip] = ->(field) { field.strip }
29 | 
30 | csv_opts = {
31 |   skip_lines:  SKIP_REGEX,
32 |   skip_blanks: true,    ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
33 |   :converters => [:strip],
34 |   encoding: 'utf-8'
35 | }
36 | 
37 | pp CSV.parse( txt, csv_opts )
38 | 
39 | # => [["Brewery", "City", "Name", "Abv"],
40 | #     ["Andechser Klosterbrauerei", "Andechs", "Doppelbock Dunkel", "7%"],
41 | #     ["Augustiner Br\u00E4u M\u00FCnchen", "M\u00FCnchen", "Edelstoff", "5.6%"],
42 | #     ["Bayerische Staatsbrauerei Weihenstephan", "Freising", "Hefe Weissbier", "5.4%"],
43 | #     ["Brauerei Spezial", "Bamberg", "Rauchbier M\u00E4rzen", "5.1%"],
44 | #     ["Hacker-Pschorr Br\u00E4u", "M\u00FCnchen", "M\u00FCnchner Dunkel", "5.0%"],
45 | #     ["Staatliches Hofbr\u00E4uhaus M\u00FCnchen", "M\u00FCnchen", "Hofbr\u00E4u Oktoberfestbier", "6.3%"]]
46 | 


--------------------------------------------------------------------------------
/docs/csv_stdlib_test.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | require 'csv'
 4 | require 'pp'
 5 | 
 6 | 
 7 | 
 8 | begin
 9 |   CSV.parse( %{1, "2"})
10 | rescue CSV::MalformedCSVError => ex
11 |   pp ex
12 | end
13 | # => #<CSV::MalformedCSVError: Illegal quoting in line 1.>
14 | 
15 | begin
16 |   CSV.parse( %{"3" , 4})
17 | rescue CSV::MalformedCSVError => ex
18 |   pp ex
19 | end
20 | # => #<CSV::MalformedCSVError: Unclosed quoted field on line 1.>
21 | 
22 | pp CSV.parse( %{"","",,} )
23 | # => ["", "", nil, nil]
24 | 


--------------------------------------------------------------------------------
/tabreader/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Specific to RubyMotion:
13 | .dat*
14 | .repl_history
15 | build/
16 | 
17 | ## Documentation cache and generated files:
18 | /.yardoc/
19 | /_yardoc/
20 | /doc/
21 | /rdoc/
22 | 
23 | ## Environment normalisation:
24 | /.bundle/
25 | /lib/bundler/man/
26 | 
27 | # for a library or gem, you might want to ignore these files since the code is
28 | # intended to run in multiple environments; otherwise, check them in:
29 | # Gemfile.lock
30 | # .ruby-version
31 | # .ruby-gemset
32 | 
33 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34 | .rvmrc
35 | 
36 | 
37 | ####
38 | # add some auto-generated getting started samples
39 | 
40 | getting-started-samples/AUT_2016-2017.csv
41 | getting-started-samples/AUT_2017-2018.csv
42 | 


--------------------------------------------------------------------------------
/tabreader/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ### 0.0.1 / 2018-08-17
2 | 
3 | * Everything is new. First release.
4 | 


--------------------------------------------------------------------------------
/tabreader/Manifest.txt:
--------------------------------------------------------------------------------
 1 | HISTORY.md
 2 | LICENSE.md
 3 | Manifest.txt
 4 | README.md
 5 | Rakefile
 6 | datasets/empty.tab
 7 | datasets/test.tab
 8 | lib/tabreader.rb
 9 | lib/tabreader/reader.rb
10 | lib/tabreader/reader_hash.rb
11 | lib/tabreader/version.rb
12 | test/helper.rb
13 | test/test_reader.rb
14 | test/test_reader_hash.rb
15 | 


--------------------------------------------------------------------------------
/tabreader/NOTES.md:
--------------------------------------------------------------------------------
1 | # Notes
2 | 
3 | ## Todos
4 | 
5 | - [ ] add encoding option to all file convenience methods
6 | - [ ] add (auto) skip blank lines?
7 | - [ ] add (auto) skip comment lines (`#`) or (`%`)? why? why not?
8 | 


--------------------------------------------------------------------------------
/tabreader/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'hoe'
 2 | require './lib/tabreader/version.rb'
 3 | 
 4 | Hoe.spec 'tabreader' do
 5 | 
 6 |   self.version = TabReader::VERSION
 7 | 
 8 |   self.summary = "tabreader - read in tabular datafiles in text in the tabular (TAB) format"
 9 |   self.description = summary
10 | 
11 |   self.urls = ['https://github.com/csvreader/tabreader']
12 | 
13 |   self.author = 'Gerald Bauer'
14 |   self.email = 'wwwmake@googlegroups.com'
15 | 
16 |   # switch extension to .markdown for gihub formatting
17 |   self.readme_file = 'README.md'
18 |   self.history_file = 'HISTORY.md'
19 | 
20 |   self.licenses = ['Public Domain']
21 | 
22 |   self.spec_extras = {
23 |     required_ruby_version: '>= 2.2.2'
24 |   }
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/tabreader/datasets/empty.tab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rubycocos/csvreader/5f33603e8f2f8db57bebfb167561111317ed7d0a/tabreader/datasets/empty.tab


--------------------------------------------------------------------------------
/tabreader/datasets/test.tab:
--------------------------------------------------------------------------------
1 | a	b	c
2 | 1	2	3
3 | 4	5	6
4 | {"one":1,"two":2,[3,4,5],null}	7	8
5 | test newline \n and tab \t literals	9	10
6 | 


--------------------------------------------------------------------------------
/tabreader/lib/tabreader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | require 'pp'
 5 | require 'logger'
 6 | 
 7 | 
 8 | ###
 9 | # our own code
10 | #   check: use require_relative - why? why not?
11 | require 'tabreader/version' # let version always go first
12 | require 'tabreader/reader'
13 | require 'tabreader/reader_hash'
14 | 
15 | 
16 | 
17 | ## add some "convenience" shortcuts
18 | TAB     = TabReader
19 | Tab     = TabReader
20 | TabHash = TabHashReader
21 | 
22 | 
23 | # say hello
24 | puts TabReader.banner     if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)
25 | 


--------------------------------------------------------------------------------
/tabreader/lib/tabreader/reader_hash.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | 
  3 | class TabHashReader
  4 | 
  5 | 
  6 | def self.open( path, mode=nil, headers: nil, &block )   ## rename path to filename or name - why? why not?
  7 | 
  8 |     ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
  9 |     f = File.open( path, mode ? mode : 'r:bom|utf-8' )
 10 |     tab = new(f, headers: headers )
 11 | 
 12 |     # handle blocks like Ruby's open()
 13 |     if block_given?
 14 |       begin
 15 |         block.call( tab )
 16 |       ensure
 17 |         tab.close
 18 |       end
 19 |     else
 20 |       tab
 21 |     end
 22 | end # method self.open
 23 | 
 24 | 
 25 | def self.read( path, headers: nil )
 26 |     open( path, headers: headers ) { |tab| tab.read }
 27 | end
 28 | 
 29 | 
 30 | 
 31 | def self.foreach( path, headers: nil, &block )
 32 |   tab = open( path, headers: headers)
 33 | 
 34 |   if block_given?
 35 |     begin
 36 |       tab.each( &block )
 37 |     ensure
 38 |       tab.close
 39 |     end
 40 |   else
 41 |     tab.to_enum    ## note: caller (responsible) must close file!!!
 42 |     ## remove version without block given - why? why not?
 43 |     ## use Tab.open().to_enum  or Tab.open().each
 44 |     ##   or Tab.new( File.new() ).to_enum or Tab.new( File.new() ).each ???
 45 |   end
 46 | end # method self.foreach
 47 | 
 48 | 
 49 | def self.parse( data, headers: nil, &block )
 50 |   tab = new( data, headers: headers )
 51 | 
 52 |   if block_given?
 53 |     tab.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
 54 |   else  # slurp contents, if no block is given
 55 |     tab.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
 56 |   end
 57 | end # method self.parse
 58 | 
 59 | 
 60 | 
 61 | 
 62 | def initialize( data, headers: nil )
 63 |       raise ArgumentError.new( "Cannot parse nil as TAB" )  if data.nil?
 64 | 
 65 |       if data.is_a?( String )
 66 |         @input = data   # note: just needs each for each_line
 67 |       else  ## assume io
 68 |         @input = data
 69 |       end
 70 | 
 71 |       ## pass in headers as array e.g. ['A', 'B', 'C']
 72 |       @names = headers ? headers : nil
 73 | end
 74 | 
 75 | 
 76 | 
 77 |  include Enumerable
 78 | 
 79 | 
 80 |  def each( &block )
 81 | 
 82 |    ## todo/fix:
 83 |    ##   add case for headers/names.size != values.size
 84 |    ##   - add rest option? for if less headers than values (see python csv.DictReader - why? why not?)
 85 |    ##
 86 |    ##   handle case with duplicate and empty header names etc.
 87 | 
 88 | 
 89 |    if block_given?
 90 |      TabReader.parse( @input ) do |values|
 91 |         if @names.nil?    ## check for (first) headers row
 92 |           @names = values   ## store header row / a.k.a. field/column names
 93 |         else    ## "regular" record
 94 |           record = @names.zip( values ).to_h    ## todo/fix: check for more values than names/headers!!!
 95 |           block.call( record )
 96 |         end
 97 |      end
 98 |    else
 99 |      to_enum
100 |    end
101 |  end # method each
102 | 
103 |  def read() to_a; end # method read
104 | 
105 | 
106 |  def close
107 |    @input.close   if @input.respond_to?(:close)   ## note: string needs no close
108 |  end
109 | 
110 | 
111 | end # class TabHashReader
112 | 


--------------------------------------------------------------------------------
/tabreader/lib/tabreader/version.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | 
 4 | ## note: for now TabReader is a class!!! NOT a module - change - why? why not?
 5 | class TabReader
 6 | 
 7 |   MAJOR = 1    ## todo: namespace inside version or something - why? why not??
 8 |   MINOR = 0
 9 |   PATCH = 1
10 |   VERSION = [MAJOR,MINOR,PATCH].join('.')
11 | 
12 |   def self.version
13 |     VERSION
14 |   end
15 | 
16 |   def self.banner
17 |     "tabreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18 |   end
19 | 
20 |   def self.root
21 |     File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22 |   end
23 | 
24 | end # class TabReader
25 | 


--------------------------------------------------------------------------------
/tabreader/test/helper.rb:
--------------------------------------------------------------------------------
 1 | ## $:.unshift(File.dirname(__FILE__))
 2 | 
 3 | ## minitest setup
 4 | 
 5 | require 'minitest/autorun'
 6 | 
 7 | 
 8 | ## our own code
 9 | require 'tabreader'
10 | 
11 | ## add test_data_dir helper
12 | class TabReader
13 |   def self.test_data_dir
14 |     "#{root}/datasets"
15 |   end
16 | end
17 | 
18 | 
19 | 
20 | TabReader.logger.level = :debug   ## turn on "global" logging
21 | 


--------------------------------------------------------------------------------
/tabreader/test/test_reader.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_reader.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestReader < MiniTest::Test
11 | 
12 | 
13 | def test_parse
14 | 
15 | txt1 = <<TXT
16 | a\tb\tc
17 | 1\t2\t3
18 | 4\t5\t6
19 | TXT
20 | 
21 | txt2 = <<TXT
22 | a	b	c	d
23 | 1	2	3	4
24 | 5	6	7	8
25 | TXT
26 | 
27 | puts "== parse:"
28 | pp TabReader.parse( txt1 )
29 | 
30 | puts "== parse:"
31 | pp TabReader.parse( txt2 )
32 | 
33 | puts "== parse_line:"
34 | pp TabReader.parse_line( "1\t2\t3" )
35 | 
36 | puts "== parse_line:"
37 | pp TabReader.parse_line( "1	2	3	4" )
38 | 
39 | puts "== parse_line:"
40 | pp TabReader.parse_line( "1\t2\t3\r\n" )
41 | 
42 |   assert true
43 | end
44 | 
45 | 
46 | def test_read
47 | 
48 | puts "== read:"
49 | pp TabReader.read( "#{TabReader.test_data_dir}/test.tab" )
50 | puts "== header:"
51 | pp TabReader.header( "#{TabReader.test_data_dir}/test.tab" )
52 | puts "== foreach:"
53 | TabReader.foreach( "#{TabReader.test_data_dir}/test.tab" ) do |row|
54 |   pp row
55 | end
56 | end
57 | 
58 | 
59 | def test_read_empty
60 | 
61 | puts "== read (empty):"
62 | pp TabReader.read( "#{TabReader.test_data_dir}/empty.tab" )
63 | puts "== header (empty):"
64 | pp TabReader.header( "#{TabReader.test_data_dir}/empty.tab" )
65 | puts "== foreach (empty):"
66 | TabReader.foreach( "#{TabReader.test_data_dir}/empty.tab" ) do |row|
67 | 	pp row
68 | end
69 | puts "== parse (empty):"
70 | pp TabReader.parse( "" )
71 | pp TabReader.parse_line( "" )
72 | end
73 | 
74 | end
75 | 


--------------------------------------------------------------------------------
/tabreader/test/test_reader_hash.rb:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | ###
 4 | #  to run use
 5 | #     ruby -I ./lib -I ./test test/test_reader_hash.rb
 6 | 
 7 | 
 8 | require 'helper'
 9 | 
10 | class TestReader < MiniTest::Test
11 | 
12 | 
13 | def test_parse
14 | 
15 | txt1 = <<TXT
16 | a\tb\tc
17 | 1\t2\t3
18 | 4\t5\t6
19 | TXT
20 | 
21 | txt2 = <<TXT
22 | a	b	c	d
23 | 1	2	3	4
24 | 5	6	7	8
25 | TXT
26 | 
27 | puts "== parse:"
28 | pp TabHashReader.parse( txt1 )
29 | 
30 | puts "== parse:"
31 | pp TabHashReader.parse( txt2 )
32 | 
33 |   assert true
34 | end
35 | 
36 | 
37 | def test_read
38 | 
39 | puts "== read:"
40 | pp TabHashReader.read( "#{TabReader.test_data_dir}/test.tab" )
41 | puts "== foreach:"
42 | TabHashReader.foreach( "#{TabReader.test_data_dir}/test.tab" ) do |row|
43 |   pp row
44 | end
45 | end
46 | 
47 | 
48 | def test_read_empty
49 | 
50 | puts "== read (empty):"
51 | pp TabHashReader.read( "#{TabReader.test_data_dir}/empty.tab" )
52 | puts "== foreach (empty):"
53 | TabHashReader.foreach( "#{TabReader.test_data_dir}/empty.tab" ) do |row|
54 | 	pp row
55 | end
56 | puts "== parse (empty):"
57 | pp TabHashReader.parse( "" )
58 | end
59 | 
60 | end
61 | 


--------------------------------------------------------------------------------