├── lib
    ├── fastest-csv.rb
    ├── fastest-csv
    │   └── version.rb
    └── fastest_csv.rb
├── Gemfile
├── .gitignore
├── ext
    └── csv_parser
    │   ├── extconf.rb
    │   ├── parser.c
    │   ├── CsvParser.java
    │   └── CsvParserService.java
├── Rakefile
├── fastest-csv.gemspec
├── LICENSE
├── README.md
└── test
    ├── tc_interface.rb
    └── tc_csv_parsing.rb


/lib/fastest-csv.rb:
--------------------------------------------------------------------------------
1 | require 'fastest_csv'
2 | 


--------------------------------------------------------------------------------
/lib/fastest-csv/version.rb:
--------------------------------------------------------------------------------
1 | class FastestCSV
2 |   VERSION = "0.0.4"
3 | end
4 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | # Specify your gem's dependencies in fastest-csv.gemspec
4 | gemspec
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | .bundle
 4 | .config
 5 | .yardoc
 6 | .DS_Store
 7 | Gemfile.lock
 8 | InstalledFiles
 9 | _yardoc
10 | coverage
11 | doc/
12 | lib/bundler/man
13 | pkg
14 | rdoc
15 | spec/reports
16 | test/tmp
17 | test/version_tmp
18 | tmp
19 | lib/*.bundle
20 | lib/*.jar
21 | 


--------------------------------------------------------------------------------
/ext/csv_parser/extconf.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/ruby -w
 2 | 
 3 | require 'mkmf'
 4 | extension_name = 'csv_parser'
 5 | #dir_config(extension_name)
 6 | 
 7 | if RUBY_VERSION =~ /1.8/ then
 8 |   $CPPFLAGS += " -DRUBY_18"
 9 | end
10 | 
11 | #if CONFIG["arch"] =~ /mswin32|mingw/
12 | #  $CFLAGS += " -march=i686"
13 | #end
14 | 
15 | create_makefile(extension_name)
16 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env rake
 2 | require "bundler/gem_tasks"
 3 | 
 4 | spec = Gem::Specification.load('fastest-csv.gemspec')
 5 | 
 6 | if RUBY_PLATFORM =~ /java/
 7 |   require 'rake/javaextensiontask'
 8 |   Rake::JavaExtensionTask.new('csv_parser', spec)
 9 | else
10 |   require 'rake/extensiontask'
11 |   Rake::ExtensionTask.new('csv_parser', spec)
12 | end
13 | 
14 | require 'rake/testtask'
15 | Rake::TestTask.new do |t|
16 |   t.libs << "test"
17 |   t.test_files = FileList['test/tc_*.rb']
18 |   #test.libs << 'lib' << 'test'
19 |   #test.pattern = 'test/**/test_*.rb'
20 |   #test.verbose = true
21 | end
22 | 
23 | 


--------------------------------------------------------------------------------
/fastest-csv.gemspec:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | require File.expand_path('../lib/fastest-csv/version', __FILE__)
 3 | 
 4 | Gem::Specification.new do |gem|
 5 |   gem.authors       = ["Maarten Oelering"]
 6 |   gem.email         = ["maarten@brightcode.nl"]
 7 |   gem.description   = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
 8 |   gem.summary       = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
 9 |   gem.homepage      = "https://github.com/brightcode/fastest-csv"
10 | 
11 |   gem.files         = `git ls-files`.split($\)
12 |   #gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13 |   gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
14 |   gem.name          = "fastest-csv"
15 |   gem.require_paths = ["lib"]
16 |   gem.version       = FastestCSV::VERSION
17 | 
18 |   if RUBY_PLATFORM =~ /java/
19 |     gem.platform = "java"
20 |     gem.files << "lib/csv_parser.jar"
21 |   else
22 |     gem.extensions  = ['ext/csv_parser/extconf.rb']
23 |   end
24 |   
25 |   gem.add_development_dependency "rake-compiler"
26 |   
27 |   gem.license = 'MIT'
28 | end
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012, 2013 Maarten Oelering
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FastestCSV
 2 | 
 3 | Fastest CSV class for MRI Ruby and JRuby. Faster than faster_csv and fasterer-csv. 
 4 | 
 5 | Uses native C code to parse CSV lines in MRI Ruby and Java in JRuby.
 6 | 
 7 | Supports standard CSV according to RFC4180. Not the so-called "csv" from Excel.
 8 | 
 9 | The interface is a subset of the CSV interface in Ruby 1.9.3. The options parameter is not supported.
10 | 
11 | Originally developed to parse large CSV log files from PowerMTA.
12 | 
13 | ## Installation
14 | 
15 | Add this line to your application's Gemfile:
16 | 
17 | ```ruby
18 | gem 'fastest-csv'
19 | ```
20 | 
21 | And then execute:
22 | 
23 |     $ bundle
24 | 
25 | Or install it yourself as:
26 | 
27 |     $ gem install fastest-csv
28 | 
29 | ## Usage
30 | 
31 | Parse single line
32 | 
33 | ```ruby
34 | FastestCSV.parse_line("one,two,three")
35 |  => ["one", "two", "three"]
36 | 
37 | "one,two,three".parse_csv
38 |  => ["one", "two", "three"]
39 |  ```
40 | 
41 | Parse file without header
42 | 
43 | ```ruby
44 | FastestCSV.foreach("path/to/file.csv") do |row|
45 |   # ...
46 | end
47 | ```
48 | 
49 | Parse file with header
50 | 
51 | ```ruby
52 | FastestCSV.open("path/to/file.csv") do |csv|
53 |   fields = csv.shift
54 |   while values = csv.shift
55 |     # ...
56 |   end
57 | end
58 | ```
59 | 
60 | Parse file in array of arrays
61 | 
62 | ```ruby
63 | rows = FastestCSV.read("path/to/file.csv")
64 | ```
65 | 
66 | Parse string in array of arrays
67 | 
68 | ```ruby
69 | rows = FastestCSV.parse(csv_data)
70 | ```
71 | 
72 | ## Contributing
73 | 
74 | 1. Fork it
75 | 2. Create your feature branch (`git checkout -b my-new-feature`)
76 | 3. Commit your changes (`git commit -am 'Added some feature'`)
77 | 4. Push to the branch (`git push origin my-new-feature`)
78 | 5. Create new Pull Request
79 | 


--------------------------------------------------------------------------------
/ext/csv_parser/parser.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Maarten Oelering, BrightCode BV
 3 |  */
 4 | 
 5 | #include "ruby.h"
 6 | #ifdef RUBY_18
 7 |   #include "rubyio.h"
 8 | #else
 9 |   #include "ruby/io.h"
10 | #endif
11 | 
12 | #define DEF_ARRAY_LEN 32
13 | 
14 | #define UNQUOTED 0
15 | #define IN_QUOTED 1
16 | #define QUOTE_IN_QUOTED 2
17 | 
18 | static VALUE mCsvParser;
19 | 
20 | static VALUE parse_line(VALUE self, VALUE str)
21 | {
22 |     if (NIL_P(str))
23 |         return Qnil;
24 |     
25 |     const char *ptr = RSTRING_PTR(str);
26 |     int len = (int) RSTRING_LEN(str);  /* cast to prevent warning in 64-bit OS */
27 | 
28 |     if (len == 0)
29 |         return Qnil;
30 |     
31 |     VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
32 |     char value[len];  /* field value, no longer than line */
33 |     int state = 0;
34 |     int index = 0;
35 |     int i;
36 |     char c;
37 |     for (i = 0; i < len; i++)
38 |     {
39 |         c = ptr[i];
40 |         switch (c)
41 |         {
42 |             case ',':
43 |                 if (state == UNQUOTED) {
44 |                     rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
45 |                     index = 0;
46 |                 }
47 |                 else if (state == IN_QUOTED) {
48 |                     value[index++] = c;
49 |                 }
50 |                 else if (state == QUOTE_IN_QUOTED) {
51 |                     rb_ary_push(array, rb_str_new(value, index));
52 |                     index = 0;
53 |                     state = UNQUOTED;
54 |                 }
55 |                 break;
56 |             case '"':
57 |                 if (state == UNQUOTED) {
58 |                     state = IN_QUOTED;
59 |                 }
60 |                 else if (state == 1) {
61 |                     state = QUOTE_IN_QUOTED;
62 |                 }
63 |                 else if (state == QUOTE_IN_QUOTED) {
64 |                     value[index++] = c;  /* escaped quote */
65 |                     state = IN_QUOTED;
66 |                 }
67 |                 break;
68 |             case 13:  /* \r */
69 |             case 10:  /* \n */
70 |                 if (state == IN_QUOTED) {
71 |                     value[index++] = c;
72 |                 }
73 |                 else {
74 |                     i = len;  /* only parse first line if multiline */
75 |                 }
76 |                 break;
77 |             default:
78 |                 value[index++] = c;
79 |         }
80 |     }
81 |     
82 |     if (state == UNQUOTED) {
83 |         rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
84 |     }
85 |     else if (state == QUOTE_IN_QUOTED) {
86 |         rb_ary_push(array, rb_str_new(value, index));
87 |     }
88 |     return array;
89 | }
90 | 
91 | void Init_csv_parser()
92 | {
93 |     mCsvParser = rb_define_module("CsvParser");
94 |     rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
95 | }
96 | 


--------------------------------------------------------------------------------
/ext/csv_parser/CsvParser.java:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright (c) Maarten Oelering, BrightCode BV
 3 | //
 4 | 
 5 | package org.brightcode;
 6 | 
 7 | import java.util.ArrayList;
 8 | import java.util.List;
 9 | 
10 | public class CsvParser {
11 | 
12 |     private static int DEF_ARRAY_LEN = 32;
13 | 
14 |     private static int UNQUOTED = 0;
15 |     private static int IN_QUOTED = 1;
16 |     private static int QUOTE_IN_QUOTED = 2;
17 |     
18 |     public static List parseLine(String line) {
19 |         int length = line.length();
20 |         if (length == 0)
21 |             return null;
22 |         
23 |         int state = UNQUOTED;
24 |         StringBuilder value = new StringBuilder(length);   // field value, no longer than line
25 |         List<String> array = new ArrayList<String>(DEF_ARRAY_LEN);
26 |         
27 |         for (int i = 0; i < length; i++) {
28 |             char c = line.charAt(i);
29 |             switch (c) {
30 |                 case ',':
31 |                     if (state == UNQUOTED) {
32 |                         if (value.length() == 0) {
33 |                             array.add(null);
34 |                         } 
35 |                         else {
36 |                             array.add(value.toString());
37 |                             value.setLength(0);
38 |                         }
39 |                     }
40 |                     else if (state == IN_QUOTED) {
41 |                         value.append(c);
42 |                     }
43 |                     else if (state == 2) {
44 |                         array.add(value.toString());
45 |                         value.setLength(0);
46 |                         state = UNQUOTED;
47 |                     }
48 |                     break;
49 |                 case '"':
50 |                     if (state == UNQUOTED) {
51 |                         state = IN_QUOTED;
52 |                     }
53 |                     else if (state == IN_QUOTED) {
54 |                         state = QUOTE_IN_QUOTED;
55 |                     }
56 |                     else if (state == QUOTE_IN_QUOTED) {
57 |                         value.append(c);   // escaped quote
58 |                         state = IN_QUOTED;
59 |                     }
60 |                     break;
61 |                 case '\r':
62 |                 case '\n':
63 |                     if (state == IN_QUOTED) {
64 |                         value.append(c);
65 |                     }
66 |                     else {
67 |                         i = length;  // only parse first line if multiline
68 |                     }
69 |                     break;
70 |                 default:
71 |                     value.append(c);
72 |                     break;
73 |             }
74 |         }
75 |         if (state == UNQUOTED) {
76 |             if (value.length() == 0) {
77 |                 array.add(null);
78 |             } 
79 |             else {
80 |                 array.add(value.toString());
81 |                 value.setLength(0);
82 |             }
83 |         }
84 |         else if (state == QUOTE_IN_QUOTED) {
85 |             array.add(value.toString());
86 |         }
87 |         return array;
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/lib/fastest_csv.rb:
--------------------------------------------------------------------------------
  1 | # This loads either csv_parser.so, csv_parser.bundle or
  2 | # csv_parser.jar, depending on your Ruby platform and OS
  3 | require 'csv_parser'
  4 | require 'stringio'
  5 | 
  6 | # Fast CSV parser using native code
  7 | class FastestCSV
  8 |   include Enumerable
  9 |   
 10 |   if RUBY_PLATFORM =~ /java/
 11 |     if JRUBY_VERSION =~ /^1\.[0-6]/
 12 |       require 'jruby'
 13 |       org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
 14 |     else
 15 |       include_package "org.brightcode"
 16 |     end
 17 |   end
 18 | 
 19 |   # Pass each line of the specified +path+ as array to the provided +block+
 20 |   def self.foreach(path, &block)
 21 |     open(path) do |reader|
 22 |       reader.each(&block)
 23 |     end
 24 |   end
 25 | 
 26 |   # Opens a csv file. Pass a FastestCSV instance to the provided block,
 27 |   # or return it when no block is provided
 28 |   def self.open(path, mode = "rb")
 29 |     csv = new(File.open(path, mode))
 30 |     if block_given?
 31 |       begin
 32 |         yield csv
 33 |       ensure
 34 |         csv.close
 35 |       end
 36 |     else
 37 |       csv
 38 |     end
 39 |   end
 40 | 
 41 |   # Read all lines from the specified +path+ into an array of arrays
 42 |   def self.read(path)
 43 |     open(path, "rb") { |csv| csv.read }
 44 |   end
 45 | 
 46 |   # Alias for FastestCSV.read
 47 |   def self.readlines(path)
 48 |     read(path)
 49 |   end
 50 | 
 51 |   # Read all lines from the specified String into an array of arrays
 52 |   def self.parse(data, &block)
 53 |     csv = new(StringIO.new(data))
 54 |     if block.nil?
 55 |       begin
 56 |         csv.read
 57 |       ensure
 58 |         csv.close
 59 |       end
 60 |     else
 61 |       csv.each(&block)
 62 |     end
 63 |   end
 64 |   
 65 |   def self.parse_line(line)
 66 |     CsvParser.parse_line(line)
 67 |   end
 68 | 
 69 |   # Create new FastestCSV wrapping the specified IO object
 70 |   def initialize(io)
 71 |     @io = io
 72 |   end
 73 |   
 74 |   # Read from the wrapped IO passing each line as array to the specified block
 75 |   def each
 76 |     if block_given?
 77 |       while row = shift
 78 |         yield row
 79 |       end
 80 |     else
 81 |       to_enum # return enumerator
 82 |     end
 83 |   end
 84 |   
 85 |   # Read all remaining lines from the wrapped IO into an array of arrays
 86 |   def read
 87 |     table = Array.new
 88 |     each {|row| table << row}
 89 |     table
 90 |   end
 91 |   alias_method :readlines, :read
 92 |   
 93 |   # Rewind the underlying IO object and reset line counter
 94 |   def rewind
 95 |     @io.rewind
 96 |   end
 97 | 
 98 |   # Read next line from the wrapped IO and return as array or nil at EOF
 99 |   def shift
100 |     if line = @io.gets
101 |       CsvParser.parse_line(line)
102 |     else
103 |       nil
104 |     end
105 |   end
106 |   alias_method :gets,     :shift
107 |   alias_method :readline, :shift
108 |   
109 |   # Close the wrapped IO
110 |   def close
111 |     @io.close
112 |   end
113 |   
114 |   def closed?
115 |     @io.closed?
116 |   end
117 | end
118 | 
119 | class String
120 |   # Equivalent to <tt>FasterCSV::parse_line(self)</tt>
121 |   def parse_csv
122 |     CsvParser.parse_line(self)
123 |   end
124 | end
125 | 
126 | 


--------------------------------------------------------------------------------
/test/tc_interface.rb:
--------------------------------------------------------------------------------
  1 | #
  2 | # Tests copied from faster_csv by James Edward Gray II
  3 | #
  4 | 
  5 | require 'test/unit'
  6 | require 'fastest_csv'
  7 | 
  8 | class TestFastestCSVInterface < Test::Unit::TestCase
  9 | 
 10 |   def setup
 11 |     @path = File.join(File.dirname(__FILE__), "temp_test_data.csv")
 12 |     
 13 |     File.open(@path, "w") do |file|
 14 |       file << "1,2,3\r\n"
 15 |       file << "4,5\r\n"
 16 |     end
 17 | 
 18 |     @expected = [%w{1 2 3}, %w{4 5}]
 19 |   end
 20 |   
 21 |   def teardown
 22 |     File.unlink(@path)
 23 |   end
 24 |   
 25 |   ### Test Read Interface ###
 26 |   
 27 |   def test_foreach
 28 |     FastestCSV.foreach(@path) do |row|
 29 |       assert_equal(@expected.shift, row)
 30 |     end
 31 |   end
 32 |   
 33 |   def test_open_and_close
 34 |     csv = FastestCSV.open(@path, "r+")
 35 |     assert_not_nil(csv)
 36 |     assert_instance_of(FastestCSV, csv)
 37 |     assert_equal(false, csv.closed?)
 38 |     csv.close
 39 |     assert(csv.closed?)
 40 |     
 41 |     ret = FastestCSV.open(@path) do |csv|
 42 |       assert_instance_of(FastestCSV, csv)
 43 |       "Return value."
 44 |     end
 45 |     assert(csv.closed?)
 46 |     assert_equal("Return value.", ret)
 47 |   end
 48 |   
 49 |   def test_parse
 50 |     data = File.read(@path)
 51 |     assert_equal( @expected,
 52 |                   FastestCSV.parse(data) )
 53 | 
 54 |     FastestCSV.parse(data) do |row|
 55 |       assert_equal(@expected.shift, row)
 56 |     end
 57 |   end
 58 |   
 59 |   #def test_parse_line
 60 |   #  row = FasterCSV.parse_line("1;2;3", :col_sep => ";")
 61 |   #  assert_not_nil(row)
 62 |   #  assert_instance_of(Array, row)
 63 |   #  assert_equal(%w{1 2 3}, row)
 64 |   #  
 65 |   #  # shortcut interface
 66 |   #  row = "1;2;3".parse_csv(:col_sep => ";")
 67 |   #  assert_not_nil(row)
 68 |   #  assert_instance_of(Array, row)
 69 |   #  assert_equal(%w{1 2 3}, row)
 70 |   #end
 71 |   
 72 |   def test_parse_line_with_empty_lines
 73 |     assert_equal(nil,       FastestCSV.parse_line(""))  # to signal eof
 74 |     #assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3"))
 75 |     assert_equal([nil], FastestCSV.parse_line("\n1,2,3"))
 76 |   end
 77 |   
 78 |   def test_read_and_readlines
 79 |     assert_equal( @expected,
 80 |                   FastestCSV.read(@path) )
 81 |     assert_equal( @expected,
 82 |                   FastestCSV.readlines(@path))
 83 |     
 84 |     
 85 |     data = FastestCSV.open(@path) do |csv|
 86 |       csv.read
 87 |     end
 88 |     assert_equal(@expected, data)
 89 |     data = FastestCSV.open(@path) do |csv|
 90 |       csv.readlines
 91 |     end
 92 |     assert_equal(@expected, data)
 93 |   end
 94 |   
 95 |   #def test_table
 96 |   #  table = FastestCSV.table(@path)
 97 |   #  assert_instance_of(FastestCSV::Table, table)
 98 |   #  assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
 99 |   #end
100 |   
101 |   def test_shift  # aliased as gets() and readline()
102 |     FastestCSV.open(@path, "r+") do |csv|
103 |       assert_equal(@expected.shift, csv.shift)
104 |       assert_equal(@expected.shift, csv.shift)
105 |       assert_equal(nil, csv.shift)
106 |     end
107 |   end
108 | 
109 |   def test_long_line # ruby's regex parser may have problems with long rows
110 |     File.unlink(@path)
111 | 
112 |     long_field_length = 2800
113 |     File.open(@path, "w") do |file|
114 |       file << "1,2,#{'3' * long_field_length}\r\n"
115 |     end
116 |     @expected = [%w{1 2} + ['3' * long_field_length]]
117 |     test_shift
118 |   end
119 |   
120 |   def test_enumerable
121 |     FastestCSV.open(@path) do |csv|
122 |       assert(csv.include?(["1", "2", "3"]))
123 |       csv.rewind
124 |       assert_equal([["1", "2", "3"], ["4", "5"]], csv.to_a)
125 |     end
126 |   end
127 |   
128 | end
129 | 


--------------------------------------------------------------------------------
/ext/csv_parser/CsvParserService.java:
--------------------------------------------------------------------------------
  1 | //
  2 | // Copyright (c) Maarten Oelering, BrightCode BV
  3 | //
  4 | 
  5 | package org.brightcode;
  6 | 
  7 | import java.io.IOException;
  8 | 
  9 | import org.jruby.Ruby;
 10 | import org.jruby.RubyArray;
 11 | import org.jruby.RubyModule;
 12 | import org.jruby.RubyString;
 13 | import org.jruby.runtime.Block;
 14 | import org.jruby.runtime.CallbackFactory;
 15 | import org.jruby.runtime.builtin.IRubyObject;
 16 | import org.jruby.runtime.load.BasicLibraryService;
 17 | 
 18 | public class CsvParserService implements BasicLibraryService {
 19 | 
 20 |     private Ruby runtime;
 21 | 
 22 |     private static int DEF_ARRAY_LEN = 32;
 23 | 
 24 |     private static int UNQUOTED = 0;
 25 |     private static int IN_QUOTED = 1;
 26 |     private static int QUOTE_IN_QUOTED = 2;
 27 |     
 28 |     // Initial setup function. Takes a reference to the current JRuby runtime and
 29 |     // sets up our modules.
 30 |     public boolean basicLoad(Ruby runtime) throws IOException {
 31 |         this.runtime = runtime;
 32 | 
 33 |         RubyModule mCsvParser = runtime.defineModule("CsvParser");
 34 |         // TODO: CallbackFactory#getSingletonMethod is deprecated
 35 |         CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
 36 |         mCsvParser.defineModuleFunction("parse_line", 
 37 |             callbackFactory.getSingletonMethod("parseLine", RubyString.class));
 38 |         return true;
 39 |     }
 40 |     
 41 |     public static IRubyObject parseLine(IRubyObject recv, RubyString line, Block unusedBlock) {
 42 |         Ruby runtime = recv.getRuntime();
 43 |  
 44 |         CharSequence seq = line.getValue();
 45 |         int length = seq.length();
 46 |         if (length == 0)
 47 |             return runtime.getNil();
 48 |         
 49 |         int state = UNQUOTED;
 50 |         StringBuilder value = new StringBuilder(length);   // field value, no longer than line
 51 |         RubyArray array = RubyArray.newArray(runtime, DEF_ARRAY_LEN);
 52 |         
 53 |         for (int i = 0; i < length; i++) {
 54 |             char c = seq.charAt(i);
 55 |             switch (c) {
 56 |                 case ',':
 57 |                     if (state == UNQUOTED) {
 58 |                         if (value.length() == 0) {
 59 |                             array.append(runtime.getNil());
 60 |                         } 
 61 |                         else {
 62 |                             array.append(RubyString.newString(runtime, value));
 63 |                             value.setLength(0);
 64 |                         }
 65 |                     }
 66 |                     else if (state == IN_QUOTED) {
 67 |                         value.append(c);
 68 |                     }
 69 |                     else if (state == 2) {
 70 |                         array.append(RubyString.newString(runtime, value));
 71 |                         value.setLength(0);
 72 |                         state = UNQUOTED;
 73 |                     }
 74 |                     break;
 75 |                 case '"':
 76 |                     if (state == UNQUOTED) {
 77 |                         state = IN_QUOTED;
 78 |                     }
 79 |                     else if (state == IN_QUOTED) {
 80 |                         state = QUOTE_IN_QUOTED;
 81 |                     }
 82 |                     else if (state == QUOTE_IN_QUOTED) {
 83 |                         value.append(c);   // escaped quote
 84 |                         state = IN_QUOTED;
 85 |                     }
 86 |                     break;
 87 |                 case '\r':
 88 |                 case '\n':
 89 |                     if (state == IN_QUOTED) {
 90 |                         value.append(c);
 91 |                     }
 92 |                     else {
 93 |                         i = length;  // only parse first line if multiline
 94 |                     }
 95 |                     break;
 96 |                 default:
 97 |                     value.append(c);
 98 |                     break;
 99 |             }
100 |         }
101 |         if (state == UNQUOTED) {
102 |             if (value.length() == 0) {
103 |                 array.append(runtime.getNil());
104 |             } 
105 |             else {
106 |                 array.append(RubyString.newString(runtime, value));
107 |                 value.setLength(0);
108 |             }
109 |         }
110 |         else if (state == QUOTE_IN_QUOTED) {
111 |             array.append(RubyString.newString(runtime, value));
112 |         }
113 |         return array;
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/test/tc_csv_parsing.rb:
--------------------------------------------------------------------------------
  1 | #
  2 | # Tests copied from faster_csv by James Edward Gray II
  3 | #
  4 | 
  5 | require 'test/unit'
  6 | require 'fastest_csv'
  7 | 
  8 | # 
  9 | # Following tests are my interpretation of the 
 10 | # {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt].  I only deviate from that 
 11 | # document in one place (intentionally) and that is to make the default row
 12 | # separator <tt>$/</tt>.
 13 | # 
 14 | class TestCSVParsing < Test::Unit::TestCase
 15 | 
 16 |   if RUBY_PLATFORM =~ /java/
 17 |     include_package "org.brightcode"
 18 |   end
 19 | 
 20 |   def test_mastering_regex_example
 21 |     ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
 22 |     assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
 23 |                     "It's \"10 Grand\", baby", "10K" ],
 24 |                   CsvParser.parse_line(ex) )
 25 |   end
 26 | 
 27 |   # Pulled from:  http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/ruby/test/csv/test_csv.rb?rev=1.12.2.2;content-type=text%2Fplain
 28 |   def test_std_lib_csv
 29 |     [ ["\t", ["\t"]],
 30 |       ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
 31 |       ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
 32 |       ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
 33 |       ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
 34 |       ["\"\"", [""]],
 35 |       ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
 36 |       ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
 37 |       ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
 38 |       ["foo,\"\",baz", ["foo", "", "baz"]],
 39 |       ["\",\"", [","]],
 40 |       ["foo", ["foo"]],
 41 |       [",,", [nil, nil, nil]],
 42 |       [",", [nil, nil]],
 43 |       ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
 44 |       ["foo,,baz", ["foo", nil, "baz"]],
 45 |       ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
 46 |       ["\",\",\",\"", [",", ","]],
 47 |       ["foo,bar,", ["foo", "bar", nil]],
 48 |       [",foo,bar", [nil, "foo", "bar"]],
 49 |       ["foo,bar", ["foo", "bar"]],
 50 |       [";", [";"]],
 51 |       ["\t,\t", ["\t", "\t"]],
 52 |       ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
 53 |       ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
 54 |       ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
 55 |       [";,;", [";", ";"]] ].each do |csv_test|
 56 |       assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
 57 |     end
 58 |     
 59 |     [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
 60 |       ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
 61 |       ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
 62 |       ["\"\"", [""]],
 63 |       ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
 64 |       ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
 65 |       ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
 66 |       ["foo,\"\",baz", ["foo", "", "baz"]],
 67 |       ["foo", ["foo"]],
 68 |       [",,", [nil, nil, nil]],
 69 |       [",", [nil, nil]],
 70 |       ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
 71 |       ["foo,,baz", ["foo", nil, "baz"]],
 72 |       ["foo,bar", ["foo", "bar"]],
 73 |       ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
 74 |       ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
 75 |       assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
 76 |      end
 77 |   end
 78 | 
 79 |   # From:  http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
 80 |   def test_aras_edge_cases
 81 |     [ [%Q{a,b},               ["a", "b"]],
 82 |       [%Q{a,"""b"""},         ["a", "\"b\""]],
 83 |       [%Q{a,"""b"},           ["a", "\"b"]],
 84 |       [%Q{a,"b"""},           ["a", "b\""]],
 85 |       [%Q{a,"\nb"""},         ["a", "\nb\""]],
 86 |       [%Q{a,"""\nb"},         ["a", "\"\nb"]],
 87 |       [%Q{a,"""\nb\n"""},     ["a", "\"\nb\n\""]],
 88 |       [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
 89 |       [%Q{a,,,},              ["a", nil, nil, nil]],
 90 |       [%Q{,},                 [nil, nil]],
 91 |       [%Q{"",""},             ["", ""]],
 92 |       [%Q{""""},              ["\""]],
 93 |       [%Q{"""",""},           ["\"",""]],
 94 |       [%Q{,""},               [nil,""]],
 95 |       [%Q{,"\r"},             [nil,"\r"]],
 96 |       [%Q{"\r\n,"},           ["\r\n,"]],
 97 |       [%Q{"\r\n,",},          ["\r\n,", nil]] ].each do |edge_case|
 98 |         assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
 99 |       end
100 |   end
101 | 
102 |   def test_james_edge_cases
103 |     # A read at eof? should return nil.
104 |     assert_equal(nil, CsvParser.parse_line(""))
105 |     # 
106 |     # With CSV it's impossible to tell an empty line from a line containing a
107 |     # single +nil+ field.  The standard CSV library returns <tt>[nil]</tt>
108 |     # in these cases, but <tt>Array.new</tt> makes more sense to me.
109 |     # 
110 |     #assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3\n"))
111 |     assert_equal([nil], CsvParser.parse_line("\n1,2,3\n"))
112 |   end
113 | 
114 |   def test_rob_edge_cases
115 |     [ [%Q{"a\nb"},                         ["a\nb"]],
116 |       [%Q{"\n\n\n"},                       ["\n\n\n"]],
117 |       [%Q{a,"b\n\nc"},                     ['a', "b\n\nc"]],
118 |       [%Q{,"\r\n"},                        [nil,"\r\n"]],
119 |       [%Q{,"\r\n."},                       [nil,"\r\n."]],
120 |       [%Q{"a\na","one newline"},           ["a\na", 'one newline']],
121 |       [%Q{"a\n\na","two newlines"},        ["a\n\na", 'two newlines']],
122 |       [%Q{"a\r\na","one CRLF"},            ["a\r\na", 'one CRLF']],
123 |       [%Q{"a\r\n\r\na","two CRLFs"},       ["a\r\n\r\na", 'two CRLFs']],
124 |       [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
125 |     ].each do |edge_case|
126 |       assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
127 |     end
128 |   end
129 | 
130 | end
131 | 


--------------------------------------------------------------------------------