├── .github ├── dependabot.yml └── workflows │ ├── benchmark.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── Gemfile ├── LICENSE.txt ├── NEWS.md ├── README.md ├── Rakefile ├── benchmark ├── convert_nil.yaml ├── parse.yaml ├── parse_liberal_parsing.yaml ├── parse_quote_char_nil.yaml ├── parse_strip.yaml ├── read.yaml ├── shift.yaml └── write.yaml ├── bin ├── console └── setup ├── csv.gemspec ├── doc └── csv │ ├── arguments │ └── io.rdoc │ ├── options │ ├── common │ │ ├── col_sep.rdoc │ │ ├── quote_char.rdoc │ │ └── row_sep.rdoc │ ├── generating │ │ ├── force_quotes.rdoc │ │ ├── quote_empty.rdoc │ │ ├── write_converters.rdoc │ │ ├── write_empty_value.rdoc │ │ ├── write_headers.rdoc │ │ └── write_nil_value.rdoc │ └── parsing │ │ ├── converters.rdoc │ │ ├── empty_value.rdoc │ │ ├── field_size_limit.rdoc │ │ ├── header_converters.rdoc │ │ ├── headers.rdoc │ │ ├── liberal_parsing.rdoc │ │ ├── nil_value.rdoc │ │ ├── return_headers.rdoc │ │ ├── skip_blanks.rdoc │ │ ├── skip_lines.rdoc │ │ ├── strip.rdoc │ │ └── unconverted_fields.rdoc │ └── recipes │ ├── filtering.rdoc │ ├── generating.rdoc │ ├── parsing.rdoc │ └── recipes.rdoc ├── lib ├── csv.rb └── csv │ ├── core_ext │ ├── array.rb │ └── string.rb │ ├── fields_converter.rb │ ├── input_record_separator.rb │ ├── parser.rb │ ├── row.rb │ ├── table.rb │ ├── version.rb │ └── writer.rb ├── profile ├── parse.rb └── write.rb ├── run-test.rb └── test ├── csv ├── helper.rb ├── interface │ ├── test_delegation.rb │ ├── test_read.rb │ ├── test_read_write.rb │ └── test_write.rb ├── line_endings.gz ├── parse │ ├── test_column_separator.rb │ ├── test_convert.rb │ ├── test_each.rb │ ├── test_general.rb │ ├── test_header.rb │ ├── test_inputs_scanner.rb │ ├── test_invalid.rb │ ├── test_liberal_parsing.rb │ ├── test_quote_char_nil.rb │ ├── test_read.rb │ ├── test_rewind.rb │ ├── test_row_separator.rb │ ├── test_skip_lines.rb │ ├── test_strip.rb │ └── test_unconverted_fields.rb ├── test_data_converters.rb ├── test_encodings.rb ├── test_features.rb ├── test_patterns.rb ├── test_row.rb ├── test_table.rb ├── test_tsv.rb └── write │ ├── test_converters.rb │ ├── test_force_quotes.rb │ ├── test_general.rb │ └── test_quote_empty.rb └── lib └── with_different_ofs.rb /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'github-actions' 4 | directory: '/' 5 | schedule: 6 | interval: 'weekly' 7 | -------------------------------------------------------------------------------- /.github/workflows/benchmark.yml: -------------------------------------------------------------------------------- 1 | name: Benchmark 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | benchmark: 9 | name: "Benchmark: ${{ matrix.runs-on }}" 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | runs-on: 14 | - macos-latest 15 | - ubuntu-latest 16 | - windows-latest 17 | runs-on: ${{ matrix.runs-on }} 18 | timeout-minutes: 10 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: ruby/setup-ruby@v1 22 | with: 23 | ruby-version: ruby 24 | - name: Install dependencies 25 | run: | 26 | bundle install 27 | gem install csv -v 3.0.1 28 | gem install csv -v 3.0.2 29 | - name: Benchmark 30 | run: | 31 | rake benchmark 32 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "*" 6 | jobs: 7 | github: 8 | name: GitHub 9 | runs-on: ubuntu-latest 10 | timeout-minutes: 10 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Extract release note 14 | run: | 15 | ruby \ 16 | -e 'print("## csv "); \ 17 | puts(ARGF.read.split(/^## /)[1]. \ 18 | gsub(/ {.+?}/, ""). \ 19 | gsub(/\[(.+?)\]\[.+?\]/) {$1})' \ 20 | NEWS.md > release-note.md 21 | - name: Upload to release 22 | run: | 23 | title=$(head -n1 release-note.md | sed -e 's/^## //') 24 | tail -n +2 release-note.md > release-note-without-version.md 25 | gh release create ${GITHUB_REF_NAME} \ 26 | --discussion-category Announcements \ 27 | --notes-file release-note-without-version.md \ 28 | --title "${title}" 29 | env: 30 | GH_TOKEN: ${{ github.token }} 31 | 32 | rubygems: 33 | name: RubyGems 34 | runs-on: ubuntu-latest 35 | timeout-minutes: 10 36 | permissions: 37 | id-token: write 38 | environment: release 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: ruby/setup-ruby@v1 42 | with: 43 | ruby-version: ruby 44 | bundler-cache: true 45 | - uses: rubygems/configure-rubygems-credentials@v1.0.0 46 | - name: Push gems 47 | run: | 48 | bundle exec rake release:rubygem_push 49 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | ruby-versions: 9 | uses: ruby/actions/.github/workflows/ruby_versions.yml@master 10 | with: 11 | engine: cruby 12 | min_version: 2.5 13 | versions: '["debug"]' 14 | 15 | in-place: 16 | needs: ruby-versions 17 | name: "In-place test: Ruby ${{ matrix.ruby-version }}: ${{ matrix.runs-on }}" 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | ruby-version: ${{ fromJson(needs.ruby-versions.outputs.versions) }} 22 | runs-on: 23 | - macos-latest 24 | - ubuntu-latest 25 | - windows-latest 26 | exclude: 27 | - { runs-on: macos-latest, ruby-version: "2.5" } 28 | # Can't mix newer Psych and old RubyGems/Bundler 29 | - { runs-on: ubuntu-latest, ruby-version: "2.5" } 30 | - { runs-on: windows-latest, ruby-version: "3.1" } 31 | - { runs-on: windows-latest, ruby-version: debug } 32 | include: 33 | - { runs-on: windows-latest, ruby-version: mingw } 34 | - { runs-on: windows-latest, ruby-version: mswin } 35 | runs-on: ${{ matrix.runs-on }} 36 | timeout-minutes: 10 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: ruby/setup-ruby@v1 40 | with: 41 | ruby-version: ${{ matrix.ruby-version }} 42 | bundler-cache: true 43 | - name: Test 44 | run: | 45 | bundle exec rake 46 | 47 | scanner: 48 | needs: ruby-versions 49 | name: "Scanner test: Ruby ${{ matrix.ruby-version }}: ${{ matrix.runs-on }}" 50 | strategy: 51 | fail-fast: false 52 | matrix: 53 | ruby-version: ${{ fromJson(needs.ruby-versions.outputs.versions) }} 54 | runs-on: 55 | - macos-latest 56 | - ubuntu-latest 57 | - windows-latest 58 | exclude: 59 | - { runs-on: macos-latest, ruby-version: "2.5" } 60 | # Can't mix newer Psych and old RubyGems/Bundler 61 | - { runs-on: ubuntu-latest, ruby-version: "2.5" } 62 | - { runs-on: windows-latest, ruby-version: "3.1" } 63 | - { runs-on: windows-latest, ruby-version: debug } 64 | include: 65 | - { runs-on: windows-latest, ruby-version: mingw } 66 | - { runs-on: windows-latest, ruby-version: mswin } 67 | runs-on: ${{ matrix.runs-on }} 68 | timeout-minutes: 10 69 | env: 70 | CSV_PARSER_SCANNER_TEST: "yes" 71 | steps: 72 | - uses: actions/checkout@v4 73 | - uses: ruby/setup-ruby@v1 74 | with: 75 | ruby-version: ${{ matrix.ruby-version }} 76 | bundler-cache: true 77 | - name: Test 78 | run: | 79 | bundle exec rake 80 | 81 | gem: 82 | name: "Gem test" 83 | runs-on: ubuntu-latest 84 | timeout-minutes: 10 85 | steps: 86 | - uses: actions/checkout@v4 87 | - uses: ruby/setup-ruby@v1 88 | with: 89 | ruby-version: ruby 90 | - name: Install 91 | run: | 92 | rake install 93 | gem install test-unit 94 | - name: Test 95 | run: | 96 | mkdir -p tmp 97 | cd tmp 98 | cp -a ../test/ ./ 99 | ../run-test.rb 100 | 101 | document: 102 | name: "Document test" 103 | runs-on: ubuntu-latest 104 | timeout-minutes: 10 105 | steps: 106 | - uses: actions/checkout@v4 107 | - uses: ruby/setup-ruby@v1 108 | with: 109 | ruby-version: ruby 110 | bundler-cache: true 111 | - name: Build document 112 | run: | 113 | bundle exec rake warning:error rdoc 114 | - uses: actions/checkout@v4 115 | if: | 116 | github.event_name == 'push' 117 | with: 118 | ref: gh-pages 119 | path: gh-pages 120 | - name: Deploy 121 | if: | 122 | github.event_name == 'push' 123 | run: | 124 | rm html/created.rid 125 | touch html/.nojekyll 126 | cd gh-pages 127 | rsync \ 128 | -a \ 129 | --delete \ 130 | --exclude "/.git/" \ 131 | ../html/ \ 132 | ./ 133 | if [ "$(git status --porcelain)" != "" ]; then 134 | git add --all 135 | git config user.name "github-actions[bot]" 136 | git config user.email "github-actions[bot]@users.noreply.github.com" 137 | git commit -m "Generate (${GITHUB_SHA})" 138 | git push 139 | fi 140 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /Gemfile.lock 3 | /coverage/ 4 | /html/ 5 | /pkg/ 6 | /spec/reports/ 7 | /tmp/ 8 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in csv.gemspec 4 | gemspec 5 | 6 | group :development do 7 | gem "bundler" 8 | gem "rake" 9 | gem "rdoc" 10 | gem "benchmark_driver" 11 | gem "test-unit", ">= 3.4.8" 12 | end 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2005-2016 James Edward Gray II. All rights reserved. 2 | Copyright (C) 2007-2017 Yukihiro Matsumoto. All rights reserved. 3 | Copyright (C) 2017 SHIBATA Hiroshi. All rights reserved. 4 | Copyright (C) 2017 Olivier Lacan. All rights reserved. 5 | Copyright (C) 2017 Espartaco Palma. All rights reserved. 6 | Copyright (C) 2017 Marcus Stollsteimer. All rights reserved. 7 | Copyright (C) 2017 pavel. All rights reserved. 8 | Copyright (C) 2017-2018 Steven Daniels. All rights reserved. 9 | Copyright (C) 2018 Tomohiro Ogoke. All rights reserved. 10 | Copyright (C) 2018 Kouhei Sutou. All rights reserved. 11 | Copyright (C) 2018 Mitsutaka Mimura. All rights reserved. 12 | Copyright (C) 2018 Vladislav. All rights reserved. 13 | 14 | Redistribution and use in source and binary forms, with or without 15 | modification, are permitted provided that the following conditions 16 | are met: 17 | 1. Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | 2. Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 | SUCH DAMAGE. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSV 2 | 3 | This library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed. 4 | 5 | ## Installation 6 | 7 | Add this line to your application's Gemfile: 8 | 9 | ```ruby 10 | gem 'csv' 11 | ``` 12 | 13 | And then execute: 14 | 15 | $ bundle 16 | 17 | Or install it yourself as: 18 | 19 | $ gem install csv 20 | 21 | ## Usage 22 | 23 | ```ruby 24 | require "csv" 25 | 26 | CSV.foreach("path/to/file.csv") do |row| 27 | # use row here... 28 | end 29 | ``` 30 | 31 | ## Documentation 32 | 33 | - [API](https://ruby.github.io/csv/): all classes, methods, and constants. 34 | - [Recipes](https://ruby.github.io/csv/doc/csv/recipes/recipes_rdoc.html): specific code for specific tasks. 35 | 36 | ## Development 37 | 38 | After checking out the repo, run `ruby run-test.rb` to check if your changes can pass the test. 39 | 40 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 41 | 42 | ## Contributing 43 | 44 | Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/csv. 45 | 46 | ### NOTE: About RuboCop 47 | 48 | We don't use RuboCop because we can manage our coding style by ourselves. We want to accept small fluctuations in our coding style because we use Ruby. 49 | Please do not submit issues and PRs that aim to introduce RuboCop in this repository. 50 | 51 | ## License 52 | 53 | The gem is available as open source under the terms of the [2-Clause BSD License](https://opensource.org/licenses/BSD-2-Clause). 54 | 55 | See LICENSE.txt for details. 56 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "rbconfig" 2 | require "rdoc/task" 3 | 4 | require "bundler/gem_tasks" 5 | 6 | spec = Bundler::GemHelper.gemspec 7 | 8 | desc "Run test" 9 | task :test do 10 | ruby("run-test.rb") 11 | end 12 | 13 | task :default => :test 14 | 15 | namespace :warning do 16 | desc "Treat warning as error" 17 | task :error do 18 | def Warning.warn(*message) 19 | super 20 | raise "Treat warning as error:\n" + message.join("\n") 21 | end 22 | end 23 | end 24 | 25 | RDoc::Task.new do |rdoc| 26 | rdoc.options = spec.rdoc_options 27 | rdoc.rdoc_files.include(*spec.source_paths) 28 | rdoc.rdoc_files.include(*spec.extra_rdoc_files) 29 | end 30 | 31 | benchmark_tasks = [] 32 | namespace :benchmark do 33 | Dir.glob("benchmark/*.yaml").sort.each do |yaml| 34 | name = File.basename(yaml, ".*") 35 | env = { 36 | "RUBYLIB" => nil, 37 | "BUNDLER_ORIG_RUBYLIB" => nil, 38 | } 39 | command_line = [ 40 | RbConfig.ruby, "-v", "-S", "benchmark-driver", File.expand_path(yaml), 41 | ] 42 | 43 | desc "Run #{name} benchmark" 44 | task name do 45 | puts("```") 46 | sh(env, *command_line) 47 | puts("```") 48 | end 49 | benchmark_tasks << "benchmark:#{name}" 50 | 51 | case name 52 | when /\Aparse/, "shift" 53 | namespace name do 54 | desc "Run #{name} benchmark: small" 55 | task :small do 56 | puts("```") 57 | sh(env.merge("N_COLUMNS" => "10"), 58 | *command_line) 59 | puts("```") 60 | end 61 | benchmark_tasks << "benchmark:#{name}:small" 62 | end 63 | end 64 | end 65 | end 66 | 67 | desc "Run all benchmarks" 68 | task :benchmark => benchmark_tasks 69 | 70 | release_task = Rake.application["release"] 71 | # We use Trusted Publishing. 72 | release_task.prerequisites.delete("build") 73 | release_task.prerequisites.delete("release:rubygem_push") 74 | release_task_comment = release_task.comment 75 | if release_task_comment 76 | release_task.clear_comments 77 | release_task.comment = release_task_comment.gsub(/ and build.*$/, "") 78 | end 79 | -------------------------------------------------------------------------------- /benchmark/convert_nil.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.0.1 5 | - gems: 6 | csv: 3.0.2 7 | - gems: 8 | csv: 3.3.0 9 | - name: "master" 10 | prelude: | 11 | $LOAD_PATH.unshift(File.expand_path("lib")) 12 | require "csv" 13 | prelude: |- 14 | csv_text = <(s) {s || ""} 20 | benchmark: 21 | 'not convert': CSV.parse(csv_text) 22 | converter: |- 23 | CSV.parse(csv_text, converters: convert_nil) 24 | option: |- 25 | CSV.parse(csv_text, nil_value: "") 26 | -------------------------------------------------------------------------------- /benchmark/parse.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.0.1 5 | - gems: 6 | csv: 3.0.2 7 | - gems: 8 | csv: 3.3.0 9 | - name: "master" 10 | prelude: | 11 | $LOAD_PATH.unshift(File.expand_path("lib")) 12 | require "csv" 13 | prelude: |- 14 | n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) 15 | n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) 16 | alphas = ["AAAAA"] * n_columns 17 | unquoted = (alphas.join(",") + "\r\n") * n_rows 18 | quoted = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows 19 | mixed = (alphas.map.with_index { |s, i| i.odd? ? s : %("#{s}") }.join(",") + "\r\n") * n_rows 20 | inc_col_sep = (alphas.map { |s| %(",#{s}") }.join(",") + "\r\n") * n_rows 21 | inc_row_sep = (alphas.map { |s| %("#{s}\r\n") }.join(",") + "\r\n") * n_rows 22 | hiraganas = ["あああああ"] * n_columns 23 | enc_utf8 = (hiraganas.join(",") + "\r\n") * n_rows 24 | enc_sjis = enc_utf8.encode("Windows-31J") 25 | benchmark: 26 | unquoted: CSV.parse(unquoted) 27 | quoted: CSV.parse(quoted) 28 | mixed: CSV.parse(mixed) 29 | include_col_sep: CSV.parse(inc_col_sep) 30 | include_row_sep: CSV.parse(inc_row_sep) 31 | encode_utf-8: CSV.parse(enc_utf8) 32 | encode_sjis: CSV.parse(enc_sjis) 33 | -------------------------------------------------------------------------------- /benchmark/parse_liberal_parsing.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.0.2 5 | - gems: 6 | csv: 3.3.0 7 | - name: "master" 8 | prelude: | 9 | $LOAD_PATH.unshift(File.expand_path("lib")) 10 | require "csv" 11 | prelude: |- 12 | n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) 13 | n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) 14 | alphas = ['\"\"a\"\"'] * n_columns 15 | unquoted = (alphas.join(",") + "\r\n") * n_rows 16 | quoted = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows 17 | inc_col_sep = (alphas.map { |s| %(",#{s}") }.join(",") + "\r\n") * n_rows 18 | inc_row_sep = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows 19 | hiraganas = ["あああああ"] * n_columns 20 | enc_utf8 = (hiraganas.join(",") + "\r\n") * n_rows 21 | enc_sjis = enc_utf8.encode("Windows-31J") 22 | benchmark: 23 | unquoted: |- 24 | CSV.parse(unquoted, liberal_parsing: true) 25 | unquoted_backslash_quote: |- 26 | CSV.parse(unquoted, liberal_parsing: { 27 | backslash_quote: true, 28 | }) 29 | quoted: |- 30 | CSV.parse(quoted, liberal_parsing: true) 31 | quoted_double_quote_outside_quote: |- 32 | CSV.parse(quoted, liberal_parsing: { 33 | double_quote_outside_quote: true 34 | }) 35 | quoted_backslash_quote: |- 36 | CSV.parse(quoted, liberal_parsing: { 37 | double_quote_outside_quote: true, 38 | backslash_quote: true, 39 | }) 40 | include_col_sep: |- 41 | CSV.parse(inc_col_sep, liberal_parsing: true) 42 | include_row_sep: |- 43 | CSV.parse(inc_row_sep, liberal_parsing: true) 44 | encode_utf-8: |- 45 | CSV.parse(enc_utf8, liberal_parsing: true) 46 | encode_sjis: |- 47 | CSV.parse(enc_sjis, liberal_parsing: true) 48 | -------------------------------------------------------------------------------- /benchmark/parse_quote_char_nil.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.3.0 5 | - name: "master" 6 | prelude: | 7 | $LOAD_PATH.unshift(File.expand_path("lib")) 8 | require "csv" 9 | prelude: |- 10 | n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) 11 | n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) 12 | alphas = ["AAAAA"] * n_columns 13 | unquoted = (alphas.join(",") + "\r\n") * n_rows 14 | col_sep_space = (alphas.join(" ") + "\r\n") * n_rows 15 | 16 | benchmark: 17 | without_quote_char: |- 18 | CSV.parse(unquoted) 19 | quote_char_nil: |- 20 | CSV.parse(unquoted, quote_char: nil) 21 | col_sep_space: |- 22 | CSV.parse(col_sep_space, quote_char: nil, col_sep: " ") 23 | -------------------------------------------------------------------------------- /benchmark/parse_strip.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.3.0 5 | - name: "master" 6 | prelude: | 7 | $LOAD_PATH.unshift(File.expand_path("lib")) 8 | require "csv" 9 | prelude: |- 10 | n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) 11 | n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) 12 | alphas = ["AAAAA"] * n_columns 13 | quoted = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows 14 | 15 | benchmark: 16 | default: |- 17 | CSV.parse(quoted) 18 | no_quote_strip: |- 19 | CSV.parse(quoted, quote_char: nil, strip: '"') 20 | -------------------------------------------------------------------------------- /benchmark/read.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.0.1 5 | - gems: 6 | csv: 3.0.2 7 | - gems: 8 | csv: 3.3.0 9 | - name: "master" 10 | prelude: | 11 | $LOAD_PATH.unshift(File.expand_path("lib")) 12 | require "csv" 13 | prelude: |- 14 | CSV.open("/tmp/file.csv", "w") do |csv| 15 | csv << ["player", "gameA", "gameB"] 16 | 1000.times do 17 | csv << ['"Alice"', "84.0", "79.5"] 18 | csv << ['"Bob"', "20.0", "56.5"] 19 | end 20 | end 21 | benchmark: 22 | "CSV.foreach": |- 23 | CSV.foreach("/tmp/file.csv") do |row| 24 | end 25 | "CSV#shift": |- 26 | CSV.open("/tmp/file.csv") do |csv| 27 | while _line = csv.shift 28 | end 29 | end 30 | "CSV.read": CSV.read("/tmp/file.csv") 31 | "CSV.table": CSV.table("/tmp/file.csv") 32 | -------------------------------------------------------------------------------- /benchmark/shift.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.0.1 5 | - gems: 6 | csv: 3.0.2 7 | - gems: 8 | csv: 3.3.0 9 | - name: "master" 10 | prelude: | 11 | $LOAD_PATH.unshift(File.expand_path("lib")) 12 | require "csv" 13 | prelude: |- 14 | n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) 15 | n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) 16 | alphas = ["AAAAA"] * n_columns 17 | data = (alphas.join(",") + "\r\n") * n_rows 18 | benchmark: 19 | shift: |- 20 | csv = CSV.new(data) 21 | while csv.shift do 22 | end 23 | -------------------------------------------------------------------------------- /benchmark/write.yaml: -------------------------------------------------------------------------------- 1 | loop_count: 100 2 | contexts: 3 | - gems: 4 | csv: 3.0.1 5 | - gems: 6 | csv: 3.0.2 7 | - gems: 8 | csv: 3.3.0 9 | - name: "master" 10 | prelude: | 11 | $LOAD_PATH.unshift(File.expand_path("lib")) 12 | require "csv" 13 | prelude: |- 14 | n_columns = Integer(ENV.fetch("N_COLUMNS", "5"), 10) 15 | n_rows = Integer(ENV.fetch("N_ROWS", "100"), 10) 16 | fields = ["AAAAA"] * n_columns 17 | headers = n_columns.times.collect do |i| 18 | "header#{i}" 19 | end 20 | row = CSV::Row.new(headers, fields) 21 | raw_row = {} 22 | n_columns.times do |i| 23 | raw_row[headers[i]] = fields[i] 24 | end 25 | benchmark: 26 | "generate_line: fields": |- 27 | n_rows.times do 28 | CSV.generate_line(fields) 29 | end 30 | "generate_line: Row": |- 31 | n_rows.times do 32 | CSV.generate_line(row) 33 | end 34 | "generate_line: Hash": |- 35 | n_rows.times do 36 | CSV.generate_line(raw_row, headers: headers) 37 | end 38 | "<< fields": |- 39 | output = StringIO.new 40 | csv = CSV.new(output) 41 | n_rows.times do 42 | csv << fields 43 | end 44 | "<< Row": |- 45 | output = StringIO.new 46 | csv = CSV.new(output) 47 | n_rows.times do 48 | csv << row 49 | end 50 | "<< Hash": |- 51 | output = StringIO.new 52 | csv = CSV.new(output, headers: headers) 53 | n_rows.times do 54 | csv << raw_row 55 | end 56 | "<< fields: write headers": |- 57 | output = StringIO.new 58 | csv = CSV.new(output, headers: headers, write_headers: true) 59 | n_rows.times do 60 | csv << fields 61 | end 62 | "<< Row: write headers": |- 63 | output = StringIO.new 64 | csv = CSV.new(output, headers: headers, write_headers: true) 65 | n_rows.times do 66 | csv << row 67 | end 68 | "<< Hash: write headers": |- 69 | output = StringIO.new 70 | csv = CSV.new(output, headers: headers, write_headers: true) 71 | n_rows.times do 72 | csv << raw_row 73 | end 74 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "bundler/setup" 4 | require "csv" 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | # (If you use this, don't forget to add pry to your Gemfile!) 10 | # require "pry" 11 | # Pry.start 12 | 13 | require "irb" 14 | IRB.start(__FILE__) 15 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /csv.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | begin 4 | require_relative "lib/csv/version" 5 | rescue LoadError 6 | # for Ruby core repository 7 | require_relative "version" 8 | end 9 | 10 | Gem::Specification.new do |spec| 11 | spec.name = "csv" 12 | spec.version = CSV::VERSION 13 | spec.authors = ["James Edward Gray II", "Kouhei Sutou"] 14 | spec.email = [nil, "kou@cozmixng.org"] 15 | 16 | spec.summary = "CSV Reading and Writing" 17 | spec.description = "The CSV library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed." 18 | spec.homepage = "https://github.com/ruby/csv" 19 | spec.licenses = ["Ruby", "BSD-2-Clause"] 20 | 21 | lib_path = "lib" 22 | spec.require_paths = [lib_path] 23 | files = [] 24 | lib_dir = File.join(__dir__, lib_path) 25 | if File.exist?(lib_dir) 26 | Dir.chdir(lib_dir) do 27 | Dir.glob("**/*.rb").each do |file| 28 | files << "lib/#{file}" 29 | end 30 | end 31 | end 32 | doc_dir = File.join(__dir__, "doc") 33 | if File.exist?(doc_dir) 34 | Dir.chdir(doc_dir) do 35 | Dir.glob("**/*.rdoc").each do |rdoc_file| 36 | files << "doc/#{rdoc_file}" 37 | end 38 | end 39 | end 40 | spec.files = files 41 | spec.rdoc_options.concat(["--main", "README.md"]) 42 | rdoc_files = [ 43 | "LICENSE.txt", 44 | "NEWS.md", 45 | "README.md", 46 | ] 47 | recipes_dir = File.join(doc_dir, "csv", "recipes") 48 | if File.exist?(recipes_dir) 49 | Dir.chdir(recipes_dir) do 50 | Dir.glob("**/*.rdoc").each do |recipe_file| 51 | rdoc_files << "doc/csv/recipes/#{recipe_file}" 52 | end 53 | end 54 | end 55 | spec.extra_rdoc_files = rdoc_files 56 | 57 | spec.required_ruby_version = ">= 2.5.0" 58 | 59 | spec.metadata["changelog_uri"] = "#{spec.homepage}/releases/tag/v#{spec.version}" 60 | end 61 | -------------------------------------------------------------------------------- /doc/csv/arguments/io.rdoc: -------------------------------------------------------------------------------- 1 | * Argument +io+ should be an IO object that is: 2 | * Open for reading; on return, the IO object will be closed. 3 | * Positioned at the beginning. 4 | To position at the end, for appending, use method CSV.generate. 5 | For any other positioning, pass a preset \StringIO object instead. 6 | -------------------------------------------------------------------------------- /doc/csv/options/common/col_sep.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +col_sep+ 2 | 3 | Specifies the \String column separator to be used 4 | for both parsing and generating. 5 | The \String will be transcoded into the data's \Encoding before use. 6 | 7 | Default value: 8 | CSV::DEFAULT_OPTIONS.fetch(:col_sep) # => "," (comma) 9 | 10 | Using the default (comma): 11 | str = CSV.generate do |csv| 12 | csv << [:foo, 0] 13 | csv << [:bar, 1] 14 | csv << [:baz, 2] 15 | end 16 | str # => "foo,0\nbar,1\nbaz,2\n" 17 | ary = CSV.parse(str) 18 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 19 | 20 | Using +:+ (colon): 21 | col_sep = ':' 22 | str = CSV.generate(col_sep: col_sep) do |csv| 23 | csv << [:foo, 0] 24 | csv << [:bar, 1] 25 | csv << [:baz, 2] 26 | end 27 | str # => "foo:0\nbar:1\nbaz:2\n" 28 | ary = CSV.parse(str, col_sep: col_sep) 29 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 30 | 31 | Using +::+ (two colons): 32 | col_sep = '::' 33 | str = CSV.generate(col_sep: col_sep) do |csv| 34 | csv << [:foo, 0] 35 | csv << [:bar, 1] 36 | csv << [:baz, 2] 37 | end 38 | str # => "foo::0\nbar::1\nbaz::2\n" 39 | ary = CSV.parse(str, col_sep: col_sep) 40 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 41 | 42 | Using '' (empty string): 43 | col_sep = '' 44 | str = CSV.generate(col_sep: col_sep) do |csv| 45 | csv << [:foo, 0] 46 | csv << [:bar, 1] 47 | csv << [:baz, 2] 48 | end 49 | str # => "foo0\nbar1\nbaz2\n" 50 | 51 | --- 52 | 53 | Raises an exception if parsing with the empty \String: 54 | col_sep = '' 55 | # Raises ArgumentError (:col_sep must be 1 or more characters: "") 56 | CSV.parse("foo0\nbar1\nbaz2\n", col_sep: col_sep) 57 | 58 | -------------------------------------------------------------------------------- /doc/csv/options/common/quote_char.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +quote_char+ 2 | 3 | Specifies the character (\String of length 1) used used to quote fields 4 | in both parsing and generating. 5 | This String will be transcoded into the data's \Encoding before use. 6 | 7 | Default value: 8 | CSV::DEFAULT_OPTIONS.fetch(:quote_char) # => "\"" (double quote) 9 | 10 | This is useful for an application that incorrectly uses ' (single-quote) 11 | to quote fields, instead of the correct " (double-quote). 12 | 13 | Using the default (double quote): 14 | str = CSV.generate do |csv| 15 | csv << ['foo', 0] 16 | csv << ["'bar'", 1] 17 | csv << ['"baz"', 2] 18 | end 19 | str # => "foo,0\n'bar',1\n\"\"\"baz\"\"\",2\n" 20 | ary = CSV.parse(str) 21 | ary # => [["foo", "0"], ["'bar'", "1"], ["\"baz\"", "2"]] 22 | 23 | Using ' (single-quote): 24 | quote_char = "'" 25 | str = CSV.generate(quote_char: quote_char) do |csv| 26 | csv << ['foo', 0] 27 | csv << ["'bar'", 1] 28 | csv << ['"baz"', 2] 29 | end 30 | str # => "foo,0\n'''bar''',1\n\"baz\",2\n" 31 | ary = CSV.parse(str, quote_char: quote_char) 32 | ary # => [["foo", "0"], ["'bar'", "1"], ["\"baz\"", "2"]] 33 | 34 | --- 35 | 36 | Raises an exception if the \String length is greater than 1: 37 | # Raises ArgumentError (:quote_char has to be nil or a single character String) 38 | CSV.new('', quote_char: 'xx') 39 | 40 | Raises an exception if the value is not a \String: 41 | # Raises ArgumentError (:quote_char has to be nil or a single character String) 42 | CSV.new('', quote_char: :foo) 43 | -------------------------------------------------------------------------------- /doc/csv/options/common/row_sep.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +row_sep+ 2 | 3 | Specifies the row separator, a \String or the \Symbol :auto (see below), 4 | to be used for both parsing and generating. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:row_sep) # => :auto 8 | 9 | --- 10 | 11 | When +row_sep+ is a \String, that \String becomes the row separator. 12 | The String will be transcoded into the data's Encoding before use. 13 | 14 | Using "\n": 15 | row_sep = "\n" 16 | str = CSV.generate(row_sep: row_sep) do |csv| 17 | csv << [:foo, 0] 18 | csv << [:bar, 1] 19 | csv << [:baz, 2] 20 | end 21 | str # => "foo,0\nbar,1\nbaz,2\n" 22 | ary = CSV.parse(str) 23 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 24 | 25 | Using | (pipe): 26 | row_sep = '|' 27 | str = CSV.generate(row_sep: row_sep) do |csv| 28 | csv << [:foo, 0] 29 | csv << [:bar, 1] 30 | csv << [:baz, 2] 31 | end 32 | str # => "foo,0|bar,1|baz,2|" 33 | ary = CSV.parse(str, row_sep: row_sep) 34 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 35 | 36 | Using -- (two hyphens): 37 | row_sep = '--' 38 | str = CSV.generate(row_sep: row_sep) do |csv| 39 | csv << [:foo, 0] 40 | csv << [:bar, 1] 41 | csv << [:baz, 2] 42 | end 43 | str # => "foo,0--bar,1--baz,2--" 44 | ary = CSV.parse(str, row_sep: row_sep) 45 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 46 | 47 | Using '' (empty string): 48 | row_sep = '' 49 | str = CSV.generate(row_sep: row_sep) do |csv| 50 | csv << [:foo, 0] 51 | csv << [:bar, 1] 52 | csv << [:baz, 2] 53 | end 54 | str # => "foo,0bar,1baz,2" 55 | ary = CSV.parse(str, row_sep: row_sep) 56 | ary # => [["foo", "0bar", "1baz", "2"]] 57 | 58 | --- 59 | 60 | When +row_sep+ is the \Symbol +:auto+ (the default), 61 | generating uses "\n" as the row separator: 62 | str = CSV.generate do |csv| 63 | csv << [:foo, 0] 64 | csv << [:bar, 1] 65 | csv << [:baz, 2] 66 | end 67 | str # => "foo,0\nbar,1\nbaz,2\n" 68 | 69 | Parsing, on the other hand, invokes auto-discovery of the row separator. 70 | 71 | Auto-discovery reads ahead in the data looking for the next \r\n, +\n+, or +\r+ sequence. 72 | The sequence will be selected even if it occurs in a quoted field, 73 | assuming that you would have the same line endings there. 74 | 75 | Example: 76 | str = CSV.generate do |csv| 77 | csv << [:foo, 0] 78 | csv << [:bar, 1] 79 | csv << [:baz, 2] 80 | end 81 | str # => "foo,0\nbar,1\nbaz,2\n" 82 | ary = CSV.parse(str) 83 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 84 | 85 | The default $INPUT_RECORD_SEPARATOR ($/) is used 86 | if any of the following is true: 87 | * None of those sequences is found. 88 | * Data is +ARGF+, +STDIN+, +STDOUT+, or +STDERR+. 89 | * The stream is only available for output. 90 | 91 | Obviously, discovery takes a little time. Set manually if speed is important. Also note that IO objects should be opened in binary mode on Windows if this feature will be used as the line-ending translation can cause problems with resetting the document position to where it was before the read ahead. 92 | -------------------------------------------------------------------------------- /doc/csv/options/generating/force_quotes.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +force_quotes+ 2 | 3 | Specifies the boolean that determines whether each output field is to be double-quoted. 4 | 5 | Default value: 6 | CSV::DEFAULT_OPTIONS.fetch(:force_quotes) # => false 7 | 8 | For examples in this section: 9 | ary = ['foo', 0, nil] 10 | 11 | Using the default, +false+: 12 | str = CSV.generate_line(ary) 13 | str # => "foo,0,\n" 14 | 15 | Using +true+: 16 | str = CSV.generate_line(ary, force_quotes: true) 17 | str # => "\"foo\",\"0\",\"\"\n" 18 | -------------------------------------------------------------------------------- /doc/csv/options/generating/quote_empty.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +quote_empty+ 2 | 3 | Specifies the boolean that determines whether an empty value is to be double-quoted. 4 | 5 | Default value: 6 | CSV::DEFAULT_OPTIONS.fetch(:quote_empty) # => true 7 | 8 | With the default +true+: 9 | CSV.generate_line(['"', ""]) # => "\"\"\"\",\"\"\n" 10 | 11 | With +false+: 12 | CSV.generate_line(['"', ""], quote_empty: false) # => "\"\"\"\",\n" 13 | -------------------------------------------------------------------------------- /doc/csv/options/generating/write_converters.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +write_converters+ 2 | 3 | Specifies converters to be used in generating fields. 4 | See {Write Converters}[#class-CSV-label-Write+Converters] 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:write_converters) # => nil 8 | 9 | With no write converter: 10 | str = CSV.generate_line(["\na\n", "\tb\t", " c "]) 11 | str # => "\"\na\n\",\tb\t, c \n" 12 | 13 | With a write converter: 14 | strip_converter = proc {|field| field.strip } 15 | str = CSV.generate_line(["\na\n", "\tb\t", " c "], write_converters: strip_converter) 16 | str # => "a,b,c\n" 17 | 18 | With two write converters (called in order): 19 | upcase_converter = proc {|field| field.upcase } 20 | downcase_converter = proc {|field| field.downcase } 21 | write_converters = [upcase_converter, downcase_converter] 22 | str = CSV.generate_line(['a', 'b', 'c'], write_converters: write_converters) 23 | str # => "a,b,c\n" 24 | 25 | See also {Write Converters}[#class-CSV-label-Write+Converters] 26 | -------------------------------------------------------------------------------- /doc/csv/options/generating/write_empty_value.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +write_empty_value+ 2 | 3 | Specifies the object that is to be substituted for each field 4 | that has an empty \String. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:write_empty_value) # => "" 8 | 9 | Without the option: 10 | str = CSV.generate_line(['a', '', 'c', '']) 11 | str # => "a,\"\",c,\"\"\n" 12 | 13 | With the option: 14 | str = CSV.generate_line(['a', '', 'c', ''], write_empty_value: "x") 15 | str # => "a,x,c,x\n" 16 | -------------------------------------------------------------------------------- /doc/csv/options/generating/write_headers.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +write_headers+ 2 | 3 | Specifies the boolean that determines whether a header row is included in the output; 4 | ignored if there are no headers. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:write_headers) # => nil 8 | 9 | Without +write_headers+: 10 | file_path = 't.csv' 11 | CSV.open(file_path,'w', 12 | :headers => ['Name','Value'] 13 | ) do |csv| 14 | csv << ['foo', '0'] 15 | end 16 | CSV.open(file_path) do |csv| 17 | csv.shift 18 | end # => ["foo", "0"] 19 | 20 | With +write_headers+": 21 | CSV.open(file_path,'w', 22 | :write_headers => true, 23 | :headers => ['Name','Value'] 24 | ) do |csv| 25 | csv << ['foo', '0'] 26 | end 27 | CSV.open(file_path) do |csv| 28 | csv.shift 29 | end # => ["Name", "Value"] 30 | -------------------------------------------------------------------------------- /doc/csv/options/generating/write_nil_value.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +write_nil_value+ 2 | 3 | Specifies the object that is to be substituted for each +nil+-valued field. 4 | 5 | Default value: 6 | CSV::DEFAULT_OPTIONS.fetch(:write_nil_value) # => nil 7 | 8 | Without the option: 9 | str = CSV.generate_line(['a', nil, 'c', nil]) 10 | str # => "a,,c,\n" 11 | 12 | With the option: 13 | str = CSV.generate_line(['a', nil, 'c', nil], write_nil_value: "x") 14 | str # => "a,x,c,x\n" 15 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/converters.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +converters+ 2 | 3 | Specifies converters to be used in parsing fields. 4 | See {Field Converters}[#class-CSV-label-Field+Converters] 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:converters) # => nil 8 | 9 | The value may be a field converter name 10 | (see {Stored Converters}[#class-CSV-label-Stored+Converters]): 11 | str = '1,2,3' 12 | # Without a converter 13 | array = CSV.parse_line(str) 14 | array # => ["1", "2", "3"] 15 | # With built-in converter :integer 16 | array = CSV.parse_line(str, converters: :integer) 17 | array # => [1, 2, 3] 18 | 19 | The value may be a converter list 20 | (see {Converter Lists}[#class-CSV-label-Converter+Lists]): 21 | str = '1,3.14159' 22 | # Without converters 23 | array = CSV.parse_line(str) 24 | array # => ["1", "3.14159"] 25 | # With built-in converters 26 | array = CSV.parse_line(str, converters: [:integer, :float]) 27 | array # => [1, 3.14159] 28 | 29 | The value may be a \Proc custom converter: 30 | (see {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters]): 31 | str = ' foo , bar , baz ' 32 | # Without a converter 33 | array = CSV.parse_line(str) 34 | array # => [" foo ", " bar ", " baz "] 35 | # With a custom converter 36 | array = CSV.parse_line(str, converters: proc {|field| field.strip }) 37 | array # => ["foo", "bar", "baz"] 38 | 39 | See also {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters] 40 | 41 | --- 42 | 43 | Raises an exception if the converter is not a converter name or a \Proc: 44 | str = 'foo,0' 45 | # Raises NoMethodError (undefined method `arity' for nil:NilClass) 46 | CSV.parse(str, converters: :foo) 47 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/empty_value.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +empty_value+ 2 | 3 | Specifies the object that is to be substituted 4 | for each field that has an empty \String. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:empty_value) # => "" (empty string) 8 | 9 | With the default, "": 10 | CSV.parse_line('a,"",b,"",c') # => ["a", "", "b", "", "c"] 11 | 12 | With a different object: 13 | CSV.parse_line('a,"",b,"",c', empty_value: 'x') # => ["a", "x", "b", "x", "c"] 14 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/field_size_limit.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +field_size_limit+ 2 | 3 | Specifies the \Integer field size limit. 4 | 5 | Default value: 6 | CSV::DEFAULT_OPTIONS.fetch(:field_size_limit) # => nil 7 | 8 | This is a maximum size CSV will read ahead looking for the closing quote for a field. 9 | (In truth, it reads to the first line ending beyond this size.) 10 | If a quote cannot be found within the limit CSV will raise a MalformedCSVError, 11 | assuming the data is faulty. 12 | You can use this limit to prevent what are effectively DoS attacks on the parser. 13 | However, this limit can cause a legitimate parse to fail; 14 | therefore the default value is +nil+ (no limit). 15 | 16 | For the examples in this section: 17 | str = <<~EOT 18 | "a","b" 19 | " 20 | 2345 21 | ","" 22 | EOT 23 | str # => "\"a\",\"b\"\n\"\n2345\n\",\"\"\n" 24 | 25 | Using the default +nil+: 26 | ary = CSV.parse(str) 27 | ary # => [["a", "b"], ["\n2345\n", ""]] 28 | 29 | Using 50: 30 | field_size_limit = 50 31 | ary = CSV.parse(str, field_size_limit: field_size_limit) 32 | ary # => [["a", "b"], ["\n2345\n", ""]] 33 | 34 | --- 35 | 36 | Raises an exception if a field is too long: 37 | big_str = "123456789\n" * 1024 38 | # Raises CSV::MalformedCSVError (Field size exceeded in line 1.) 39 | CSV.parse('valid,fields,"' + big_str + '"', field_size_limit: 2048) 40 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/header_converters.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +header_converters+ 2 | 3 | Specifies converters to be used in parsing headers. 4 | See {Header Converters}[#class-CSV-label-Header+Converters] 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:header_converters) # => nil 8 | 9 | Identical in functionality to option {converters}[#class-CSV-label-Option+converters] 10 | except that: 11 | - The converters apply only to the header row. 12 | - The built-in header converters are +:downcase+ and +:symbol+. 13 | 14 | This section assumes prior execution of: 15 | str = <<-EOT 16 | Name,Value 17 | foo,0 18 | bar,1 19 | baz,2 20 | EOT 21 | # With no header converter 22 | table = CSV.parse(str, headers: true) 23 | table.headers # => ["Name", "Value"] 24 | 25 | The value may be a header converter name 26 | (see {Stored Converters}[#class-CSV-label-Stored+Converters]): 27 | table = CSV.parse(str, headers: true, header_converters: :downcase) 28 | table.headers # => ["name", "value"] 29 | 30 | The value may be a converter list 31 | (see {Converter Lists}[#class-CSV-label-Converter+Lists]): 32 | header_converters = [:downcase, :symbol] 33 | table = CSV.parse(str, headers: true, header_converters: header_converters) 34 | table.headers # => [:name, :value] 35 | 36 | The value may be a \Proc custom converter 37 | (see {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters]): 38 | upcase_converter = proc {|field| field.upcase } 39 | table = CSV.parse(str, headers: true, header_converters: upcase_converter) 40 | table.headers # => ["NAME", "VALUE"] 41 | 42 | See also {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters] 43 | 44 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/headers.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +headers+ 2 | 3 | Specifies a boolean, \Symbol, \Array, or \String to be used 4 | to define column headers. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:headers) # => false 8 | 9 | --- 10 | 11 | Without +headers+: 12 | str = <<-EOT 13 | Name,Count 14 | foo,0 15 | bar,1 16 | bax,2 17 | EOT 18 | csv = CSV.new(str) 19 | csv # => # 20 | csv.headers # => nil 21 | csv.shift # => ["Name", "Count"] 22 | 23 | --- 24 | 25 | If set to +true+ or the \Symbol +:first_row+, 26 | the first row of the data is treated as a row of headers: 27 | str = <<-EOT 28 | Name,Count 29 | foo,0 30 | bar,1 31 | bax,2 32 | EOT 33 | csv = CSV.new(str, headers: true) 34 | csv # => # 35 | csv.headers # => ["Name", "Count"] 36 | csv.shift # => # 37 | 38 | --- 39 | 40 | If set to an \Array, the \Array elements are treated as headers: 41 | str = <<-EOT 42 | foo,0 43 | bar,1 44 | bax,2 45 | EOT 46 | csv = CSV.new(str, headers: ['Name', 'Count']) 47 | csv 48 | csv.headers # => ["Name", "Count"] 49 | csv.shift # => # 50 | 51 | --- 52 | 53 | If set to a \String +str+, method CSV::parse_line(str, options) is called 54 | with the current +options+, and the returned \Array is treated as headers: 55 | str = <<-EOT 56 | foo,0 57 | bar,1 58 | bax,2 59 | EOT 60 | csv = CSV.new(str, headers: 'Name,Count') 61 | csv 62 | csv.headers # => ["Name", "Count"] 63 | csv.shift # => # 64 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/liberal_parsing.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +liberal_parsing+ 2 | 3 | Specifies the boolean or hash value that determines whether 4 | CSV will attempt to parse input not conformant with RFC 4180, 5 | such as double quotes in unquoted fields. 6 | 7 | Default value: 8 | CSV::DEFAULT_OPTIONS.fetch(:liberal_parsing) # => false 9 | 10 | For the next two examples: 11 | str = 'is,this "three, or four",fields' 12 | 13 | Without +liberal_parsing+: 14 | # Raises CSV::MalformedCSVError (Illegal quoting in str 1.) 15 | CSV.parse_line(str) 16 | 17 | With +liberal_parsing+: 18 | ary = CSV.parse_line(str, liberal_parsing: true) 19 | ary # => ["is", "this \"three", " or four\"", "fields"] 20 | 21 | Use the +backslash_quote+ sub-option to parse values that use 22 | a backslash to escape a double-quote character. This 23 | causes the parser to treat \" as if it were 24 | "". 25 | 26 | For the next two examples: 27 | str = 'Show,"Harry \"Handcuff\" Houdini, the one and only","Tampa Theater"' 28 | 29 | With +liberal_parsing+, but without the +backslash_quote+ sub-option: 30 | # Incorrect interpretation of backslash; incorrectly interprets the quoted comma as a field separator. 31 | ary = CSV.parse_line(str, liberal_parsing: true) 32 | ary # => ["Show", "\"Harry \\\"Handcuff\\\" Houdini", " the one and only\"", "Tampa Theater"] 33 | puts ary[1] # => "Harry \"Handcuff\" Houdini 34 | 35 | With +liberal_parsing+ and its +backslash_quote+ sub-option: 36 | ary = CSV.parse_line(str, liberal_parsing: { backslash_quote: true }) 37 | ary # => ["Show", "Harry \"Handcuff\" Houdini, the one and only", "Tampa Theater"] 38 | puts ary[1] # => Harry "Handcuff" Houdini, the one and only 39 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/nil_value.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +nil_value+ 2 | 3 | Specifies the object that is to be substituted for each null (no-text) field. 4 | 5 | Default value: 6 | CSV::DEFAULT_OPTIONS.fetch(:nil_value) # => nil 7 | 8 | With the default, +nil+: 9 | CSV.parse_line('a,,b,,c') # => ["a", nil, "b", nil, "c"] 10 | 11 | With a different object: 12 | CSV.parse_line('a,,b,,c', nil_value: 0) # => ["a", 0, "b", 0, "c"] 13 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/return_headers.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +return_headers+ 2 | 3 | Specifies the boolean that determines whether method #shift 4 | returns or ignores the header row. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:return_headers) # => false 8 | 9 | Examples: 10 | str = <<-EOT 11 | Name,Count 12 | foo,0 13 | bar,1 14 | bax,2 15 | EOT 16 | # Without return_headers first row is str. 17 | csv = CSV.new(str, headers: true) 18 | csv.shift # => # 19 | # With return_headers first row is headers. 20 | csv = CSV.new(str, headers: true, return_headers: true) 21 | csv.shift # => # 22 | 23 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/skip_blanks.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +skip_blanks+ 2 | 3 | Specifies a boolean that determines whether blank lines in the input will be ignored; 4 | a line that contains a column separator is not considered to be blank. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:skip_blanks) # => false 8 | 9 | See also option {skiplines}[#class-CSV-label-Option+skip_lines]. 10 | 11 | For examples in this section: 12 | str = <<-EOT 13 | foo,0 14 | 15 | bar,1 16 | baz,2 17 | 18 | , 19 | EOT 20 | 21 | Using the default, +false+: 22 | ary = CSV.parse(str) 23 | ary # => [["foo", "0"], [], ["bar", "1"], ["baz", "2"], [], [nil, nil]] 24 | 25 | Using +true+: 26 | ary = CSV.parse(str, skip_blanks: true) 27 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] 28 | 29 | Using a truthy value: 30 | ary = CSV.parse(str, skip_blanks: :foo) 31 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] 32 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/skip_lines.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +skip_lines+ 2 | 3 | Specifies an object to use in identifying comment lines in the input that are to be ignored: 4 | * If a \Regexp, ignores lines that match it. 5 | * If a \String, converts it to a \Regexp, ignores lines that match it. 6 | * If +nil+, no lines are considered to be comments. 7 | 8 | Default value: 9 | CSV::DEFAULT_OPTIONS.fetch(:skip_lines) # => nil 10 | 11 | For examples in this section: 12 | str = <<-EOT 13 | # Comment 14 | foo,0 15 | bar,1 16 | baz,2 17 | # Another comment 18 | EOT 19 | str # => "# Comment\nfoo,0\nbar,1\nbaz,2\n# Another comment\n" 20 | 21 | Using the default, +nil+: 22 | ary = CSV.parse(str) 23 | ary # => [["# Comment"], ["foo", "0"], ["bar", "1"], ["baz", "2"], ["# Another comment"]] 24 | 25 | Using a \Regexp: 26 | ary = CSV.parse(str, skip_lines: /^#/) 27 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 28 | 29 | Using a \String: 30 | ary = CSV.parse(str, skip_lines: '#') 31 | ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] 32 | 33 | --- 34 | 35 | Raises an exception if given an object that is not a \Regexp, a \String, or +nil+: 36 | # Raises ArgumentError (:skip_lines has to respond to #match: 0) 37 | CSV.parse(str, skip_lines: 0) 38 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/strip.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +strip+ 2 | 3 | Specifies the boolean value that determines whether 4 | whitespace is stripped from each input field. 5 | 6 | Default value: 7 | CSV::DEFAULT_OPTIONS.fetch(:strip) # => false 8 | 9 | With default value +false+: 10 | ary = CSV.parse_line(' a , b ') 11 | ary # => [" a ", " b "] 12 | 13 | With value +true+: 14 | ary = CSV.parse_line(' a , b ', strip: true) 15 | ary # => ["a", "b"] 16 | -------------------------------------------------------------------------------- /doc/csv/options/parsing/unconverted_fields.rdoc: -------------------------------------------------------------------------------- 1 | ====== Option +unconverted_fields+ 2 | 3 | Specifies the boolean that determines whether unconverted field values are to be available. 4 | 5 | Default value: 6 | CSV::DEFAULT_OPTIONS.fetch(:unconverted_fields) # => nil 7 | 8 | The unconverted field values are those found in the source data, 9 | prior to any conversions performed via option +converters+. 10 | 11 | When option +unconverted_fields+ is +true+, 12 | each returned row (\Array or \CSV::Row) has an added method, 13 | +unconverted_fields+, that returns the unconverted field values: 14 | str = <<-EOT 15 | foo,0 16 | bar,1 17 | baz,2 18 | EOT 19 | # Without unconverted_fields 20 | csv = CSV.parse(str, converters: :integer) 21 | csv # => [["foo", 0], ["bar", 1], ["baz", 2]] 22 | csv.first.respond_to?(:unconverted_fields) # => false 23 | # With unconverted_fields 24 | csv = CSV.parse(str, converters: :integer, unconverted_fields: true) 25 | csv # => [["foo", 0], ["bar", 1], ["baz", 2]] 26 | csv.first.respond_to?(:unconverted_fields) # => true 27 | csv.first.unconverted_fields # => ["foo", "0"] 28 | -------------------------------------------------------------------------------- /doc/csv/recipes/filtering.rdoc: -------------------------------------------------------------------------------- 1 | == Recipes for Filtering \CSV 2 | 3 | These recipes are specific code examples for specific \CSV filtering tasks. 4 | 5 | For other recipes, see {Recipes for CSV}[./recipes_rdoc.html]. 6 | 7 | All code snippets on this page assume that the following has been executed: 8 | require 'csv' 9 | 10 | === Contents 11 | 12 | - {Source and Output Formats}[#label-Source+and+Output+Formats] 13 | - {Filtering String to String}[#label-Filtering+String+to+String] 14 | - {Recipe: Filter String to String parsing Headers}[#label-Recipe-3A+Filter+String+to+String+parsing+Headers] 15 | - {Recipe: Filter String to String parsing and writing Headers}[#label-Recipe-3A+Filter+String+to+String+parsing+and+writing+Headers] 16 | - {Recipe: Filter String to String Without Headers}[#label-Recipe-3A+Filter+String+to+String+Without+Headers] 17 | - {Filtering String to IO Stream}[#label-Filtering+String+to+IO+Stream] 18 | - {Recipe: Filter String to IO Stream parsing Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+parsing+Headers] 19 | - {Recipe: Filter String to IO Stream parsing and writing Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+parsing+and+writing+Headers] 20 | - {Recipe: Filter String to IO Stream Without Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+Without+Headers] 21 | - {Filtering IO Stream to String}[#label-Filtering+IO+Stream+to+String] 22 | - {Recipe: Filter IO Stream to String parsing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+parsing+Headers] 23 | - {Recipe: Filter IO Stream to String parsing and writing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+parsing+and+writing+Headers] 24 | - {Recipe: Filter IO Stream to String Without Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+Without+Headers] 25 | - {Filtering IO Stream to IO Stream}[#label-Filtering+IO+Stream+to+IO+Stream] 26 | - {Recipe: Filter IO Stream to IO Stream parsing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+parsing+Headers] 27 | - {Recipe: Filter IO Stream to IO Stream parsing and writing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+parsing+and+writing+Headers] 28 | - {Recipe: Filter IO Stream to IO Stream Without Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+Without+Headers] 29 | 30 | === Source and Output Formats 31 | 32 | You can use a Unix-style "filter" for \CSV data. 33 | The filter reads source \CSV data and writes output \CSV data as modified by the filter. 34 | The input and output \CSV data may be any mixture of \Strings and \IO streams. 35 | 36 | ==== Filtering \String to \String 37 | 38 | You can filter one \String to another, with or without headers. 39 | 40 | ===== Recipe: Filter \String to \String parsing Headers 41 | 42 | Use class method CSV.filter with option +headers+ to filter a \String to another \String: 43 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 44 | out_string = '' 45 | CSV.filter(in_string, out_string, headers: true) do |row| 46 | row['Name'] = row['Name'].upcase 47 | row['Value'] *= 4 48 | end 49 | out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 50 | 51 | ===== Recipe: Filter \String to \String parsing and writing Headers 52 | 53 | Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter a \String to another \String including header row: 54 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 55 | out_string = '' 56 | CSV.filter(in_string, out_string, headers: true, out_write_headers: true) do |row| 57 | unless row.is_a?(Array) 58 | row['Name'] = row['Name'].upcase 59 | row['Value'] *= 4 60 | end 61 | end 62 | out_string # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" 63 | 64 | ===== Recipe: Filter \String to \String Without Headers 65 | 66 | Use class method CSV.filter without option +headers+ to filter a \String to another \String: 67 | in_string = "foo,0\nbar,1\nbaz,2\n" 68 | out_string = '' 69 | CSV.filter(in_string, out_string) do |row| 70 | row[0] = row[0].upcase 71 | row[1] *= 4 72 | end 73 | out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 74 | 75 | ==== Filtering \String to \IO Stream 76 | 77 | You can filter a \String to an \IO stream, with or without headers. 78 | 79 | ===== Recipe: Filter \String to \IO Stream parsing Headers 80 | 81 | Use class method CSV.filter with option +headers+ to filter a \String to an \IO stream: 82 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 83 | path = 't.csv' 84 | File.open(path, 'w') do |out_io| 85 | CSV.filter(in_string, out_io, headers: true) do |row| 86 | row['Name'] = row['Name'].upcase 87 | row['Value'] *= 4 88 | end 89 | end 90 | p File.read(path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 91 | 92 | ===== Recipe: Filter \String to \IO Stream parsing and writing Headers 93 | 94 | Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter a \String to an \IO stream including header row: 95 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 96 | path = 't.csv' 97 | File.open(path, 'w') do |out_io| 98 | CSV.filter(in_string, out_io, headers: true, out_write_headers: true ) do |row| 99 | unless row.is_a?(Array) 100 | row['Name'] = row['Name'].upcase 101 | row['Value'] *= 4 102 | end 103 | end 104 | end 105 | p File.read(path) # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" 106 | 107 | ===== Recipe: Filter \String to \IO Stream Without Headers 108 | 109 | Use class method CSV.filter without option +headers+ to filter a \String to an \IO stream: 110 | in_string = "foo,0\nbar,1\nbaz,2\n" 111 | path = 't.csv' 112 | File.open(path, 'w') do |out_io| 113 | CSV.filter(in_string, out_io) do |row| 114 | row[0] = row[0].upcase 115 | row[1] *= 4 116 | end 117 | end 118 | p File.read(path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 119 | 120 | ==== Filtering \IO Stream to \String 121 | 122 | You can filter an \IO stream to a \String, with or without headers. 123 | 124 | ===== Recipe: Filter \IO Stream to \String parsing Headers 125 | 126 | Use class method CSV.filter with option +headers+ to filter an \IO stream to a \String: 127 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 128 | path = 't.csv' 129 | File.write(path, in_string) 130 | out_string = '' 131 | File.open(path) do |in_io| 132 | CSV.filter(in_io, out_string, headers: true) do |row| 133 | row['Name'] = row['Name'].upcase 134 | row['Value'] *= 4 135 | end 136 | end 137 | out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 138 | 139 | ===== Recipe: Filter \IO Stream to \String parsing and writing Headers 140 | 141 | Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter an \IO stream to a \String including header row: 142 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 143 | path = 't.csv' 144 | File.write(path, in_string) 145 | out_string = '' 146 | File.open(path) do |in_io| 147 | CSV.filter(in_io, out_string, headers: true, out_write_headers: true) do |row| 148 | unless row.is_a?(Array) 149 | row['Name'] = row['Name'].upcase 150 | row['Value'] *= 4 151 | end 152 | end 153 | end 154 | out_string # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" 155 | 156 | ===== Recipe: Filter \IO Stream to \String Without Headers 157 | 158 | Use class method CSV.filter without option +headers+ to filter an \IO stream to a \String: 159 | in_string = "foo,0\nbar,1\nbaz,2\n" 160 | path = 't.csv' 161 | File.write(path, in_string) 162 | out_string = '' 163 | File.open(path) do |in_io| 164 | CSV.filter(in_io, out_string) do |row| 165 | row[0] = row[0].upcase 166 | row[1] *= 4 167 | end 168 | end 169 | out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 170 | 171 | ==== Filtering \IO Stream to \IO Stream 172 | 173 | You can filter an \IO stream to another \IO stream, with or without headers. 174 | 175 | ===== Recipe: Filter \IO Stream to \IO Stream parsing Headers 176 | 177 | Use class method CSV.filter with option +headers+ to filter an \IO stream to another \IO stream: 178 | in_path = 't.csv' 179 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 180 | File.write(in_path, in_string) 181 | out_path = 'u.csv' 182 | File.open(in_path) do |in_io| 183 | File.open(out_path, 'w') do |out_io| 184 | CSV.filter(in_io, out_io, headers: true) do |row| 185 | row['Name'] = row['Name'].upcase 186 | row['Value'] *= 4 187 | end 188 | end 189 | end 190 | p File.read(out_path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 191 | 192 | ===== Recipe: Filter \IO Stream to \IO Stream parsing and writing Headers 193 | 194 | Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter an \IO stream to another \IO stream including header row: 195 | in_path = 't.csv' 196 | in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" 197 | File.write(in_path, in_string) 198 | out_path = 'u.csv' 199 | File.open(in_path) do |in_io| 200 | File.open(out_path, 'w') do |out_io| 201 | CSV.filter(in_io, out_io, headers: true, out_write_headers: true) do |row| 202 | unless row.is_a?(Array) 203 | row['Name'] = row['Name'].upcase 204 | row['Value'] *= 4 205 | end 206 | end 207 | end 208 | end 209 | p File.read(out_path) # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" 210 | 211 | ===== Recipe: Filter \IO Stream to \IO Stream Without Headers 212 | 213 | Use class method CSV.filter without option +headers+ to filter an \IO stream to another \IO stream: 214 | in_path = 't.csv' 215 | in_string = "foo,0\nbar,1\nbaz,2\n" 216 | File.write(in_path, in_string) 217 | out_path = 'u.csv' 218 | File.open(in_path) do |in_io| 219 | File.open(out_path, 'w') do |out_io| 220 | CSV.filter(in_io, out_io) do |row| 221 | row[0] = row[0].upcase 222 | row[1] *= 4 223 | end 224 | end 225 | end 226 | p File.read(out_path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" 227 | -------------------------------------------------------------------------------- /doc/csv/recipes/generating.rdoc: -------------------------------------------------------------------------------- 1 | == Recipes for Generating \CSV 2 | 3 | These recipes are specific code examples for specific \CSV generating tasks. 4 | 5 | For other recipes, see {Recipes for CSV}[./recipes_rdoc.html]. 6 | 7 | All code snippets on this page assume that the following has been executed: 8 | require 'csv' 9 | 10 | === Contents 11 | 12 | - {Output Formats}[#label-Output+Formats] 13 | - {Generating to a String}[#label-Generating+to+a+String] 14 | - {Recipe: Generate to String with Headers}[#label-Recipe-3A+Generate+to+String+with+Headers] 15 | - {Recipe: Generate to String Without Headers}[#label-Recipe-3A+Generate+to+String+Without+Headers] 16 | - {Generating to a File}[#label-Generating+to+a+File] 17 | - {Recipe: Generate to File with Headers}[#label-Recipe-3A+Generate+to+File+with+Headers] 18 | - {Recipe: Generate to File Without Headers}[#label-Recipe-3A+Generate+to+File+Without+Headers] 19 | - {Generating to IO an Stream}[#label-Generating+to+an+IO+Stream] 20 | - {Recipe: Generate to IO Stream with Headers}[#label-Recipe-3A+Generate+to+IO+Stream+with+Headers] 21 | - {Recipe: Generate to IO Stream Without Headers}[#label-Recipe-3A+Generate+to+IO+Stream+Without+Headers] 22 | - {Converting Fields}[#label-Converting+Fields] 23 | - {Recipe: Filter Generated Field Strings}[#label-Recipe-3A+Filter+Generated+Field+Strings] 24 | - {Recipe: Specify Multiple Write Converters}[#label-Recipe-3A+Specify+Multiple+Write+Converters] 25 | - {RFC 4180 Compliance}[#label-RFC+4180+Compliance] 26 | - {Row Separator}[#label-Row+Separator] 27 | - {Recipe: Generate Compliant Row Separator}[#label-Recipe-3A+Generate+Compliant+Row+Separator] 28 | - {Recipe: Generate Non-Compliant Row Separator}[#label-Recipe-3A+Generate+Non-Compliant+Row+Separator] 29 | - {Column Separator}[#label-Column+Separator] 30 | - {Recipe: Generate Compliant Column Separator}[#label-Recipe-3A+Generate+Compliant+Column+Separator] 31 | - {Recipe: Generate Non-Compliant Column Separator}[#label-Recipe-3A+Generate+Non-Compliant+Column+Separator] 32 | - {Quotes}[#label-Quotes] 33 | - {Recipe: Quote All Fields}[#label-Recipe-3A+Quote+All+Fields] 34 | - {Recipe: Quote Empty Fields}[#label-Recipe-3A+Quote+Empty+Fields] 35 | - {Recipe: Generate Compliant Quote Character}[#label-Recipe-3A+Generate+Compliant+Quote+Character] 36 | - {Recipe: Generate Non-Compliant Quote Character}[#label-Recipe-3A+Generate+Non-Compliant+Quote+Character] 37 | 38 | === Output Formats 39 | 40 | You can generate \CSV output to a \String, to a \File (via its path), or to an \IO stream. 41 | 42 | ==== Generating to a \String 43 | 44 | You can generate \CSV output to a \String, with or without headers. 45 | 46 | ===== Recipe: Generate to \String with Headers 47 | 48 | Use class method CSV.generate with option +headers+ to generate to a \String. 49 | 50 | This example uses method CSV#<< to append the rows 51 | that are to be generated: 52 | output_string = CSV.generate('', headers: ['Name', 'Value'], write_headers: true) do |csv| 53 | csv << ['Foo', 0] 54 | csv << ['Bar', 1] 55 | csv << ['Baz', 2] 56 | end 57 | output_string # => "Name,Value\nFoo,0\nBar,1\nBaz,2\n" 58 | 59 | ===== Recipe: Generate to \String Without Headers 60 | 61 | Use class method CSV.generate without option +headers+ to generate to a \String. 62 | 63 | This example uses method CSV#<< to append the rows 64 | that are to be generated: 65 | output_string = CSV.generate do |csv| 66 | csv << ['Foo', 0] 67 | csv << ['Bar', 1] 68 | csv << ['Baz', 2] 69 | end 70 | output_string # => "Foo,0\nBar,1\nBaz,2\n" 71 | 72 | ==== Generating to a \File 73 | 74 | You can generate /CSV data to a \File, with or without headers. 75 | 76 | ===== Recipe: Generate to \File with Headers 77 | 78 | Use class method CSV.open with option +headers+ generate to a \File. 79 | 80 | This example uses method CSV#<< to append the rows 81 | that are to be generated: 82 | path = 't.csv' 83 | CSV.open(path, 'w', headers: ['Name', 'Value'], write_headers: true) do |csv| 84 | csv << ['Foo', 0] 85 | csv << ['Bar', 1] 86 | csv << ['Baz', 2] 87 | end 88 | p File.read(path) # => "Name,Value\nFoo,0\nBar,1\nBaz,2\n" 89 | 90 | ===== Recipe: Generate to \File Without Headers 91 | 92 | Use class method CSV.open without option +headers+ to generate to a \File. 93 | 94 | This example uses method CSV#<< to append the rows 95 | that are to be generated: 96 | path = 't.csv' 97 | CSV.open(path, 'w') do |csv| 98 | csv << ['Foo', 0] 99 | csv << ['Bar', 1] 100 | csv << ['Baz', 2] 101 | end 102 | p File.read(path) # => "Foo,0\nBar,1\nBaz,2\n" 103 | 104 | ==== Generating to an \IO Stream 105 | 106 | You can generate \CSV data to an \IO stream, with or without headers. 107 | 108 | ==== Recipe: Generate to \IO Stream with Headers 109 | 110 | Use class method CSV.new with option +headers+ to generate \CSV data to an \IO stream: 111 | path = 't.csv' 112 | File.open(path, 'w') do |file| 113 | csv = CSV.new(file, headers: ['Name', 'Value'], write_headers: true) 114 | csv << ['Foo', 0] 115 | csv << ['Bar', 1] 116 | csv << ['Baz', 2] 117 | end 118 | p File.read(path) # => "Name,Value\nFoo,0\nBar,1\nBaz,2\n" 119 | 120 | ===== Recipe: Generate to \IO Stream Without Headers 121 | 122 | Use class method CSV.new without option +headers+ to generate \CSV data to an \IO stream: 123 | path = 't.csv' 124 | File.open(path, 'w') do |file| 125 | csv = CSV.new(file) 126 | csv << ['Foo', 0] 127 | csv << ['Bar', 1] 128 | csv << ['Baz', 2] 129 | end 130 | p File.read(path) # => "Foo,0\nBar,1\nBaz,2\n" 131 | 132 | === Converting Fields 133 | 134 | You can use _write_ _converters_ to convert fields when generating \CSV. 135 | 136 | ==== Recipe: Filter Generated Field Strings 137 | 138 | Use option :write_converters and a custom converter to convert field values when generating \CSV. 139 | 140 | This example defines and uses a custom write converter to strip whitespace from generated fields: 141 | strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field } 142 | output_string = CSV.generate(write_converters: strip_converter) do |csv| 143 | csv << [' foo ', 0] 144 | csv << [' bar ', 1] 145 | csv << [' baz ', 2] 146 | end 147 | output_string # => "foo,0\nbar,1\nbaz,2\n" 148 | 149 | ==== Recipe: Specify Multiple Write Converters 150 | 151 | Use option :write_converters and multiple custom converters 152 | to convert field values when generating \CSV. 153 | 154 | This example defines and uses two custom write converters to strip and upcase generated fields: 155 | strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field } 156 | upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field } 157 | converters = [strip_converter, upcase_converter] 158 | output_string = CSV.generate(write_converters: converters) do |csv| 159 | csv << [' foo ', 0] 160 | csv << [' bar ', 1] 161 | csv << [' baz ', 2] 162 | end 163 | output_string # => "FOO,0\nBAR,1\nBAZ,2\n" 164 | 165 | === RFC 4180 Compliance 166 | 167 | By default, \CSV generates data that is compliant with 168 | {RFC 4180}[https://www.rfc-editor.org/rfc/rfc4180] 169 | with respect to: 170 | - Column separator. 171 | - Quote character. 172 | 173 | ==== Row Separator 174 | 175 | RFC 4180 specifies the row separator CRLF (Ruby "\r\n"). 176 | 177 | ===== Recipe: Generate Compliant Row Separator 178 | 179 | For strict compliance, use option +:row_sep+ to specify row separator "\r\n": 180 | output_string = CSV.generate('', row_sep: "\r\n") do |csv| 181 | csv << ['Foo', 0] 182 | csv << ['Bar', 1] 183 | csv << ['Baz', 2] 184 | end 185 | output_string # => "Foo,0\r\nBar,1\r\nBaz,2\r\n" 186 | 187 | ===== Recipe: Generate Non-Compliant Row Separator 188 | 189 | For data with non-compliant row separators, use option +:row_sep+ with a different value: 190 | This example source uses semicolon (";') as its row separator: 191 | output_string = CSV.generate('', row_sep: ";") do |csv| 192 | csv << ['Foo', 0] 193 | csv << ['Bar', 1] 194 | csv << ['Baz', 2] 195 | end 196 | output_string # => "Foo,0;Bar,1;Baz,2;" 197 | 198 | ==== Column Separator 199 | 200 | RFC 4180 specifies column separator COMMA (Ruby ","). 201 | 202 | ===== Recipe: Generate Compliant Column Separator 203 | 204 | Because the \CSV default comma separator is ",", 205 | you need not specify option +:col_sep+ for compliant data: 206 | output_string = CSV.generate('') do |csv| 207 | csv << ['Foo', 0] 208 | csv << ['Bar', 1] 209 | csv << ['Baz', 2] 210 | end 211 | output_string # => "Foo,0\nBar,1\nBaz,2\n" 212 | 213 | ===== Recipe: Generate Non-Compliant Column Separator 214 | 215 | For data with non-compliant column separators, use option +:col_sep+. 216 | This example source uses TAB ("\t") as its column separator: 217 | output_string = CSV.generate('', col_sep: "\t") do |csv| 218 | csv << ['Foo', 0] 219 | csv << ['Bar', 1] 220 | csv << ['Baz', 2] 221 | end 222 | output_string # => "Foo\t0\nBar\t1\nBaz\t2\n" 223 | 224 | ==== Quotes 225 | 226 | IFC 4180 allows most fields to be quoted or not. 227 | By default, \CSV does not quote most fields. 228 | 229 | However, a field containing the current row separator, column separator, 230 | or quote character is automatically quoted, producing IFC 4180 compliance: 231 | # Field contains row separator. 232 | output_string = CSV.generate('') do |csv| 233 | row_sep = csv.row_sep 234 | csv << ["Foo#{row_sep}Foo", 0] 235 | csv << ['Bar', 1] 236 | csv << ['Baz', 2] 237 | end 238 | output_string # => "\"Foo\nFoo\",0\nBar,1\nBaz,2\n" 239 | # Field contains column separator. 240 | output_string = CSV.generate('') do |csv| 241 | col_sep = csv.col_sep 242 | csv << ["Foo#{col_sep}Foo", 0] 243 | csv << ['Bar', 1] 244 | csv << ['Baz', 2] 245 | end 246 | output_string # => "\"Foo,Foo\",0\nBar,1\nBaz,2\n" 247 | # Field contains quote character. 248 | output_string = CSV.generate('') do |csv| 249 | quote_char = csv.quote_char 250 | csv << ["Foo#{quote_char}Foo", 0] 251 | csv << ['Bar', 1] 252 | csv << ['Baz', 2] 253 | end 254 | output_string # => "\"Foo\"\"Foo\",0\nBar,1\nBaz,2\n" 255 | 256 | ===== Recipe: Quote All Fields 257 | 258 | Use option +:force_quotes+ to force quoted fields: 259 | output_string = CSV.generate('', force_quotes: true) do |csv| 260 | csv << ['Foo', 0] 261 | csv << ['Bar', 1] 262 | csv << ['Baz', 2] 263 | end 264 | output_string # => "\"Foo\",\"0\"\n\"Bar\",\"1\"\n\"Baz\",\"2\"\n" 265 | 266 | ===== Recipe: Quote Empty Fields 267 | 268 | Use option +:quote_empty+ to force quoting for empty fields: 269 | output_string = CSV.generate('', quote_empty: true) do |csv| 270 | csv << ['Foo', 0] 271 | csv << ['Bar', 1] 272 | csv << ['', 2] 273 | end 274 | output_string # => "Foo,0\nBar,1\n\"\",2\n" 275 | 276 | ===== Recipe: Generate Compliant Quote Character 277 | 278 | RFC 4180 specifies quote character DQUOTE (Ruby "\""). 279 | 280 | Because the \CSV default quote character is also "\"", 281 | you need not specify option +:quote_char+ for compliant data: 282 | output_string = CSV.generate('', force_quotes: true) do |csv| 283 | csv << ['Foo', 0] 284 | csv << ['Bar', 1] 285 | csv << ['Baz', 2] 286 | end 287 | output_string # => "\"Foo\",\"0\"\n\"Bar\",\"1\"\n\"Baz\",\"2\"\n" 288 | 289 | ===== Recipe: Generate Non-Compliant Quote Character 290 | 291 | For data with non-compliant quote characters, use option +:quote_char+. 292 | This example source uses SQUOTE ("'") as its quote character: 293 | output_string = CSV.generate('', quote_char: "'", force_quotes: true) do |csv| 294 | csv << ['Foo', 0] 295 | csv << ['Bar', 1] 296 | csv << ['Baz', 2] 297 | end 298 | output_string # => "'Foo','0'\n'Bar','1'\n'Baz','2'\n" 299 | -------------------------------------------------------------------------------- /doc/csv/recipes/recipes.rdoc: -------------------------------------------------------------------------------- 1 | == Recipes for \CSV 2 | 3 | The recipes are specific code examples for specific tasks. See: 4 | - {Recipes for Parsing CSV}[./parsing_rdoc.html] 5 | - {Recipes for Generating CSV}[./generating_rdoc.html] 6 | - {Recipes for Filtering CSV}[./filtering_rdoc.html] 7 | -------------------------------------------------------------------------------- /lib/csv/core_ext/array.rb: -------------------------------------------------------------------------------- 1 | class Array 2 | # Equivalent to CSV::generate_line(self, options) 3 | # 4 | # ["CSV", "data"].to_csv 5 | # #=> "CSV,data\n" 6 | def to_csv(**options) 7 | CSV.generate_line(self, **options) 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /lib/csv/core_ext/string.rb: -------------------------------------------------------------------------------- 1 | class String 2 | # Equivalent to CSV::parse_line(self, options) 3 | # 4 | # "CSV,data".parse_csv 5 | # #=> ["CSV", "data"] 6 | def parse_csv(**options) 7 | CSV.parse_line(self, **options) 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /lib/csv/fields_converter.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class CSV 4 | # Note: Don't use this class directly. This is an internal class. 5 | class FieldsConverter 6 | include Enumerable 7 | 8 | NO_QUOTED_FIELDS = [] # :nodoc: 9 | def NO_QUOTED_FIELDS.[](_index) 10 | false 11 | end 12 | NO_QUOTED_FIELDS.freeze 13 | 14 | # 15 | # A CSV::FieldsConverter is a data structure for storing the 16 | # fields converter properties to be passed as a parameter 17 | # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options)) 18 | # 19 | 20 | def initialize(options={}) 21 | @converters = [] 22 | @nil_value = options[:nil_value] 23 | @empty_value = options[:empty_value] 24 | @empty_value_is_empty_string = (@empty_value == "") 25 | @accept_nil = options[:accept_nil] 26 | @builtin_converters_name = options[:builtin_converters_name] 27 | @need_static_convert = need_static_convert? 28 | end 29 | 30 | def add_converter(name=nil, &converter) 31 | if name.nil? # custom converter 32 | @converters << converter 33 | else # named converter 34 | combo = builtin_converters[name] 35 | case combo 36 | when Array # combo converter 37 | combo.each do |sub_name| 38 | add_converter(sub_name) 39 | end 40 | else # individual named converter 41 | @converters << combo 42 | end 43 | end 44 | end 45 | 46 | def each(&block) 47 | @converters.each(&block) 48 | end 49 | 50 | def empty? 51 | @converters.empty? 52 | end 53 | 54 | def convert(fields, headers, lineno, quoted_fields=NO_QUOTED_FIELDS) 55 | return fields unless need_convert? 56 | 57 | fields.collect.with_index do |field, index| 58 | if field.nil? 59 | field = @nil_value 60 | elsif field.is_a?(String) and field.empty? 61 | field = @empty_value unless @empty_value_is_empty_string 62 | end 63 | @converters.each do |converter| 64 | break if field.nil? and @accept_nil 65 | if converter.arity == 1 # straight field converter 66 | field = converter[field] 67 | else # FieldInfo converter 68 | if headers 69 | header = headers[index] 70 | else 71 | header = nil 72 | end 73 | quoted = quoted_fields[index] 74 | field = converter[field, FieldInfo.new(index, lineno, header, quoted)] 75 | end 76 | break unless field.is_a?(String) # short-circuit pipeline for speed 77 | end 78 | field # final state of each field, converted or original 79 | end 80 | end 81 | 82 | private 83 | def need_static_convert? 84 | not (@nil_value.nil? and @empty_value_is_empty_string) 85 | end 86 | 87 | def need_convert? 88 | @need_static_convert or 89 | (not @converters.empty?) 90 | end 91 | 92 | def builtin_converters 93 | @builtin_converters ||= ::CSV.const_get(@builtin_converters_name) 94 | end 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /lib/csv/input_record_separator.rb: -------------------------------------------------------------------------------- 1 | require "English" 2 | require "stringio" 3 | 4 | class CSV 5 | module InputRecordSeparator 6 | class << self 7 | if RUBY_VERSION >= "3.0.0" 8 | def value 9 | "\n" 10 | end 11 | else 12 | def value 13 | $INPUT_RECORD_SEPARATOR 14 | end 15 | end 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/csv/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class CSV 4 | # The version of the installed library. 5 | VERSION = "3.3.6" 6 | end 7 | -------------------------------------------------------------------------------- /lib/csv/writer.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "input_record_separator" 4 | require_relative "row" 5 | 6 | class CSV 7 | # Note: Don't use this class directly. This is an internal class. 8 | class Writer 9 | # 10 | # A CSV::Writer receives an output, prepares the header, format and output. 11 | # It allows us to write new rows in the object and rewind it. 12 | # 13 | attr_reader :lineno 14 | attr_reader :headers 15 | 16 | def initialize(output, options) 17 | @output = output 18 | @options = options 19 | @lineno = 0 20 | @fields_converter = nil 21 | prepare 22 | if @options[:write_headers] and @headers 23 | self << @headers 24 | end 25 | @fields_converter = @options[:fields_converter] 26 | end 27 | 28 | # 29 | # Adds a new row 30 | # 31 | def <<(row) 32 | case row 33 | when Row 34 | row = row.fields 35 | when Hash 36 | row = @headers.collect {|header| row[header]} 37 | end 38 | 39 | @headers ||= row if @use_headers 40 | @lineno += 1 41 | 42 | if @fields_converter 43 | row = @fields_converter.convert(row, nil, lineno) 44 | end 45 | 46 | i = -1 47 | converted_row = row.collect do |field| 48 | i += 1 49 | quote(field, i) 50 | end 51 | line = converted_row.join(@column_separator) + @row_separator 52 | if @output_encoding 53 | line = line.encode(@output_encoding) 54 | end 55 | @output << line 56 | 57 | self 58 | end 59 | 60 | # 61 | # Winds back to the beginning 62 | # 63 | def rewind 64 | @lineno = 0 65 | @headers = nil if @options[:headers].nil? 66 | end 67 | 68 | private 69 | def prepare 70 | @encoding = @options[:encoding] 71 | 72 | prepare_header 73 | prepare_format 74 | prepare_output 75 | end 76 | 77 | def prepare_header 78 | headers = @options[:headers] 79 | case headers 80 | when Array 81 | @headers = headers 82 | @use_headers = true 83 | when String 84 | @headers = CSV.parse_line(headers, 85 | col_sep: @options[:column_separator], 86 | row_sep: @options[:row_separator], 87 | quote_char: @options[:quote_character]) 88 | @use_headers = true 89 | when true 90 | @headers = nil 91 | @use_headers = true 92 | else 93 | @headers = nil 94 | @use_headers = false 95 | end 96 | return unless @headers 97 | 98 | converter = @options[:header_fields_converter] 99 | @headers = converter.convert(@headers, nil, 0, []) 100 | @headers.each do |header| 101 | header.freeze if header.is_a?(String) 102 | end 103 | end 104 | 105 | def prepare_force_quotes_fields(force_quotes) 106 | @force_quotes_fields = {} 107 | force_quotes.each do |name_or_index| 108 | case name_or_index 109 | when Integer 110 | index = name_or_index 111 | @force_quotes_fields[index] = true 112 | when String, Symbol 113 | name = name_or_index.to_s 114 | if @headers.nil? 115 | message = ":headers is required when you use field name " + 116 | "in :force_quotes: " + 117 | "#{name_or_index.inspect}: #{force_quotes.inspect}" 118 | raise ArgumentError, message 119 | end 120 | index = @headers.index(name) 121 | next if index.nil? 122 | @force_quotes_fields[index] = true 123 | else 124 | message = ":force_quotes element must be " + 125 | "field index or field name: " + 126 | "#{name_or_index.inspect}: #{force_quotes.inspect}" 127 | raise ArgumentError, message 128 | end 129 | end 130 | end 131 | 132 | def prepare_format 133 | @column_separator = @options[:column_separator].to_s.encode(@encoding) 134 | row_separator = @options[:row_separator] 135 | if row_separator == :auto 136 | @row_separator = InputRecordSeparator.value.encode(@encoding) 137 | else 138 | @row_separator = row_separator.to_s.encode(@encoding) 139 | end 140 | @quote_character = @options[:quote_character] 141 | force_quotes = @options[:force_quotes] 142 | if force_quotes.is_a?(Array) 143 | prepare_force_quotes_fields(force_quotes) 144 | @force_quotes = false 145 | elsif force_quotes 146 | @force_quotes_fields = nil 147 | @force_quotes = true 148 | else 149 | @force_quotes_fields = nil 150 | @force_quotes = false 151 | end 152 | unless @force_quotes 153 | @quotable_pattern = 154 | Regexp.new("[\r\n".encode(@encoding) + 155 | Regexp.escape(@column_separator) + 156 | Regexp.escape(@quote_character.encode(@encoding)) + 157 | "]".encode(@encoding)) 158 | end 159 | @quote_empty = @options.fetch(:quote_empty, true) 160 | end 161 | 162 | def prepare_output 163 | @output_encoding = nil 164 | return unless @output.is_a?(StringIO) 165 | 166 | output_encoding = @output.internal_encoding || @output.external_encoding 167 | if @encoding != output_encoding 168 | if @options[:force_encoding] 169 | @output_encoding = output_encoding 170 | else 171 | compatible_encoding = Encoding.compatible?(@encoding, output_encoding) 172 | if compatible_encoding 173 | @output.set_encoding(compatible_encoding) 174 | @output.seek(0, IO::SEEK_END) 175 | end 176 | end 177 | end 178 | end 179 | 180 | def quote_field(field) 181 | field = String(field) 182 | encoded_quote_character = @quote_character.encode(field.encoding) 183 | encoded_quote_character + 184 | field.gsub(encoded_quote_character, 185 | encoded_quote_character * 2) + 186 | encoded_quote_character 187 | end 188 | 189 | def quote(field, i) 190 | if @force_quotes 191 | quote_field(field) 192 | elsif @force_quotes_fields and @force_quotes_fields[i] 193 | quote_field(field) 194 | else 195 | if field.nil? # represent +nil+ fields as empty unquoted fields 196 | "" 197 | else 198 | field = String(field) # Stringify fields 199 | # represent empty fields as empty quoted fields 200 | if (@quote_empty and field.empty?) or (field.valid_encoding? and @quotable_pattern.match?(field)) 201 | quote_field(field) 202 | else 203 | field # unquoted field 204 | end 205 | end 206 | end 207 | end 208 | end 209 | end 210 | -------------------------------------------------------------------------------- /profile/parse.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "csv" 4 | require "optparse" 5 | 6 | n_columns = 1000 7 | n_rows = 1000 8 | type = "unquoted" 9 | 10 | alphas = nil 11 | hiraganas = nil 12 | 13 | builders = { 14 | "unquoted" => lambda {(alphas.join(",") + "\r\n") * n_rows}, 15 | "quoted" => lambda {(alphas.map {|s| %("#{s}")}.join(",") + "\r\n") * n_rows}, 16 | "include-column-separator" => 17 | lambda {(alphas.map {|s| %(",#{s}")}.join(",") + "\r\n") * n_rows}, 18 | "include-row-separator" => 19 | lambda {(alphas.map {|s| %("#{s}\r\n")}.join(",") + "\r\n") * n_rows}, 20 | "utf-8" => lambda {((hiraganas.join(",") + "\r\n") * n_rows).encode("UTF-8")}, 21 | "windows-31j" => 22 | lambda {((hiraganas.join(",") + "\r\n") * n_rows).encode("Windows-31J")}, 23 | } 24 | 25 | parser = OptionParser.new 26 | parser.on("--n-columns=N", Integer, 27 | "The number of columns to be parsed", 28 | "(#{n_columns})") do |n| 29 | n_columns = n 30 | end 31 | parser.on("--n-rows=N", Integer, 32 | "The number of rows to be parsed", 33 | "(#{n_rows})") do |n| 34 | n_rows = n 35 | end 36 | parser.on("--type=TYPE", builders.keys, 37 | "The type for profile", 38 | "(#{type})") do |t| 39 | type = t 40 | end 41 | parser.parse!(ARGV) 42 | 43 | alphas = ["AAAAA"] * n_columns 44 | hiragans = ["あああああ"] * n_columns 45 | 46 | data = builders[type].call 47 | 48 | require "profile" 49 | CSV.parse(data) 50 | -------------------------------------------------------------------------------- /profile/write.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "csv" 4 | require "optparse" 5 | 6 | n_columns = 5 7 | n_rows = 100 8 | type = "generate-line" 9 | 10 | parser = OptionParser.new 11 | parser.on("--n-columns=N", Integer, 12 | "The number of columns to be generated", 13 | "(#{n_columns})") do |n| 14 | n_columns = n 15 | end 16 | parser.on("--n-rows=N", Integer, 17 | "The number of rows to be generated", 18 | "(#{n_rows})") do |n| 19 | n_rows = n 20 | end 21 | parser.on("--type=TYPE", 22 | "The type to write", 23 | "(#{type})") do |t| 24 | type = t 25 | end 26 | parser.parse!(ARGV) 27 | 28 | fields = ["AAAAA"] * n_columns 29 | headers = n_columns.times.collect do |i| 30 | "header#{i}" 31 | end 32 | row = CSV::Row.new(headers, fields) 33 | raw_row = {} 34 | n_columns.times do |i| 35 | raw_row[headers[i]] = fields[i] 36 | end 37 | 38 | require "profile" 39 | 40 | case type 41 | when "generate-line" 42 | n_rows.times do 43 | CSV.generate_line(fields) 44 | end 45 | when "add" 46 | output = StringIO.new 47 | csv = CSV.new(output) 48 | n_rows.times do 49 | csv << row 50 | end 51 | else 52 | raise "unknown type: #{type.inspect}" 53 | end 54 | -------------------------------------------------------------------------------- /run-test.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | $VERBOSE = true 4 | 5 | $LOAD_PATH.unshift("test") 6 | $LOAD_PATH.unshift("test/lib") 7 | $LOAD_PATH.unshift("lib") 8 | 9 | Dir.glob("test/csv/**/*test_*.rb") do |test_rb| 10 | # Ensure we only load syntax that we can handle 11 | next if RUBY_VERSION < "2.7" && test_rb.end_with?("test_patterns.rb") 12 | 13 | require File.expand_path(test_rb) 14 | end 15 | -------------------------------------------------------------------------------- /test/csv/helper.rb: -------------------------------------------------------------------------------- 1 | require "tempfile" 2 | require "test/unit" 3 | 4 | require "csv" 5 | 6 | require_relative "../lib/with_different_ofs" 7 | 8 | module CSVHelper 9 | def with_chunk_size(chunk_size) 10 | chunk_size_keep = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] 11 | begin 12 | ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size 13 | yield 14 | ensure 15 | ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size_keep 16 | end 17 | end 18 | 19 | def with_verbose(verbose) 20 | original = $VERBOSE 21 | begin 22 | $VERBOSE = verbose 23 | yield 24 | ensure 25 | $VERBOSE = original 26 | end 27 | end 28 | 29 | def with_default_internal(encoding) 30 | original = Encoding.default_internal 31 | begin 32 | with_verbose(false) do 33 | Encoding.default_internal = encoding 34 | end 35 | yield 36 | ensure 37 | with_verbose(false) do 38 | Encoding.default_internal = original 39 | end 40 | end 41 | end 42 | end 43 | 44 | class Ractor 45 | alias value take unless method_defined? :value 46 | end if defined?(Ractor) 47 | -------------------------------------------------------------------------------- /test/csv/interface/test_delegation.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: false 2 | 3 | require_relative "../helper" 4 | 5 | class TestCSVInterfaceDelegation < Test::Unit::TestCase 6 | class TestStringIO < self 7 | def setup 8 | @csv = CSV.new("h1,h2") 9 | end 10 | 11 | def test_flock 12 | assert_raise(NotImplementedError) do 13 | @csv.flock(File::LOCK_EX) 14 | end 15 | end 16 | 17 | def test_ioctl 18 | assert_raise(NotImplementedError) do 19 | @csv.ioctl(0) 20 | end 21 | end 22 | 23 | def test_stat 24 | assert_raise(NotImplementedError) do 25 | @csv.stat 26 | end 27 | end 28 | 29 | def test_to_i 30 | assert_raise(NotImplementedError) do 31 | @csv.to_i 32 | end 33 | end 34 | 35 | def test_binmode? 36 | assert_equal(false, @csv.binmode?) 37 | end 38 | 39 | def test_path 40 | assert_equal(nil, @csv.path) 41 | end 42 | 43 | def test_to_io 44 | assert_instance_of(StringIO, @csv.to_io) 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /test/csv/interface/test_read.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: false 2 | 3 | require_relative "../helper" 4 | 5 | class TestCSVInterfaceRead < Test::Unit::TestCase 6 | extend DifferentOFS 7 | 8 | def setup 9 | super 10 | @data = "" 11 | @data << "1\t2\t3\r\n" 12 | @data << "4\t5\r\n" 13 | @input = Tempfile.new(["interface-read", ".csv"], binmode: true) 14 | @input << @data 15 | @input.rewind 16 | @rows = [ 17 | ["1", "2", "3"], 18 | ["4", "5"], 19 | ] 20 | end 21 | 22 | def teardown 23 | @input.close(true) 24 | super 25 | end 26 | 27 | def test_foreach 28 | rows = [] 29 | CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n") do |row| 30 | rows << row 31 | end 32 | assert_equal(@rows, rows) 33 | end 34 | 35 | def test_foreach_stringio 36 | string_io = StringIO.new(@data) 37 | rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a 38 | assert_equal(@rows, rows) 39 | end 40 | 41 | def test_foreach_stringio_with_bom 42 | if RUBY_VERSION < "2.7" 43 | # Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support: 44 | # https://github.com/ruby/stringio/pull/47 45 | omit("StringIO's BOM support isn't available with Ruby < 2.7") 46 | end 47 | 48 | string_io = StringIO.new("\ufeff#{@data}") # U+FEFF ZERO WIDTH NO-BREAK SPACE 49 | rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a 50 | assert_equal(@rows, rows) 51 | end 52 | 53 | if respond_to?(:ractor) 54 | ractor 55 | def test_foreach_in_ractor 56 | ractor = Ractor.new(@input.path) do |path| 57 | rows = [] 58 | CSV.foreach(path, col_sep: "\t", row_sep: "\r\n") do |row| 59 | rows << row 60 | end 61 | rows 62 | end 63 | rows = [ 64 | ["1", "2", "3"], 65 | ["4", "5"], 66 | ] 67 | assert_equal(rows, ractor.value) 68 | end 69 | end 70 | 71 | def test_foreach_mode 72 | rows = [] 73 | CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n") do |row| 74 | rows << row 75 | end 76 | assert_equal(@rows, rows) 77 | end 78 | 79 | def test_foreach_enumerator 80 | rows = CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").to_a 81 | assert_equal(@rows, rows) 82 | end 83 | 84 | def test_closed? 85 | csv = CSV.open(@input.path, "r+", col_sep: "\t", row_sep: "\r\n") 86 | assert_not_predicate(csv, :closed?) 87 | csv.close 88 | assert_predicate(csv, :closed?) 89 | end 90 | 91 | def test_open_auto_close 92 | csv = nil 93 | CSV.open(@input.path) do |_csv| 94 | csv = _csv 95 | end 96 | assert_predicate(csv, :closed?) 97 | end 98 | 99 | def test_open_closed 100 | csv = nil 101 | CSV.open(@input.path) do |_csv| 102 | csv = _csv 103 | csv.close 104 | end 105 | assert_predicate(csv, :closed?) 106 | end 107 | 108 | def test_open_block_return_value 109 | return_value = CSV.open(@input.path) do 110 | "Return value." 111 | end 112 | assert_equal("Return value.", return_value) 113 | end 114 | 115 | def test_open_mode_integer 116 | CSV.open(@input.path, File::RDONLY, col_sep: "\t") do |csv| 117 | assert_equal(@rows, csv.read) 118 | end 119 | end 120 | 121 | def test_open_encoding_valid 122 | # U+1F600 GRINNING FACE 123 | # U+1F601 GRINNING FACE WITH SMILING EYES 124 | File.open(@input.path, "w") do |file| 125 | file << "\u{1F600},\u{1F601}" 126 | end 127 | CSV.open(@input.path, encoding: "utf-8") do |csv| 128 | assert_equal([["\u{1F600}", "\u{1F601}"]], 129 | csv.to_a) 130 | end 131 | end 132 | 133 | def test_open_encoding_invalid 134 | # U+1F600 GRINNING FACE 135 | # U+1F601 GRINNING FACE WITH SMILING EYES 136 | File.open(@input.path, "w") do |file| 137 | file << "\u{1F600},\u{1F601}" 138 | end 139 | CSV.open(@input.path, encoding: "EUC-JP") do |csv| 140 | error = assert_raise(CSV::InvalidEncodingError) do 141 | csv.shift 142 | end 143 | assert_equal([Encoding::EUC_JP, "Invalid byte sequence in EUC-JP in line 1."], 144 | [error.encoding, error.message]) 145 | end 146 | end 147 | 148 | def test_open_encoding_nonexistent 149 | _output, error = capture_output do 150 | CSV.open(@input.path, encoding: "nonexistent") do 151 | end 152 | end 153 | assert_equal("path:0: warning: Unsupported encoding nonexistent ignored\n", 154 | error.gsub(/\A.+:\d+: /, "path:0: ")) 155 | end 156 | 157 | def test_open_encoding_utf_8_with_bom 158 | # U+FEFF ZERO WIDTH NO-BREAK SPACE, BOM 159 | # U+1F600 GRINNING FACE 160 | # U+1F601 GRINNING FACE WITH SMILING EYES 161 | File.open(@input.path, "w") do |file| 162 | file << "\u{FEFF}\u{1F600},\u{1F601}" 163 | end 164 | CSV.open(@input.path, encoding: "bom|utf-8") do |csv| 165 | assert_equal([["\u{1F600}", "\u{1F601}"]], 166 | csv.to_a) 167 | end 168 | end 169 | 170 | def test_open_invalid_byte_sequence_in_utf_8 171 | CSV.open(@input.path, "w", encoding: Encoding::CP932) do |rows| 172 | error = assert_raise(Encoding::InvalidByteSequenceError) do 173 | rows << ["\x82\xa0"] 174 | end 175 | assert_equal('"\x82" on UTF-8', 176 | error.message) 177 | end 178 | end 179 | 180 | def test_open_with_invalid_nil 181 | CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: nil) do |rows| 182 | error = assert_raise(Encoding::InvalidByteSequenceError) do 183 | rows << ["\x82\xa0"] 184 | end 185 | assert_equal('"\x82" on UTF-8', 186 | error.message) 187 | end 188 | end 189 | 190 | def test_open_with_invalid_replace 191 | CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: :replace) do |rows| 192 | rows << ["\x82\xa0".force_encoding(Encoding::UTF_8)] 193 | end 194 | CSV.open(@input.path, encoding: Encoding::CP932) do |csv| 195 | assert_equal([["??"]], 196 | csv.to_a) 197 | end 198 | end 199 | 200 | def test_open_with_invalid_replace_and_replace_string 201 | CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: :replace, replace: "X") do |rows| 202 | rows << ["\x82\xa0".force_encoding(Encoding::UTF_8)] 203 | end 204 | CSV.open(@input.path, encoding: Encoding::CP932) do |csv| 205 | assert_equal([["XX"]], 206 | csv.to_a) 207 | end 208 | end 209 | 210 | def test_open_with_undef_replace 211 | # U+00B7 Middle Dot 212 | CSV.open(@input.path, "w", encoding: Encoding::CP932, undef: :replace) do |rows| 213 | rows << ["\u00B7"] 214 | end 215 | CSV.open(@input.path, encoding: Encoding::CP932) do |csv| 216 | assert_equal([["?"]], 217 | csv.to_a) 218 | end 219 | end 220 | 221 | def test_open_with_undef_replace_and_replace_string 222 | # U+00B7 Middle Dot 223 | CSV.open(@input.path, "w", encoding: Encoding::CP932, undef: :replace, replace: "X") do |rows| 224 | rows << ["\u00B7"] 225 | end 226 | CSV.open(@input.path, encoding: Encoding::CP932) do |csv| 227 | assert_equal([["X"]], 228 | csv.to_a) 229 | end 230 | end 231 | 232 | def test_open_with_newline 233 | CSV.open(@input.path, col_sep: "\t", universal_newline: true) do |csv| 234 | assert_equal(@rows, csv.to_a) 235 | end 236 | File.binwrite(@input.path, "1,2,3\r\n" "4,5\n") 237 | CSV.open(@input.path, newline: :universal) do |csv| 238 | assert_equal(@rows, csv.to_a) 239 | end 240 | end 241 | 242 | def test_open_with_bom 243 | if /mingw|mswin/.match?(RUBY_PLATFORM) 244 | omit("BOM detection on Windows may be buggy: Bug #20526") 245 | end 246 | csv_data = @input.read 247 | bom = "\ufeff" # U+FEFF ZERO WIDTH NO-BREAK SPACE 248 | File.binwrite(@input.path, "#{bom}#{csv_data}") 249 | @input.rewind 250 | CSV.open(@input.path, col_sep: "\t") do |csv| 251 | assert_equal(@rows, csv.to_a) 252 | end 253 | end 254 | 255 | def test_parse 256 | assert_equal(@rows, 257 | CSV.parse(@data, col_sep: "\t", row_sep: "\r\n")) 258 | end 259 | 260 | def test_parse_block 261 | rows = [] 262 | CSV.parse(@data, col_sep: "\t", row_sep: "\r\n") do |row| 263 | rows << row 264 | end 265 | assert_equal(@rows, rows) 266 | end 267 | 268 | def test_parse_enumerator 269 | rows = CSV.parse(@data, col_sep: "\t", row_sep: "\r\n").to_a 270 | assert_equal(@rows, rows) 271 | end 272 | 273 | def test_parse_headers_only 274 | table = CSV.parse("a,b,c", headers: true) 275 | assert_equal([ 276 | ["a", "b", "c"], 277 | [], 278 | ], 279 | [ 280 | table.headers, 281 | table.each.to_a, 282 | ]) 283 | end 284 | 285 | def test_parse_line 286 | assert_equal(["1", "2", "3"], 287 | CSV.parse_line("1;2;3", col_sep: ";")) 288 | end 289 | 290 | def test_parse_line_shortcut 291 | assert_equal(["1", "2", "3"], 292 | "1;2;3".parse_csv(col_sep: ";")) 293 | end 294 | 295 | def test_parse_line_empty 296 | assert_equal(nil, CSV.parse_line("")) # to signal eof 297 | end 298 | 299 | def test_parse_line_empty_line 300 | assert_equal([], CSV.parse_line("\n1,2,3")) 301 | end 302 | 303 | def test_read 304 | assert_equal(@rows, 305 | CSV.read(@input.path, col_sep: "\t", row_sep: "\r\n")) 306 | end 307 | 308 | if respond_to?(:ractor) 309 | ractor 310 | def test_read_in_ractor 311 | ractor = Ractor.new(@input.path) do |path| 312 | CSV.read(path, col_sep: "\t", row_sep: "\r\n") 313 | end 314 | rows = [ 315 | ["1", "2", "3"], 316 | ["4", "5"], 317 | ] 318 | assert_equal(rows, ractor.value) 319 | end 320 | end 321 | 322 | def test_readlines 323 | assert_equal(@rows, 324 | CSV.readlines(@input.path, col_sep: "\t", row_sep: "\r\n")) 325 | end 326 | 327 | def test_open_read 328 | rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| 329 | csv.read 330 | end 331 | assert_equal(@rows, rows) 332 | end 333 | 334 | def test_open_readlines 335 | rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| 336 | csv.readlines 337 | end 338 | assert_equal(@rows, rows) 339 | end 340 | 341 | def test_table 342 | table = CSV.table(@input.path, col_sep: "\t", row_sep: "\r\n") 343 | assert_equal(CSV::Table.new([ 344 | CSV::Row.new([:"1", :"2", :"3"], [4, 5, nil]), 345 | ]), 346 | table) 347 | end 348 | 349 | def test_shift # aliased as gets() and readline() 350 | CSV.open(@input.path, "rb+", col_sep: "\t", row_sep: "\r\n") do |csv| 351 | rows = [ 352 | csv.shift, 353 | csv.shift, 354 | csv.shift, 355 | ] 356 | assert_equal(@rows + [nil], 357 | rows) 358 | end 359 | end 360 | 361 | def test_enumerator 362 | CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| 363 | assert_equal(@rows, csv.each.to_a) 364 | end 365 | end 366 | 367 | def test_shift_and_each 368 | CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| 369 | rows = [] 370 | rows << csv.shift 371 | rows.concat(csv.each.to_a) 372 | assert_equal(@rows, rows) 373 | end 374 | end 375 | 376 | def test_each_twice 377 | CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| 378 | assert_equal([ 379 | @rows, 380 | [], 381 | ], 382 | [ 383 | csv.each.to_a, 384 | csv.each.to_a, 385 | ]) 386 | end 387 | end 388 | 389 | def test_eof? 390 | eofs = [] 391 | CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| 392 | eofs << csv.eof? 393 | csv.shift 394 | eofs << csv.eof? 395 | csv.shift 396 | eofs << csv.eof? 397 | end 398 | assert_equal([false, false, true], 399 | eofs) 400 | end 401 | 402 | def test_new_nil 403 | assert_raise_with_message ArgumentError, "Cannot parse nil as CSV" do 404 | CSV.new(nil) 405 | end 406 | end 407 | 408 | def test_options_not_modified 409 | options = {}.freeze 410 | CSV.foreach(@input.path, **options) 411 | CSV.open(@input.path, **options) {} 412 | CSV.parse("", **options) 413 | CSV.parse_line("", **options) 414 | CSV.read(@input.path, **options) 415 | CSV.readlines(@input.path, **options) 416 | CSV.table(@input.path, **options) 417 | end 418 | end 419 | -------------------------------------------------------------------------------- /test/csv/interface/test_read_write.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: false 2 | 3 | require_relative "../helper" 4 | 5 | class TestCSVInterfaceReadWrite < Test::Unit::TestCase 6 | extend DifferentOFS 7 | 8 | def test_filter 9 | input = <<-CSV.freeze 10 | 1;2;3 11 | 4;5 12 | CSV 13 | output = "" 14 | CSV.filter(input, output, 15 | in_col_sep: ";", 16 | out_col_sep: ",", 17 | converters: :all) do |row| 18 | row.map! {|n| n * 2} 19 | row << "Added\r" 20 | end 21 | assert_equal(<<-CSV, output) 22 | 2,4,6,"Added\r" 23 | 8,10,"Added\r" 24 | CSV 25 | end 26 | 27 | def test_filter_headers_true 28 | input = <<-CSV.freeze 29 | Name,Value 30 | foo,0 31 | bar,1 32 | baz,2 33 | CSV 34 | output = "" 35 | CSV.filter(input, output, headers: true) do |row| 36 | row[0] += "X" 37 | row[1] = row[1].to_i + 1 38 | end 39 | assert_equal(<<-CSV, output) 40 | fooX,1 41 | barX,2 42 | bazX,3 43 | CSV 44 | end 45 | 46 | def test_filter_headers_true_write_headers 47 | input = <<-CSV.freeze 48 | Name,Value 49 | foo,0 50 | bar,1 51 | baz,2 52 | CSV 53 | output = "" 54 | CSV.filter(input, output, headers: true, out_write_headers: true) do |row| 55 | if row.is_a?(Array) 56 | row[0] += "X" 57 | row[1] += "Y" 58 | else 59 | row[0] += "X" 60 | row[1] = row[1].to_i + 1 61 | end 62 | end 63 | assert_equal(<<-CSV, output) 64 | NameX,ValueY 65 | fooX,1 66 | barX,2 67 | bazX,3 68 | CSV 69 | end 70 | 71 | def test_filter_headers_array_write_headers 72 | input = <<-CSV.freeze 73 | foo,0 74 | bar,1 75 | baz,2 76 | CSV 77 | output = "" 78 | CSV.filter(input, output, 79 | headers: ["Name", "Value"], 80 | out_write_headers: true) do |row| 81 | row[0] += "X" 82 | row[1] = row[1].to_i + 1 83 | end 84 | assert_equal(<<-CSV, output) 85 | Name,Value 86 | fooX,1 87 | barX,2 88 | bazX,3 89 | CSV 90 | end 91 | 92 | def test_instance_same 93 | data = "" 94 | assert_equal(CSV.instance(data, col_sep: ";").object_id, 95 | CSV.instance(data, col_sep: ";").object_id) 96 | end 97 | 98 | def test_instance_append 99 | output = "" 100 | CSV.instance(output, col_sep: ";") << ["a", "b", "c"] 101 | assert_equal(<<-CSV, output) 102 | a;b;c 103 | CSV 104 | CSV.instance(output, col_sep: ";") << [1, 2, 3] 105 | assert_equal(<<-CSV, output) 106 | a;b;c 107 | 1;2;3 108 | CSV 109 | end 110 | 111 | def test_instance_shortcut 112 | assert_equal(CSV.instance, 113 | CSV {|csv| csv}) 114 | end 115 | 116 | def test_instance_shortcut_with_io 117 | io = StringIO.new 118 | from_instance = CSV.instance(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] } 119 | from_shortcut = CSV(io, col_sep: ";") { |csv| csv << ["e", "f", "g"] } 120 | 121 | assert_equal(from_instance, from_shortcut) 122 | assert_equal(from_instance.string, "a;b;c\ne;f;g\n") 123 | end 124 | end 125 | -------------------------------------------------------------------------------- /test/csv/interface/test_write.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: false 2 | 3 | require_relative "../helper" 4 | 5 | class TestCSVInterfaceWrite < Test::Unit::TestCase 6 | extend DifferentOFS 7 | 8 | def setup 9 | super 10 | @output = Tempfile.new(["interface-write", ".csv"]) 11 | end 12 | 13 | def teardown 14 | @output.close(true) 15 | super 16 | end 17 | 18 | def test_generate_default 19 | csv_text = CSV.generate do |csv| 20 | csv << [1, 2, 3] << [4, nil, 5] 21 | end 22 | assert_equal(<<-CSV, csv_text) 23 | 1,2,3 24 | 4,,5 25 | CSV 26 | end 27 | 28 | if respond_to?(:ractor) 29 | ractor 30 | def test_generate_default_in_ractor 31 | ractor = Ractor.new do 32 | CSV.generate do |csv| 33 | csv << [1, 2, 3] << [4, nil, 5] 34 | end 35 | end 36 | assert_equal(<<-CSV, ractor.value) 37 | 1,2,3 38 | 4,,5 39 | CSV 40 | end 41 | end 42 | 43 | def test_generate_append 44 | csv_text = <<-CSV 45 | 1,2,3 46 | 4,,5 47 | CSV 48 | CSV.generate(csv_text) do |csv| 49 | csv << ["last", %Q{"row"}] 50 | end 51 | assert_equal(<<-CSV, csv_text) 52 | 1,2,3 53 | 4,,5 54 | last,"""row""" 55 | CSV 56 | end 57 | 58 | def test_generate_no_new_line 59 | csv_text = CSV.generate("test") do |csv| 60 | csv << ["row"] 61 | end 62 | assert_equal(<<-CSV, csv_text) 63 | testrow 64 | CSV 65 | end 66 | 67 | def test_generate_line_col_sep 68 | line = CSV.generate_line(["1", "2", "3"], col_sep: ";") 69 | assert_equal(<<-LINE, line) 70 | 1;2;3 71 | LINE 72 | end 73 | 74 | def test_generate_line_row_sep 75 | line = CSV.generate_line(["1", "2"], row_sep: nil) 76 | assert_equal(<<-LINE.chomp, line) 77 | 1,2 78 | LINE 79 | end 80 | 81 | def test_generate_line_shortcut 82 | line = ["1", "2", "3"].to_csv(col_sep: ";") 83 | assert_equal(<<-LINE, line) 84 | 1;2;3 85 | LINE 86 | end 87 | 88 | def test_generate_lines 89 | lines = CSV.generate_lines([["foo", "bar"], [1, 2], [3, 4]]) 90 | assert_equal(<<-LINES, lines) 91 | foo,bar 92 | 1,2 93 | 3,4 94 | LINES 95 | end 96 | 97 | def test_headers_detection 98 | headers = ["a", "b", "c"] 99 | CSV.open(@output.path, "w", headers: true) do |csv| 100 | csv << headers 101 | csv << ["1", "2", "3"] 102 | assert_equal(headers, csv.headers) 103 | end 104 | end 105 | 106 | def test_lineno 107 | CSV.open(@output.path, "w") do |csv| 108 | n_lines = 20 109 | n_lines.times do 110 | csv << ["a", "b", "c"] 111 | end 112 | assert_equal(n_lines, csv.lineno) 113 | end 114 | end 115 | 116 | def test_append_row 117 | CSV.open(@output.path, "wb") do |csv| 118 | csv << 119 | CSV::Row.new([], ["1", "2", "3"]) << 120 | CSV::Row.new([], ["a", "b", "c"]) 121 | end 122 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 123 | 1,2,3 124 | a,b,c 125 | CSV 126 | end 127 | 128 | if respond_to?(:ractor) 129 | ractor 130 | def test_append_row_in_ractor 131 | ractor = Ractor.new(@output.path) do |path| 132 | CSV.open(path, "wb") do |csv| 133 | csv << 134 | CSV::Row.new([], ["1", "2", "3"]) << 135 | CSV::Row.new([], ["a", "b", "c"]) 136 | end 137 | end 138 | ractor.value 139 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 140 | 1,2,3 141 | a,b,c 142 | CSV 143 | end 144 | end 145 | 146 | def test_append_hash 147 | CSV.open(@output.path, "wb", headers: true) do |csv| 148 | csv << [:a, :b, :c] 149 | csv << {a: 1, b: 2, c: 3} 150 | csv << {a: 4, b: 5, c: 6} 151 | end 152 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 153 | a,b,c 154 | 1,2,3 155 | 4,5,6 156 | CSV 157 | end 158 | 159 | def test_append_hash_headers_array 160 | CSV.open(@output.path, "wb", headers: [:b, :a, :c]) do |csv| 161 | csv << {a: 1, b: 2, c: 3} 162 | csv << {a: 4, b: 5, c: 6} 163 | end 164 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 165 | 2,1,3 166 | 5,4,6 167 | CSV 168 | end 169 | 170 | def test_append_hash_headers_string 171 | CSV.open(@output.path, "wb", headers: "b|a|c", col_sep: "|") do |csv| 172 | csv << {"a" => 1, "b" => 2, "c" => 3} 173 | csv << {"a" => 4, "b" => 5, "c" => 6} 174 | end 175 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 176 | 2|1|3 177 | 5|4|6 178 | CSV 179 | end 180 | 181 | def test_write_headers 182 | CSV.open(@output.path, 183 | "wb", 184 | headers: "b|a|c", 185 | write_headers: true, 186 | col_sep: "|" ) do |csv| 187 | csv << {"a" => 1, "b" => 2, "c" => 3} 188 | csv << {"a" => 4, "b" => 5, "c" => 6} 189 | end 190 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 191 | b|a|c 192 | 2|1|3 193 | 5|4|6 194 | CSV 195 | end 196 | 197 | def test_write_headers_empty 198 | CSV.open(@output.path, 199 | "wb", 200 | headers: "b|a|c", 201 | write_headers: true, 202 | col_sep: "|" ) do |csv| 203 | end 204 | assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 205 | b|a|c 206 | CSV 207 | end 208 | 209 | def test_options_not_modified 210 | options = {}.freeze 211 | CSV.generate(**options) {} 212 | CSV.generate_line([], **options) 213 | CSV.filter("", "", **options) 214 | CSV.instance("", **options) 215 | end 216 | end 217 | -------------------------------------------------------------------------------- /test/csv/line_endings.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby/csv/650b9c2e69ba025312574624cae67bf56aa3a189/test/csv/line_endings.gz -------------------------------------------------------------------------------- /test/csv/parse/test_column_separator.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseColumnSeparator < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def test_comma 10 | assert_equal([["a", "b", nil, "d"]], 11 | CSV.parse("a,b,,d", col_sep: ",")) 12 | end 13 | 14 | def test_space 15 | assert_equal([["a", "b", nil, "d"]], 16 | CSV.parse("a b d", col_sep: " ")) 17 | end 18 | 19 | def test_tab 20 | assert_equal([["a", "b", nil, "d"]], 21 | CSV.parse("a\tb\t\td", col_sep: "\t")) 22 | end 23 | 24 | def test_multiple_characters_include_sub_separator 25 | assert_equal([["a b", nil, "d"]], 26 | CSV.parse("a b d", col_sep: " ")) 27 | end 28 | 29 | def test_multiple_characters_leading_empty_fields 30 | data = <<-CSV 31 | <=><=>A<=>B<=>C 32 | 1<=>2<=>3 33 | CSV 34 | assert_equal([ 35 | [nil, nil, "A", "B", "C"], 36 | ["1", "2", "3"], 37 | ], 38 | CSV.parse(data, col_sep: "<=>")) 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/csv/parse/test_convert.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseConvert < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def setup 10 | super 11 | @data = "Numbers,:integer,1,:float,3.015" 12 | @parser = CSV.new(@data) 13 | 14 | @custom = lambda {|field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field} 15 | 16 | @time = Time.utc(2018, 12, 30, 6, 41, 29) 17 | @windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y") 18 | 19 | @preserving_converter = lambda do |field, info| 20 | f = field.encode(CSV::ConverterEncoding) 21 | return f if info.quoted? 22 | begin 23 | Integer(f, 10) 24 | rescue 25 | f 26 | end 27 | end 28 | 29 | @quoted_header_converter = lambda do |field, info| 30 | f = field.encode(CSV::ConverterEncoding) 31 | return f if info.quoted? 32 | f.to_sym 33 | end 34 | end 35 | 36 | def test_integer 37 | @parser.convert(:integer) 38 | assert_equal(["Numbers", ":integer", 1, ":float", "3.015"], 39 | @parser.shift) 40 | end 41 | 42 | def test_float 43 | @parser.convert(:float) 44 | assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015], 45 | @parser.shift) 46 | end 47 | 48 | def test_float_integer 49 | @parser.convert(:float) 50 | @parser.convert(:integer) 51 | assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015], 52 | @parser.shift) 53 | end 54 | 55 | def test_integer_float 56 | @parser.convert(:integer) 57 | @parser.convert(:float) 58 | assert_equal(["Numbers", ":integer", 1, ":float", 3.015], 59 | @parser.shift) 60 | end 61 | 62 | def test_numeric 63 | @parser.convert(:numeric) 64 | assert_equal(["Numbers", ":integer", 1, ":float", 3.015], 65 | @parser.shift) 66 | end 67 | 68 | def test_all 69 | @data << ",#{@windows_safe_time_data}" 70 | @parser = CSV.new(@data) 71 | @parser.convert(:all) 72 | assert_equal(["Numbers", ":integer", 1, ":float", 3.015, @time.to_datetime], 73 | @parser.shift) 74 | end 75 | 76 | def test_custom 77 | @parser.convert do |field| 78 | /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field 79 | end 80 | assert_equal(["Numbers", :integer, "1", :float, "3.015"], 81 | @parser.shift) 82 | end 83 | 84 | def test_builtin_custom 85 | @parser.convert(:numeric) 86 | @parser.convert(&@custom) 87 | assert_equal(["Numbers", :integer, 1, :float, 3.015], 88 | @parser.shift) 89 | end 90 | 91 | def test_custom_field_info_line 92 | @parser.convert do |field, info| 93 | assert_equal(1, info.line) 94 | info.index == 4 ? Float(field).floor : field 95 | end 96 | assert_equal(["Numbers", ":integer", "1", ":float", 3], 97 | @parser.shift) 98 | end 99 | 100 | def test_custom_field_info_header 101 | headers = ["one", "two", "three", "four", "five"] 102 | @parser = CSV.new(@data, headers: headers) 103 | @parser.convert do |field, info| 104 | info.header == "three" ? Integer(field) * 100 : field 105 | end 106 | assert_equal(CSV::Row.new(headers, 107 | ["Numbers", ":integer", 100, ":float", "3.015"]), 108 | @parser.shift) 109 | end 110 | 111 | def test_custom_blank_field 112 | converter = lambda {|field| field.nil?} 113 | row = CSV.parse_line('nil,', converters: converter) 114 | assert_equal([false, true], row) 115 | end 116 | 117 | def test_nil_value 118 | assert_equal(["nil", "", "a"], 119 | CSV.parse_line(',"",a', nil_value: "nil")) 120 | end 121 | 122 | def test_empty_value 123 | assert_equal([nil, "empty", "a"], 124 | CSV.parse_line(',"",a', empty_value: "empty")) 125 | end 126 | 127 | def test_quoted_parse_line 128 | row = CSV.parse_line('1,"2",3', converters: @preserving_converter) 129 | assert_equal([1, "2", 3], row) 130 | end 131 | 132 | def test_quoted_parse 133 | expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]] 134 | rows = CSV.parse(<<~CSV, converters: @preserving_converter) 135 | "quoted",unquoted 136 | "109",1 137 | "10A",2 138 | CSV 139 | assert_equal(expected, rows) 140 | end 141 | 142 | def test_quoted_alternating_quote 143 | row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter) 144 | assert_equal(['1', 2, '3'], row) 145 | end 146 | 147 | def test_quoted_parse_headers 148 | expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]] 149 | table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter) 150 | "quoted",unquoted 151 | "109",1 152 | "10A",2 153 | CSV 154 | assert_equal(expected, table.to_a) 155 | end 156 | 157 | def test_quoted_parse_with_string_headers 158 | expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]] 159 | table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter) 160 | "109",1 161 | "10A",2 162 | CSV 163 | assert_equal(expected, table.to_a) 164 | end 165 | end 166 | -------------------------------------------------------------------------------- /test/csv/parse/test_each.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseEach < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def test_twice 10 | data = <<-CSV 11 | Ruby,2.6.0,script 12 | CSV 13 | csv = CSV.new(data) 14 | assert_equal([ 15 | [["Ruby", "2.6.0", "script"]], 16 | [], 17 | ], 18 | [ 19 | csv.to_a, 20 | csv.to_a, 21 | ]) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /test/csv/parse/test_general.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require "timeout" 5 | 6 | require_relative "../helper" 7 | 8 | # 9 | # Following tests are my interpretation of the 10 | # {CSV RCF}[https://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that 11 | # document in one place (intentionally) and that is to make the default row 12 | # separator $/. 13 | # 14 | class TestCSVParseGeneral < Test::Unit::TestCase 15 | extend DifferentOFS 16 | 17 | BIG_DATA = "123456789\n" * 512 18 | 19 | def test_mastering_regex_example 20 | ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K} 21 | assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000", 22 | "It's \"10 Grand\", baby", "10K" ], 23 | CSV.parse_line(ex) ) 24 | end 25 | 26 | # Old Ruby 1.8 CSV library tests. 27 | def test_std_lib_csv 28 | [ ["\t", ["\t"]], 29 | ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], 30 | ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], 31 | ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]], 32 | ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], 33 | ["\"\"", [""]], 34 | ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], 35 | ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], 36 | ["foo,\"\r\",baz", ["foo", "\r", "baz"]], 37 | ["foo,\"\",baz", ["foo", "", "baz"]], 38 | ["\",\"", [","]], 39 | ["foo", ["foo"]], 40 | [",,", [nil, nil, nil]], 41 | [",", [nil, nil]], 42 | ["foo,\"\n\",baz", ["foo", "\n", "baz"]], 43 | ["foo,,baz", ["foo", nil, "baz"]], 44 | ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]], 45 | ["\",\",\",\"", [",", ","]], 46 | ["foo,bar,", ["foo", "bar", nil]], 47 | [",foo,bar", [nil, "foo", "bar"]], 48 | ["foo,bar", ["foo", "bar"]], 49 | [";", [";"]], 50 | ["\t,\t", ["\t", "\t"]], 51 | ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]], 52 | ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], 53 | ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]], 54 | [";,;", [";", ";"]] ].each do |csv_test| 55 | assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) 56 | end 57 | 58 | [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], 59 | ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], 60 | ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], 61 | ["\"\"", [""]], 62 | ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], 63 | ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], 64 | ["foo,\"\r\",baz", ["foo", "\r", "baz"]], 65 | ["foo,\"\",baz", ["foo", "", "baz"]], 66 | ["foo", ["foo"]], 67 | [",,", [nil, nil, nil]], 68 | [",", [nil, nil]], 69 | ["foo,\"\n\",baz", ["foo", "\n", "baz"]], 70 | ["foo,,baz", ["foo", nil, "baz"]], 71 | ["foo,bar", ["foo", "bar"]], 72 | ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], 73 | ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test| 74 | assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) 75 | end 76 | end 77 | 78 | # From: [ruby-core:6496] 79 | def test_aras_edge_cases 80 | [ [%Q{a,b}, ["a", "b"]], 81 | [%Q{a,"""b"""}, ["a", "\"b\""]], 82 | [%Q{a,"""b"}, ["a", "\"b"]], 83 | [%Q{a,"b"""}, ["a", "b\""]], 84 | [%Q{a,"\nb"""}, ["a", "\nb\""]], 85 | [%Q{a,"""\nb"}, ["a", "\"\nb"]], 86 | [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], 87 | [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], 88 | [%Q{a,,,}, ["a", nil, nil, nil]], 89 | [%Q{,}, [nil, nil]], 90 | [%Q{"",""}, ["", ""]], 91 | [%Q{""""}, ["\""]], 92 | [%Q{"""",""}, ["\"",""]], 93 | [%Q{,""}, [nil,""]], 94 | [%Q{,"\r"}, [nil,"\r"]], 95 | [%Q{"\r\n,"}, ["\r\n,"]], 96 | [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case| 97 | assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) 98 | end 99 | end 100 | 101 | def test_james_edge_cases 102 | # A read at eof? should return nil. 103 | assert_equal(nil, CSV.parse_line("")) 104 | # 105 | # With Ruby 1.8 CSV it's impossible to tell an empty line from a line 106 | # containing a single +nil+ field. The old CSV library returns 107 | # [nil] in these cases, but Array.new makes more sense to 108 | # me. 109 | # 110 | assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) 111 | end 112 | 113 | def test_rob_edge_cases 114 | [ [%Q{"a\nb"}, ["a\nb"]], 115 | [%Q{"\n\n\n"}, ["\n\n\n"]], 116 | [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]], 117 | [%Q{,"\r\n"}, [nil,"\r\n"]], 118 | [%Q{,"\r\n."}, [nil,"\r\n."]], 119 | [%Q{"a\na","one newline"}, ["a\na", 'one newline']], 120 | [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']], 121 | [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']], 122 | [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']], 123 | [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]], 124 | ].each do |edge_case| 125 | assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) 126 | end 127 | end 128 | 129 | def test_non_regex_edge_cases 130 | # An early version of the non-regex parser fails this test 131 | [ [ "foo,\"foo,bar,baz,foo\",\"foo\"", 132 | ["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case| 133 | assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) 134 | end 135 | 136 | assert_raise(CSV::MalformedCSVError) do 137 | CSV.parse_line("1,\"23\"4\"5\", 6") 138 | end 139 | end 140 | 141 | def test_malformed_csv_cr_first_line 142 | error = assert_raise(CSV::MalformedCSVError) do 143 | CSV.parse_line("1,2\r,3", row_sep: "\n") 144 | end 145 | assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.", 146 | error.message) 147 | end 148 | 149 | def test_malformed_csv_cr_middle_line 150 | csv = <<-CSV 151 | line,1,abc 152 | line,2,"def\nghi" 153 | 154 | line,4,some\rjunk 155 | line,5,jkl 156 | CSV 157 | 158 | error = assert_raise(CSV::MalformedCSVError) do 159 | CSV.parse(csv) 160 | end 161 | assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.", 162 | error.message) 163 | end 164 | 165 | def test_malformed_csv_unclosed_quote 166 | error = assert_raise(CSV::MalformedCSVError) do 167 | CSV.parse_line('1,2,"3...') 168 | end 169 | assert_equal("Unclosed quoted field in line 1.", 170 | error.message) 171 | end 172 | 173 | def test_malformed_csv_illegal_quote_middle_line 174 | csv = <<-CSV 175 | line,1,abc 176 | line,2,"def\nghi" 177 | 178 | line,4,8'10" 179 | line,5,jkl 180 | CSV 181 | 182 | error = assert_raise(CSV::MalformedCSVError) do 183 | CSV.parse(csv) 184 | end 185 | assert_equal("Illegal quoting in line 4.", 186 | error.message) 187 | end 188 | 189 | def test_the_parse_fails_fast_when_it_can_for_unquoted_fields 190 | assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA) 191 | end 192 | 193 | def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes 194 | assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA) 195 | end 196 | 197 | def test_field_size_limit_controls_lookahead 198 | assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', 199 | field_size_limit: 2048 ) 200 | end 201 | 202 | def test_field_size_limit_max_allowed 203 | column = "abcde" 204 | assert_equal([[column]], 205 | CSV.parse("\"#{column}\"", 206 | field_size_limit: column.size + 1)) 207 | end 208 | 209 | def test_field_size_limit_quote_simple 210 | column = "abcde" 211 | assert_parse_errors_out("\"#{column}\"", 212 | field_size_limit: column.size) 213 | end 214 | 215 | def test_field_size_limit_no_quote_implicitly 216 | column = "abcde" 217 | assert_parse_errors_out("#{column}", 218 | field_size_limit: column.size) 219 | end 220 | 221 | def test_field_size_limit_no_quote_explicitly 222 | column = "abcde" 223 | assert_parse_errors_out("#{column}", 224 | field_size_limit: column.size, 225 | quote_char: nil) 226 | end 227 | 228 | def test_field_size_limit_in_extended_column_not_exceeding 229 | data = <<~DATA 230 | "a","b" 231 | " 232 | 2 233 | ","" 234 | DATA 235 | assert_nothing_raised(CSV::MalformedCSVError) do 236 | CSV.parse(data, field_size_limit: 4) 237 | end 238 | end 239 | 240 | def test_field_size_limit_in_extended_column_exceeding 241 | data = <<~DATA 242 | "a","b" 243 | " 244 | 2345 245 | ","" 246 | DATA 247 | assert_parse_errors_out(data, field_size_limit: 5) 248 | end 249 | 250 | def test_max_field_size_controls_lookahead 251 | assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', 252 | max_field_size: 2048 ) 253 | end 254 | 255 | def test_max_field_size_max_allowed 256 | column = "abcde" 257 | assert_equal([[column]], 258 | CSV.parse("\"#{column}\"", 259 | max_field_size: column.size)) 260 | end 261 | 262 | def test_max_field_size_quote_simple 263 | column = "abcde" 264 | assert_parse_errors_out("\"#{column}\"", 265 | max_field_size: column.size - 1) 266 | end 267 | 268 | def test_max_field_size_no_quote_implicitly 269 | column = "abcde" 270 | assert_parse_errors_out("#{column}", 271 | max_field_size: column.size - 1) 272 | end 273 | 274 | def test_max_field_size_no_quote_explicitly 275 | column = "abcde" 276 | assert_parse_errors_out("#{column}", 277 | max_field_size: column.size - 1, 278 | quote_char: nil) 279 | end 280 | 281 | def test_max_field_size_in_extended_column_not_exceeding 282 | data = <<~DATA 283 | "a","b" 284 | " 285 | 2 286 | ","" 287 | DATA 288 | assert_nothing_raised(CSV::MalformedCSVError) do 289 | CSV.parse(data, max_field_size: 3) 290 | end 291 | end 292 | 293 | def test_max_field_size_in_extended_column_exceeding 294 | data = <<~DATA 295 | "a","b" 296 | " 297 | 2345 298 | ","" 299 | DATA 300 | assert_parse_errors_out(data, max_field_size: 4) 301 | end 302 | 303 | def test_row_sep_auto_cr 304 | assert_equal([["a"]], CSV.parse("a\r")) 305 | end 306 | 307 | def test_row_sep_auto_lf 308 | assert_equal([["a"]], CSV.parse("a\n")) 309 | end 310 | 311 | def test_row_sep_auto_cr_lf 312 | assert_equal([["a"]], CSV.parse("a\r\n")) 313 | end 314 | 315 | def test_seeked_string_io 316 | input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n") 317 | input_with_bom.read(3) 318 | assert_equal([ 319 | ["あ", "い", "う"], 320 | ["a", "b", "c"], 321 | ], 322 | CSV.new(input_with_bom).each.to_a) 323 | end 324 | 325 | def test_quoted_col_sep_and_empty_line 326 | assert_equal([["one,"], [], ["three"]], 327 | CSV.parse(<<-CSV)) 328 | "one," 329 | 330 | "three" 331 | CSV 332 | end 333 | 334 | private 335 | 336 | { 337 | "YJIT"=>1, # for --yjit-call-threshold=1 338 | "MJIT"=>5, "RJIT"=>5, # for --jit-wait 339 | }.any? do |jit, timeout| 340 | if (RubyVM.const_defined?(jit) and 341 | jit = RubyVM.const_get(jit) and 342 | jit.respond_to?(:enabled?) and 343 | jit.enabled?) 344 | PARSE_ERROR_TIMEOUT = timeout 345 | end 346 | end 347 | PARSE_ERROR_TIMEOUT ||= 0.2 348 | 349 | def assert_parse_errors_out(data, timeout: PARSE_ERROR_TIMEOUT, **options) 350 | assert_raise(CSV::MalformedCSVError) do 351 | Timeout.timeout(timeout) do 352 | CSV.parse(data, **options) 353 | fail("Parse didn't error out") 354 | end 355 | end 356 | end 357 | end 358 | -------------------------------------------------------------------------------- /test/csv/parse/test_header.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVHeaders < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def setup 10 | super 11 | @data = <<-CSV 12 | first,second,third 13 | A,B,C 14 | 1,2,3 15 | CSV 16 | end 17 | 18 | def test_first_row 19 | [:first_row, true].each do |setting| # two names for the same setting 20 | # activate headers 21 | csv = nil 22 | assert_nothing_raised(Exception) do 23 | csv = CSV.parse(@data, headers: setting) 24 | end 25 | 26 | # first data row - skipping headers 27 | row = csv[0] 28 | assert_not_nil(row) 29 | assert_instance_of(CSV::Row, row) 30 | assert_equal([%w{first A}, %w{second B}, %w{third C}], row.to_a) 31 | 32 | # second data row 33 | row = csv[1] 34 | assert_not_nil(row) 35 | assert_instance_of(CSV::Row, row) 36 | assert_equal([%w{first 1}, %w{second 2}, %w{third 3}], row.to_a) 37 | 38 | # empty 39 | assert_nil(csv[2]) 40 | end 41 | end 42 | 43 | def test_array_of_headers 44 | # activate headers 45 | csv = nil 46 | assert_nothing_raised(Exception) do 47 | csv = CSV.parse(@data, headers: [:my, :new, :headers]) 48 | end 49 | 50 | # first data row - skipping headers 51 | row = csv[0] 52 | assert_not_nil(row) 53 | assert_instance_of(CSV::Row, row) 54 | assert_equal( [[:my, "first"], [:new, "second"], [:headers, "third"]], 55 | row.to_a ) 56 | 57 | # second data row 58 | row = csv[1] 59 | assert_not_nil(row) 60 | assert_instance_of(CSV::Row, row) 61 | assert_equal([[:my, "A"], [:new, "B"], [:headers, "C"]], row.to_a) 62 | 63 | # third data row 64 | row = csv[2] 65 | assert_not_nil(row) 66 | assert_instance_of(CSV::Row, row) 67 | assert_equal([[:my, "1"], [:new, "2"], [:headers, "3"]], row.to_a) 68 | 69 | # empty 70 | assert_nil(csv[3]) 71 | 72 | # with return and convert 73 | assert_nothing_raised(Exception) do 74 | csv = CSV.parse( @data, headers: [:my, :new, :headers], 75 | return_headers: true, 76 | header_converters: lambda { |h| h.to_s } ) 77 | end 78 | row = csv[0] 79 | assert_not_nil(row) 80 | assert_instance_of(CSV::Row, row) 81 | assert_equal([["my", :my], ["new", :new], ["headers", :headers]], row.to_a) 82 | assert_predicate(row, :header_row?) 83 | assert_not_predicate(row, :field_row?) 84 | end 85 | 86 | def test_csv_header_string 87 | # activate headers 88 | csv = nil 89 | assert_nothing_raised(Exception) do 90 | csv = CSV.parse(@data, headers: "my,new,headers") 91 | end 92 | 93 | # first data row - skipping headers 94 | row = csv[0] 95 | assert_not_nil(row) 96 | assert_instance_of(CSV::Row, row) 97 | assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a) 98 | 99 | # second data row 100 | row = csv[1] 101 | assert_not_nil(row) 102 | assert_instance_of(CSV::Row, row) 103 | assert_equal([%w{my A}, %w{new B}, %w{headers C}], row.to_a) 104 | 105 | # third data row 106 | row = csv[2] 107 | assert_not_nil(row) 108 | assert_instance_of(CSV::Row, row) 109 | assert_equal([%w{my 1}, %w{new 2}, %w{headers 3}], row.to_a) 110 | 111 | # empty 112 | assert_nil(csv[3]) 113 | 114 | # with return and convert 115 | assert_nothing_raised(Exception) do 116 | csv = CSV.parse( @data, headers: "my,new,headers", 117 | return_headers: true, 118 | header_converters: :symbol ) 119 | end 120 | row = csv[0] 121 | assert_not_nil(row) 122 | assert_instance_of(CSV::Row, row) 123 | assert_equal([[:my, "my"], [:new, "new"], [:headers, "headers"]], row.to_a) 124 | assert_predicate(row, :header_row?) 125 | assert_not_predicate(row, :field_row?) 126 | end 127 | 128 | def test_csv_header_string_inherits_separators 129 | # parse with custom col_sep 130 | csv = nil 131 | assert_nothing_raised(Exception) do 132 | csv = CSV.parse( @data.tr(",", "|"), col_sep: "|", 133 | headers: "my|new|headers" ) 134 | end 135 | 136 | # verify headers were recognized 137 | row = csv[0] 138 | assert_not_nil(row) 139 | assert_instance_of(CSV::Row, row) 140 | assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a) 141 | end 142 | 143 | def test_return_headers 144 | # activate headers and request they are returned 145 | csv = nil 146 | assert_nothing_raised(Exception) do 147 | csv = CSV.parse(@data, headers: true, return_headers: true) 148 | end 149 | 150 | # header row 151 | row = csv[0] 152 | assert_not_nil(row) 153 | assert_instance_of(CSV::Row, row) 154 | assert_equal( [%w{first first}, %w{second second}, %w{third third}], 155 | row.to_a ) 156 | assert_predicate(row, :header_row?) 157 | assert_not_predicate(row, :field_row?) 158 | 159 | # first data row - skipping headers 160 | row = csv[1] 161 | assert_not_nil(row) 162 | assert_instance_of(CSV::Row, row) 163 | assert_equal([%w{first A}, %w{second B}, %w{third C}], row.to_a) 164 | assert_not_predicate(row, :header_row?) 165 | assert_predicate(row, :field_row?) 166 | 167 | # second data row 168 | row = csv[2] 169 | assert_not_nil(row) 170 | assert_instance_of(CSV::Row, row) 171 | assert_equal([%w{first 1}, %w{second 2}, %w{third 3}], row.to_a) 172 | assert_not_predicate(row, :header_row?) 173 | assert_predicate(row, :field_row?) 174 | 175 | # empty 176 | assert_nil(csv[3]) 177 | end 178 | 179 | def test_converters 180 | # create test data where headers and fields look alike 181 | data = <<-CSV 182 | 1,2,3 183 | 1,2,3 184 | CSV 185 | 186 | # normal converters do not affect headers 187 | csv = CSV.parse( data, headers: true, 188 | return_headers: true, 189 | converters: :numeric ) 190 | assert_equal([%w{1 1}, %w{2 2}, %w{3 3}], csv[0].to_a) 191 | assert_equal([["1", 1], ["2", 2], ["3", 3]], csv[1].to_a) 192 | assert_nil(csv[2]) 193 | 194 | # header converters do affect headers (only) 195 | assert_nothing_raised(Exception) do 196 | csv = CSV.parse( data, headers: true, 197 | return_headers: true, 198 | converters: :numeric, 199 | header_converters: :symbol ) 200 | end 201 | assert_equal([[:"1", "1"], [:"2", "2"], [:"3", "3"]], csv[0].to_a) 202 | assert_equal([[:"1", 1], [:"2", 2], [:"3", 3]], csv[1].to_a) 203 | assert_nil(csv[2]) 204 | end 205 | 206 | def test_builtin_downcase_converter 207 | csv = CSV.parse( "One,TWO Three", headers: true, 208 | return_headers: true, 209 | header_converters: :downcase ) 210 | assert_equal(%w{one two\ three}, csv.headers) 211 | end 212 | 213 | def test_builtin_symbol_converter 214 | # Note that the trailing space is intentional 215 | csv = CSV.parse( "One,TWO Three ", headers: true, 216 | return_headers: true, 217 | header_converters: :symbol ) 218 | assert_equal([:one, :two_three], csv.headers) 219 | end 220 | 221 | def test_builtin_symbol_raw_converter 222 | csv = CSV.parse( "a b,c d", headers: true, 223 | return_headers: true, 224 | header_converters: :symbol_raw ) 225 | assert_equal([:"a b", :"c d"], csv.headers) 226 | end 227 | 228 | def test_builtin_symbol_converter_with_punctuation 229 | csv = CSV.parse( "One, Two & Three ($)", headers: true, 230 | return_headers: true, 231 | header_converters: :symbol ) 232 | assert_equal([:one, :two_three], csv.headers) 233 | end 234 | 235 | def test_builtin_converters_with_blank_header 236 | csv = CSV.parse( "one,,three", headers: true, 237 | return_headers: true, 238 | header_converters: [:downcase, :symbol, :symbol_raw] ) 239 | assert_equal([:one, nil, :three], csv.headers) 240 | end 241 | 242 | def test_custom_converter 243 | converter = lambda { |header| header.tr(" ", "_") } 244 | csv = CSV.parse( "One,TWO Three", 245 | headers: true, 246 | return_headers: true, 247 | header_converters: converter ) 248 | assert_equal(%w{One TWO_Three}, csv.headers) 249 | end 250 | 251 | def test_table_support 252 | csv = nil 253 | assert_nothing_raised(Exception) do 254 | csv = CSV.parse(@data, headers: true) 255 | end 256 | 257 | assert_instance_of(CSV::Table, csv) 258 | end 259 | 260 | def test_skip_blanks 261 | @data = <<-CSV 262 | 263 | 264 | A,B,C 265 | 266 | 1,2,3 267 | 268 | 269 | 270 | CSV 271 | 272 | expected = [%w[1 2 3]] 273 | CSV.parse(@data, headers: true, skip_blanks: true) do |row| 274 | assert_equal(expected.shift, row.fields) 275 | end 276 | 277 | expected = [%w[A B C], %w[1 2 3]] 278 | CSV.parse( @data, 279 | headers: true, 280 | return_headers: true, 281 | skip_blanks: true ) do |row| 282 | assert_equal(expected.shift, row.fields) 283 | end 284 | end 285 | 286 | def test_headers_reader 287 | # no headers 288 | assert_nil(CSV.new(@data).headers) 289 | 290 | # headers 291 | csv = CSV.new(@data, headers: true) 292 | assert_equal(true, csv.headers) # before headers are read 293 | csv.shift # set headers 294 | assert_equal(%w[first second third], csv.headers) # after headers are read 295 | end 296 | 297 | def test_blank_row 298 | @data += "\n#{@data}" # add a blank row 299 | 300 | # ensure that everything returned is a Row object 301 | CSV.parse(@data, headers: true) do |row| 302 | assert_instance_of(CSV::Row, row) 303 | end 304 | end 305 | 306 | def test_nil_row_header 307 | @data = <<-CSV 308 | A 309 | 310 | 1 311 | CSV 312 | 313 | csv = CSV.parse(@data, headers: true) 314 | 315 | # ensure nil row creates Row object with headers 316 | row = csv[0] 317 | assert_equal([["A"], [nil]], 318 | [row.headers, row.fields]) 319 | end 320 | 321 | def test_parse_empty 322 | assert_equal(CSV::Table.new([]), 323 | CSV.parse("", headers: true)) 324 | end 325 | 326 | def test_parse_empty_line 327 | assert_equal(CSV::Table.new([]), 328 | CSV.parse("\n", headers: true)) 329 | end 330 | 331 | def test_specified_empty 332 | assert_equal(CSV::Table.new([], 333 | headers: ["header1"]), 334 | CSV.parse("", headers: ["header1"])) 335 | end 336 | 337 | def test_specified_empty_line 338 | assert_equal(CSV::Table.new([CSV::Row.new(["header1"], [])], 339 | headers: ["header1"]), 340 | CSV.parse("\n", headers: ["header1"])) 341 | end 342 | end 343 | -------------------------------------------------------------------------------- /test/csv/parse/test_inputs_scanner.rb: -------------------------------------------------------------------------------- 1 | require_relative "../helper" 2 | 3 | class TestCSVParseInputsScanner < Test::Unit::TestCase 4 | include CSVHelper 5 | 6 | def test_scan_keep_nested_back 7 | input = CSV::Parser::UnoptimizedStringIO.new("abcdef") 8 | scanner = CSV::Parser::InputsScanner.new([input], 9 | Encoding::UTF_8, 10 | nil) 11 | scanner.keep_start 12 | assert_equal("abc", scanner.scan_all(/[a-c]+/)) 13 | scanner.keep_start 14 | assert_equal("def", scanner.scan_all(/[d-f]+/)) 15 | scanner.keep_back 16 | scanner.keep_back 17 | assert_equal("abcdef", scanner.scan_all(/[a-f]+/)) 18 | end 19 | 20 | def test_scan_keep_over_chunks_nested_back 21 | input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") 22 | scanner = CSV::Parser::InputsScanner.new([input], 23 | Encoding::UTF_8, 24 | nil, 25 | chunk_size: 2) 26 | scanner.keep_start 27 | assert_equal("abc", scanner.scan_all(/[a-c]+/)) 28 | scanner.keep_start 29 | assert_equal("def", scanner.scan_all(/[d-f]+/)) 30 | scanner.keep_back 31 | scanner.keep_back 32 | assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) 33 | end 34 | 35 | def test_scan_keep_over_chunks_nested_drop_back 36 | input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") 37 | scanner = CSV::Parser::InputsScanner.new([input], 38 | Encoding::UTF_8, 39 | nil, 40 | chunk_size: 3) 41 | scanner.keep_start 42 | assert_equal("ab", scanner.scan(/../)) 43 | scanner.keep_start 44 | assert_equal("c", scanner.scan(/./)) 45 | assert_equal("d", scanner.scan(/./)) 46 | scanner.keep_drop 47 | scanner.keep_back 48 | assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) 49 | end 50 | 51 | def test_each_line_keep_over_chunks_multibyte 52 | input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n") 53 | scanner = CSV::Parser::InputsScanner.new([input], 54 | Encoding::UTF_8, 55 | nil, 56 | chunk_size: 1) 57 | each_line = scanner.each_line("\n") 58 | assert_equal("ab\n", each_line.next) 59 | scanner.keep_start 60 | assert_equal("\u{3000}a\n", each_line.next) 61 | scanner.keep_back 62 | assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/)) 63 | end 64 | 65 | def test_each_line_keep_over_chunks_fit_chunk_size 66 | input = CSV::Parser::UnoptimizedStringIO.new("\na") 67 | scanner = CSV::Parser::InputsScanner.new([input], 68 | Encoding::UTF_8, 69 | nil, 70 | chunk_size: 1) 71 | each_line = scanner.each_line("\n") 72 | assert_equal("\n", each_line.next) 73 | scanner.keep_start 74 | assert_equal("a", each_line.next) 75 | scanner.keep_back 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /test/csv/parse/test_invalid.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseInvalid < Test::Unit::TestCase 7 | def test_no_column_mixed_new_lines 8 | error = assert_raise(CSV::MalformedCSVError) do 9 | CSV.parse("\n" + 10 | "\r") 11 | end 12 | assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.", 13 | error.message) 14 | end 15 | 16 | def test_ignore_invalid_line 17 | csv = CSV.new(<<-CSV, headers: true, return_headers: true) 18 | head1,head2,head3 19 | aaa,bbb,ccc 20 | ddd,ee"e.fff 21 | ggg,hhh,iii 22 | CSV 23 | headers = ["head1", "head2", "head3"] 24 | assert_equal(CSV::Row.new(headers, headers), 25 | csv.shift) 26 | assert_equal(CSV::Row.new(headers, ["aaa", "bbb", "ccc"]), 27 | csv.shift) 28 | assert_equal(false, csv.eof?) 29 | error = assert_raise(CSV::MalformedCSVError) do 30 | csv.shift 31 | end 32 | assert_equal("Illegal quoting in line 3.", 33 | error.message) 34 | assert_equal(false, csv.eof?) 35 | assert_equal(CSV::Row.new(headers, ["ggg", "hhh", "iii"]), 36 | csv.shift) 37 | assert_equal(true, csv.eof?) 38 | end 39 | 40 | def test_ignore_invalid_line_cr_lf 41 | data = <<-CSV 42 | "1","OK"\r 43 | "2",""NOT" OK"\r 44 | "3","OK"\r 45 | CSV 46 | csv = CSV.new(data) 47 | 48 | assert_equal(['1', 'OK'], csv.shift) 49 | assert_raise(CSV::MalformedCSVError) { csv.shift } 50 | assert_equal(['3', 'OK'], csv.shift) 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /test/csv/parse/test_liberal_parsing.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseLiberalParsing < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def test_middle_quote_start 10 | input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson' 11 | error = assert_raise(CSV::MalformedCSVError) do 12 | CSV.parse_line(input) 13 | end 14 | assert_equal("Illegal quoting in line 1.", 15 | error.message) 16 | assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'], 17 | CSV.parse_line(input, liberal_parsing: true)) 18 | end 19 | 20 | def test_middle_quote_end 21 | input = '"quoted" field' 22 | error = assert_raise(CSV::MalformedCSVError) do 23 | CSV.parse_line(input) 24 | end 25 | assert_equal("Any value after quoted field isn't allowed in line 1.", 26 | error.message) 27 | assert_equal(['"quoted" field'], 28 | CSV.parse_line(input, liberal_parsing: true)) 29 | end 30 | 31 | def test_endline_after_quoted_field_end 32 | csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true) 33 | assert_equal(["A"], csv.gets) 34 | error = assert_raise(CSV::MalformedCSVError) do 35 | csv.gets 36 | end 37 | assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.', 38 | error.message) 39 | assert_equal(["C"], csv.gets) 40 | end 41 | 42 | def test_quote_after_column_separator 43 | error = assert_raise(CSV::MalformedCSVError) do 44 | CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) 45 | end 46 | assert_equal("Unclosed quoted field in line 1.", 47 | error.message) 48 | end 49 | 50 | def test_quote_before_column_separator 51 | assert_equal(["is", 'this "three', ' or four"', "fields"], 52 | CSV.parse_line('is,this "three, or four",fields', 53 | liberal_parsing: true)) 54 | end 55 | 56 | def test_backslash_quote 57 | assert_equal([ 58 | "1", 59 | "\"Hamlet says, \\\"Seems", 60 | "\\\" madam! Nay it is; I know not \\\"seems.\\\"\"", 61 | ], 62 | CSV.parse_line('1,' + 63 | '"Hamlet says, \"Seems,' + 64 | '\" madam! Nay it is; I know not \"seems.\""', 65 | liberal_parsing: true)) 66 | end 67 | 68 | def test_space_quote 69 | input = <<~CSV 70 | Los Angeles, 34°03'N, 118°15'W 71 | New York City, 40°42'46"N, 74°00'21"W 72 | Paris, 48°51'24"N, 2°21'03"E 73 | CSV 74 | assert_equal( 75 | [ 76 | ["Los Angeles", " 34°03'N", " 118°15'W"], 77 | ["New York City", " 40°42'46\"N", " 74°00'21\"W"], 78 | ["Paris", " 48°51'24\"N", " 2°21'03\"E"], 79 | ], 80 | CSV.parse(input, liberal_parsing: true)) 81 | end 82 | 83 | def test_double_quote_outside_quote 84 | data = %Q{a,""b""} 85 | error = assert_raise(CSV::MalformedCSVError) do 86 | CSV.parse(data) 87 | end 88 | assert_equal("Any value after quoted field isn't allowed in line 1.", 89 | error.message) 90 | assert_equal([ 91 | [["a", %Q{""b""}]], 92 | [["a", %Q{"b"}]], 93 | ], 94 | [ 95 | CSV.parse(data, liberal_parsing: true), 96 | CSV.parse(data, 97 | liberal_parsing: { 98 | double_quote_outside_quote: true, 99 | }), 100 | ]) 101 | end 102 | 103 | class TestBackslashQuote < Test::Unit::TestCase 104 | extend ::DifferentOFS 105 | 106 | def test_double_quote_outside_quote 107 | data = %Q{a,""b""} 108 | assert_equal([ 109 | [["a", %Q{""b""}]], 110 | [["a", %Q{"b"}]], 111 | ], 112 | [ 113 | CSV.parse(data, 114 | liberal_parsing: { 115 | backslash_quote: true 116 | }), 117 | CSV.parse(data, 118 | liberal_parsing: { 119 | backslash_quote: true, 120 | double_quote_outside_quote: true 121 | }), 122 | ]) 123 | end 124 | 125 | def test_unquoted_value 126 | data = %q{\"\"a\"\"} 127 | assert_equal([ 128 | [[%q{\"\"a\"\"}]], 129 | [[%q{""a""}]], 130 | ], 131 | [ 132 | CSV.parse(data, liberal_parsing: true), 133 | CSV.parse(data, 134 | liberal_parsing: { 135 | backslash_quote: true 136 | }), 137 | ]) 138 | end 139 | 140 | def test_unquoted_value_multiple_characters_col_sep 141 | data = %q{a<\\"b<=>x} 142 | assert_equal([[%Q{a<"b}, "x"]], 143 | CSV.parse(data, 144 | col_sep: "<=>", 145 | liberal_parsing: { 146 | backslash_quote: true 147 | })) 148 | end 149 | 150 | def test_quoted_value 151 | data = %q{"\"\"a\"\""} 152 | assert_equal([ 153 | [[%q{"\"\"a\"\""}]], 154 | [[%q{""a""}]], 155 | [[%q{""a""}]], 156 | ], 157 | [ 158 | CSV.parse(data, liberal_parsing: true), 159 | CSV.parse(data, 160 | liberal_parsing: { 161 | backslash_quote: true 162 | }), 163 | CSV.parse(data, 164 | liberal_parsing: { 165 | backslash_quote: true, 166 | double_quote_outside_quote: true 167 | }), 168 | ]) 169 | end 170 | end 171 | end 172 | -------------------------------------------------------------------------------- /test/csv/parse/test_quote_char_nil.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseQuoteCharNil < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def test_full 10 | assert_equal(["a", "b"], CSV.parse_line(%Q{a,b}, quote_char: nil)) 11 | end 12 | 13 | def test_end_with_nil 14 | assert_equal(["a", nil, nil, nil], CSV.parse_line(%Q{a,,,}, quote_char: nil)) 15 | end 16 | 17 | def test_nil_nil 18 | assert_equal([nil, nil], CSV.parse_line(%Q{,}, quote_char: nil)) 19 | end 20 | 21 | def test_unquoted_value_multiple_characters_col_sep 22 | data = %q{ax} 23 | assert_equal([[%Q{a", quote_char: nil)) 24 | end 25 | 26 | def test_csv_header_string 27 | data = <<~DATA 28 | first,second,third 29 | A,B,C 30 | 1,2,3 31 | DATA 32 | assert_equal( 33 | CSV::Table.new([ 34 | CSV::Row.new(["my", "new", "headers"], ["first", "second", "third"]), 35 | CSV::Row.new(["my", "new", "headers"], ["A", "B", "C"]), 36 | CSV::Row.new(["my", "new", "headers"], ["1", "2", "3"]) 37 | ]), 38 | CSV.parse(data, headers: "my,new,headers", quote_char: nil) 39 | ) 40 | end 41 | 42 | def test_comma 43 | assert_equal([["a", "b", nil, "d"]], 44 | CSV.parse("a,b,,d", col_sep: ",", quote_char: nil)) 45 | end 46 | 47 | def test_space 48 | assert_equal([["a", "b", nil, "d"]], 49 | CSV.parse("a b d", col_sep: " ", quote_char: nil)) 50 | end 51 | 52 | def encode_array(array, encoding) 53 | array.collect do |element| 54 | element ? element.encode(encoding) : element 55 | end 56 | end 57 | 58 | def test_space_no_ascii 59 | encoding = Encoding::UTF_16LE 60 | assert_equal([encode_array(["a", "b", nil, "d"], encoding)], 61 | CSV.parse("a b d".encode(encoding), 62 | col_sep: " ".encode(encoding), 63 | quote_char: nil)) 64 | end 65 | 66 | def test_multiple_space 67 | assert_equal([["a b", nil, "d"]], 68 | CSV.parse("a b d", col_sep: " ", quote_char: nil)) 69 | end 70 | 71 | def test_multiple_characters_leading_empty_fields 72 | data = <<-CSV 73 | <=><=>A<=>B<=>C 74 | 1<=>2<=>3 75 | CSV 76 | assert_equal([ 77 | [nil, nil, "A", "B", "C"], 78 | ["1", "2", "3"], 79 | ], 80 | CSV.parse(data, col_sep: "<=>", quote_char: nil)) 81 | end 82 | 83 | def test_line 84 | lines = [ 85 | "abc,def\n", 86 | ] 87 | csv = CSV.new(lines.join(""), quote_char: nil) 88 | lines.each do |line| 89 | csv.shift 90 | assert_equal(line, csv.line) 91 | end 92 | end 93 | end 94 | -------------------------------------------------------------------------------- /test/csv/parse/test_read.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseRead < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def test_shift 10 | data = <<-CSV 11 | 1 12 | 2 13 | 3 14 | CSV 15 | csv = CSV.new(data) 16 | assert_equal([ 17 | ["1"], 18 | [["2"], ["3"]], 19 | nil, 20 | ], 21 | [ 22 | csv.shift, 23 | csv.read, 24 | csv.shift, 25 | ]) 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /test/csv/parse/test_rewind.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseRewind < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def parse(data, **options) 10 | csv = CSV.new(data, **options) 11 | records = csv.to_a 12 | csv.rewind 13 | [records, csv.to_a] 14 | end 15 | 16 | def test_default 17 | data = <<-CSV 18 | Ruby,2.6.0,script 19 | CSV 20 | assert_equal([ 21 | [["Ruby", "2.6.0", "script"]], 22 | [["Ruby", "2.6.0", "script"]], 23 | ], 24 | parse(data)) 25 | end 26 | 27 | def test_have_headers 28 | data = <<-CSV 29 | Language,Version,Type 30 | Ruby,2.6.0,script 31 | CSV 32 | assert_equal([ 33 | [CSV::Row.new(["Language", "Version", "Type"], 34 | ["Ruby", "2.6.0", "script"])], 35 | [CSV::Row.new(["Language", "Version", "Type"], 36 | ["Ruby", "2.6.0", "script"])], 37 | ], 38 | parse(data, headers: true)) 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/csv/parse/test_row_separator.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseRowSeparator < Test::Unit::TestCase 7 | extend DifferentOFS 8 | include CSVHelper 9 | 10 | def test_multiple_characters 11 | with_chunk_size("1") do 12 | assert_equal([["a"], ["b"]], 13 | CSV.parse("a\r\nb\r\n", row_sep: "\r\n")) 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /test/csv/parse/test_skip_lines.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: false 2 | 3 | require_relative "../helper" 4 | 5 | class TestCSVParseSkipLines < Test::Unit::TestCase 6 | extend DifferentOFS 7 | include CSVHelper 8 | 9 | def test_default 10 | csv = CSV.new("a,b,c\n") 11 | assert_nil(csv.skip_lines) 12 | end 13 | 14 | def parse(data, **options) 15 | # We use Tempfile here to use CSV::Parser::InputsScanner. 16 | Tempfile.open(["csv-", ".csv"]) do |file| 17 | file.binmode 18 | file.print(data) 19 | file.close 20 | CSV.open(file, **options) do |csv| 21 | csv.read 22 | end 23 | end 24 | end 25 | 26 | def test_regexp 27 | csv = <<-CSV 28 | 1 29 | #2 30 | #3 31 | 4 32 | CSV 33 | assert_equal([ 34 | ["1"], 35 | ["4"], 36 | ], 37 | parse(csv, :skip_lines => /\A\s*#/)) 38 | end 39 | 40 | def test_regexp_quoted 41 | csv = <<-CSV 42 | 1 43 | #2 44 | "#3" 45 | 4 46 | CSV 47 | assert_equal([ 48 | ["1"], 49 | ["#3"], 50 | ["4"], 51 | ], 52 | parse(csv, :skip_lines => /\A\s*#/)) 53 | end 54 | 55 | def test_string 56 | csv = <<-CSV 57 | 1 58 | .2 59 | 3. 60 | 4 61 | CSV 62 | assert_equal([ 63 | ["1"], 64 | ["4"], 65 | ], 66 | parse(csv, :skip_lines => ".")) 67 | end 68 | 69 | class RegexStub 70 | end 71 | 72 | def test_not_matchable 73 | regex_stub = RegexStub.new 74 | csv = CSV.new("1\n", :skip_lines => regex_stub) 75 | error = assert_raise(ArgumentError) do 76 | csv.shift 77 | end 78 | assert_equal(":skip_lines has to respond to #match: #{regex_stub.inspect}", 79 | error.message) 80 | end 81 | 82 | class Matchable 83 | def initialize(pattern) 84 | @pattern = pattern 85 | end 86 | 87 | def match(line) 88 | @pattern.match(line) 89 | end 90 | end 91 | 92 | def test_matchable 93 | csv = <<-CSV 94 | 1 95 | # 2 96 | 3 97 | # 4 98 | CSV 99 | assert_equal([ 100 | ["1"], 101 | ["3"], 102 | ], 103 | parse(csv, :skip_lines => Matchable.new(/\A#/))) 104 | end 105 | 106 | def test_multibyte_data 107 | # U+3042 HIRAGANA LETTER A 108 | # U+3044 HIRAGANA LETTER I 109 | # U+3046 HIRAGANA LETTER U 110 | value = "\u3042\u3044\u3046" 111 | with_chunk_size("5") do 112 | assert_equal([[value], [value]], 113 | parse("#{value}\n#{value}\n", 114 | :skip_lines => /\A#/)) 115 | end 116 | end 117 | 118 | def test_empty_line_and_liberal_parsing 119 | assert_equal([["a", "b"]], 120 | parse("a,b\n", 121 | :liberal_parsing => true, 122 | :skip_lines => /^$/)) 123 | end 124 | 125 | def test_crlf 126 | assert_equal([["a", "b"]], 127 | parse("a,b\r\n,\r\n", 128 | :skip_lines => /^,+$/)) 129 | end 130 | 131 | def test_crlf_strip_no_last_crlf 132 | assert_equal([["a"], ["b"]], 133 | parse("a\r\nb", 134 | row_sep: "\r\n", 135 | skip_lines: /^ *$/, 136 | strip: true)) 137 | end 138 | 139 | def test_crlf_quoted_lf 140 | assert_equal([["\n", ""]], 141 | parse("\"\n\",\"\"\r\n", 142 | row_sep: "\r\n", 143 | skip_lines: /not matched/)) 144 | end 145 | end 146 | -------------------------------------------------------------------------------- /test/csv/parse/test_strip.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseStrip < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def test_both 10 | assert_equal(["a", "b"], 11 | CSV.parse_line(%Q{ a , b }, strip: true)) 12 | end 13 | 14 | def test_left 15 | assert_equal(["a", "b"], 16 | CSV.parse_line(%Q{ a, b}, strip: true)) 17 | end 18 | 19 | def test_right 20 | assert_equal(["a", "b"], 21 | CSV.parse_line(%Q{a ,b }, strip: true)) 22 | end 23 | 24 | def test_middle 25 | assert_equal(["a b"], 26 | CSV.parse_line(%Q{a b}, strip: true)) 27 | end 28 | 29 | def test_quoted 30 | assert_equal([" a ", " b "], 31 | CSV.parse_line(%Q{" a "," b "}, strip: true)) 32 | end 33 | 34 | def test_liberal_parsing 35 | assert_equal([" a ", "b", " c ", " d "], 36 | CSV.parse_line(%Q{" a ", b , " c "," d " }, 37 | strip: true, 38 | liberal_parsing: true)) 39 | end 40 | 41 | def test_string 42 | assert_equal(["a", " b"], 43 | CSV.parse_line(%Q{ a , " b" }, 44 | strip: " ")) 45 | end 46 | 47 | def test_no_quote 48 | assert_equal([" a ", " b "], 49 | CSV.parse_line(%Q{" a ", b }, 50 | strip: %Q{"}, 51 | quote_char: nil)) 52 | end 53 | 54 | def test_do_not_strip_cr 55 | assert_equal([ 56 | ["a", "b "], 57 | ["a", "b "], 58 | ], 59 | CSV.parse(%Q{"a" ,"b " \r} + 60 | %Q{"a" ,"b " \r}, 61 | strip: true)) 62 | end 63 | 64 | def test_do_not_strip_lf 65 | assert_equal([ 66 | ["a", "b "], 67 | ["a", "b "], 68 | ], 69 | CSV.parse(%Q{"a" ,"b " \n} + 70 | %Q{"a" ,"b " \n}, 71 | strip: true)) 72 | end 73 | 74 | def test_do_not_strip_crlf 75 | assert_equal([ 76 | ["a", "b "], 77 | ["a", "b "], 78 | ], 79 | CSV.parse(%Q{"a" ,"b " \r\n} + 80 | %Q{"a" ,"b " \r\n}, 81 | strip: true)) 82 | end 83 | 84 | def test_col_sep_incompatible_true 85 | message = "The provided strip (true) and " \ 86 | "col_sep (\\t) options are incompatible." 87 | assert_raise_with_message(ArgumentError, message) do 88 | CSV.parse_line(%Q{"a"\t"b"\n}, 89 | col_sep: "\t", 90 | strip: true) 91 | end 92 | end 93 | 94 | def test_col_sep_incompatible_string 95 | message = "The provided strip (\\t) and " \ 96 | "col_sep (\\t) options are incompatible." 97 | assert_raise_with_message(ArgumentError, message) do 98 | CSV.parse_line(%Q{"a"\t"b"\n}, 99 | col_sep: "\t", 100 | strip: "\t") 101 | end 102 | end 103 | 104 | def test_col_sep_compatible_string 105 | assert_equal( 106 | ["a", "b"], 107 | CSV.parse_line(%Q{\va\tb\v\n}, 108 | col_sep: "\t", 109 | strip: "\v") 110 | ) 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /test/csv/parse/test_unconverted_fields.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | class TestCSVParseUnconvertedFields < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def setup 10 | super 11 | @custom = lambda {|field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field} 12 | 13 | @headers = ["first", "second", "third"] 14 | @data = <<-CSV 15 | first,second,third 16 | 1,2,3 17 | CSV 18 | end 19 | 20 | 21 | def test_custom 22 | row = CSV.parse_line("Numbers,:integer,1,:float,3.015", 23 | converters: [:numeric, @custom], 24 | unconverted_fields: true) 25 | assert_equal([ 26 | ["Numbers", :integer, 1, :float, 3.015], 27 | ["Numbers", ":integer", "1", ":float", "3.015"], 28 | ], 29 | [ 30 | row, 31 | row.unconverted_fields, 32 | ]) 33 | end 34 | 35 | def test_no_fields 36 | row = CSV.parse_line("\n", 37 | converters: [:numeric, @custom], 38 | unconverted_fields: true) 39 | assert_equal([ 40 | [], 41 | [], 42 | ], 43 | [ 44 | row, 45 | row.unconverted_fields, 46 | ]) 47 | end 48 | 49 | def test_parsed_header 50 | row = CSV.parse_line(@data, 51 | converters: :numeric, 52 | unconverted_fields: true, 53 | headers: :first_row) 54 | assert_equal([ 55 | CSV::Row.new(@headers, 56 | [1, 2, 3]), 57 | ["1", "2", "3"], 58 | ], 59 | [ 60 | row, 61 | row.unconverted_fields, 62 | ]) 63 | end 64 | 65 | def test_return_headers 66 | row = CSV.parse_line(@data, 67 | converters: :numeric, 68 | unconverted_fields: true, 69 | headers: :first_row, 70 | return_headers: true) 71 | assert_equal([ 72 | CSV::Row.new(@headers, 73 | @headers), 74 | @headers, 75 | ], 76 | [ 77 | row, 78 | row.unconverted_fields, 79 | ]) 80 | end 81 | 82 | def test_header_converters 83 | row = CSV.parse_line(@data, 84 | converters: :numeric, 85 | unconverted_fields: true, 86 | headers: :first_row, 87 | return_headers: true, 88 | header_converters: :symbol) 89 | assert_equal([ 90 | CSV::Row.new(@headers.collect(&:to_sym), 91 | @headers), 92 | @headers, 93 | ], 94 | [ 95 | row, 96 | row.unconverted_fields, 97 | ]) 98 | end 99 | 100 | def test_specified_headers 101 | row = CSV.parse_line("\n", 102 | converters: :numeric, 103 | unconverted_fields: true, 104 | headers: %w{my new headers}, 105 | return_headers: true, 106 | header_converters: :symbol) 107 | assert_equal([ 108 | CSV::Row.new([:my, :new, :headers], 109 | ["my", "new", "headers"]), 110 | [], 111 | ], 112 | [ 113 | row, 114 | row.unconverted_fields, 115 | ]) 116 | end 117 | end 118 | -------------------------------------------------------------------------------- /test/csv/test_data_converters.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "helper" 5 | 6 | class TestCSVDataConverters < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def setup 10 | super 11 | @win_safe_time_str = Time.now.strftime("%a %b %d %H:%M:%S %Y") 12 | end 13 | 14 | def test_builtin_integer_converter 15 | # does convert 16 | [-5, 1, 10000000000].each do |n| 17 | assert_equal(n, CSV::Converters[:integer][n.to_s]) 18 | end 19 | 20 | # does not convert 21 | (%w{junk 1.0} + [""]).each do |str| 22 | assert_equal(str, CSV::Converters[:integer][str]) 23 | end 24 | end 25 | 26 | def test_builtin_float_converter 27 | # does convert 28 | [-5.1234, 0, 2.3e-11].each do |n| 29 | assert_equal(n, CSV::Converters[:float][n.to_s]) 30 | end 31 | 32 | # does not convert 33 | (%w{junk 1..0 .015F} + [""]).each do |str| 34 | assert_equal(str, CSV::Converters[:float][str]) 35 | end 36 | end 37 | 38 | def test_builtin_date_converter 39 | # does convert 40 | assert_instance_of( 41 | Date, 42 | CSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")] 43 | ) 44 | 45 | # does not convert 46 | assert_instance_of(String, CSV::Converters[:date]["junk"]) 47 | end 48 | 49 | def test_builtin_date_time_converter 50 | # does convert 51 | assert_instance_of( DateTime, 52 | CSV::Converters[:date_time][@win_safe_time_str] ) 53 | 54 | # does not convert 55 | assert_instance_of(String, CSV::Converters[:date_time]["junk"]) 56 | end 57 | 58 | def test_builtin_date_time_converter_iso8601_date 59 | iso8601_string = "2018-01-14" 60 | datetime = DateTime.new(2018, 1, 14) 61 | assert_equal(datetime, 62 | CSV::Converters[:date_time][iso8601_string]) 63 | end 64 | 65 | def test_builtin_date_time_converter_iso8601_minute 66 | iso8601_string = "2018-01-14T22:25" 67 | datetime = DateTime.new(2018, 1, 14, 22, 25) 68 | assert_equal(datetime, 69 | CSV::Converters[:date_time][iso8601_string]) 70 | end 71 | 72 | def test_builtin_date_time_converter_iso8601_second 73 | iso8601_string = "2018-01-14T22:25:19" 74 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19) 75 | assert_equal(datetime, 76 | CSV::Converters[:date_time][iso8601_string]) 77 | end 78 | 79 | def test_builtin_date_time_converter_iso8601_under_second 80 | iso8601_string = "2018-01-14T22:25:19.1" 81 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) 82 | assert_equal(datetime, 83 | CSV::Converters[:date_time][iso8601_string]) 84 | end 85 | 86 | def test_builtin_date_time_converter_iso8601_under_second_offset 87 | iso8601_string = "2018-01-14T22:25:19.1+09:00" 88 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") 89 | assert_equal(datetime, 90 | CSV::Converters[:date_time][iso8601_string]) 91 | end 92 | 93 | def test_builtin_date_time_converter_iso8601_offset 94 | iso8601_string = "2018-01-14T22:25:19+09:00" 95 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") 96 | assert_equal(datetime, 97 | CSV::Converters[:date_time][iso8601_string]) 98 | end 99 | 100 | def test_builtin_date_time_converter_iso8601_utc 101 | iso8601_string = "2018-01-14T22:25:19Z" 102 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19) 103 | assert_equal(datetime, 104 | CSV::Converters[:date_time][iso8601_string]) 105 | end 106 | 107 | def test_builtin_date_time_converter_rfc3339_minute 108 | rfc3339_string = "2018-01-14 22:25" 109 | datetime = DateTime.new(2018, 1, 14, 22, 25) 110 | assert_equal(datetime, 111 | CSV::Converters[:date_time][rfc3339_string]) 112 | end 113 | 114 | def test_builtin_date_time_converter_rfc3339_second 115 | rfc3339_string = "2018-01-14 22:25:19" 116 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19) 117 | assert_equal(datetime, 118 | CSV::Converters[:date_time][rfc3339_string]) 119 | end 120 | 121 | def test_builtin_date_time_converter_rfc3339_under_second 122 | rfc3339_string = "2018-01-14 22:25:19.1" 123 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) 124 | assert_equal(datetime, 125 | CSV::Converters[:date_time][rfc3339_string]) 126 | end 127 | 128 | def test_builtin_date_time_converter_rfc3339_under_second_offset 129 | rfc3339_string = "2018-01-14 22:25:19.1+09:00" 130 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") 131 | assert_equal(datetime, 132 | CSV::Converters[:date_time][rfc3339_string]) 133 | end 134 | 135 | def test_builtin_date_time_converter_rfc3339_offset 136 | rfc3339_string = "2018-01-14 22:25:19+09:00" 137 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") 138 | assert_equal(datetime, 139 | CSV::Converters[:date_time][rfc3339_string]) 140 | end 141 | 142 | def test_builtin_date_time_converter_rfc3339_utc 143 | rfc3339_string = "2018-01-14 22:25:19Z" 144 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19) 145 | assert_equal(datetime, 146 | CSV::Converters[:date_time][rfc3339_string]) 147 | end 148 | 149 | def test_builtin_date_time_converter_rfc3339_tab_minute 150 | rfc3339_string = "2018-01-14\t22:25" 151 | datetime = DateTime.new(2018, 1, 14, 22, 25) 152 | assert_equal(datetime, 153 | CSV::Converters[:date_time][rfc3339_string]) 154 | end 155 | 156 | def test_builtin_date_time_converter_rfc3339_tab_second 157 | rfc3339_string = "2018-01-14\t22:25:19" 158 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19) 159 | assert_equal(datetime, 160 | CSV::Converters[:date_time][rfc3339_string]) 161 | end 162 | 163 | def test_builtin_date_time_converter_rfc3339_tab_under_second 164 | rfc3339_string = "2018-01-14\t22:25:19.1" 165 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) 166 | assert_equal(datetime, 167 | CSV::Converters[:date_time][rfc3339_string]) 168 | end 169 | 170 | def test_builtin_date_time_converter_rfc3339_tab_under_second_offset 171 | rfc3339_string = "2018-01-14\t22:25:19.1+09:00" 172 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") 173 | assert_equal(datetime, 174 | CSV::Converters[:date_time][rfc3339_string]) 175 | end 176 | 177 | def test_builtin_date_time_converter_rfc3339_tab_offset 178 | rfc3339_string = "2018-01-14\t22:25:19+09:00" 179 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") 180 | assert_equal(datetime, 181 | CSV::Converters[:date_time][rfc3339_string]) 182 | end 183 | 184 | def test_builtin_date_time_converter_rfc3339_tab_utc 185 | rfc3339_string = "2018-01-14\t22:25:19Z" 186 | datetime = DateTime.new(2018, 1, 14, 22, 25, 19) 187 | assert_equal(datetime, 188 | CSV::Converters[:date_time][rfc3339_string]) 189 | end 190 | 191 | def test_builtin_time_converter 192 | # does convert 193 | assert_instance_of(Time, 194 | CSV::Converters[:time][@win_safe_time_str]) 195 | 196 | # does not convert 197 | assert_instance_of(String, CSV::Converters[:time]["junk"]) 198 | end 199 | 200 | def test_builtin_time_converter_iso8601_date 201 | iso8601_string = "2018-01-14" 202 | time = Time.new(2018, 1, 14) 203 | assert_equal(time, 204 | CSV::Converters[:time][iso8601_string]) 205 | end 206 | 207 | def test_builtin_time_converter_iso8601_minute 208 | iso8601_string = "2018-01-14T22:25" 209 | time = Time.new(2018, 1, 14, 22, 25) 210 | assert_equal(time, 211 | CSV::Converters[:time][iso8601_string]) 212 | end 213 | 214 | def test_builtin_time_converter_iso8601_second 215 | iso8601_string = "2018-01-14T22:25:19" 216 | time = Time.new(2018, 1, 14, 22, 25, 19) 217 | assert_equal(time, 218 | CSV::Converters[:time][iso8601_string]) 219 | end 220 | 221 | def test_builtin_time_converter_iso8601_under_second 222 | iso8601_string = "2018-01-14T22:25:19.1" 223 | time = Time.new(2018, 1, 14, 22, 25, 19.1r) 224 | assert_equal(time, 225 | CSV::Converters[:time][iso8601_string]) 226 | end 227 | 228 | def test_builtin_time_converter_iso8601_under_second_offset 229 | iso8601_string = "2018-01-14T22:25:19.1+09:00" 230 | time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") 231 | assert_equal(time, 232 | CSV::Converters[:time][iso8601_string]) 233 | end 234 | 235 | def test_builtin_time_converter_iso8601_offset 236 | iso8601_string = "2018-01-14T22:25:19+09:00" 237 | time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") 238 | assert_equal(time, 239 | CSV::Converters[:time][iso8601_string]) 240 | end 241 | 242 | def test_builtin_time_converter_iso8601_utc 243 | iso8601_string = "2018-01-14T22:25:19Z" 244 | time = Time.utc(2018, 1, 14, 22, 25, 19) 245 | assert_equal(time, 246 | CSV::Converters[:time][iso8601_string]) 247 | end 248 | 249 | def test_builtin_time_converter_rfc3339_minute 250 | rfc3339_string = "2018-01-14 22:25" 251 | time = Time.new(2018, 1, 14, 22, 25) 252 | assert_equal(time, 253 | CSV::Converters[:time][rfc3339_string]) 254 | end 255 | 256 | def test_builtin_time_converter_rfc3339_second 257 | rfc3339_string = "2018-01-14 22:25:19" 258 | time = Time.new(2018, 1, 14, 22, 25, 19) 259 | assert_equal(time, 260 | CSV::Converters[:time][rfc3339_string]) 261 | end 262 | 263 | def test_builtin_time_converter_rfc3339_under_second 264 | rfc3339_string = "2018-01-14 22:25:19.1" 265 | time = Time.new(2018, 1, 14, 22, 25, 19.1r) 266 | assert_equal(time, 267 | CSV::Converters[:time][rfc3339_string]) 268 | end 269 | 270 | def test_builtin_time_converter_rfc3339_under_second_offset 271 | rfc3339_string = "2018-01-14 22:25:19.1+09:00" 272 | time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") 273 | assert_equal(time, 274 | CSV::Converters[:time][rfc3339_string]) 275 | end 276 | 277 | def test_builtin_time_converter_rfc3339_offset 278 | rfc3339_string = "2018-01-14 22:25:19+09:00" 279 | time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") 280 | assert_equal(time, 281 | CSV::Converters[:time][rfc3339_string]) 282 | end 283 | 284 | def test_builtin_time_converter_rfc3339_utc 285 | rfc3339_string = "2018-01-14 22:25:19Z" 286 | time = Time.utc(2018, 1, 14, 22, 25, 19) 287 | assert_equal(time, 288 | CSV::Converters[:time][rfc3339_string]) 289 | end 290 | 291 | def test_builtin_time_converter_rfc3339_tab_minute 292 | rfc3339_string = "2018-01-14\t22:25" 293 | time = Time.new(2018, 1, 14, 22, 25) 294 | assert_equal(time, 295 | CSV::Converters[:time][rfc3339_string]) 296 | end 297 | 298 | def test_builtin_time_converter_rfc3339_tab_second 299 | rfc3339_string = "2018-01-14\t22:25:19" 300 | time = Time.new(2018, 1, 14, 22, 25, 19) 301 | assert_equal(time, 302 | CSV::Converters[:time][rfc3339_string]) 303 | end 304 | 305 | def test_builtin_time_converter_rfc3339_tab_under_second 306 | rfc3339_string = "2018-01-14\t22:25:19.1" 307 | time = Time.new(2018, 1, 14, 22, 25, 19.1r) 308 | assert_equal(time, 309 | CSV::Converters[:time][rfc3339_string]) 310 | end 311 | 312 | def test_builtin_time_converter_rfc3339_tab_under_second_offset 313 | rfc3339_string = "2018-01-14\t22:25:19.1+09:00" 314 | time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") 315 | assert_equal(time, 316 | CSV::Converters[:time][rfc3339_string]) 317 | end 318 | 319 | def test_builtin_time_converter_rfc3339_tab_offset 320 | rfc3339_string = "2018-01-14\t22:25:19+09:00" 321 | time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") 322 | assert_equal(time, 323 | CSV::Converters[:time][rfc3339_string]) 324 | end 325 | 326 | def test_builtin_time_converter_rfc3339_tab_utc 327 | rfc3339_string = "2018-01-14\t22:25:19Z" 328 | time = Time.utc(2018, 1, 14, 22, 25, 19) 329 | assert_equal(time, 330 | CSV::Converters[:time][rfc3339_string]) 331 | end 332 | end 333 | -------------------------------------------------------------------------------- /test/csv/test_features.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | begin 5 | require "zlib" 6 | rescue LoadError 7 | end 8 | 9 | require_relative "helper" 10 | require "tempfile" 11 | 12 | class TestCSVFeatures < Test::Unit::TestCase 13 | extend DifferentOFS 14 | 15 | TEST_CASES = [ [%Q{a,b}, ["a", "b"]], 16 | [%Q{a,"""b"""}, ["a", "\"b\""]], 17 | [%Q{a,"""b"}, ["a", "\"b"]], 18 | [%Q{a,"b"""}, ["a", "b\""]], 19 | [%Q{a,"\nb"""}, ["a", "\nb\""]], 20 | [%Q{a,"""\nb"}, ["a", "\"\nb"]], 21 | [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], 22 | [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], 23 | [%Q{a,,,}, ["a", nil, nil, nil]], 24 | [%Q{,}, [nil, nil]], 25 | [%Q{"",""}, ["", ""]], 26 | [%Q{""""}, ["\""]], 27 | [%Q{"""",""}, ["\"",""]], 28 | [%Q{,""}, [nil,""]], 29 | [%Q{,"\r"}, [nil,"\r"]], 30 | [%Q{"\r\n,"}, ["\r\n,"]], 31 | [%Q{"\r\n,",}, ["\r\n,", nil]] ] 32 | 33 | def setup 34 | super 35 | @sample_data = <<-CSV 36 | line,1,abc 37 | line,2,"def\nghi" 38 | 39 | line,4,jkl 40 | CSV 41 | @csv = CSV.new(@sample_data) 42 | end 43 | 44 | def test_col_sep 45 | [";", "\t"].each do |sep| 46 | TEST_CASES.each do |test_case| 47 | assert_equal( test_case.last.map { |t| t.tr(",", sep) unless t.nil? }, 48 | CSV.parse_line( test_case.first.tr(",", sep), 49 | col_sep: sep ) ) 50 | end 51 | end 52 | assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";")) 53 | end 54 | 55 | def test_col_sep_nil 56 | assert_raise_with_message(ArgumentError, 57 | ":col_sep must be 1 or more characters: nil") do 58 | CSV.parse(@sample_data, col_sep: nil) 59 | end 60 | end 61 | 62 | def test_col_sep_empty 63 | assert_raise_with_message(ArgumentError, 64 | ":col_sep must be 1 or more characters: \"\"") do 65 | CSV.parse(@sample_data, col_sep: "") 66 | end 67 | end 68 | 69 | def test_row_sep 70 | error = assert_raise(CSV::MalformedCSVError) do 71 | CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n") 72 | end 73 | assert_equal("Unquoted fields do not allow new line <\"\\n\"> in line 1.", 74 | error.message) 75 | assert_equal( ["1", "2", "3\n", "4", "5"], 76 | CSV.parse_line(%Q{1,2,"3\n",4,5\r\n}, row_sep: "\r\n")) 77 | end 78 | 79 | def test_quote_char 80 | TEST_CASES.each do |test_case| 81 | assert_equal(test_case.last.map {|t| t.tr('"', "'") unless t.nil?}, 82 | CSV.parse_line(test_case.first.tr('"', "'"), 83 | quote_char: "'" )) 84 | end 85 | end 86 | 87 | def test_quote_char_special_regexp_char 88 | TEST_CASES.each do |test_case| 89 | assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?}, 90 | CSV.parse_line(test_case.first.tr('"', "|"), 91 | quote_char: "|")) 92 | end 93 | end 94 | 95 | def test_quote_char_special_regexp_char_liberal_parsing 96 | TEST_CASES.each do |test_case| 97 | assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?}, 98 | CSV.parse_line(test_case.first.tr('"', "|"), 99 | quote_char: "|", 100 | liberal_parsing: true)) 101 | end 102 | end 103 | 104 | def test_csv_char_readers 105 | %w[col_sep row_sep quote_char].each do |reader| 106 | csv = CSV.new("abc,def", reader.to_sym => "|") 107 | assert_equal("|", csv.send(reader)) 108 | end 109 | end 110 | 111 | def test_row_sep_auto_discovery 112 | ["\r\n", "\n", "\r"].each do |line_end| 113 | data = "1,2,3#{line_end}4,5#{line_end}" 114 | discovered = CSV.new(data).row_sep 115 | assert_equal(line_end, discovered) 116 | end 117 | 118 | assert_equal("\n", CSV.new("\n\r\n\r").row_sep) 119 | 120 | assert_equal($/, CSV.new("").row_sep) 121 | 122 | assert_equal($/, CSV.new(STDERR).row_sep) 123 | end 124 | 125 | def test_line 126 | lines = [ 127 | %Q(\u{3000}abc,def\n), 128 | %Q(\u{3000}abc,"d\nef"\n), 129 | %Q(\u{3000}abc,"d\r\nef"\n), 130 | %Q(\u{3000}abc,"d\ref") 131 | ] 132 | csv = CSV.new(lines.join('')) 133 | lines.each do |line| 134 | csv.shift 135 | assert_equal(line, csv.line) 136 | end 137 | end 138 | 139 | def test_lineno 140 | assert_equal(5, @sample_data.lines.to_a.size) 141 | 142 | 4.times do |line_count| 143 | assert_equal(line_count, @csv.lineno) 144 | assert_not_nil(@csv.shift) 145 | assert_equal(line_count + 1, @csv.lineno) 146 | end 147 | assert_nil(@csv.shift) 148 | end 149 | 150 | def test_readline 151 | test_lineno 152 | 153 | @csv.rewind 154 | 155 | test_lineno 156 | end 157 | 158 | def test_unknown_options 159 | assert_raise_with_message(ArgumentError, /unknown keyword/) { 160 | CSV.new(@sample_data, unknown: :error) 161 | } 162 | assert_raise_with_message(ArgumentError, /unknown keyword/) { 163 | CSV.new(@sample_data, universal_newline: true) 164 | } 165 | end 166 | 167 | def test_skip_blanks 168 | assert_equal(4, @csv.to_a.size) 169 | 170 | @csv = CSV.new(@sample_data, skip_blanks: true) 171 | 172 | count = 0 173 | @csv.each do |row| 174 | count += 1 175 | assert_equal("line", row.first) 176 | end 177 | assert_equal(3, count) 178 | end 179 | 180 | def test_csv_behavior_readers 181 | %w[ unconverted_fields return_headers write_headers 182 | skip_blanks force_quotes ].each do |behavior| 183 | assert_not_predicate(CSV.new("abc,def"), "#{behavior}?", "Behavior defaulted to on.") 184 | csv = CSV.new("abc,def", behavior.to_sym => true) 185 | assert_predicate(csv, "#{behavior}?", "Behavior change now registered.") 186 | end 187 | end 188 | 189 | def test_converters_reader 190 | # no change 191 | assert_equal( [:integer], 192 | CSV.new("abc,def", converters: [:integer]).converters ) 193 | 194 | # just one 195 | assert_equal( [:integer], 196 | CSV.new("abc,def", converters: :integer).converters ) 197 | 198 | # expanded 199 | assert_equal( [:integer, :float], 200 | CSV.new("abc,def", converters: :numeric).converters ) 201 | 202 | # custom 203 | csv = CSV.new("abc,def", converters: [:integer, lambda { }]) 204 | assert_equal(2, csv.converters.size) 205 | assert_equal(:integer, csv.converters.first) 206 | assert_instance_of(Proc, csv.converters.last) 207 | end 208 | 209 | def test_header_converters_reader 210 | # no change 211 | hc = :header_converters 212 | assert_equal([:downcase], CSV.new("abc,def", hc => [:downcase]).send(hc)) 213 | 214 | # just one 215 | assert_equal([:downcase], CSV.new("abc,def", hc => :downcase).send(hc)) 216 | 217 | # custom 218 | csv = CSV.new("abc,def", hc => [:symbol, lambda { }]) 219 | assert_equal(2, csv.send(hc).size) 220 | assert_equal(:symbol, csv.send(hc).first) 221 | assert_instance_of(Proc, csv.send(hc).last) 222 | end 223 | 224 | # reported by Kev Jackson 225 | def test_failing_to_escape_col_sep 226 | assert_nothing_raised(Exception) { CSV.new(String.new, col_sep: "|") } 227 | end 228 | 229 | # reported by Chris Roos 230 | def test_failing_to_reset_headers_in_rewind 231 | csv = CSV.new("forename,surname", headers: true, return_headers: true) 232 | csv.each {|row| assert_predicate row, :header_row?} 233 | csv.rewind 234 | csv.each {|row| assert_predicate row, :header_row?} 235 | end 236 | 237 | def test_gzip_reader 238 | zipped = nil 239 | assert_nothing_raised(NoMethodError) do 240 | zipped = CSV.new( 241 | Zlib::GzipReader.open( 242 | File.join(File.dirname(__FILE__), "line_endings.gz") 243 | ) 244 | ) 245 | end 246 | assert_equal("\r\n", zipped.row_sep) 247 | ensure 248 | zipped.close 249 | end if defined?(Zlib::GzipReader) 250 | 251 | def test_gzip_writer 252 | Tempfile.create(%w"temp .gz") {|tempfile| 253 | tempfile.close 254 | file = tempfile.path 255 | zipped = nil 256 | assert_nothing_raised(NoMethodError) do 257 | zipped = CSV.new(Zlib::GzipWriter.open(file)) 258 | end 259 | zipped << %w[one two three] 260 | zipped << [1, 2, 3] 261 | zipped.close 262 | 263 | assert_include(Zlib::GzipReader.open(file) {|f| f.read}, 264 | $INPUT_RECORD_SEPARATOR, "@row_sep did not default") 265 | } 266 | end if defined?(Zlib::GzipWriter) 267 | 268 | def test_inspect_is_smart_about_io_types 269 | str = CSV.new("string,data").inspect 270 | assert_include(str, "io_type:StringIO", "IO type not detected.") 271 | 272 | str = CSV.new($stderr).inspect 273 | assert_include(str, "io_type:$stderr", "IO type not detected.") 274 | 275 | Tempfile.create(%w"temp .csv") {|tempfile| 276 | tempfile.close 277 | path = tempfile.path 278 | File.open(path, "w") { |csv| csv << "one,two,three\n1,2,3\n" } 279 | str = CSV.open(path) { |csv| csv.inspect } 280 | assert_include(str, "io_type:File", "IO type not detected.") 281 | } 282 | end 283 | 284 | def test_inspect_shows_key_attributes 285 | str = @csv.inspect 286 | %w[lineno col_sep row_sep quote_char].each do |attr_name| 287 | assert_match(/\b#{attr_name}:[^\s>]+/, str) 288 | end 289 | end 290 | 291 | def test_inspect_shows_headers_when_available 292 | csv = CSV.new("one,two,three\n1,2,3\n", headers: true) 293 | assert_include(csv.inspect, "headers:true", "Header hint not shown.") 294 | csv.shift # load headers 295 | assert_match(/headers:\[[^\]]+\]/, csv.inspect) 296 | end 297 | 298 | def test_inspect_encoding_is_ascii_compatible 299 | csv = CSV.new("one,two,three\n1,2,3\n".encode("UTF-16BE")) 300 | assert_send([Encoding, :compatible?, 301 | Encoding.find("US-ASCII"), csv.inspect.encoding], 302 | "inspect() was not ASCII compatible.") 303 | end 304 | 305 | def test_version 306 | assert_not_nil(CSV::VERSION) 307 | assert_instance_of(String, CSV::VERSION) 308 | assert_predicate(CSV::VERSION, :frozen?) 309 | assert_match(/\A\d\.\d\.\d\z/, CSV::VERSION) 310 | end 311 | 312 | def test_table_nil_equality 313 | assert_nothing_raised(NoMethodError) { CSV.parse("test", headers: true) == nil } 314 | end 315 | 316 | # non-seekable input stream for testing https://github.com/ruby/csv/issues/44 317 | class DummyIO 318 | extend Forwardable 319 | def_delegators :@io, :gets, :read, :pos, :eof? # no seek or rewind! 320 | def initialize(data) 321 | @io = StringIO.new(data) 322 | end 323 | end 324 | 325 | def test_line_separator_autodetection_for_non_seekable_input_lf 326 | c = CSV.new(DummyIO.new("one,two,three\nfoo,bar,baz\n")) 327 | assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a 328 | end 329 | 330 | def test_line_separator_autodetection_for_non_seekable_input_cr 331 | c = CSV.new(DummyIO.new("one,two,three\rfoo,bar,baz\r")) 332 | assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a 333 | end 334 | 335 | def test_line_separator_autodetection_for_non_seekable_input_cr_lf 336 | c = CSV.new(DummyIO.new("one,two,three\r\nfoo,bar,baz\r\n")) 337 | assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a 338 | end 339 | 340 | def test_line_separator_autodetection_for_non_seekable_input_1024_over_lf 341 | table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a 342 | input = table.map { |line| line.join(",") }.join("\n") 343 | c = CSV.new(DummyIO.new(input)) 344 | assert_equal table, c.each.to_a 345 | end 346 | 347 | def test_line_separator_autodetection_for_non_seekable_input_1024_over_cr_lf 348 | table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a 349 | input = table.map { |line| line.join(",") }.join("\r\n") 350 | c = CSV.new(DummyIO.new(input)) 351 | assert_equal table, c.each.to_a 352 | end 353 | 354 | def test_line_separator_autodetection_for_non_seekable_input_many_cr_only 355 | # input with lots of CRs (to make sure no bytes are lost due to look-ahead) 356 | c = CSV.new(DummyIO.new("foo\r" + "\r" * 9999 + "bar\r")) 357 | assert_equal [["foo"]] + [[]] * 9999 + [["bar"]], c.each.to_a 358 | end 359 | end 360 | -------------------------------------------------------------------------------- /test/csv/test_patterns.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "helper" 4 | 5 | class TestCSVPatternMatching < Test::Unit::TestCase 6 | 7 | def test_hash 8 | case CSV::Row.new(%i{A B C}, [1, 2, 3]) 9 | in B: b, C: c 10 | assert_equal([2, 3], [b, c]) 11 | end 12 | end 13 | 14 | def test_hash_rest 15 | case CSV::Row.new(%i{A B C}, [1, 2, 3]) 16 | in B: b, **rest 17 | assert_equal([2, { A: 1, C: 3 }], [b, rest]) 18 | end 19 | end 20 | 21 | def test_array 22 | case CSV::Row.new(%i{A B C}, [1, 2, 3]) 23 | in *, matched 24 | assert_equal(3, matched) 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /test/csv/test_row.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "helper" 5 | 6 | class TestCSVRow < Test::Unit::TestCase 7 | extend DifferentOFS 8 | 9 | def setup 10 | super 11 | @row = CSV::Row.new(%w{A B C A A}, [1, 2, 3, 4]) 12 | end 13 | 14 | def test_initialize 15 | # basic 16 | row = CSV::Row.new(%w{A B C}, [1, 2, 3]) 17 | assert_not_nil(row) 18 | assert_instance_of(CSV::Row, row) 19 | assert_equal([["A", 1], ["B", 2], ["C", 3]], row.to_a) 20 | 21 | # missing headers 22 | row = CSV::Row.new(%w{A}, [1, 2, 3]) 23 | assert_not_nil(row) 24 | assert_instance_of(CSV::Row, row) 25 | assert_equal([["A", 1], [nil, 2], [nil, 3]], row.to_a) 26 | 27 | # missing fields 28 | row = CSV::Row.new(%w{A B C}, [1, 2]) 29 | assert_not_nil(row) 30 | assert_instance_of(CSV::Row, row) 31 | assert_equal([["A", 1], ["B", 2], ["C", nil]], row.to_a) 32 | end 33 | 34 | def test_row_type 35 | # field rows 36 | row = CSV::Row.new(%w{A B C}, [1, 2, 3]) # implicit 37 | assert_not_predicate(row, :header_row?) 38 | assert_predicate(row, :field_row?) 39 | row = CSV::Row.new(%w{A B C}, [1, 2, 3], false) # explicit 40 | assert_not_predicate(row, :header_row?) 41 | assert_predicate(row, :field_row?) 42 | 43 | # header row 44 | row = CSV::Row.new(%w{A B C}, [1, 2, 3], true) 45 | assert_predicate(row, :header_row?) 46 | assert_not_predicate(row, :field_row?) 47 | end 48 | 49 | def test_headers 50 | assert_equal(%w{A B C A A}, @row.headers) 51 | end 52 | 53 | def test_field 54 | # by name 55 | assert_equal(2, @row.field("B")) 56 | assert_equal(2, @row["B"]) # alias 57 | 58 | # by index 59 | assert_equal(3, @row.field(2)) 60 | 61 | # by range 62 | assert_equal([2,3], @row.field(1..2)) 63 | 64 | # missing 65 | assert_nil(@row.field("Missing")) 66 | assert_nil(@row.field(10)) 67 | 68 | # minimum index 69 | assert_equal(1, @row.field("A")) 70 | assert_equal(1, @row.field("A", 0)) 71 | assert_equal(4, @row.field("A", 1)) 72 | assert_equal(4, @row.field("A", 2)) 73 | assert_equal(4, @row.field("A", 3)) 74 | assert_equal(nil, @row.field("A", 4)) 75 | assert_equal(nil, @row.field("A", 5)) 76 | end 77 | 78 | def test_fetch 79 | # only by name 80 | assert_equal(2, @row.fetch('B')) 81 | 82 | # missing header raises KeyError 83 | assert_raise KeyError do 84 | @row.fetch('foo') 85 | end 86 | 87 | # missing header yields itself to block 88 | assert_equal 'bar', @row.fetch('foo') { |header| 89 | header == 'foo' ? 'bar' : false } 90 | 91 | # missing header returns the given default value 92 | assert_equal 'bar', @row.fetch('foo', 'bar') 93 | 94 | # more than one vararg raises ArgumentError 95 | assert_raise ArgumentError do 96 | @row.fetch('foo', 'bar', 'baz') 97 | end 98 | end 99 | 100 | def test_has_key? 101 | assert_equal(true, @row.has_key?('B')) 102 | assert_equal(false, @row.has_key?('foo')) 103 | 104 | # aliases 105 | assert_equal(true, @row.header?('B')) 106 | assert_equal(false, @row.header?('foo')) 107 | 108 | assert_equal(true, @row.include?('B')) 109 | assert_equal(false, @row.include?('foo')) 110 | 111 | assert_equal(true, @row.member?('B')) 112 | assert_equal(false, @row.member?('foo')) 113 | 114 | assert_equal(true, @row.key?('B')) 115 | assert_equal(false, @row.key?('foo')) 116 | end 117 | 118 | def test_set_field 119 | # set field by name 120 | assert_equal(100, @row["A"] = 100) 121 | 122 | # set field by index 123 | assert_equal(300, @row[3] = 300) 124 | 125 | # set field by name and minimum index 126 | assert_equal([:a, :b, :c], @row["A", 4] = [:a, :b, :c]) 127 | 128 | # verify the changes 129 | assert_equal( [ ["A", 100], 130 | ["B", 2], 131 | ["C", 3], 132 | ["A", 300], 133 | ["A", [:a, :b, :c]] ], @row.to_a ) 134 | 135 | # assigning an index past the end 136 | assert_equal("End", @row[10] = "End") 137 | assert_equal( [ ["A", 100], 138 | ["B", 2], 139 | ["C", 3], 140 | ["A", 300], 141 | ["A", [:a, :b, :c]], 142 | [nil, nil], 143 | [nil, nil], 144 | [nil, nil], 145 | [nil, nil], 146 | [nil, nil], 147 | [nil, "End"] ], @row.to_a ) 148 | 149 | # assigning a new field by header 150 | assert_equal("New", @row[:new] = "New") 151 | assert_equal( [ ["A", 100], 152 | ["B", 2], 153 | ["C", 3], 154 | ["A", 300], 155 | ["A", [:a, :b, :c]], 156 | [nil, nil], 157 | [nil, nil], 158 | [nil, nil], 159 | [nil, nil], 160 | [nil, nil], 161 | [nil, "End"], 162 | [:new, "New"] ], @row.to_a ) 163 | end 164 | 165 | def test_append 166 | # add a value 167 | assert_equal(@row, @row << "Value") 168 | assert_equal( [ ["A", 1], 169 | ["B", 2], 170 | ["C", 3], 171 | ["A", 4], 172 | ["A", nil], 173 | [nil, "Value"] ], @row.to_a ) 174 | 175 | # add a pair 176 | assert_equal(@row, @row << %w{Header Field}) 177 | assert_equal( [ ["A", 1], 178 | ["B", 2], 179 | ["C", 3], 180 | ["A", 4], 181 | ["A", nil], 182 | [nil, "Value"], 183 | %w{Header Field} ], @row.to_a ) 184 | 185 | # a pair with Hash syntax 186 | assert_equal(@row, @row << {key: :value}) 187 | assert_equal( [ ["A", 1], 188 | ["B", 2], 189 | ["C", 3], 190 | ["A", 4], 191 | ["A", nil], 192 | [nil, "Value"], 193 | %w{Header Field}, 194 | [:key, :value] ], @row.to_a ) 195 | 196 | # multiple fields at once 197 | assert_equal(@row, @row.push(100, 200, [:last, 300])) 198 | assert_equal( [ ["A", 1], 199 | ["B", 2], 200 | ["C", 3], 201 | ["A", 4], 202 | ["A", nil], 203 | [nil, "Value"], 204 | %w{Header Field}, 205 | [:key, :value], 206 | [nil, 100], 207 | [nil, 200], 208 | [:last, 300] ], @row.to_a ) 209 | end 210 | 211 | def test_delete 212 | # by index 213 | assert_equal(["B", 2], @row.delete(1)) 214 | 215 | # by header 216 | assert_equal(["C", 3], @row.delete("C")) 217 | 218 | end 219 | 220 | def test_delete_if 221 | assert_equal(@row, @row.delete_if { |h, f| h == "A" and not f.nil? }) 222 | assert_equal([["B", 2], ["C", 3], ["A", nil]], @row.to_a) 223 | end 224 | 225 | def test_delete_if_without_block 226 | enum = @row.delete_if 227 | assert_instance_of(Enumerator, enum) 228 | assert_equal(@row.size, enum.size) 229 | 230 | assert_equal(@row, enum.each { |h, f| h == "A" and not f.nil? }) 231 | assert_equal([["B", 2], ["C", 3], ["A", nil]], @row.to_a) 232 | end 233 | 234 | def test_fields 235 | # all fields 236 | assert_equal([1, 2, 3, 4, nil], @row.fields) 237 | 238 | # by header 239 | assert_equal([1, 3], @row.fields("A", "C")) 240 | 241 | # by index 242 | assert_equal([2, 3, nil], @row.fields(1, 2, 10)) 243 | 244 | # by both 245 | assert_equal([2, 3, 4], @row.fields("B", "C", 3)) 246 | 247 | # with minimum indices 248 | assert_equal([2, 3, 4], @row.fields("B", "C", ["A", 3])) 249 | 250 | # by header range 251 | assert_equal([2, 3], @row.values_at("B".."C")) 252 | end 253 | 254 | def test_index 255 | # basic usage 256 | assert_equal(0, @row.index("A")) 257 | assert_equal(1, @row.index("B")) 258 | assert_equal(2, @row.index("C")) 259 | assert_equal(nil, @row.index("Z")) 260 | 261 | # with minimum index 262 | assert_equal(0, @row.index("A")) 263 | assert_equal(0, @row.index("A", 0)) 264 | assert_equal(3, @row.index("A", 1)) 265 | assert_equal(3, @row.index("A", 2)) 266 | assert_equal(3, @row.index("A", 3)) 267 | assert_equal(4, @row.index("A", 4)) 268 | assert_equal(nil, @row.index("A", 5)) 269 | end 270 | 271 | def test_queries 272 | # fields 273 | assert(@row.field?(4)) 274 | assert(@row.field?(nil)) 275 | assert(!@row.field?(10)) 276 | end 277 | 278 | def test_each 279 | # array style 280 | ary = @row.to_a 281 | @row.each do |pair| 282 | assert_equal(ary.first.first, pair.first) 283 | assert_equal(ary.shift.last, pair.last) 284 | end 285 | 286 | # hash style 287 | ary = @row.to_a 288 | @row.each do |header, field| 289 | assert_equal(ary.first.first, header) 290 | assert_equal(ary.shift.last, field) 291 | end 292 | 293 | # verify that we can chain the call 294 | assert_equal(@row, @row.each { }) 295 | 296 | # without block 297 | ary = @row.to_a 298 | enum = @row.each 299 | assert_instance_of(Enumerator, enum) 300 | assert_equal(@row.size, enum.size) 301 | enum.each do |pair| 302 | assert_equal(ary.first.first, pair.first) 303 | assert_equal(ary.shift.last, pair.last) 304 | end 305 | end 306 | 307 | def test_each_pair 308 | assert_equal([ 309 | ["A", 1], 310 | ["B", 2], 311 | ["C", 3], 312 | ["A", 4], 313 | ["A", nil], 314 | ], 315 | @row.each_pair.to_a) 316 | end 317 | 318 | def test_enumerable 319 | assert_equal( [["A", 1], ["A", 4], ["A", nil]], 320 | @row.select { |pair| pair.first == "A" } ) 321 | 322 | assert_equal(10, @row.inject(0) { |sum, (_, n)| sum + (n || 0) }) 323 | end 324 | 325 | def test_to_a 326 | row = CSV::Row.new(%w{A B C}, [1, 2, 3]).to_a 327 | assert_instance_of(Array, row) 328 | row.each do |pair| 329 | assert_instance_of(Array, pair) 330 | assert_equal(2, pair.size) 331 | end 332 | assert_equal([["A", 1], ["B", 2], ["C", 3]], row) 333 | end 334 | 335 | def test_to_hash 336 | hash = @row.to_hash 337 | assert_equal({"A" => @row["A"], "B" => @row["B"], "C" => @row["C"]}, hash) 338 | hash.keys.each_with_index do |string_key, h| 339 | assert_predicate(string_key, :frozen?) 340 | assert_same(string_key, @row.headers[h]) 341 | end 342 | end 343 | 344 | def test_to_csv 345 | # normal conversion 346 | assert_equal("1,2,3,4,\n", @row.to_csv) 347 | assert_equal("1,2,3,4,\n", @row.to_s) # alias 348 | 349 | # with options 350 | assert_equal( "1|2|3|4|\r\n", 351 | @row.to_csv(col_sep: "|", row_sep: "\r\n") ) 352 | end 353 | 354 | def test_array_delegation 355 | assert_not_empty(@row, "Row was empty.") 356 | 357 | assert_equal([@row.headers.size, @row.fields.size].max, @row.size) 358 | end 359 | 360 | def test_inspect_shows_header_field_pairs 361 | str = @row.inspect 362 | @row.each do |header, field| 363 | assert_include(str, "#{header.inspect}:#{field.inspect}", 364 | "Header field pair not found.") 365 | end 366 | end 367 | 368 | def test_inspect_encoding_is_ascii_compatible 369 | assert_send([Encoding, :compatible?, 370 | Encoding.find("US-ASCII"), 371 | @row.inspect.encoding], 372 | "inspect() was not ASCII compatible.") 373 | end 374 | 375 | def test_inspect_shows_symbol_headers_as_bare_attributes 376 | str = CSV::Row.new(@row.headers.map { |h| h.to_sym }, @row.fields).inspect 377 | @row.each do |header, field| 378 | assert_include(str, "#{header}:#{field.inspect}", 379 | "Header field pair not found.") 380 | end 381 | end 382 | 383 | def test_can_be_compared_with_other_classes 384 | assert_not_nil(CSV::Row.new([ ], [ ]), "The row was nil") 385 | end 386 | 387 | def test_can_be_compared_when_not_a_row 388 | r = @row == [] 389 | assert_equal false, r 390 | end 391 | 392 | def test_dig_by_index 393 | assert_equal(2, @row.dig(1)) 394 | 395 | assert_nil(@row.dig(100)) 396 | end 397 | 398 | def test_dig_by_header 399 | assert_equal(2, @row.dig("B")) 400 | 401 | assert_nil(@row.dig("Missing")) 402 | end 403 | 404 | def test_dig_cell 405 | row = CSV::Row.new(%w{A}, [["foo", ["bar", ["baz"]]]]) 406 | 407 | assert_equal("foo", row.dig(0, 0)) 408 | assert_equal("bar", row.dig(0, 1, 0)) 409 | 410 | assert_equal("foo", row.dig("A", 0)) 411 | assert_equal("bar", row.dig("A", 1, 0)) 412 | end 413 | 414 | def test_dig_cell_no_dig 415 | row = CSV::Row.new(%w{A}, ["foo"]) 416 | 417 | assert_raise(TypeError) do 418 | row.dig(0, 0) 419 | end 420 | assert_raise(TypeError) do 421 | row.dig("A", 0) 422 | end 423 | end 424 | 425 | def test_dup 426 | row = CSV::Row.new(["A"], ["foo"]) 427 | dupped_row = row.dup 428 | dupped_row["A"] = "bar" 429 | assert_equal(["foo", "bar"], 430 | [row["A"], dupped_row["A"]]) 431 | dupped_row.delete("A") 432 | assert_equal(["foo", nil], 433 | [row["A"], dupped_row["A"]]) 434 | end 435 | end 436 | -------------------------------------------------------------------------------- /test/csv/test_tsv.rb: -------------------------------------------------------------------------------- 1 | require_relative "helper" 2 | 3 | class TestTSV < Test::Unit::TestCase 4 | def test_default_separator 5 | tsv = CSV::TSV.new(String.new) 6 | assert_equal("\t", tsv.col_sep) 7 | end 8 | 9 | def test_override_separator 10 | tsv = CSV::TSV.new(String.new, col_sep: ",") 11 | assert_equal(",", tsv.col_sep) 12 | end 13 | 14 | def test_read_tsv_data 15 | data = "a\tb\tc\n1\t2\t3" 16 | result = CSV::TSV.parse(data) 17 | assert_equal([["a", "b", "c"], ["1", "2", "3"]], result.to_a) 18 | end 19 | 20 | def test_write_tsv_data 21 | output = String.new 22 | CSV::TSV.generate(output) do |tsv| 23 | tsv << ["a", "b", "c"] 24 | tsv << ["1", "2", "3"] 25 | end 26 | assert_equal("a\tb\tc\n1\t2\t3\n", output) 27 | end 28 | 29 | def test_inheritance 30 | assert_kind_of(CSV, CSV::TSV.new(String.new)) 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /test/csv/write/test_converters.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | module TestCSVWriteConverters 7 | def test_one 8 | assert_equal(%Q[=a,=b,=c\n], 9 | generate_line(["a", "b", "c"], 10 | write_converters: ->(value) {"=" + value})) 11 | end 12 | 13 | def test_multiple 14 | assert_equal(%Q[=a_,=b_,=c_\n], 15 | generate_line(["a", "b", "c"], 16 | write_converters: [ 17 | ->(value) {"=" + value}, 18 | ->(value) {value + "_"}, 19 | ])) 20 | end 21 | 22 | def test_nil_value 23 | assert_equal(%Q[a,NaN,29\n], 24 | generate_line(["a", nil, 29], 25 | write_nil_value: "NaN")) 26 | end 27 | 28 | def test_empty_value 29 | assert_equal(%Q[a,,29\n], 30 | generate_line(["a", "", 29], 31 | write_empty_value: nil)) 32 | end 33 | end 34 | 35 | class TestCSVWriteConvertersGenerateLine < Test::Unit::TestCase 36 | include TestCSVWriteConverters 37 | extend DifferentOFS 38 | 39 | def generate_line(row, **kwargs) 40 | CSV.generate_line(row, **kwargs) 41 | end 42 | end 43 | 44 | class TestCSVWriteConvertersGenerate < Test::Unit::TestCase 45 | include TestCSVWriteConverters 46 | extend DifferentOFS 47 | 48 | def generate_line(row, **kwargs) 49 | CSV.generate(**kwargs) do |csv| 50 | csv << row 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /test/csv/write/test_force_quotes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: false 2 | 3 | require_relative "../helper" 4 | 5 | module TestCSVWriteForceQuotes 6 | def test_default 7 | assert_equal(%Q[1,2,3#{$INPUT_RECORD_SEPARATOR}], 8 | generate_line(["1", "2", "3"])) 9 | end 10 | 11 | def test_true 12 | assert_equal(%Q["1","2","3"#{$INPUT_RECORD_SEPARATOR}], 13 | generate_line(["1", "2", "3"], 14 | force_quotes: true)) 15 | end 16 | 17 | def test_false 18 | assert_equal(%Q[1,2,3#{$INPUT_RECORD_SEPARATOR}], 19 | generate_line(["1", "2", "3"], 20 | force_quotes: false)) 21 | end 22 | 23 | def test_field_name 24 | assert_equal(%Q["1",2,"3"#{$INPUT_RECORD_SEPARATOR}], 25 | generate_line(["1", "2", "3"], 26 | headers: ["a", "b", "c"], 27 | force_quotes: ["a", :c])) 28 | end 29 | 30 | def test_field_name_without_headers 31 | force_quotes = ["a", "c"] 32 | error = assert_raise(ArgumentError) do 33 | generate_line(["1", "2", "3"], 34 | force_quotes: force_quotes) 35 | end 36 | assert_equal(":headers is required when you use field name " + 37 | "in :force_quotes: " + 38 | "#{force_quotes.first.inspect}: #{force_quotes.inspect}", 39 | error.message) 40 | end 41 | 42 | def test_field_index 43 | assert_equal(%Q["1",2,"3"#{$INPUT_RECORD_SEPARATOR}], 44 | generate_line(["1", "2", "3"], 45 | force_quotes: [0, 2])) 46 | end 47 | 48 | def test_field_unknown 49 | force_quotes = [1.1] 50 | error = assert_raise(ArgumentError) do 51 | generate_line(["1", "2", "3"], 52 | force_quotes: force_quotes) 53 | end 54 | assert_equal(":force_quotes element must be field index or field name: " + 55 | "#{force_quotes.first.inspect}: #{force_quotes.inspect}", 56 | error.message) 57 | end 58 | end 59 | 60 | class TestCSVWriteForceQuotesGenerateLine < Test::Unit::TestCase 61 | include TestCSVWriteForceQuotes 62 | extend DifferentOFS 63 | 64 | def generate_line(row, **kwargs) 65 | CSV.generate_line(row, **kwargs) 66 | end 67 | end 68 | 69 | class TestCSVWriteForceQuotesGenerate < Test::Unit::TestCase 70 | include TestCSVWriteForceQuotes 71 | extend DifferentOFS 72 | 73 | def generate_line(row, **kwargs) 74 | CSV.generate(**kwargs) do |csv| 75 | csv << row 76 | end 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /test/csv/write/test_general.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | module TestCSVWriteGeneral 7 | include CSVHelper 8 | 9 | def test_tab 10 | assert_equal("\t#{$INPUT_RECORD_SEPARATOR}", 11 | generate_line(["\t"])) 12 | end 13 | 14 | def test_quote_character 15 | assert_equal(%Q[foo,"""",baz#{$INPUT_RECORD_SEPARATOR}], 16 | generate_line(["foo", %Q["], "baz"])) 17 | end 18 | 19 | def test_quote_character_double 20 | assert_equal(%Q[foo,"""""",baz#{$INPUT_RECORD_SEPARATOR}], 21 | generate_line(["foo", %Q[""], "baz"])) 22 | end 23 | 24 | def test_quote 25 | assert_equal(%Q[foo,"""bar""",baz#{$INPUT_RECORD_SEPARATOR}], 26 | generate_line(["foo", %Q["bar"], "baz"])) 27 | end 28 | 29 | def test_quote_lf 30 | assert_equal(%Q["""\n","""\n"#{$INPUT_RECORD_SEPARATOR}], 31 | generate_line([%Q["\n], %Q["\n]])) 32 | end 33 | 34 | def test_quote_cr 35 | assert_equal(%Q["""\r","""\r"#{$INPUT_RECORD_SEPARATOR}], 36 | generate_line([%Q["\r], %Q["\r]])) 37 | end 38 | 39 | def test_quote_last 40 | assert_equal(%Q[foo,"bar"""#{$INPUT_RECORD_SEPARATOR}], 41 | generate_line(["foo", %Q[bar"]])) 42 | end 43 | 44 | def test_quote_lf_last 45 | assert_equal(%Q[foo,"\nbar"""#{$INPUT_RECORD_SEPARATOR}], 46 | generate_line(["foo", %Q[\nbar"]])) 47 | end 48 | 49 | def test_quote_lf_value_lf 50 | assert_equal(%Q[foo,"""\nbar\n"""#{$INPUT_RECORD_SEPARATOR}], 51 | generate_line(["foo", %Q["\nbar\n"]])) 52 | end 53 | 54 | def test_quote_lf_value_lf_nil 55 | assert_equal(%Q[foo,"""\nbar\n""",#{$INPUT_RECORD_SEPARATOR}], 56 | generate_line(["foo", %Q["\nbar\n"], nil])) 57 | end 58 | 59 | def test_cr 60 | assert_equal(%Q[foo,"\r",baz#{$INPUT_RECORD_SEPARATOR}], 61 | generate_line(["foo", "\r", "baz"])) 62 | end 63 | 64 | def test_lf 65 | assert_equal(%Q[foo,"\n",baz#{$INPUT_RECORD_SEPARATOR}], 66 | generate_line(["foo", "\n", "baz"])) 67 | end 68 | 69 | def test_cr_lf 70 | assert_equal(%Q[foo,"\r\n",baz#{$INPUT_RECORD_SEPARATOR}], 71 | generate_line(["foo", "\r\n", "baz"])) 72 | end 73 | 74 | def test_cr_dot_lf 75 | assert_equal(%Q[foo,"\r.\n",baz#{$INPUT_RECORD_SEPARATOR}], 76 | generate_line(["foo", "\r.\n", "baz"])) 77 | end 78 | 79 | def test_cr_lf_cr 80 | assert_equal(%Q[foo,"\r\n\r",baz#{$INPUT_RECORD_SEPARATOR}], 81 | generate_line(["foo", "\r\n\r", "baz"])) 82 | end 83 | 84 | def test_cr_lf_lf 85 | assert_equal(%Q[foo,"\r\n\n",baz#{$INPUT_RECORD_SEPARATOR}], 86 | generate_line(["foo", "\r\n\n", "baz"])) 87 | end 88 | 89 | def test_cr_lf_comma 90 | assert_equal(%Q["\r\n,"#{$INPUT_RECORD_SEPARATOR}], 91 | generate_line(["\r\n,"])) 92 | end 93 | 94 | def test_cr_lf_comma_nil 95 | assert_equal(%Q["\r\n,",#{$INPUT_RECORD_SEPARATOR}], 96 | generate_line(["\r\n,", nil])) 97 | end 98 | 99 | def test_comma 100 | assert_equal(%Q[","#{$INPUT_RECORD_SEPARATOR}], 101 | generate_line([","])) 102 | end 103 | 104 | def test_comma_double 105 | assert_equal(%Q[",",","#{$INPUT_RECORD_SEPARATOR}], 106 | generate_line([",", ","])) 107 | end 108 | 109 | def test_comma_and_value 110 | assert_equal(%Q[foo,"foo,bar",baz#{$INPUT_RECORD_SEPARATOR}], 111 | generate_line(["foo", "foo,bar", "baz"])) 112 | end 113 | 114 | def test_one_element 115 | assert_equal(%Q[foo#{$INPUT_RECORD_SEPARATOR}], 116 | generate_line(["foo"])) 117 | end 118 | 119 | def test_nil_values_only 120 | assert_equal(%Q[,,#{$INPUT_RECORD_SEPARATOR}], 121 | generate_line([nil, nil, nil])) 122 | end 123 | 124 | def test_nil_double_only 125 | assert_equal(%Q[,#{$INPUT_RECORD_SEPARATOR}], 126 | generate_line([nil, nil])) 127 | end 128 | 129 | def test_nil_values 130 | assert_equal(%Q[foo,,,#{$INPUT_RECORD_SEPARATOR}], 131 | generate_line(["foo", nil, nil, nil])) 132 | end 133 | 134 | def test_nil_value_first 135 | assert_equal(%Q[,foo,baz#{$INPUT_RECORD_SEPARATOR}], 136 | generate_line([nil, "foo", "baz"])) 137 | end 138 | 139 | def test_nil_value_middle 140 | assert_equal(%Q[foo,,baz#{$INPUT_RECORD_SEPARATOR}], 141 | generate_line(["foo", nil, "baz"])) 142 | end 143 | 144 | def test_nil_value_last 145 | assert_equal(%Q[foo,baz,#{$INPUT_RECORD_SEPARATOR}], 146 | generate_line(["foo", "baz", nil])) 147 | end 148 | 149 | def test_nil_empty 150 | assert_equal(%Q[,""#{$INPUT_RECORD_SEPARATOR}], 151 | generate_line([nil, ""])) 152 | end 153 | 154 | def test_nil_cr 155 | assert_equal(%Q[,"\r"#{$INPUT_RECORD_SEPARATOR}], 156 | generate_line([nil, "\r"])) 157 | end 158 | 159 | def test_values 160 | assert_equal(%Q[foo,bar#{$INPUT_RECORD_SEPARATOR}], 161 | generate_line(["foo", "bar"])) 162 | end 163 | 164 | def test_semi_colon 165 | assert_equal(%Q[;#{$INPUT_RECORD_SEPARATOR}], 166 | generate_line([";"])) 167 | end 168 | 169 | def test_semi_colon_values 170 | assert_equal(%Q[;,;#{$INPUT_RECORD_SEPARATOR}], 171 | generate_line([";", ";"])) 172 | end 173 | 174 | def test_tab_values 175 | assert_equal(%Q[\t,\t#{$INPUT_RECORD_SEPARATOR}], 176 | generate_line(["\t", "\t"])) 177 | end 178 | 179 | def test_col_sep 180 | assert_equal(%Q[a;b;;c#{$INPUT_RECORD_SEPARATOR}], 181 | generate_line(["a", "b", nil, "c"], 182 | col_sep: ";")) 183 | assert_equal(%Q[a\tb\t\tc#{$INPUT_RECORD_SEPARATOR}], 184 | generate_line(["a", "b", nil, "c"], 185 | col_sep: "\t")) 186 | end 187 | 188 | def test_row_sep 189 | assert_equal(%Q[a,b,,c\r\n], 190 | generate_line(["a", "b", nil, "c"], 191 | row_sep: "\r\n")) 192 | end 193 | 194 | def test_force_quotes 195 | assert_equal(%Q["1","b","","already ""quoted"""#{$INPUT_RECORD_SEPARATOR}], 196 | generate_line([1, "b", nil, %Q{already "quoted"}], 197 | force_quotes: true)) 198 | end 199 | 200 | def test_encoding_utf8 201 | assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}], 202 | generate_line(["あ" , "い", "う"])) 203 | end 204 | 205 | def test_encoding_euc_jp 206 | row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")} 207 | assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), 208 | generate_line(row)) 209 | end 210 | 211 | def test_encoding_with_default_internal 212 | with_default_internal(Encoding::UTF_8) do 213 | row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")} 214 | assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), 215 | generate_line(row, encoding: Encoding::EUC_JP)) 216 | end 217 | end 218 | 219 | def test_with_default_internal 220 | with_default_internal(Encoding::UTF_8) do 221 | row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")} 222 | assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), 223 | generate_line(row)) 224 | end 225 | end 226 | end 227 | 228 | class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase 229 | include TestCSVWriteGeneral 230 | extend DifferentOFS 231 | 232 | def generate_line(row, **kwargs) 233 | CSV.generate_line(row, **kwargs) 234 | end 235 | end 236 | 237 | class TestCSVWriteGeneralGenerate < Test::Unit::TestCase 238 | include TestCSVWriteGeneral 239 | extend DifferentOFS 240 | 241 | def generate_line(row, **kwargs) 242 | CSV.generate(**kwargs) do |csv| 243 | csv << row 244 | end 245 | end 246 | end 247 | -------------------------------------------------------------------------------- /test/csv/write/test_quote_empty.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: false 3 | 4 | require_relative "../helper" 5 | 6 | module TestCSVWriteQuoteEmpty 7 | def test_quote_empty_default 8 | assert_equal(%Q["""",""#{$INPUT_RECORD_SEPARATOR}], 9 | generate_line([%Q["], ""])) 10 | end 11 | 12 | def test_quote_empty_false 13 | assert_equal(%Q["""",#{$INPUT_RECORD_SEPARATOR}], 14 | generate_line([%Q["], ""], 15 | quote_empty: false)) 16 | end 17 | 18 | def test_empty_default 19 | assert_equal(%Q[foo,"",baz#{$INPUT_RECORD_SEPARATOR}], 20 | generate_line(["foo", "", "baz"])) 21 | end 22 | 23 | def test_empty_false 24 | assert_equal(%Q[foo,,baz#{$INPUT_RECORD_SEPARATOR}], 25 | generate_line(["foo", "", "baz"], 26 | quote_empty: false)) 27 | end 28 | 29 | def test_empty_only_default 30 | assert_equal(%Q[""#{$INPUT_RECORD_SEPARATOR}], 31 | generate_line([""])) 32 | end 33 | 34 | def test_empty_only_false 35 | assert_equal(%Q[#{$INPUT_RECORD_SEPARATOR}], 36 | generate_line([""], 37 | quote_empty: false)) 38 | end 39 | 40 | def test_empty_double_default 41 | assert_equal(%Q["",""#{$INPUT_RECORD_SEPARATOR}], 42 | generate_line(["", ""])) 43 | end 44 | 45 | def test_empty_double_false 46 | assert_equal(%Q[,#{$INPUT_RECORD_SEPARATOR}], 47 | generate_line(["", ""], 48 | quote_empty: false)) 49 | end 50 | end 51 | 52 | class TestCSVWriteQuoteEmptyGenerateLine < Test::Unit::TestCase 53 | include TestCSVWriteQuoteEmpty 54 | extend DifferentOFS 55 | 56 | def generate_line(row, **kwargs) 57 | CSV.generate_line(row, **kwargs) 58 | end 59 | end 60 | 61 | class TestCSVWriteQuoteEmptyGenerate < Test::Unit::TestCase 62 | include TestCSVWriteQuoteEmpty 63 | extend DifferentOFS 64 | 65 | def generate_line(row, **kwargs) 66 | CSV.generate(**kwargs) do |csv| 67 | csv << row 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /test/lib/with_different_ofs.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module DifferentOFS 4 | is_output_field_separator_deprecated = false 5 | verbose, $VERBOSE = $VERBOSE, true 6 | stderr, $stderr = $stderr, StringIO.new 7 | begin 8 | ofs, $, = $,, "-" 9 | is_output_field_separator_deprecated = (not $stderr.string.empty?) 10 | ensure 11 | $, = ofs 12 | $stderr = stderr 13 | $VERBOSE = verbose 14 | end 15 | 16 | unless is_output_field_separator_deprecated 17 | module WithDifferentOFS 18 | def setup 19 | super 20 | @ofs, $, = $,, "-" 21 | end 22 | 23 | def teardown 24 | $, = @ofs 25 | super 26 | end 27 | end 28 | 29 | def self.extended(klass) 30 | super(klass) 31 | klass.const_set(:DifferentOFS, Class.new(klass).class_eval {include WithDifferentOFS}) 32 | end 33 | end 34 | end 35 | --------------------------------------------------------------------------------