├── .gitignore ├── .rspec ├── .travis.yml ├── CHANGELOG.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── lib ├── tsv.rb └── tsv │ ├── row.rb │ ├── table.rb │ └── version.rb ├── spec ├── fixtures │ ├── broken.tsv │ ├── empty-trailing.tsv │ ├── empty.tsv │ └── example.tsv ├── lib │ ├── tsv │ │ ├── row_spec.rb │ │ └── table_spec.rb │ └── tsv_spec.rb ├── spec_helper.rb └── tsv_integration_spec.rb └── tsv.gemspec /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: ruby 3 | 4 | addons: 5 | code_climate: 6 | repo_token: 17abb3979e6abb0cee4069ec3e7aeee9c6e4fd8277b0899b4cd0900ac6030f98 7 | 8 | rvm: 9 | - '2.0' 10 | - '2.1' 11 | - '2.2' 12 | - '2.3.0' 13 | - rbx-2.5.1 14 | - jruby 15 | 16 | # Part of test suite - building gem and trying to require it via ruby -e 17 | # This way we check if gem is loadable and works without any problems in vanilla ruby 18 | script: > 19 | rake build&&gem install pkg/tsv*.gem&&ruby -e 'require "tsv"; TSV.parse_file("spec/fixtures/example.tsv").to_a'&&rake 20 | 21 | matrix: 22 | allow_failures: 23 | - rvm: rbx-2.5.1 24 | - rvm: jruby 25 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | 4 | ## [Unreleased][unreleased] 5 | 6 | ## [1.0.0] - 2015-02-14 7 | ### Changed 8 | - Reworked Cyclist into Table 9 | 10 | ## [0.0.3] - 2014-12-26 11 | ### Fixed 12 | - Fixed case with empty column contents in TSV 13 | 14 | ### Added 15 | - First version of CHANGELOG.md 16 | 17 | ## [0.0.2] - 2014-08-13 18 | ### Changed 19 | - Internal refactoring 20 | 21 | ### Added 22 | - Testing built gem integrity via CI 23 | 24 | Changelog format taken from [keep-a-changelog](https://github.com/olivierlacan/keep-a-changelog) 25 | 26 | [unreleased]: https://github.com/mimimi/ruby-tsv/compare/v1.0.0...master 27 | [1.0.0]: https://github.com/mimimi/ruby-tsv/compare/v0.0.3...v1.0.0 28 | [0.0.3]: https://github.com/mimimi/ruby-tsv/compare/v0.0.2...v0.0.3 29 | [0.0.2]: https://github.com/mimimi/ruby-tsv/compare/v0.0.1...v0.0.2 -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in tsv.gemspec 4 | gemspec 5 | 6 | group "test" do 7 | gem "codeclimate-test-reporter", require: nil 8 | gem "rake" 9 | gem "rspec", "~> 3.1.0" 10 | gem "pry" 11 | end 12 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Moron Activity 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tsv 2 | [![Build Status](https://travis-ci.org/mimimi/ruby-tsv.svg?branch=master)](https://travis-ci.org/mimimi/ruby-tsv) 3 | [![Code Climate](https://codeclimate.com/github/mimimi/ruby-tsv/badges/gpa.svg)](https://codeclimate.com/github/mimimi/ruby-tsv) 4 | [![Test Coverage](https://codeclimate.com/github/mimimi/ruby-tsv/badges/coverage.svg)](https://codeclimate.com/github/mimimi/ruby-tsv) 5 | 6 | A simple TSV parser, developed with aim of parsing a ~200Gb TSV dump. As such, no mode of operation, but enumerable is considered sane. Feel free to use `#to_a` on your supercomputer :) 7 | 8 | Does not (yet) provide TSV writing mechanism. Pull requests are welcome :) 9 | 10 | ## Installation 11 | 12 | Add this line to your application's Gemfile: 13 | 14 | gem 'tsv' 15 | 16 | And then execute: 17 | 18 | $ bundle 19 | 20 | Or install it yourself as: 21 | 22 | $ gem install tsv 23 | 24 | ## Usage 25 | 26 | ### High level interfaces 27 | 28 | #### TSV::parse 29 | 30 | `TSV.parse` accepts basically anything that can enumerate over lines, for example: 31 | 32 | * TSV as a whole string 33 | * any IO object - a TSV file pre-opened with `File.open`, `StringIO` buffer containing TSV data, etc 34 | 35 | It returns a lazy enumerator, yielding TSV::Row objects on demand. 36 | 37 | #### TSV::parse_file 38 | 39 | `TSV.parse_file` accepts path to TSV file, returning lazy enumerator, yielding TSV::Row objects on demand 40 | `TSV.parse_file` is also aliased as `[]`, allowing for `TSV[filename]` syntax 41 | 42 | #### TSV::Table 43 | 44 | While TSV specification requires headers, popular use doesn't necessarily adhere. In order to cope both `TSV::parse` and `TSV::parse_file` return `TSV::Table` object, that apart from acting as enumerator exposes two additional methods: `#with_headers` and `#without_headers`. Neither method preserves read position by design. 45 | 46 | #### TSV::Row 47 | 48 | By default TSV::Row behaves like an Array of strings, derived from TSV row. However this similarity is limited to Enumerable methods. In case a real array is needed, `#to_a` will behave as expected. 49 | Additionally TSV::Row contains header data, accessible via `#header` reader. 50 | 51 | In case a hash-like behaviour is required, field can be accessed with header string key. Alternatively, `#with_header` and `#to_h` will return hash representation for the row. 52 | 53 | ### Examples 54 | 55 | Getting first line from tsv file without headers: 56 | ```ruby 57 | TSV.parse_file("tsv.tsv").without_header.first 58 | ``` 59 | 60 | Mapping name fields from a file: 61 | ```ruby 62 | TSV["tsv.tsv"].map do |row| 63 | row['name'] 64 | end 65 | ``` 66 | 67 | Mapping last and first row elements: 68 | ```ruby 69 | TSV["tsv.tsv"].map do |row| 70 | [row[-1], row[1]] 71 | end 72 | ``` 73 | 74 | ### Nuances 75 | 76 | Range accessor is not implemented for initial version due to authors' lack of need. 77 | In addition, accessing tenth element in a row of five is considered an exception from TSV standpoint, which should be represented in range accessor. Such nuance, would it be implemented, will break expectations. Still, if need arises, pull or feature requests with accompanying reasoning (or even without one) are more than welcome. 78 | 79 | ## Contributing 80 | 81 | 1. Fork it 82 | 2. Create your feature branch (`git checkout -b my-new-feature`) 83 | 3. Commit your changes (`git commit -am 'Add some feature'`) 84 | 4. Push to the branch (`git push origin my-new-feature`) 85 | 5. Create new Pull Request 86 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require 'rspec/core/rake_task' 3 | 4 | RSpec::Core::RakeTask.new('spec') 5 | 6 | task :default => :spec 7 | -------------------------------------------------------------------------------- /lib/tsv.rb: -------------------------------------------------------------------------------- 1 | require 'forwardable' 2 | 3 | require "tsv/version" 4 | require "tsv/row" 5 | require "tsv/table" 6 | 7 | module TSV 8 | extend self 9 | 10 | def parse(content, opts = {}, &block) 11 | TSV::Table.new(content, opts, &block) 12 | end 13 | 14 | def parse_file(filename, opts = {}, &block) 15 | TSV::Table.new(File.new(filename, 'r'), opts, &block) 16 | end 17 | 18 | alias :[] :parse_file 19 | 20 | class ReadOnly < StandardError 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/tsv/row.rb: -------------------------------------------------------------------------------- 1 | module TSV 2 | class Row 3 | extend Forwardable 4 | 5 | def_delegators :data, *Enumerable.instance_methods(false) 6 | 7 | attr_reader :header, :data 8 | 9 | def []=(key, value) 10 | raise TSV::ReadOnly.new('TSV data is read only. Export data to modify it.') 11 | end 12 | 13 | def [](key) 14 | if key.is_a? ::String 15 | raise UnknownKey unless header.include?(key) 16 | 17 | data[header.index(key)] 18 | elsif key.is_a? ::Numeric 19 | raise UnknownKey if data[key].nil? 20 | 21 | data[key] 22 | else 23 | raise InvalidKey.new 24 | end 25 | end 26 | 27 | def initialize(data, header) 28 | @data = data 29 | @header = header 30 | 31 | raise InputError.new("Row has #{@data.length} columns, but #{@header.length} columns expected") if @data.length != @header.length 32 | end 33 | 34 | def with_header 35 | Hash[header.zip(data)] 36 | end 37 | alias :to_h :with_header 38 | 39 | def ==(other) 40 | other.is_a?(self.class) and 41 | header == other.header and 42 | data == other.data 43 | end 44 | 45 | class InvalidKey < StandardError 46 | end 47 | 48 | class UnknownKey < StandardError 49 | end 50 | 51 | class InputError < StandardError 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/tsv/table.rb: -------------------------------------------------------------------------------- 1 | module TSV 2 | class Table 3 | extend Forwardable 4 | 5 | def_delegators :enumerator, *Enumerator.instance_methods(false) 6 | def_delegators :enumerator, *Enumerable.instance_methods(false) 7 | 8 | attr_accessor :source, :header 9 | 10 | def initialize(source, params = {}, &block) 11 | self.header = params.fetch(:header, true) 12 | self.source = source 13 | self.enumerator.each(&block) if block_given? 14 | end 15 | 16 | def with_header 17 | self.class.new(self.source, header: true) 18 | end 19 | 20 | def without_header 21 | self.class.new(self.source, header: false) 22 | end 23 | 24 | def enumerator 25 | @enumerator ||= ::Enumerator.new do |y| 26 | lines = data_enumerator 27 | 28 | first_line = generate_row_from begin 29 | lines.next 30 | rescue StopIteration => ex 31 | '' 32 | end 33 | 34 | local_header = if self.header 35 | first_line 36 | else 37 | lines.rewind 38 | generate_default_header_from first_line 39 | end 40 | 41 | loop do 42 | y << TSV::Row.new(generate_row_from(lines.next).freeze, local_header.freeze) 43 | end 44 | end 45 | end 46 | 47 | def data_enumerator 48 | source.each_line 49 | end 50 | 51 | protected 52 | 53 | def generate_row_from(str) 54 | str.to_s.chomp.split("\t", -1) 55 | end 56 | 57 | def generate_default_header_from(example_line) 58 | (0...example_line.length).to_a.map(&:to_s) 59 | end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /lib/tsv/version.rb: -------------------------------------------------------------------------------- 1 | module TSV 2 | VERSION = "1.0.0" 3 | end 4 | -------------------------------------------------------------------------------- /spec/fixtures/broken.tsv: -------------------------------------------------------------------------------- 1 | first second third 2 | 0 1 2 3 | one -------------------------------------------------------------------------------- /spec/fixtures/empty-trailing.tsv: -------------------------------------------------------------------------------- 1 | first second third 2 | 0 1 2 3 | one 4 | -------------------------------------------------------------------------------- /spec/fixtures/empty.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mimimi/ruby-tsv/b8e1e3490017c9a5327405069856c7426b3605cd/spec/fixtures/empty.tsv -------------------------------------------------------------------------------- /spec/fixtures/example.tsv: -------------------------------------------------------------------------------- 1 | first second third 2 | 0 1 2 3 | one two three 4 | weird data s@mthin# else -------------------------------------------------------------------------------- /spec/lib/tsv/row_spec.rb: -------------------------------------------------------------------------------- 1 | require File.join(File.dirname(__FILE__), '..', '..', 'spec_helper.rb') 2 | 3 | describe TSV::Row do 4 | describe "::new" do 5 | it "sets header and data from params" do 6 | t = TSV::Row.new(['data'], ['header']) 7 | 8 | expect(t.header).to eq(['header']) 9 | expect(t.data).to eq(['data']) 10 | end 11 | 12 | context "when header and data length do not match" do 13 | it "raises TSV::Row::InputError" do 14 | expect { TSV::Row.new(['data'], ['header', 'footer']) }.to raise_error(TSV::Row::InputError) 15 | expect { TSV::Row.new(['data', 'not data'], ['header']) }.to raise_error(TSV::Row::InputError) 16 | end 17 | end 18 | end 19 | 20 | let(:header) { ['first', 'second', 'third'] } 21 | let(:data) { ['one', 'two', 'three'] } 22 | 23 | subject(:row) { TSV::Row.new(data, header) } 24 | 25 | describe "#[]" do 26 | describe "array interface compatibility" do 27 | context "when provided with element number" do 28 | it "returns requested element" do 29 | expect(subject[1]).to eq "two" 30 | end 31 | end 32 | 33 | context "when provided with negative offset" do 34 | it "returns requested element" do 35 | expect(subject[-1]).to eq "three" 36 | end 37 | end 38 | 39 | context "when provided with header name" do 40 | it "returns requested element" do 41 | expect(subject['third']).to eq "three" 42 | end 43 | end 44 | 45 | context "when provided with nil or symbol" do 46 | it "raises TSV::Row::InvalidKey" do 47 | expect { subject[nil] }.to raise_error(TSV::Row::InvalidKey) 48 | expect { subject[:something] }.to raise_error(TSV::Row::InvalidKey) 49 | end 50 | end 51 | 52 | context "when provided with unknown numeric key" do 53 | let(:cases) { [-(data.length + 1), data.length, 500, -500]} 54 | 55 | it "raises TSV::Row::UnknownKey" do 56 | cases.each do |item| 57 | expect { subject[item] }.to raise_error(TSV::Row::UnknownKey) 58 | end 59 | end 60 | end 61 | 62 | context "when provided with unknown string key" do 63 | it "raises TSV::Row::UnknownKey" do 64 | expect { subject['something'] }.to raise_error(TSV::Row::UnknownKey) 65 | end 66 | end 67 | end 68 | end 69 | 70 | describe "#[]=" do 71 | it "raises TSV::ReadOnly exception" do 72 | expect { subject['a'] = 123 }.to raise_error(TSV::ReadOnly, 'TSV data is read only. Export data to modify it.') 73 | end 74 | end 75 | 76 | describe "accessors" do 77 | describe "header" do 78 | it "does not have setter" do 79 | expect(subject).to_not respond_to(:"header=") 80 | end 81 | 82 | it "has getter" do 83 | expect(subject.header).to eq ['first', 'second', 'third'] 84 | end 85 | end 86 | 87 | describe "data" do 88 | it "does not have setter" do 89 | expect(subject).to_not respond_to(:"header=") 90 | end 91 | 92 | it "has getter" do 93 | expect(subject.data).to eq ['one', 'two', 'three'] 94 | end 95 | end 96 | end 97 | 98 | describe "iterators" do 99 | describe "Enumerable #methods (except #to_h, which we have a better implementation for)" do 100 | (Enumerable.instance_methods(false) - [:to_h]).each do |name| 101 | it "delegates #{name} to data array" do 102 | expect(subject.data).to receive(name) 103 | subject.send(name) 104 | end 105 | end 106 | end 107 | 108 | describe "#with_header" do 109 | subject { row.with_header } 110 | 111 | it "gathers header and data into hash" do 112 | expect(subject).to eq({ 113 | "first" => "one", 114 | "second" => "two", 115 | "third" => "three" 116 | }) 117 | end 118 | end 119 | 120 | describe "#to_h" do 121 | subject { row.to_h } 122 | 123 | it "gathers header and data into hash" do 124 | expect(subject).to eq({ 125 | "first" => "one", 126 | "second" => "two", 127 | "third" => "three" 128 | }) 129 | end 130 | end 131 | end 132 | 133 | describe "#==" do 134 | let(:other_header) { header } 135 | let(:other_data) { data } 136 | 137 | let(:other_row) { TSV::Row.new(other_data, other_header) } 138 | subject { row == other_row } 139 | 140 | context "when compared to TSV::Row" do 141 | context "when both objects' data and header are equal" do 142 | it { should be true } 143 | end 144 | 145 | context "when data attributes are not equal" do 146 | let(:other_data) { data.reverse } 147 | it { should be false } 148 | end 149 | 150 | context "when header attributes are not equal" do 151 | let(:other_header) { header.reverse } 152 | it { should be false } 153 | end 154 | 155 | context "when both objects' data and header are not equal" do 156 | let(:other_data) { data.reverse } 157 | let(:other_header) { header.reverse } 158 | it { should be false } 159 | end 160 | end 161 | 162 | context "when compared to something else than TSV::Row" do 163 | let(:other_row) { data } 164 | 165 | it { should be false } 166 | end 167 | end 168 | end 169 | -------------------------------------------------------------------------------- /spec/lib/tsv/table_spec.rb: -------------------------------------------------------------------------------- 1 | require File.join(File.dirname(__FILE__), '..', '..', 'spec_helper.rb') 2 | 3 | describe TSV::Table do 4 | let(:source) { IO.read(File.join(File.dirname(__FILE__), '..', '..', 'fixtures', filename)) } 5 | let(:filename) { 'example.tsv' } 6 | 7 | let(:header) { true } 8 | let(:parameters) { { header: header } } 9 | 10 | subject(:table) { TSV::Table.new(source, parameters) } 11 | 12 | describe "::new" do 13 | it "initializes header to true by default" do 14 | expect(subject.header).to be true 15 | end 16 | 17 | it "initializes source to given value" do 18 | expect(subject.source).to eq(source) 19 | end 20 | 21 | context "when block is given" do 22 | it "passes block to enumerator through each" do 23 | data = [] 24 | 25 | described_class.new(source) do |v| 26 | data << v 27 | end 28 | 29 | headers = %w{first second third} 30 | expect(data).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 31 | TSV::Row.new( ['one', 'two', 'three'], headers ), 32 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 33 | end 34 | end 35 | end 36 | 37 | describe "#enumerator" do 38 | it { expect(table.enumerator).to be_a_kind_of(Enumerator) } 39 | subject { table.enumerator.to_a } 40 | 41 | context "string is empty" do 42 | let(:filename) { 'empty.tsv' } 43 | 44 | it { should be_empty } 45 | end 46 | 47 | context "string is incorrect" do 48 | let(:filename) { 'broken.tsv' } 49 | 50 | it "should raise exception" do 51 | expect { subject }.to raise_error(TSV::Row::InputError) 52 | end 53 | end 54 | 55 | context "string is correct" do 56 | context "when requested without header" do 57 | let(:header) { false } 58 | let(:auto_header) { %w{0 1 2} } 59 | 60 | it "returns its content as array of arrays" do 61 | expect(subject).to eq [ TSV::Row.new( ['first', 'second', 'third'], auto_header ), 62 | TSV::Row.new( ['0', '1', '2'], auto_header ), 63 | TSV::Row.new( ['one', 'two', 'three'], auto_header ), 64 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], auto_header ) ] 65 | end 66 | 67 | it "freezes data and header for TSV::Row" do 68 | subject.each do |i| 69 | expect(i.data).to be_frozen 70 | expect(i.header).to be_frozen 71 | end 72 | end 73 | end 74 | 75 | context "when requested with header" do 76 | let(:header) { true } 77 | 78 | it "returns its content as array of hashes" do 79 | headers = %w{first second third} 80 | expect(subject).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 81 | TSV::Row.new( ['one', 'two', 'three'], headers ), 82 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 83 | end 84 | 85 | it "freezes data and header for TSV::Row" do 86 | subject.each do |i| 87 | expect(i.data).to be_frozen 88 | expect(i.header).to be_frozen 89 | end 90 | end 91 | end 92 | end 93 | end 94 | 95 | describe "#with_header" do 96 | subject { table.with_header } 97 | 98 | it "returns a Table with header option set to true" do 99 | expect(subject.header).to be true 100 | end 101 | end 102 | 103 | describe "#without_header" do 104 | subject { table.without_header } 105 | 106 | it "returns a Table with header option set to false" do 107 | expect(subject.header).to be false 108 | end 109 | end 110 | 111 | describe "enumerator interfaces" do 112 | ( Enumerable.instance_methods(false) + Enumerator.instance_methods(false) ).each do |name| 113 | it "delegates #{name} to enumerator" do 114 | expect(table.enumerator).to receive(name) 115 | table.send(name) 116 | end 117 | end 118 | end 119 | end 120 | -------------------------------------------------------------------------------- /spec/lib/tsv_spec.rb: -------------------------------------------------------------------------------- 1 | require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb') 2 | 3 | describe TSV do 4 | let(:filename) { 'example.tsv' } 5 | 6 | describe "#parse" do 7 | let(:header) { nil } 8 | let(:parameters) { { header: header } } 9 | 10 | context "given a string with content" do 11 | let(:content) { IO.read(File.join(File.dirname(__FILE__), '..', 'fixtures', filename)) } 12 | 13 | subject { TSV.parse(content, parameters) } 14 | 15 | it "returns Table initialized with given data" do 16 | expect(subject).to be_a TSV::Table 17 | expect(subject.source).to eq(content) 18 | end 19 | 20 | context "when block is given" do 21 | it "passes block to Table" do 22 | data = [] 23 | 24 | TSV.parse(content) do |i| 25 | data.push i 26 | end 27 | 28 | headers = %w{first second third} 29 | expect(data).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 30 | TSV::Row.new( ['one', 'two', 'three'], headers ), 31 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 32 | end 33 | end 34 | end 35 | 36 | context "given a opened IO object" do 37 | let(:content) { File.open(File.join(File.dirname(__FILE__), '..', 'fixtures', filename), 'r') } 38 | 39 | subject { TSV.parse(content, parameters) } 40 | 41 | it "returns Table initialized with given data" do 42 | expect(subject).to be_a TSV::Table 43 | expect(subject.source).to eq(content) 44 | end 45 | 46 | it "can properly parse file" do 47 | data = [] 48 | 49 | TSV.parse(content).each do |i| 50 | data.push i 51 | end 52 | 53 | headers = %w{first second third} 54 | expect(data).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 55 | TSV::Row.new( ['one', 'two', 'three'], headers ), 56 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 57 | end 58 | end 59 | end 60 | 61 | describe "#parse_file" do 62 | let(:tsv_path) { File.join(File.dirname(__FILE__), '..', 'fixtures', filename) } 63 | 64 | subject { TSV.parse_file tsv_path } 65 | 66 | context "when no block is given" do 67 | it "returns Table initialized with File object" do 68 | expect(subject).to be_a TSV::Table 69 | expect(subject.source).to be_kind_of(File) 70 | expect(subject.source.path).to eq(tsv_path) 71 | end 72 | end 73 | 74 | context "when block is given" do 75 | it "passes block to Table" do 76 | data = [] 77 | 78 | TSV.parse_file(tsv_path) do |i| 79 | data.push i 80 | end 81 | 82 | headers = %w{first second third} 83 | expect(data).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 84 | TSV::Row.new( ['one', 'two', 'three'], headers ), 85 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 86 | end 87 | end 88 | 89 | context "when accessing unavailable files" do 90 | subject { lambda { TSV.parse_file(tsv_path).to_a } } 91 | 92 | context "when file is not found" do 93 | let(:tsv_path) { "AManThatWasntThere.tsv" } 94 | 95 | it "returns FileNotFoundException" do 96 | expect(subject).to raise_error(Errno::ENOENT) 97 | end 98 | end 99 | end 100 | 101 | describe "intermediate file handle" do 102 | it "raises IOError on write attempt" do 103 | tempfile = Tempfile.new('tsv_test') 104 | handle = TSV.parse_file(tempfile.path).source 105 | 106 | expect{ handle.puts('test string please ignore') }.to raise_error(IOError, 'not opened for writing') 107 | end 108 | end 109 | 110 | end 111 | end 112 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler/setup' 3 | 4 | require "codeclimate-test-reporter" 5 | CodeClimate::TestReporter.start 6 | 7 | require 'pry' 8 | require 'rspec' 9 | 10 | require 'tsv' 11 | 12 | # Disabling old rspec should syntax 13 | RSpec.configure do |config| 14 | config.expect_with :rspec do |c| 15 | c.syntax = :expect 16 | end 17 | 18 | config.raise_errors_for_deprecations! 19 | end 20 | 21 | Dir[File.expand_path(File.join(File.dirname(__FILE__),'support','**','*.rb'))].each {|f| require f} -------------------------------------------------------------------------------- /spec/tsv_integration_spec.rb: -------------------------------------------------------------------------------- 1 | require File.join(File.dirname(__FILE__), 'spec_helper.rb') 2 | 3 | describe TSV do 4 | let(:header) { nil } 5 | let(:tsv_path) { File.join(File.dirname(__FILE__), 'fixtures', filename) } 6 | let(:parameters) { { header: header } } 7 | 8 | describe "reading file" do 9 | subject { TSV.parse_file(tsv_path, parameters).to_a } 10 | 11 | context "when file is empty" do 12 | let(:filename) { 'empty.tsv' } 13 | 14 | context "when requested without header" do 15 | let(:header) { true } 16 | 17 | it { expect(subject).to be_empty } 18 | end 19 | 20 | context "when requested with header" do 21 | let(:header) { false } 22 | 23 | it { expect(subject).to be_empty } 24 | end 25 | end 26 | 27 | context "when file is invalid" do 28 | subject { lambda { TSV.parse_file(tsv_path, parameters).to_a } } 29 | let(:filename) { 'broken.tsv' } 30 | 31 | it "when file is broken" do 32 | expect(subject).to raise_error TSV::Row::InputError 33 | end 34 | end 35 | 36 | context "when file is valid" do 37 | let(:filename) { 'example.tsv' } 38 | 39 | context "when no block is passed" do 40 | let(:parameters) { Hash.new } 41 | 42 | it "returns its content as array of hashes" do 43 | headers = %w{first second third} 44 | expect(subject).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 45 | TSV::Row.new( ['one', 'two', 'three'], headers ), 46 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 47 | end 48 | end 49 | end 50 | 51 | context "when file includes empty trailing fields" do 52 | let(:filename) { 'empty-trailing.tsv' } 53 | 54 | context "when no block is passed" do 55 | let(:parameters) { Hash.new } 56 | 57 | it "returns its content as array of hashes" do 58 | headers = %w{first second third} 59 | expect(subject).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 60 | TSV::Row.new( ['one', '', ''], headers ) ] 61 | end 62 | end 63 | end 64 | end 65 | 66 | describe "reading from string" do 67 | subject { TSV.parse(IO.read(tsv_path), parameters).to_a } 68 | 69 | context "when string is empty" do 70 | let(:filename) { 'empty.tsv' } 71 | 72 | context "when requested without header" do 73 | let(:header) { true } 74 | 75 | it { expect(subject).to be_empty } 76 | end 77 | 78 | context "when requested with header" do 79 | let(:header) { false } 80 | 81 | it { expect(subject).to be_empty } 82 | end 83 | end 84 | 85 | context "when string is invalid" do 86 | subject { lambda { TSV.parse(IO.read(tsv_path), parameters).to_a } } 87 | let(:filename) { 'broken.tsv' } 88 | 89 | it "when file is broken" do 90 | expect(subject).to raise_error TSV::Row::InputError 91 | end 92 | end 93 | 94 | context "when string is valid" do 95 | let(:filename) { 'example.tsv' } 96 | 97 | context "when no block is passed" do 98 | let(:parameters) { Hash.new } 99 | 100 | it "returns its content as array of hashes" do 101 | headers = %w{first second third} 102 | expect(subject).to eq [ TSV::Row.new( ['0', '1', '2'], headers ), 103 | TSV::Row.new( ['one', 'two', 'three'], headers ), 104 | TSV::Row.new( ['weird data', 's@mthin#', 'else'], headers ) ] 105 | end 106 | end 107 | end 108 | end 109 | end 110 | -------------------------------------------------------------------------------- /tsv.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'tsv/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "tsv" 8 | spec.version = TSV::VERSION 9 | spec.authors = ["Dmytro Soltys", "Alexander Rozumiy"] 10 | spec.email = ["soap@slotos.net", "brain-geek@yandex.ua"] 11 | spec.description = %q{Streamed TSV parser} 12 | spec.summary = %q{Provides a simple parser for standard compliant and not so (missing header line) TSV files} 13 | spec.homepage = "" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files`.split($/) 17 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 18 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 19 | spec.require_paths = ["lib"] 20 | end 21 | --------------------------------------------------------------------------------