├── .gitignore ├── .rspec ├── .rspec_formatter.rb ├── .rubocop.yml ├── .travis.yml ├── .yardopts ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Gemfile ├── Guardfile ├── LICENSE.md ├── README.md ├── Rakefile ├── daru-io.gemspec ├── lib └── daru │ ├── io.rb │ └── io │ ├── base.rb │ ├── exporters.rb │ ├── exporters │ ├── avro.rb │ ├── base.rb │ ├── csv.rb │ ├── excel.rb │ ├── json.rb │ ├── r_data.rb │ ├── rds.rb │ └── sql.rb │ ├── importers.rb │ ├── importers │ ├── active_record.rb │ ├── avro.rb │ ├── base.rb │ ├── csv.rb │ ├── excel.rb │ ├── excelx.rb │ ├── html.rb │ ├── json.rb │ ├── log.rb │ ├── mongo.rb │ ├── plaintext.rb │ ├── r_data.rb │ ├── rds.rb │ ├── redis.rb │ └── sql.rb │ ├── link.rb │ └── version.rb └── spec ├── daru ├── io │ ├── exporters │ │ ├── avro_spec.rb │ │ ├── csv_spec.rb │ │ ├── excel_spec.rb │ │ ├── json_spec.rb │ │ ├── r_data_spec.rb │ │ ├── rds_spec.rb │ │ └── sql_spec.rb │ └── importers │ │ ├── active_record_spec.rb │ │ ├── avro_spec.rb │ │ ├── csv_spec.rb │ │ ├── excel_spec.rb │ │ ├── excelx_spec.rb │ │ ├── html_spec.rb │ │ ├── json_spec.rb │ │ ├── log_spec.rb │ │ ├── mongo_spec.rb │ │ ├── plaintext_spec.rb │ │ ├── r_data_spec.rb │ │ ├── rds_spec.rb │ │ ├── redis_spec.rb │ │ └── sql_spec.rb └── io_spec.rb ├── fixtures ├── README.md ├── avro │ ├── one_complex.avro │ ├── twitter.avro │ └── users.avro ├── csv │ ├── boolean_converter_test.csv │ ├── column_headers_only.csv │ ├── empty_rows_test.csv │ ├── matrix_test.csv │ ├── repeated_fields.csv │ ├── sales-funnel.csv │ ├── scientific_notation.csv │ └── stock_data.csv ├── excel │ └── test_xls.xls ├── excelx │ ├── LOBSTAHS_rt.windows.xlsx │ ├── Microcode.xlsx │ ├── Stock-counts-sheet.xlsx │ └── pivot.xlsx ├── html │ ├── eciresults.html │ ├── macau.html │ ├── moneycontrol.html │ ├── valid_markup.html │ ├── wiki_climate.html │ └── wiki_table_info.html ├── json │ ├── allsets.json │ ├── got.json │ ├── jsonvat.json │ ├── nasadata.json │ └── temp.json ├── log │ ├── apache.log │ ├── rails.log │ └── s3.log ├── plaintext │ ├── bank2.dat │ ├── empties.dat │ └── strings.dat ├── rdata │ ├── ACScounty.RData │ ├── FRED-cpi-house.RData │ ├── Filings-by-state.RData │ ├── Ownership.RData │ ├── case-shiller.RData │ ├── state-migration.RData │ └── zip-county.RData └── rds │ ├── RPPdataConverted.rds │ ├── bc_sites.rds │ ├── chicago.rds │ ├── healthexp.Rds │ ├── heights.RDS │ └── maacs_env.rds ├── spec_helper.rb └── support ├── custom_matchers.rb ├── shared_contexts.rb └── shared_examples.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle/ 3 | .yardoc/ 4 | Gemfile.lock 5 | /_yardoc/ 6 | *.DS_store 7 | .rspec_status 8 | coverage/ 9 | dump.rdb 10 | doc/ 11 | docs/ 12 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --require spec_helper 3 | -------------------------------------------------------------------------------- /.rspec_formatter.rb: -------------------------------------------------------------------------------- 1 | RSpec::Support.require_rspec_core 'formatters/base_text_formatter' 2 | 3 | class SimpleFormatter < RSpec::Core::Formatters::BaseTextFormatter 4 | RSpec::Core::Formatters.register self, 5 | :example_passed, :example_pending, :example_failed, :dump_pending, :dump_failures, :dump_summary 6 | 7 | def example_passed(message); end 8 | 9 | def example_pending(message); end 10 | 11 | def example_failed(message); end 12 | 13 | def dump_pending(message); end 14 | 15 | def dump_failures(message); end 16 | 17 | def dump_summary(message) 18 | colorizer = ::RSpec::Core::Formatters::ConsoleCodes 19 | 20 | output.puts "\nFinished in #{message.formatted_duration} " \ 21 | "(files took #{message.formatted_load_time} to load)\n" \ 22 | "#{message.colorized_totals_line(colorizer)}\n" 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | require: rubocop-rspec 2 | 3 | AllCops: 4 | Include: 5 | - 'lib/**/*' 6 | Exclude: 7 | - 'vendor/**/*' 8 | - 'benchmarks/*' 9 | - 'profile/*' 10 | DisplayCopNames: true 11 | TargetRubyVersion: 2.1 12 | 13 | # Preferred codebase style --------------------------------------------- 14 | 15 | ### Layouts ------------------------------------------------------------ 16 | 17 | Layout/AlignParameters: 18 | EnforcedStyle: with_fixed_indentation 19 | 20 | Layout/ExtraSpacing: 21 | AllowForAlignment: true 22 | 23 | Layout/SpaceAfterComma: 24 | Enabled: false 25 | 26 | Layout/SpaceAroundEqualsInParameterDefault: 27 | EnforcedStyle: no_space 28 | 29 | Layout/SpaceAroundOperators: 30 | Enabled: false 31 | 32 | Layout/SpaceInsideBlockBraces: 33 | EnforcedStyle: space 34 | 35 | Layout/SpaceInsideHashLiteralBraces: 36 | EnforcedStyle: no_space 37 | 38 | ### Styles ------------------------------------------------------------- 39 | 40 | Style/AndOr: 41 | EnforcedStyle: conditionals 42 | 43 | Style/DoubleNegation: 44 | Enabled: false 45 | 46 | Style/EmptyCaseCondition: 47 | Enabled: false 48 | 49 | Style/EmptyElse: 50 | EnforcedStyle: empty 51 | 52 | Style/EmptyMethod: 53 | EnforcedStyle: compact 54 | 55 | Style/FormatString: 56 | EnforcedStyle: percent 57 | 58 | Style/ParallelAssignment: 59 | Enabled: false 60 | 61 | Style/SingleLineBlockParams: 62 | Enabled: false 63 | 64 | Style/PerlBackrefs: 65 | Enabled: false 66 | 67 | Style/Documentation: 68 | Enabled: false # TODO 69 | 70 | ### Metrics ------------------------------------------------------------ 71 | 72 | Metrics/AbcSize: 73 | Max: 20 74 | 75 | Metrics/BlockLength: 76 | Exclude: 77 | - 'spec/**/*' 78 | 79 | Metrics/ClassLength: 80 | Max: 200 81 | 82 | Metrics/CyclomaticComplexity: 83 | Max: 7 84 | 85 | Metrics/LineLength: 86 | Max: 120 87 | 88 | Metrics/MethodLength: 89 | Max: 15 90 | 91 | Metrics/ModuleLength: 92 | Max: 200 93 | 94 | Style/MultilineBlockChain: 95 | Enabled: false 96 | 97 | Metrics/ParameterLists: 98 | Max: 10 99 | 100 | ### RSpec -------------------------------------------------------------- 101 | 102 | RSpec/MessageSpies: 103 | EnforcedStyle: receive 104 | 105 | RSpec/NestedGroups: 106 | Max: 5 107 | 108 | RSpec/ContextWording: 109 | Enabled: false 110 | 111 | ### Security ----------------------------------------------------------- 112 | 113 | Security/Open: 114 | Enabled: false 115 | 116 | ### Naming ------------------------------------------------------------- 117 | 118 | Naming/FileName: 119 | Enabled: false 120 | 121 | Naming/MemoizedInstanceVariableName: 122 | Exclude: 123 | - 'lib/daru/io/exporters/excel.rb' 124 | 125 | Naming/UncommunicativeMethodParamName: 126 | AllowedNames: 127 | - 'db' 128 | 129 | ### Lint --------------------------------------------------------------- 130 | 131 | Lint/SplatKeywordArguments: 132 | Enabled: false 133 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: 2 | ruby 3 | 4 | rvm: 5 | - '2.1' 6 | - '2.2' 7 | - '2.3.0' 8 | - '2.4.0' 9 | 10 | script: 11 | - bundle exec rspec 12 | - bundle exec rubocop 13 | 14 | services: 15 | - redis-server 16 | - mongodb 17 | 18 | before_install: 19 | - redis-server --daemonize yes 20 | 21 | install: 22 | - gem install bundler 23 | - gem install rainbow -v '2.2.1' 24 | - sudo apt-get update -qq 25 | - sudo apt-get install -y libgsl0-dev r-base r-base-dev 26 | - sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')" 27 | - R CMD Rserve 28 | - export R_HOME=/usr/lib/R 29 | - gem install rsruby -- --with-R-dir=$R_HOME --with-R-include=/usr/share/R/include --with_cflags="-fPIC -g -O2 -fstack-protector --param=ssp-buffer-size=4 -Wformat -Wall -fno-strict-aliasing" 30 | - bundle install -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --markup markdown 2 | --markup-provider redcarpet -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at athityakumar@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution guidelines 2 | 3 | First of all, thanks for thinking of contributing to this project. :smile: 4 | 5 | Before sending a Pull Request, please make sure that you're assigned the task on a GitHub issue. 6 | 7 | - If a relevant issue already exists, discuss on the issue and get it assigned to yourself on GitHub. 8 | - If no relevant issue exists, open a new issue and get it assigned to yourself on GitHub. 9 | 10 | Please proceed with a Pull Request only after you're assigned. It'd be sad if your Pull Request (and your hardwork) isn't accepted just because it isn't idealogically compatible. 11 | 12 | # Developing the gem 13 | 14 | 1. Install required dependencies. 15 | 16 | - For the Mongo Importer, install Mongo. 17 | - For the RData Importer, RData Exporter, RDS Importer or RDS Exporter, install R and set the R_HOME 18 | variable in your shell configuration: 19 | ```sh 20 | export R_HOME=/usr/lib/R # For Unix systems 21 | export R_HOME=/usr/local/Frameworks/R.framework/Resources # For Mac systems 22 | ``` 23 | - For the Redis Importer, install Redis and start the redis server by typing `redis-server` in another 24 | terminal window, before running the test suites. 25 | 26 | For any issue(s) related to installation steps, kindly refer to the configurations mentioned in the 27 | `.travis.yml` file. 28 | 29 | 2. Clone this repository and install all the required gem dependencies. 30 | 31 | ```sh 32 | git clone https://github.com/athityakumar/daru-io.git 33 | cd daru-io 34 | gem install bundler 35 | bundle install 36 | ``` 37 | 38 | 3. Checkout to a different git branch (say, `adds-format-importer`). 39 | 40 | 4. Add any gem dependencies required for the Format Importer to the `:optional` group of the Gemfile. 41 | 42 | 5. Add code and YARD documentation to `lib/daru/io/importers/format.rb`, consistent with other IO modules. 43 | 44 | 6. Add tests to `spec/daru/io/importers/format_spec.rb`. Add any `.format` files required for importer in `spec/fixtures/format/` directory. 45 | 46 | 7. Run the rspec test-suite. 47 | ```sh 48 | # Runs test suite for all Importers & Exporters 49 | bundle exec rspec 50 | 51 | # Runs test-suite only for the newly added Format Importer 52 | bundle exec rspec spec/daru/io/importers/format_spec.rb 53 | ``` 54 | 55 | 8. Run the rubocop for static code quality comments. 56 | 57 | ```sh 58 | # Runs rubocop test for all Importer & Exporters 59 | bundle exec rubocop 60 | 61 | # Runs rubocop test only for the newly added Format Importer 62 | bundle exec rubocop lib/daru/io/importers/format.rb spec/daru/io/importers/format_spec.rb 63 | ``` 64 | 65 | 9. Send a Pull Request back to this repository. :tada: 66 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec 3 | 4 | group :optional do 5 | gem 'activerecord', '~> 4.0' 6 | gem 'avro' 7 | gem 'dbd-sqlite3' 8 | gem 'dbi' 9 | gem 'jsonpath' 10 | gem 'mongo' 11 | gem 'nokogiri' 12 | gem 'redis' 13 | gem 'request-log-analyzer', '~> 1.13.4' 14 | gem 'roo', '~> 2.7.0' 15 | gem 'rsruby' 16 | gem 'snappy' 17 | gem 'spreadsheet', '~> 1.1.1' 18 | gem 'sqlite3' 19 | end 20 | 21 | gem 'saharspec', git: 'https://github.com/zverok/saharspec.git' 22 | -------------------------------------------------------------------------------- /Guardfile: -------------------------------------------------------------------------------- 1 | guard :rspec, cmd: 'rspec' do 2 | watch(%r{^spec/.+_spec\.rb$}) 3 | watch(%r{^lib/daru/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" } 4 | watch('spec/spec_helper.rb') { 'spec' } 5 | end 6 | 7 | # TODO: Run Rubocop in the background 8 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 [Athitya Kumar](https://github.com/athityakumar/) and [Ruby Science Foundation](https://github.com/SciRuby/). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | 3 | require 'rubygems/tasks' 4 | Gem::Tasks.new 5 | 6 | require 'rspec/core/rake_task' 7 | RSpec::Core::RakeTask.new 8 | 9 | require 'rubocop/rake_task' 10 | RuboCop::RakeTask.new 11 | 12 | task default: %w[spec rubocop] 13 | -------------------------------------------------------------------------------- /daru-io.gemspec: -------------------------------------------------------------------------------- 1 | lib = File.expand_path('lib', __dir__) 2 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 3 | require 'daru/io/version' 4 | 5 | Daru::IO::DESCRIPTION = < 0.2.0' 24 | 25 | spec.add_development_dependency 'bundler', '~> 1.15' 26 | spec.add_development_dependency 'rake', '~> 10.0' 27 | spec.add_development_dependency 'redcarpet' 28 | spec.add_development_dependency 'rspec', '~> 3.0' 29 | spec.add_development_dependency 'rspec-its' 30 | spec.add_development_dependency 'rubocop', '>= 0.40.0' 31 | spec.add_development_dependency 'rubocop-rspec' 32 | spec.add_development_dependency 'rubygems-tasks' 33 | spec.add_development_dependency 'simplecov' 34 | spec.add_development_dependency 'webmock' 35 | spec.add_development_dependency 'yard' 36 | 37 | spec.add_development_dependency 'guard-rspec' if RUBY_VERSION >= '2.2.5' 38 | end 39 | -------------------------------------------------------------------------------- /lib/daru/io.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/version' 2 | require 'daru/io/importers' 3 | require 'daru/io/exporters' 4 | -------------------------------------------------------------------------------- /lib/daru/io/base.rb: -------------------------------------------------------------------------------- 1 | require 'daru' 2 | require 'daru/io/link' 3 | 4 | module Daru 5 | module IO 6 | # Base IO Class that contains generic helper methods, to be 7 | # used by other {Importers::Base} and {Exporters::Base} via inheritence 8 | class Base 9 | # Specifies and requires a gem, if the gem is present in the application 10 | # environment. Else, raises `LoadError` with meaningful message of which 11 | # dependency to install for which Daru-IO module. 12 | # 13 | # @param dependency [String] A dependency to specify with `gem` command 14 | # @param version [String] A version range to specify with `gem` command 15 | # @param requires [String] The gem name to be required, in case it's 16 | # different from the dependency name. For example, activerecord 17 | # dependency has to be required as `require 'active_record'` 18 | # @param callback [Class] The Daru-IO module which is being used currently. 19 | # Useful for throwing meaningful `LoadError` message. 20 | # 21 | # @example Requires with dependency 22 | # optional_gem 'avro' 23 | # #=> true 24 | # 25 | # @example Requires with version and requires 26 | # optional_gem 'activerecord', '~> 4.0', requires: 'active_record' 27 | # #=> true 28 | # 29 | # @example Raises error with meaningful message 30 | # df = Daru::DataFrame.from_json('path/to/file.json') 31 | # #=> LoadError: Please install the jsonpath gem, or add it to the 32 | # # Gemfile to use the Daru::IO::Importers::JSON module. 33 | def optional_gem(dependency, version=nil, requires: nil, 34 | callback: self.class.name) 35 | gem dependency, version 36 | require requires || dependency 37 | rescue LoadError 38 | version = version.nil? ? '' : " #{version} version" 39 | raise LoadError, 40 | "Please install the #{dependency} gem#{version}, "\ 41 | "or add it to the Gemfile to use the #{callback} module." 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/daru/io/exporters.rb: -------------------------------------------------------------------------------- 1 | Dir["#{__dir__}/exporters/*.rb"].each { |file| require "daru/io#{file.gsub(__dir__, '')}" } 2 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/avro.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/exporters/base' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # Avro Exporter Class, that extends `to_avro_string` and `write_avro` methods to 7 | # `Daru::DataFrame` instance variables 8 | class Avro < Base 9 | Daru::DataFrame.register_io_module :to_avro_string, self 10 | Daru::DataFrame.register_io_module :write_avro, self 11 | 12 | # Initializes an Avro Exporter instance. 13 | # 14 | # @param dataframe [Daru::DataFrame] A dataframe to export 15 | # @param schema [Avro::Schema or Hash] The schema should contain details such as `:type`, 16 | # `:name` and `:fields` 17 | # 18 | # @return A `Daru::IO::Exporter::Avro` instance 19 | # 20 | # @example Initializing an Avro Exporter 21 | # schema = { 22 | # "type" => "record", 23 | # "name" => "User", 24 | # "fields" => [ 25 | # {"name" => "name", "type" => "string"}, 26 | # {"name" => "points", "type" => "int"}, 27 | # {"name"=> "winner", "type"=> "boolean", "default"=> "false"} 28 | # ] 29 | # } 30 | # 31 | # df = Daru::DataFrame.new( 32 | # [ 33 | # {"name"=> "Dany", "points"=> 100, "winner"=> true}, 34 | # {"name"=> "Jon", "points"=> 100, "winner"=> true}, 35 | # {"name"=> "Tyrion", "points"=> 100, "winner"=> true} 36 | # ] 37 | # ) 38 | # 39 | # #=> # 40 | # # name points winner 41 | # # 0 Dany 100 true 42 | # # 1 Jon 100 true 43 | # # 2 Tyrion 100 true 44 | # 45 | # instance = Daru::IO::Exporters::Avro.new(df, schema) 46 | def initialize(dataframe, schema=nil) 47 | optional_gem 'avro' 48 | require 'json' 49 | 50 | super(dataframe) 51 | @schema = schema 52 | end 53 | 54 | # Exports an Avro Exporter instance to a file-writable String. 55 | # 56 | # @return [String] A file-writable string 57 | # 58 | # @example Getting a file-writable string from Avro Exporter instance 59 | # instance.to_s 60 | # 61 | # #=> "Obj\u0001\u0004\u0014avro.codec\bnull\u0016avro.schema\xBC\u0002{\"type\":\"record\"..." 62 | def to_s 63 | super 64 | end 65 | 66 | # Exports an Avro Exporter instance to an avro file. 67 | # 68 | # @param path [String] Path of Avro file where the dataframe is to be saved 69 | # 70 | # @example Writing an Avro Exporter instance to an Avro file 71 | # instance.write('azor_ahai.avro') 72 | def write(path) 73 | @schema_obj = process_schema 74 | @writer = ::Avro::IO::DatumWriter.new(@schema_obj) 75 | @buffer = StringIO.new 76 | @writer = ::Avro::DataFile::Writer.new(@buffer, @writer, @schema_obj) 77 | @dataframe.each_row { |row| @writer << row.to_h } 78 | @writer.close 79 | 80 | File.open(path, 'w') { |file| file.write(@buffer.string) } 81 | end 82 | 83 | private 84 | 85 | def process_schema 86 | case @schema 87 | when ::Avro::Schema then @schema 88 | when String then ::Avro::Schema.parse(@schema) 89 | when Hash then ::Avro::Schema.parse(@schema.to_json) 90 | else raise ArgumentError, 'Invalid Avro Schema provided.' 91 | end 92 | end 93 | end 94 | end 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/base.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/base' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # Base Exporter Class that contains generic helper methods, to be 7 | # used by other Exporters via inheritence 8 | class Base < Daru::IO::Base 9 | # Checks whether the first argument given to any `Daru::IO::` module 10 | # is a `Daru::DataFrame`. Raises an error when it's not a `Daru::DataFrame`. 11 | # 12 | # @param dataframe [Daru::DataFrame] A DataFrame to initialize 13 | # 14 | # @example Stores the dataframe 15 | # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) 16 | # Daru::IO::Exporters::Base.new(df) 17 | # 18 | # #=> # 19 | # # a b 20 | # # 0 1 3 21 | # # 1 2 4> 22 | # 23 | # @example Raises error when not a DataFrame 24 | # Daru::IO::Exporters::Base.new(nil) 25 | # 26 | # #=> ArgumentError: Expected first argument to be a Daru::DataFrame, received NilClass instead 27 | def initialize(dataframe) 28 | unless dataframe.is_a?(Daru::DataFrame) 29 | raise ArgumentError, 30 | 'Expected first argument to be a Daru::DataFrame, '\ 31 | "received #{dataframe.class} instead." 32 | end 33 | @dataframe = dataframe 34 | end 35 | 36 | # Exports an Exporter instance to a file-writable String. 37 | # 38 | # @return A file-writable `String` 39 | # 40 | # @example Getting a file-writable string from Avro Exporter instance 41 | # 42 | # instance = Daru::IO::Exporters::Format.new(opts) 43 | # instance.to_s #! same as df.to_format_string(opts) 44 | def to_s 45 | tempfile = Tempfile.new('tempfile') 46 | path = tempfile.path 47 | write(path) 48 | 49 | File.read(path) 50 | end 51 | end 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/csv.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/exporters/base' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # CSV Exporter Class, that extends `to_csv_string` and `write_csv` methods to 7 | # `Daru::DataFrame` instance variables 8 | class CSV < Base 9 | Daru::DataFrame.register_io_module :to_csv_string, self 10 | Daru::DataFrame.register_io_module :write_csv, self 11 | 12 | # Initializes a CSV Exporter instance 13 | # 14 | # @param dataframe [Daru::DataFrame] A dataframe to export 15 | # @param converters [Symbol] A type to convert the data in dataframe 16 | # @param compression [Symbol] Defaults to `:infer`, which decides depending on file format 17 | # like `.csv.gz`. For explicitly writing into a `.csv.gz` file, set 18 | # `:compression` as `:gzip`. 19 | # @param headers [Boolean] When set to `false`, the headers aren't written 20 | # to the CSV file 21 | # @param convert_comma [Boolean] When set to `true`, the decimal delimiter 22 | # for float values is a comma (,) rather than a dot (.). 23 | # @param options [Hash] CSV standard library options, to tweak other 24 | # default options of CSV gem. 25 | # 26 | # @example Initializing a CSV Exporter Instance 27 | # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) 28 | # 29 | # #=> # 30 | # # a b 31 | # # 0 1 3 32 | # # 1 2 4 33 | # 34 | # csv_instance = Daru::IO::Exporters::CSV.new(df, col_sep: ' ') 35 | # csv_gz_instance = Daru::IO::Exporters::CSV.new(df, col_sep: ' ', compression: :gzip) 36 | def initialize(dataframe, converters: :numeric, compression: :infer, 37 | headers: nil, convert_comma: nil, **options) 38 | require 'csv' 39 | 40 | super(dataframe) 41 | @headers = headers 42 | @compression = compression 43 | @convert_comma = convert_comma 44 | @options = options.merge converters: converters 45 | end 46 | 47 | # Exports a CSV Exporter instance to a file-writable String. 48 | # 49 | # @return [String] A file-writable string 50 | # 51 | # @example Getting a file-writable string from CSV Exporter instance 52 | # csv_instance.to_s 53 | # #=> "a b\n1 3\n2 4\n" 54 | # 55 | # csv_gz_instance.to_s 56 | # #=> "\u001F\x8B\b\u0000*D\xA4Y\u0000\u0003KTH\xE22T0\xE62R0\xE1\u0002\u0000\xF2\\\x96y\..." 57 | def to_s 58 | super 59 | end 60 | 61 | # Exports an Avro Exporter instance to a csv / csv.gz file. 62 | # 63 | # @param path [String] Path of the csv / csv.gz file where the dataframe is to be saved 64 | # 65 | # @example Writing an Avro Exporter instance to an Avro file 66 | # csv_instance.write('filename.csv') 67 | # csv_gz_instance.write('filename.csv.gz') 68 | def write(path) 69 | @path = path 70 | contents = process_dataframe 71 | 72 | if compression?(:gzip, '.csv.gz') 73 | require 'zlib' 74 | ::Zlib::GzipWriter.open(@path) do |gz| 75 | contents.each { |content| gz.write(content.to_csv(@options)) } 76 | gz.close 77 | end 78 | else 79 | csv = ::CSV.open(@path, 'w', @options) 80 | contents.each { |content| csv << content } 81 | csv.close 82 | end 83 | end 84 | 85 | private 86 | 87 | def compression?(algorithm, *formats) 88 | @compression == algorithm || formats.any? { |f| @path.end_with?(f) } 89 | end 90 | 91 | def process_dataframe 92 | [].tap do |result| 93 | result << @dataframe.vectors.to_a unless @headers == false 94 | @dataframe.map_rows do |row| 95 | next result << row.to_a unless @convert_comma 96 | result << row.map(&:to_s).map { |v| v =~ /^\d+./ ? v.tr('.',',') : v } 97 | end 98 | end 99 | end 100 | end 101 | end 102 | end 103 | end 104 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/excel.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/exporters/base' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # Excel Exporter Class, that extends `to_excel_string` and `write_excel` methods to 7 | # `Daru::DataFrame` instance variables 8 | class Excel < Base 9 | Daru::DataFrame.register_io_module :to_excel_string, self 10 | Daru::DataFrame.register_io_module :write_excel, self 11 | 12 | # Initializes an Excel Exporter instance. 13 | # 14 | # @note For giving formatting options as hashes to the `:data`, `:index` or `header` 15 | # keyword argument(s), please have a look at the 16 | # {http://www.rubydoc.info/gems/ruby-spreadsheet/Spreadsheet/Font Spreadsheet::Font} 17 | # and 18 | # {http://www.rubydoc.info/gems/ruby-spreadsheet/Spreadsheet/Format Spreadsheet::Format} 19 | # pages. 20 | # 21 | # @param dataframe [Daru::DataFrame] A dataframe to export. Supports even dataframes 22 | # with multi-index. 23 | # @param header [Hash or Boolean] Defaults to true. When set to false or nil, 24 | # headers are not written. When given a hash of formatting options, 25 | # headers are written with the specific formatting. When set to true, 26 | # headers are written without any formatting. 27 | # @param data [Hash or Boolean] Defaults to true. When set to false or nil, 28 | # data values are not written. When given a hash of formatting options, 29 | # data values are written with the specific formatting. When set to true, 30 | # data values are written without any formatting. 31 | # @param index [Hash or Boolean] Defaults to true. When set to false or nil, 32 | # index values are not written. When given a hash of formatting options, 33 | # index values are written with the specific formatting. When set to true, 34 | # index values are written without any formatting. 35 | # 36 | # @example Initializing an Excel Exporter instance 37 | # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) 38 | # 39 | # #=> # 40 | # # a b 41 | # # 0 1 3 42 | # # 1 2 4 43 | # 44 | # simple_instance = Daru::IO::Exporters::Excel.new(df) 45 | # formatted_instance = Daru::IO::Exporters::Excel.new( 46 | # df, 47 | # header: { color: :red, weight: :bold }, 48 | # index: false, 49 | # data: { color: :blue } 50 | # ) 51 | def initialize(dataframe, header: true, data: true, index: true) 52 | optional_gem 'spreadsheet', '~> 1.1.1' 53 | 54 | super(dataframe) 55 | @data = data 56 | @index = index 57 | @header = header 58 | end 59 | 60 | # Exports an Excel Exporter instance to a file-writable String. 61 | # 62 | # @return [String] A file-writable string 63 | # 64 | # @example Getting a file-writable string from Excel Exporter instance 65 | # simple_instance.to_s #! same as df.to_avro_string(schema) 66 | # 67 | # #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..." 68 | # 69 | # formatted_instance.to_s 70 | # 71 | # #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..." 72 | def to_s 73 | super 74 | end 75 | 76 | # Exports an Excel Exporter instance to an xls file. 77 | # 78 | # @param path [String] Path of excel file where the dataframe is to be saved 79 | # 80 | # @example Writing an Excel Exporter instance to an xls file 81 | # instance.write('filename.xls') 82 | def write(path) 83 | @book = Spreadsheet::Workbook.new 84 | @sheet = @book.create_worksheet 85 | 86 | process_offsets 87 | write_headers 88 | 89 | @dataframe.each_row_with_index.with_index do |(row, idx), r| 90 | write_index(idx, r+@row_offset) 91 | write_data(row, r+@row_offset) 92 | end 93 | 94 | @book.write(path) 95 | end 96 | 97 | private 98 | 99 | def process_offsets 100 | @row_offset = @header ? 1 : 0 101 | @col_offset = 0 unless @index 102 | @col_offset ||= @dataframe.index.is_a?(Daru::MultiIndex) ? @dataframe.index.width : 1 103 | end 104 | 105 | def write_headers 106 | formatting( 107 | 0...@col_offset + @dataframe.ncols, 108 | 0, 109 | [' '] * @col_offset + @dataframe.vectors.map(&:to_s), 110 | @header 111 | ) 112 | end 113 | 114 | def write_index(idx, row) 115 | formatting( 116 | 0...@col_offset, 117 | row, 118 | idx, 119 | @index 120 | ) 121 | end 122 | 123 | def write_data(row, idx) 124 | formatting( 125 | @col_offset...@col_offset + @dataframe.ncols, 126 | idx, 127 | row, 128 | @data 129 | ) 130 | end 131 | 132 | def formatting(col_range, row, idx, format) 133 | return unless format 134 | @sheet.row(row).concat( 135 | case idx 136 | when Daru::Vector then idx.to_a 137 | when Array then idx.map(&:to_s) 138 | else [idx.to_s] 139 | end 140 | ) 141 | 142 | return unless format.is_a?(Hash) 143 | col_range.each { |col| @sheet.row(row).set_format(col, Spreadsheet::Format.new(format)) } 144 | end 145 | end 146 | end 147 | end 148 | end 149 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/r_data.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/exporters/rds' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # RData Exporter Class, that can be used to export multiple `Daru::DataFrame`s 7 | # to a RData file 8 | class RData < RDS 9 | # Initializes a RData Exporter instance. 10 | # 11 | # @param options [Hash] A set of key-value pairs wherein the key depicts the name of 12 | # the R `data.frame` variable name to be saved in the RData file, and the corresponding 13 | # value depicts the `Daru::DataFrame` (or any Ruby variable in scope) 14 | # 15 | # @example Initializing RData Exporter instance 16 | # df1 = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) 17 | # 18 | # #=> # 19 | # # a b 20 | # # 0 1 3 21 | # # 1 2 4 22 | # 23 | # df2 = Daru::DataFrame.new([[5,6],[7,8]], order: [:x, :y]) 24 | # 25 | # #=> # 26 | # # x y 27 | # # 0 5 7 28 | # # 1 6 8 29 | # 30 | # instance = Daru::IO::Exporters::RData.new("first.df": df1, "second.df": df2) 31 | def initialize(**options) 32 | optional_gem 'rsruby' 33 | 34 | @options = options 35 | end 36 | 37 | # Exports a RData Exporter instance to a file-writable String. 38 | # 39 | # @return [String] A file-writable string 40 | # 41 | # @example Writing to a RData file 42 | # instance.to_s 43 | # 44 | # #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\vr\x890\xE2\x8A\xE0b```b..." 45 | def to_s 46 | super 47 | end 48 | 49 | # Exports an RData Exporter instance to a rdata file. 50 | # 51 | # @param path [String] Path of RData file where the dataframe(s) is/are to be saved 52 | # 53 | # @example Writing to a RData file 54 | # instance.write("daru_dataframes.RData") 55 | def write(path) 56 | @instance = RSRuby.instance 57 | @statements = @options.map do |r_variable, dataframe| 58 | process_statements(r_variable, dataframe) 59 | end.flatten 60 | @statements << "save(#{@options.keys.map(&:to_s).join(', ')}, file='#{path}')" 61 | @statements.each { |statement| @instance.eval_R(statement) } 62 | end 63 | end 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/rds.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/exporters/base' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # RDS Exporter Class, that extends `to_rds_string` and `write_rds` methods to 7 | # `Daru::DataFrame` instance variables 8 | class RDS < Base 9 | Daru::DataFrame.register_io_module :to_rds_string, self 10 | Daru::DataFrame.register_io_module :write_rds, self 11 | 12 | # Initializes a RDS Exporter instance. 13 | # 14 | # @param dataframe [Daru::DataFrame] A dataframe to export 15 | # @param r_variable [String] Name of the R `data.frame` variable name to be saved in the RDS file 16 | # 17 | # @example Initializing an RData Exporter 18 | # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) 19 | # 20 | # #=> # 21 | # # a b 22 | # # 0 1 3 23 | # # 1 2 4 24 | # 25 | # instance = Daru::IO::Exporters::RDS.new(df, "sample.dataframe") 26 | def initialize(dataframe, r_variable) 27 | optional_gem 'rsruby' 28 | 29 | super(dataframe) 30 | @r_variable = r_variable 31 | end 32 | 33 | # Exports a RDS Exporter instance to a file-writable String. 34 | # 35 | # @return [String] A file-writable string 36 | # 37 | # @example Getting a file-writable string from RDS Exporter instance 38 | # instance.to_s #! same as df.to_rds_string("sample.dataframe") 39 | # 40 | # #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\x8B\xE0b```b..." 41 | def to_s 42 | super 43 | end 44 | 45 | # Exports a RDS Exporter instance to a rds file. 46 | # 47 | # @param path [String] Path of RDS file where the dataframe is to be saved 48 | # 49 | # @example Writing an RDS Exporter instance to a rds file 50 | # instance.write("daru_dataframe.rds") 51 | def write(path) 52 | @instance = RSRuby.instance 53 | @statements = process_statements(@r_variable, @dataframe) 54 | @statements << "saveRDS(#{@r_variable}, file='#{path}')" 55 | @statements.each { |statement| @instance.eval_R(statement) } 56 | end 57 | 58 | private 59 | 60 | def process_statements(r_variable, dataframe) 61 | [ 62 | *dataframe.map_vectors_with_index do |vector, i| 63 | "#{i} = c(#{vector.to_a.map { |val| convert_datatype(val) }.join(', ')})" 64 | end, 65 | "#{r_variable} = data.frame(#{dataframe.vectors.to_a.map(&:to_s).join(', ')})" 66 | ] 67 | end 68 | 69 | def convert_datatype(value) 70 | case value 71 | when nil then 'NA' 72 | when String then "'#{value}'" 73 | else value 74 | end 75 | end 76 | end 77 | end 78 | end 79 | end 80 | -------------------------------------------------------------------------------- /lib/daru/io/exporters/sql.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/exporters/base' 2 | 3 | module Daru 4 | module IO 5 | module Exporters 6 | # SQL Exporter Class, that extends `to_sql` method to `Daru::DataFrame` 7 | # instance variables 8 | class SQL < Base 9 | Daru::DataFrame.register_io_module :to_sql, self 10 | 11 | # Initializes a SQL Exporter instance. 12 | # 13 | # @param dataframe [Daru::DataFrame] A dataframe to export. 14 | # @param dbh [DBI] A DBI database connection object. 15 | # @param table [String] The SQL table to export to. 16 | # 17 | # @example Initializing with database credentials 18 | # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) 19 | # 20 | # #=> # 21 | # # a b 22 | # # 0 1 3 23 | # # 1 2 4 24 | # 25 | # table = 'test' 26 | # 27 | # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password") 28 | # # Enter the actual SQL database credentials in the above line 29 | # 30 | # instance = Daru::IO::Exporters::SQL.new(df, dbh, table) 31 | def initialize(dataframe, dbh, table) 32 | optional_gem 'dbd-sqlite3', requires: 'dbd/SQLite3' 33 | optional_gem 'dbi' 34 | optional_gem 'sqlite3' 35 | 36 | super(dataframe) 37 | @dbh = dbh 38 | @table = table 39 | end 40 | 41 | # Exports a SQL Exporter instance to an SQL table. 42 | # 43 | # @example Exports SQL Exporter instance into given SQL table 44 | # instance.to 45 | def to 46 | query = "INSERT INTO #{@table} (#{@dataframe.vectors.to_a.join(',')}"\ 47 | ") VALUES (#{(['?']*@dataframe.vectors.size).join(',')})" 48 | sth = @dbh.prepare(query) 49 | @dataframe.each_row { |c| sth.execute(*c.to_a) } 50 | true 51 | end 52 | end 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/daru/io/importers.rb: -------------------------------------------------------------------------------- 1 | Dir["#{__dir__}/importers/*.rb"].each { |file| require "daru/io#{file.gsub(__dir__, '')}" } 2 | -------------------------------------------------------------------------------- /lib/daru/io/importers/active_record.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # ActiveRecord Importer Class, that extends `from_activerecord` method to 7 | # `Daru::DataFrame` 8 | class ActiveRecord < Base 9 | Daru::DataFrame.register_io_module :from_activerecord, self 10 | 11 | # Checks for required gem dependencies of ActiveRecord Importer 12 | def initialize 13 | optional_gem 'activerecord', '~> 4.0', requires: 'active_record' 14 | end 15 | 16 | # Loads data from a given relation 17 | # 18 | # @!method self.from(relation) 19 | # 20 | # @param relation [ActiveRecord::Relation] A relation to be used to load 21 | # the contents of DataFrame 22 | # 23 | # @return [Daru::IO::Importers::ActiveRecord] 24 | # 25 | # @example Loading from a ActiveRecord instance 26 | # instance = Daru::IO::Importers::ActiveRecord.from(Account.all) 27 | def from(relation) 28 | @relation = relation 29 | self 30 | end 31 | 32 | # Imports a `Daru::DataFrame` from an ActiveRecord Importer instance 33 | # 34 | # @param fields [String or Array of Strings] A set of fields to load from. 35 | # 36 | # @return [Daru::DataFrame] 37 | # 38 | # @example Importing from an instance without specifying fields 39 | # instance.call 40 | # 41 | # #=> # 42 | # #=> id name age 43 | # #=> 0 1 Homer 20 44 | # #=> 1 2 Marge 30 45 | # 46 | # @example Importing from an instance with specific fields 47 | # instance.call(:id, :name) 48 | # 49 | # #=> # 50 | # #=> id name 51 | # #=> 0 1 Homer 52 | # #=> 1 2 Marge 53 | def call(*fields) 54 | @fields = fields 55 | 56 | if @fields.empty? 57 | records = @relation.map { |record| record.attributes.symbolize_keys } 58 | return Daru::DataFrame.new(records) 59 | else 60 | @fields.map!(&:to_sym) 61 | end 62 | 63 | vectors = @fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h 64 | 65 | Daru::DataFrame.new(vectors, order: @fields).tap do |df| 66 | @relation.pluck(*@fields).each do |record| 67 | df.add_row(Array(record)) 68 | end 69 | df.update 70 | end 71 | end 72 | end 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /lib/daru/io/importers/avro.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Avro Importer Class, that extends `read_avro` method to `Daru::DataFrame` 7 | class Avro < Base 8 | Daru::DataFrame.register_io_module :read_avro, self 9 | 10 | # Checks for required gem dependencies of Avro Importer 11 | # 12 | # @note The 'snappy' gem handles compressions and is used within Avro gem. Yet, it isn't 13 | # specified as a dependency in Avro gem. Hence, it has been added separately. 14 | def initialize 15 | optional_gem 'avro' 16 | optional_gem 'snappy' 17 | end 18 | 19 | # Reads data from an avro file 20 | # 21 | # @!method self.read(path) 22 | # 23 | # @param path [String] Path to Avro file, where the dataframe is to be imported from. 24 | # 25 | # @return [Daru::IO::Importers::Avro] 26 | # 27 | # @example Reading from avro file 28 | # instance = Daru::IO::Importers::Avro.read("azorahai.avro") 29 | def read(path) 30 | @path = path 31 | @buffer = StringIO.new(File.read(@path)) 32 | @data = ::Avro::DataFile::Reader.new(@buffer, ::Avro::IO::DatumReader.new).to_a 33 | self 34 | end 35 | 36 | # Imports a `Daru::DataFrame` from an Avro Importer instance 37 | # 38 | # @return [Daru::DataFrame] 39 | # 40 | # @example Importing from an Avro file 41 | # df = instance.call 42 | # 43 | # #=> # 44 | # # name points winner 45 | # # 0 Dany 100 true 46 | # # 1 Jon 100 true 47 | # # 2 Tyrion 100 true 48 | def call 49 | Daru::DataFrame.new(@data) 50 | end 51 | end 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/daru/io/importers/base.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Base Importer Class that contains generic helper methods, to be 7 | # used by other Importers via inheritence 8 | class Base < Daru::IO::Base 9 | # Guesses the `Daru::DataFrame` from the parsed set of key-value pairs. 10 | # 11 | # @param keys [Array] A set of keys from given key-value pairs 12 | # @param vals [Array] A set of values from given key-value pairs 13 | # 14 | # @example When key-value pairs contains values that is Array of Hashes 15 | # Daru::IO::Importers::Base.guess_parse([:a], [[{ x: 1, y: 2 },{ x: 3, y: 4 }]]) 16 | # 17 | # #=> # 18 | # # x y 19 | # # 0 1 2 20 | # # 1 3 4 21 | # 22 | # @example When key-value pairs contains values that is Arrays 23 | # Daru::IO::Importers::Base.guess_parse([:x, :y], [[1,3], [2,4]]) 24 | # 25 | # #=> # 26 | # # x y 27 | # # 0 1 2 28 | # # 1 3 4 29 | # 30 | # @example When key-value pairs contains Array of keys contain value Hashes 31 | # Daru::IO::Importers::Base.guess_parse([:a, :b], [{ x: 1, y: 2 }, { x: 3, y: 4 }]) 32 | # 33 | # #=> # 34 | # # x y 35 | # # a 1 2 36 | # # b 3 4 37 | def self.guess_parse(keys, vals) 38 | case vals.first 39 | when Array 40 | case vals.first.first 41 | when Hash then Daru::DataFrame.new(vals.flatten) 42 | else Daru::DataFrame.rows(vals.transpose, order: keys) 43 | end 44 | when Hash then Daru::DataFrame.new(vals.flatten, index: keys) 45 | end 46 | end 47 | 48 | # Adds the `from` class method to all inheriting children Importer classes, which 49 | # calls corresponding Importer's `initialize` and instance method `from`. 50 | def self.from(relation) 51 | new.from(relation) 52 | end 53 | 54 | # Adds the `read` class method to all inheriting children Importer classes, which 55 | # calls corresponding Importer's `initialize` and instance method `read`. 56 | def self.read(path) 57 | new.read(path) 58 | end 59 | end 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/daru/io/importers/csv.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # CSV Importer Class, that extends `read_csv` method to `Daru::DataFrame` 7 | class CSV < Base 8 | Daru::DataFrame.register_io_module :read_csv, self 9 | 10 | CONVERTERS = { 11 | boolean: lambda { |f, _| 12 | case f.downcase.strip 13 | when 'true' then true 14 | when 'false' then false 15 | else f 16 | end 17 | } 18 | }.freeze 19 | 20 | # Checks for required gem dependencies of CSV Importer 21 | def initialize 22 | require 'csv' 23 | require 'open-uri' 24 | require 'zlib' 25 | end 26 | 27 | # Reads data from a csv / csv.gz file 28 | # 29 | # @!method self.read(path) 30 | # 31 | # @param path [String] Path to csv / csv.gz file, where the dataframe is to be imported 32 | # from. 33 | # 34 | # @return [Daru::IO::Importers::CSV] 35 | # 36 | # @example Reading from csv file 37 | # instance = Daru::IO::Importers::CSV.read("matrix_test.csv") 38 | # 39 | # @example Reading from csv.gz file 40 | # instance = Daru::IO::Importers::CSV.read("matrix_test.csv.gz") 41 | def read(path) 42 | @path = path 43 | @file_data = open(@path) 44 | self 45 | end 46 | 47 | # Imports a `Daru::DataFrame` from a CSV Importer instance 48 | # 49 | # @param headers [Boolean] If this option is `true`, only those columns 50 | # will be used to import the `Daru::DataFrame` whose header is given. 51 | # @param skiprows [Integer] Skips the first `:skiprows` number of rows from 52 | # the CSV file. Defaults to 0. 53 | # @param compression [Symbol] Defaults to `:infer`, to parse depending on file format 54 | # like `.csv.gz`. For explicitly parsing data from a `.csv.gz` file, set 55 | # `:compression` as `:gzip`. 56 | # @param clone [Boolean] Have a look at `:clone` option 57 | # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize) 58 | # @param index [Array or Daru::Index or Daru::MultiIndex] Have a look at 59 | # `:index` option 60 | # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize) 61 | # @param order [Array or Daru::Index or Daru::MultiIndex] Have a look at 62 | # `:order` option 63 | # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize) 64 | # @param name [String] Have a look at `:name` option 65 | # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize) 66 | # @param options [Hash] CSV standard library options such as `:col_sep` 67 | # (defaults to `','`), `:converters` (defaults to `:numeric`), 68 | # `:header_converters` (defaults to `:symbol`). 69 | # 70 | # @return [Daru::DataFrame] 71 | # 72 | # @example Calling with csv options 73 | # df = instance.call(col_sep: ' ', headers: true) 74 | # 75 | # #=> # 76 | # # image_reso mls true_trans 77 | # # 0 6.55779 0 -0.2362347 78 | # # 1 2.14746 0 -0.1539447 79 | # # 2 8.31104 0 0.3832846, 80 | # # 3 3.47872 0 0.3832846, 81 | # # 4 4.16725 0 -0.2362347 82 | # # 5 5.79983 0 -0.2362347 83 | # # 6 1.9058 0 -0.895577, 84 | # # 7 1.9058 0 -0.2362347 85 | # # 8 4.11806 0 -0.895577, 86 | # # 9 6.26622 0 -0.2362347 87 | # # 10 2.57805 0 -0.1539447 88 | # # 11 4.76151 0 -0.2362347 89 | # # 12 7.11002 0 -0.895577, 90 | # # 13 5.40811 0 -0.2362347 91 | # # 14 8.19567 0 -0.1539447 92 | # # ... ... ... ... 93 | # 94 | # @example Calling with csv.gz options 95 | # df = instance.call(compression: :gzip, col_sep: ' ', headers: true) 96 | # 97 | # #=> # 98 | # # image_reso mls true_trans 99 | # # 0 6.55779 0 -0.2362347 100 | # # 1 2.14746 0 -0.1539447 101 | # # 2 8.31104 0 0.3832846, 102 | # # 3 3.47872 0 0.3832846, 103 | # # 4 4.16725 0 -0.2362347 104 | # # 5 5.79983 0 -0.2362347 105 | # # 6 1.9058 0 -0.895577, 106 | # # 7 1.9058 0 -0.2362347 107 | # # 8 4.11806 0 -0.895577, 108 | # # 9 6.26622 0 -0.2362347 109 | # # 10 2.57805 0 -0.1539447 110 | # # 11 4.76151 0 -0.2362347 111 | # # 12 7.11002 0 -0.895577, 112 | # # 13 5.40811 0 -0.2362347 113 | # # 14 8.19567 0 -0.1539447 114 | # # ... ... ... ... 115 | def call(headers: nil, skiprows: 0, compression: :infer, 116 | clone: nil, index: nil, order: nil, name: nil, **options) 117 | init_opts(headers: headers, skiprows: skiprows, compression: compression, 118 | clone: clone, index: index, order: order, name: name, **options) 119 | process_compression 120 | 121 | # Preprocess headers for detecting and correcting repetition in 122 | # case the :headers option is not specified. 123 | hsh = 124 | if @headers 125 | hash_with_headers 126 | else 127 | hash_without_headers.tap { |hash| @daru_options[:order] = hash.keys } 128 | end 129 | 130 | Daru::DataFrame.new(hsh, @daru_options) 131 | end 132 | 133 | private 134 | 135 | def compression?(algorithm, *formats) 136 | @compression == algorithm || formats.any? { |f| @path.end_with?(f) } 137 | end 138 | 139 | def hash_with_headers 140 | ::CSV 141 | .parse(@file_data, @options) 142 | .tap { |c| yield c if block_given? } 143 | .by_col 144 | .map do |col_name, values| 145 | [col_name, values.nil? ? [] : values[@skiprows..-1]] 146 | end 147 | .to_h 148 | end 149 | 150 | def hash_without_headers 151 | csv_as_arrays = 152 | ::CSV 153 | .parse(@file_data, @options) 154 | .tap { |c| yield c if block_given? } 155 | .to_a 156 | headers = ArrayHelper.recode_repeated(csv_as_arrays.shift) 157 | csv_as_arrays = csv_as_arrays[@skiprows..-1].transpose 158 | headers 159 | .each_with_index 160 | .map do |h, i| 161 | [h, csv_as_arrays[i] || []] 162 | end 163 | .to_h 164 | end 165 | 166 | def init_opts(headers: nil, skiprows: 0, compression: :infer, 167 | clone: nil, index: nil, order: nil, name: nil, **options) 168 | @headers = headers 169 | @skiprows = skiprows 170 | @compression = compression 171 | @daru_options = {clone: clone, index: index, order: order, name: name} 172 | @options = { 173 | col_sep: ',', converters: [:numeric], header_converters: :symbol, 174 | headers: @headers, skip_blanks: true 175 | }.merge(options) 176 | 177 | @options[:converters] = @options[:converters].flat_map do |c| 178 | next ::CSV::Converters[c] if ::CSV::Converters[c] 179 | next CONVERTERS[c] if CONVERTERS[c] 180 | c 181 | end 182 | end 183 | 184 | def process_compression 185 | @file_data = ::Zlib::GzipReader.new(@file_data).read if compression?(:gzip, '.csv.gz') 186 | end 187 | end 188 | end 189 | end 190 | end 191 | -------------------------------------------------------------------------------- /lib/daru/io/importers/excel.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Excel Importer Class, that extends `read_excel` method to `Daru::DataFrame` 7 | # 8 | # @see Daru::IO::Importers::Excelx For .xlsx format 9 | class Excel < Base 10 | Daru::DataFrame.register_io_module :read_excel do |*args, &io_block| 11 | if args.first.end_with?('.xlsx') 12 | require 'daru/io/importers/excelx' 13 | Daru::IO::Importers::Excelx.new(*args[1..-1], &io_block).read(*args[0]) 14 | else 15 | Daru::IO::Importers::Excel.new(*args[1..-1], &io_block).read(*args[0]) 16 | end 17 | end 18 | 19 | # Checks for required gem dependencies of Excel Importer 20 | def initialize 21 | optional_gem 'spreadsheet', '~> 1.1.1' 22 | end 23 | 24 | # Reads from an excel (.xls) file 25 | # 26 | # @!method self.read(path) 27 | # 28 | # @param path [String] Path of Excel file, where the DataFrame is to be imported from. 29 | # 30 | # @return [Daru::IO::Importers::Excel] 31 | # 32 | # @example Reading from an excel file 33 | # instance = Daru::IO::Importers::Excel.read("test_xls.xls") 34 | def read(path) 35 | @file_data = Spreadsheet.open(path) 36 | self 37 | end 38 | 39 | # Imports a `Daru::DataFrame` from an Excel Importer instance 40 | # 41 | # @param worksheet_id [Integer] The index of the worksheet in the excel file, 42 | # from where the `Daru::DataFrame` will be imported. By default, the first 43 | # worksheet has `:worksheet_id` as 0. In general, the n-th worksheet has 44 | # its worksheet_id as n-1. 45 | # 46 | # If worksheet_id option is not given, it is taken as 0 by default and the 47 | # `Daru::DataFrame` will be imported from the first worksheet in the excel file. 48 | # @param headers [Boolean] Defaults to true. When set to true, first row of the 49 | # given worksheet_id is used as the order of the Daru::DataFrame and data of 50 | # the Dataframe consists of the remaining rows. 51 | # 52 | # @return [Daru::DataFrame] 53 | # 54 | # default_instance = Daru::IO::Importers::Excel.new 55 | # 56 | # @example Importing from a default worksheet 57 | # df = instance.call 58 | # 59 | # #=> # 60 | # # id name age city a1 61 | # # 0 1 Alex 20 New York a,b 62 | # # 1 2 Claude 23 London b,c 63 | # # 2 3 Peter 25 London a 64 | # # 3 4 Franz nil Paris nil 65 | # # 4 5 George 5.5 Tome a,b,c 66 | # # 5 6 Fernand nil nil nil 67 | # 68 | # @example Importing from a specific worksheet 69 | # df = instance.call(worksheet_id: 0) 70 | # 71 | # #=> # 72 | # # id name age city a1 73 | # # 0 1 Alex 20 New York a,b 74 | # # 1 2 Claude 23 London b,c 75 | # # 2 3 Peter 25 London a 76 | # # 3 4 Franz nil Paris nil 77 | # # 4 5 George 5.5 Tome a,b,c 78 | # # 5 6 Fernand nil nil nil 79 | def call(worksheet_id: 0, headers: true) 80 | worksheet = @file_data.worksheet(worksheet_id) 81 | headers = if headers 82 | ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym) 83 | else 84 | (0..worksheet.row(0).to_a.size-1).to_a 85 | end 86 | 87 | df = Daru::DataFrame.new({}) 88 | headers.each_with_index do |h,i| 89 | col = worksheet.column(i).to_a 90 | col.delete_at(0) if headers 91 | df[h] = col 92 | end 93 | 94 | df 95 | end 96 | end 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /lib/daru/io/importers/excelx.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Excelx Importer Class, that handles .xlsx files in the Excel Importer 7 | # 8 | # @see Daru::IO::Importers::Excel For .xls format 9 | class Excelx < Base 10 | # Checks for required gem dependencies of Excelx Importer 11 | def initialize 12 | optional_gem 'roo', '~> 2.7.0' 13 | end 14 | 15 | # Reads from an excelx (xlsx) file 16 | # 17 | # @!method self.read(path) 18 | # 19 | # @param path [String] Local / Remote path of xlsx file, where the DataFrame is 20 | # to be imported from. 21 | # 22 | # @return [Daru::IO::Importers::Excelx] 23 | # 24 | # @example Reading from a local xlsx file 25 | # local_instance = Daru::IO::Importers::Excelx.read("Stock-counts-sheet.xlsx") 26 | # 27 | # @example Reading from a remote xlsx file 28 | # url = "https://www.exact.com/uk/images/downloads/getting-started-excel-sheets/Stock-counts-sheet.xlsx" 29 | # remote_instance = Daru::IO::Importers::Excelx.read(url) 30 | def read(path) 31 | @file_data = Roo::Excelx.new(path) 32 | self 33 | end 34 | 35 | # Imports a `Daru::DataFrame` from an Excelx Importer instance 36 | # 37 | # @param sheet [Integer or String] Imports from a specific sheet 38 | # @param skiprows [Integer] Skips the first `:skiprows` number of rows from the 39 | # sheet being parsed. 40 | # @param skipcols [Integer] Skips the first `:skipcols` number of columns from the 41 | # sheet being parsed. 42 | # @param order [Boolean] Defaults to true. When set to true, first row of the 43 | # given sheet is used as the order of the Daru::DataFrame and data of 44 | # the Dataframe consists of the remaining rows. 45 | # @param index [Boolean] Defaults to false. When set to true, first column of the 46 | # given sheet is used as the index of the Daru::DataFrame and data of 47 | # the Dataframe consists of the remaining columns. 48 | # 49 | # When set to false, a default order (0 to n-1) is chosen for the DataFrame, 50 | # and the data of the DataFrame consists of all rows in the sheet. 51 | # 52 | # @return [Daru::DataFrame] 53 | # 54 | # @example Importing from specific sheet 55 | # df = local_instance.call(sheet: 'Example Stock Counts') 56 | # 57 | # #=> 58 | # # Status Stock coun Item code New Descriptio Stock coun Offset G/L 59 | # # 0 H 1 nil nil New stock 2014-08-01 nil 60 | # # 1 nil 1 IND300654 2 New stock 2014-08-01 51035 61 | # # 2 nil 1 IND43201 5 New stock 2014-08-01 51035 62 | # # 3 nil 1 OUT30045 3 New stock 2014-08-01 51035 63 | # # ... ... ... ... ... ... ... ... 64 | # 65 | # @example Importing from a remote URL and default sheet 66 | # df = remote_instance.call 67 | # 68 | # #=> 69 | # # Status Stock coun Item code New Descriptio Stock coun Offset G/L 70 | # # 0 H 1 nil nil New stock 2014-08-01 nil 71 | # # 1 nil 1 IND300654 2 New stock 2014-08-01 51035 72 | # # 2 nil 1 IND43201 5 New stock 2014-08-01 51035 73 | # # 3 nil 1 OUT30045 3 New stock 2014-08-01 51035 74 | # # ... ... ... ... ... ... ... ... 75 | # 76 | # @example Importing without headers 77 | # df = local_instance.call(sheet: 'Example Stock Counts', headers: false) 78 | # 79 | # #=> 80 | # # 0 1 2 3 4 5 6 81 | # # 0 Status Stock coun Item code New Descriptio Stock coun Offset G/L 82 | # # 1 H 1 nil nil New stock 2014-08-01 nil 83 | # # 2 nil 1 IND300654 2 New stock 2014-08-01 51035 84 | # # 3 nil 1 IND43201 5 New stock 2014-08-01 51035 85 | # # 4 nil 1 OUT30045 3 New stock 2014-08-01 51035 86 | # # ... ... ... ... ... ... ... ... 87 | def call(sheet: 0, skiprows: 0, skipcols: 0, order: true, index: false) 88 | @order = order 89 | @index = index 90 | worksheet = @file_data.sheet(sheet) 91 | @data = strip_html_tags(skip_data(worksheet.to_a, skiprows, skipcols)) 92 | @index = process_index 93 | @order = process_order || (0..@data.first.length-1) 94 | @data = process_data 95 | 96 | Daru::DataFrame.rows(@data, order: @order, index: @index) 97 | end 98 | 99 | private 100 | 101 | def process_data 102 | return skip_data(@data, 1, 1) if @order && @index 103 | return skip_data(@data, 1, 0) if @order 104 | return skip_data(@data, 0, 1) if @index 105 | @data 106 | end 107 | 108 | def process_index 109 | return nil unless @index 110 | @index = @data.transpose.first 111 | @index = skip_data(@index, 1) if @order 112 | @index 113 | end 114 | 115 | def process_order 116 | return nil unless @order 117 | @order = @data.first 118 | @order = skip_data(@order, 1) if @index 119 | @order 120 | end 121 | 122 | def skip_data(data, rows, cols=nil) 123 | return data[rows..-1].map { |row| row[cols..-1] } unless cols.nil? 124 | data[rows..-1] 125 | end 126 | 127 | def strip_html_tags(data) 128 | data.map do |row| 129 | row.map do |ele| 130 | next ele unless ele.is_a?(String) 131 | ele.gsub(/<[^>]+>/, '') 132 | end 133 | end 134 | end 135 | end 136 | end 137 | end 138 | end 139 | -------------------------------------------------------------------------------- /lib/daru/io/importers/html.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # HTML Importer Class, that extends `read_html` method to `Daru::DataFrame` 7 | # 8 | # @note 9 | # Please note that this module works only for static table elements on a 10 | # HTML page, and won't work in cases where the data is being loaded into 11 | # the HTML table by inline Javascript. 12 | class HTML < Base 13 | Daru::DataFrame.register_io_module :read_html, self 14 | 15 | # Checks for required gem dependencies of HTML Importer 16 | def initialize 17 | require 'open-uri' 18 | optional_gem 'nokogiri' 19 | end 20 | 21 | # Reads from a html file / website 22 | # 23 | # @!method self.read(path) 24 | # 25 | # @param path [String] Website URL / path to HTML file, where the 26 | # DataFrame is to be imported from. 27 | # 28 | # @return [Daru::IO::Importers::HTML] 29 | # 30 | # @example Reading from a website url file 31 | # instance = Daru::IO::Importers::HTML.read('http://www.moneycontrol.com/') 32 | def read(path) 33 | @file_data = Nokogiri.parse(open(path).read) 34 | self 35 | end 36 | 37 | # Imports Array of `Daru::DataFrame`s from a HTML Importer instance 38 | # 39 | # @param match [String] A `String` to match and choose a particular table(s) 40 | # from multiple tables of a HTML page. 41 | # @param index [Array or Daru::Index or Daru::MultiIndex] If given, it 42 | # overrides the parsed index. Have a look at `:index` option, at 43 | # {http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize 44 | # Daru::DataFrame#initialize} 45 | # @param order [Array or Daru::Index or Daru::MultiIndex] If given, it 46 | # overrides the parsed order. Have a look at `:order` option 47 | # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize) 48 | # @param name [String] As `name` of the imported `Daru::DataFrame` isn't 49 | # parsed automatically by the module, users can set the name attribute to 50 | # their `Daru::DataFrame` manually, through this option. 51 | # 52 | # See `:name` option 53 | # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize) 54 | # 55 | # @return [Array] 56 | # 57 | # @example Importing with matching tables 58 | # list_of_dfs = instance.call(match: 'Sun Pharma') 59 | # list_of_dfs.count 60 | # #=> 4 61 | # 62 | # df = list_of_dfs.first 63 | # 64 | # # As the website keeps changing everyday, the output might not be exactly 65 | # # the same as the one obtained below. Nevertheless, a Daru::DataFrame 66 | # # should be obtained (as long as 'Sun Pharma' is there on the website). 67 | # 68 | # #=> 69 | # # Company Price Change Value (Rs 70 | # # 0 Sun Pharma 502.60 -65.05 2,117.87 71 | # # 1 Reliance 1356.90 19.60 745.10 72 | # # 2 Tech Mahin 379.45 -49.70 650.22 73 | # # 3 ITC 315.85 6.75 621.12 74 | # # 4 HDFC 1598.85 50.95 553.91 75 | def call(match: nil, order: nil, index: nil, name: nil) 76 | @match = match 77 | @options = {name: name, index: index, order: order} 78 | 79 | @file_data 80 | .search('table') 81 | .map { |table| parse_table(table) } 82 | .compact 83 | .keep_if { |table| satisfy_dimension(table) && search(table) } 84 | .map { |table| decide_values(table, @options) } 85 | .map { |table| table_to_dataframe(table) } 86 | end 87 | 88 | private 89 | 90 | # Allows user to override the scraped order / index / data 91 | def decide_values(scraped_val, user_val) 92 | scraped_val.merge(user_val) { |_key, scraped, user| user || scraped } 93 | end 94 | 95 | # Splits headers (all th tags) into order and index. Wherein, 96 | # Order : All tags on first proper row of HTML table 97 | # index : All tags on first proper column of HTML table 98 | def parse_hash(headers, size, headers_size) 99 | headers_index = headers.find_index { |x| x.count == headers_size } 100 | order = headers[headers_index] 101 | order_index = order.count - size 102 | order = order[order_index..-1] 103 | indice = headers[headers_index+1..-1].flatten 104 | indice = nil if indice.to_a.empty? 105 | [order, indice] 106 | end 107 | 108 | def parse_table(table) 109 | headers, headers_size = scrape_tag(table,'th') 110 | data, size = scrape_tag(table, 'td') 111 | data = data.keep_if { |x| x.count == size } 112 | order, indice = parse_hash(headers, size, headers_size) if headers_size >= size 113 | return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count>0 114 | {data: data.compact, index: indice, order: order} 115 | end 116 | 117 | def scrape_tag(table, tag) 118 | arr = table.search('tr').map { |row| row.search(tag).map { |val| val.text.strip } } 119 | size = arr.map(&:count).max 120 | [arr, size] 121 | end 122 | 123 | def satisfy_dimension(table) 124 | return false if @options[:order] && table[:data].first.size != @options[:order].size 125 | return false if @options[:index] && table[:data].size != @options[:index].size 126 | true 127 | end 128 | 129 | def search(table) 130 | @match.nil? ? true : table.to_s.include?(@match) 131 | end 132 | 133 | def table_to_dataframe(table) 134 | Daru::DataFrame.rows( 135 | table[:data], 136 | index: table[:index], 137 | order: table[:order], 138 | name: table[:name] 139 | ) 140 | end 141 | end 142 | end 143 | end 144 | end 145 | -------------------------------------------------------------------------------- /lib/daru/io/importers/json.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # JSON Importer Class, that extends `from_json` and `read_json` methods 7 | # to `Daru::DataFrame` 8 | class JSON < Base 9 | Daru::DataFrame.register_io_module :from_json, self 10 | Daru::DataFrame.register_io_module :read_json, self 11 | 12 | # Checks for required gem dependencies of JSON Importer 13 | def initialize 14 | require 'open-uri' 15 | require 'json' 16 | optional_gem 'jsonpath' 17 | end 18 | 19 | # Reads data from a json file / remote json response 20 | # 21 | # @!method self.read(path) 22 | # 23 | # @param path [String] Local / Remote path to json file, where the dataframe is to be imported 24 | # from. 25 | # 26 | # @return [Daru::IO::Importers::JSON] 27 | # 28 | # @example Reading from simply nested remote json response 29 | # url = 'https://data.nasa.gov/resource/2vr3-k9wn.json' 30 | # simple_read_instance = Daru::IO::Importers::JSON.read(url) 31 | # 32 | # @example Reading from complexy nested remote json response 33 | # url = 'http://api.tvmaze.com/singlesearch/shows?q=game-of-thrones&embed=episodes' 34 | # complex_read_instance = Daru::IO::Importers::JSON.read(url) 35 | def read(path) 36 | @file_data = ::JSON.parse(open(path).read) 37 | @json = @file_data 38 | self 39 | end 40 | 41 | # Loads from a Ruby structure of Hashes and / or Arrays 42 | # 43 | # @!method self.from(instance) 44 | # 45 | # @param instance [Hash or Array] A simple / complexly nested JSON structure 46 | # 47 | # @return [Daru::IO::Importers::JSON] 48 | # 49 | # @example Loading from Ruby Hash of Arrays 50 | # from_instance = Daru::IO::Importers::JSON.from({x: [1,4], y: [2,5], z: [3, 6]}) 51 | def from(instance) 52 | @file_data = instance 53 | @json = @file_data.is_a?(String) ? ::JSON.parse(@file_data) : @file_data 54 | self 55 | end 56 | 57 | # Imports a `Daru::DataFrame` from a JSON Importer instance 58 | # 59 | # @param columns [Array] JSON-path slectors to select specific fields 60 | # from the JSON input. 61 | # @param order [String or Array] Either a JSON-path selector string, or 62 | # an array containing the order of the `Daru::DataFrame`. 63 | # @param index [String or Array] Either a JSON-path selector string, or 64 | # an array containing the order of the `Daru::DataFrame`. 65 | # @param named_columns [Hash] JSON-path slectors to select specific fields 66 | # from the JSON input. 67 | # 68 | # @return [Daru::DataFrame] 69 | # 70 | # @note For more information on using JSON-path selectors, have a look at 71 | # the explanations {http://www.rubydoc.info/gems/jsonpath/0.5.8 here} 72 | # and {http://goessner.net/articles/JsonPath/ here}. 73 | # 74 | # @example Importing without jsonpath selectors 75 | # df = simple_read_instance.call 76 | # 77 | # #=> # 78 | # # designation discovery_ h_mag i_deg moid_au orbit_clas period_yr ... 79 | # # 0 419880 (20 2011-01-07 19.7 9.65 0.035 Apollo 4.06 ... 80 | # # 1 419624 (20 2010-09-17 20.5 14.52 0.028 Apollo 1 ... 81 | # # 2 414772 (20 2010-07-28 19 23.11 0.333 Apollo 1.31 ... 82 | # # ... ... ... ... ... ... ... ... ... 83 | # 84 | # @example Importing with jsonpath selectors 85 | # df = complex_read_instance.call( 86 | # "$.._embedded..episodes..name", 87 | # "$.._embedded..episodes..season", 88 | # "$.._embedded..episodes..number", 89 | # index: (10..70).to_a, 90 | # RunTime: "$.._embedded..episodes..runtime" 91 | # ) 92 | # 93 | # #=> # 94 | # # name season number RunTime 95 | # # 10 Winter is 1 1 60 96 | # # 11 The Kingsr 1 2 60 97 | # # 12 Lord Snow 1 3 60 98 | # # ... ... ... ... ... 99 | # 100 | # @example Importing from `from` method 101 | # df = from_instance.call 102 | # 103 | # #=> # 104 | # # x y z 105 | # # 0 1 2 3 106 | # # 1 4 5 6 107 | def call(*columns, order: nil, index: nil, **named_columns) 108 | init_opts(*columns, order: order, index: index, **named_columns) 109 | @data = fetch_data 110 | @index = at_jsonpath(@index) 111 | @order = at_jsonpath(@order) 112 | @order ||= Array.new(@columns.count) { |x| x } + @named_columns.keys 113 | 114 | Daru::DataFrame.new(@data, order: @order, index: @index) 115 | end 116 | 117 | private 118 | 119 | def at_jsonpath(jsonpath) 120 | jsonpath.is_a?(String) ? JsonPath.on(@json, jsonpath) : jsonpath 121 | end 122 | 123 | def fetch_data 124 | return @json if @columns.empty? && @named_columns.empty? 125 | 126 | # If only one unnamed column is provided without any named_columns, 127 | # entire dataset is assumed to reside in that JSON-path. 128 | return at_jsonpath(@columns.first) if @columns.size == 1 && @named_columns.empty? 129 | data_columns = @columns + @named_columns.values 130 | data_columns.map { |col| at_jsonpath(col) } 131 | end 132 | 133 | def init_opts(*columns, order: nil, index: nil, **named_columns) 134 | @columns = columns 135 | @order = order 136 | @index = index 137 | @named_columns = named_columns 138 | 139 | validate_params 140 | end 141 | 142 | def validate_params 143 | return if @order.nil? || @named_columns.empty? 144 | 145 | raise ArgumentError, 146 | 'Do not pass on order and named columns together, at the same '\ 147 | 'function call. Please use only order or only named_columns.' 148 | end 149 | end 150 | end 151 | end 152 | end 153 | -------------------------------------------------------------------------------- /lib/daru/io/importers/log.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Log Importer Class, that extends `read_rails_log` method 7 | # to `Daru::DataFrame` 8 | class Log < Base 9 | Daru::DataFrame.register_io_module :read_rails_log, self 10 | 11 | def initialize 12 | optional_gem 'request-log-analyzer', '~> 1.13.4', requires: 'request_log_analyzer' 13 | end 14 | 15 | ORDERS = { 16 | rails3: %i[method path ip timestamp line_type lineno source 17 | controller action format params rendered_file 18 | partial_duration status duration view db].freeze, 19 | 20 | apache: %i[remote_host remote_logname user timestamp http_method 21 | path http_version http_status bytes_sent referer 22 | user_agent line_type lineno source].freeze, 23 | 24 | amazon_s3: %i[bucket_owner bucket timestamp remote_ip requester request_id operation 25 | key request_uri http_status error_code bytes_sent object_size total_time 26 | turnaround_time referer user_agent line_type lineno source].freeze 27 | }.freeze 28 | 29 | RENAME_FIELDS = { 30 | path: :resource_path 31 | }.freeze 32 | 33 | # Reads data from a log file 34 | # 35 | # @!method self.read(path, format: :rails3) 36 | # 37 | # @param path [String] Path to log file, where the dataframe is to be 38 | # imported from. 39 | # 40 | # @param format [Symbol] Format of log file, which can be :rails3, :apache or :amazon_s3 41 | # default format set to :rails3 42 | # 43 | # @return [Daru::IO::Importers::Log] 44 | # 45 | # @example Reading from rails log file 46 | # instance = Daru::IO::Importers::Log.read("rails_test.log") 47 | # 48 | # @example Reading from apache log file 49 | # instance = Daru::IO::Importers::Log.new.read("apache_test.log", format: :apache) 50 | # 51 | # @example Reading from amazon s3 log file 52 | # instance = Daru::IO::Importers::Log.new.read("amazon_s3_test.log", format: :amazon_s3) 53 | def read(path, format: :rails3) 54 | @format = format 55 | @file_data = RequestLogAnalyzer::Source::LogParser 56 | .new(RequestLogAnalyzer::FileFormat.load(@format), source_files: path) 57 | .map do |request| 58 | ORDERS 59 | .fetch(@format) 60 | .map { |attr| request.attributes.include?(attr) ? request.attributes[attr] : nil } 61 | end 62 | self 63 | end 64 | 65 | # Imports a `Daru::DataFrame` from a Log Importer instance and log file 66 | # 67 | # @return [Daru::DataFrame] 68 | # 69 | # @example Reading from a log file 70 | # df = instance.call 71 | # 72 | # => # 73 | # # method resource_path ip timestamp line_type lineno source contr... 74 | # # 0 GET / 127.0.0.1 2018022607 completed 5 /home/roh Rails... 75 | # # 1 GET / 127.0.0.1 2018022716 completed 12 /home/roh Rails... 76 | # # ... ... ... ... ... ... ... ... ... 77 | def call 78 | Daru::DataFrame.rows(@file_data, order: ORDERS.fetch(@format) 79 | .map { |attr| RENAME_FIELDS.fetch(attr, attr) }) 80 | end 81 | end 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/daru/io/importers/mongo.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/json' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Mongo Importer Class, that extends `from_mongo` method to `Daru::DataFrame` 7 | class Mongo < JSON 8 | Daru::DataFrame.register_io_module :from_mongo, self 9 | 10 | # Checks for required gem dependencies of Mongo Importer 11 | def initialize 12 | super 13 | optional_gem 'mongo' 14 | end 15 | 16 | # Loads data from a given connection 17 | # 18 | # @!method self.from(connection) 19 | # 20 | # @param connection [String or Hash or Mongo::Client] Contains details 21 | # about a Mongo database / hosts to connect. 22 | # 23 | # @return [Daru::IO::Importers::Mongo] 24 | # 25 | # @example Loading from a connection string 26 | # instance_1 = Daru::IO::Importers::Mongo.from('mongodb://127.0.0.1:27017/test') 27 | # 28 | # @example Loading from a connection hash 29 | # instance_2 = Daru::IO::Importers::Mongo.from({ hosts: ['127.0.0.1:27017'], database: 'test' }) 30 | # 31 | # @example Loading from a Mongo::Client connection 32 | # instance_3 = Daru::IO::Importers::Mongo.from(Mongo::Client.new ['127.0.0.1:27017'], database: 'test') 33 | def from(connection) 34 | @client = get_client(connection) 35 | self 36 | end 37 | 38 | # Imports a `Daru::DataFrame` from a Mongo Importer instance. 39 | # 40 | # @param collection [String or Symbol] A specific collection in the 41 | # Mongo database, to import as `Daru::DataFrame`. 42 | # @param columns [Array] JSON-path slectors to select specific fields 43 | # from the JSON input. 44 | # @param order [String or Array] Either a JSON-path selector string, or 45 | # an array containing the order of the `Daru::DataFrame`. DO NOT 46 | # provide both `order` and `named_columns` at the same time. 47 | # @param index [String or Array] Either a JSON-path selector string, or 48 | # an array containing the order of the `Daru::DataFrame`. 49 | # @param filter [Hash] Filters and chooses Mongo documents that match 50 | # the given `filter` from the collection. 51 | # @param limit [Interger] Limits the number of Mongo documents to be 52 | # parsed from the collection. 53 | # @param skip [Integer] Skips `skip` number of documents from the Mongo 54 | # collection. 55 | # @param named_columns [Hash] JSON-path selectors to select specific 56 | # fields from the JSON input. DO NOT provide both `order` and 57 | # `named_columns` at the same time. 58 | # 59 | # @note 60 | # - For more information on using JSON-path selectors, have a look at 61 | # the explanations {http://www.rubydoc.info/gems/jsonpath/0.5.8 here} 62 | # and {http://goessner.net/articles/JsonPath/ here}. 63 | # - The Mongo gem faces `Argument Error : expected Proc Argument` 64 | # issue due to the bug in MRI Ruby 2.4.0 mentioned 65 | # {https://bugs.ruby-lang.org/issues/13107 here}. This seems to have 66 | # been fixed in Ruby 2.4.1 onwards. Hence, please avoid using this 67 | # Mongo Importer in Ruby version 2.4.0. 68 | # 69 | # @return [Daru::DataFrame] 70 | # 71 | # @example Importing without jsonpath selectors 72 | # # The below 'cars' collection can be recreated in a Mongo shell with - 73 | # # db.cars.drop() 74 | # # db.cars.insert({name: "Audi", price: 52642}) 75 | # # db.cars.insert({name: "Mercedes", price: 57127}) 76 | # # db.cars.insert({name: "Volvo", price: 29000}) 77 | # 78 | # df = instance.call('cars') 79 | # 80 | # #=> # 81 | # # _id name price 82 | # # 0 5948d0bfcd Audi 52642.0 83 | # # 1 5948d0c6cd Mercedes 57127.0 84 | # # 2 5948d0cecd Volvo 29000.0 85 | # 86 | # @example Importing with jsonpath selectors 87 | # # The below 'cars' collection can be recreated in a Mongo shell with - 88 | # # db.cars.drop() 89 | # # db.cars.insert({name: "Audi", price: 52642, star: { fuel: 9.8, cost: 8.6, seats: 9.9, sound: 9.3 }}) 90 | # # db.cars.insert({name: "Mercedes", price: 57127, star: { fuel: 9.3, cost: 8.9, seats: 8.4, sound: 9.1 }}) 91 | # # db.cars.insert({name: "Volvo", price: 29000, star: { fuel: 7.8, cost: 9.9, seats: 8.2, sound: 8.9 }}) 92 | # 93 | # df = instance.call( 94 | # 'cars', 95 | # '$.._id', 96 | # '$..name', 97 | # '$..price', 98 | # '$..star..fuel', 99 | # '$..star..cost' 100 | # ) 101 | # 102 | # #=> # 103 | # # _id name price fuel cost 104 | # # 0 5948d40b50 Audi 52642.0 9.8 8.6 105 | # # 1 5948d42850 Mercedes 57127.0 9.3 8.9 106 | # # 2 5948d44350 Volvo 29000.0 7.8 9.9 107 | def call(collection, *columns, order: nil, index: nil, 108 | filter: nil, limit: nil, skip: nil, **named_columns) 109 | @json = ::JSON.parse( 110 | @client[collection.to_sym] 111 | .find(filter, skip: skip, limit: limit) 112 | .to_json 113 | ) 114 | 115 | super(*columns, order: order, index: index, **named_columns) 116 | end 117 | 118 | private 119 | 120 | def get_client(connection) 121 | case connection 122 | when ::Mongo::Client 123 | connection 124 | when Hash 125 | hosts = connection.delete :hosts 126 | ::Mongo::Client.new(hosts, connection) 127 | when String 128 | ::Mongo::Client.new(connection) 129 | else 130 | raise ArgumentError, 131 | "Expected #{connection} to be either a Mongo instance, "\ 132 | 'Mongo connection Hash, or Mongo connection URL String. '\ 133 | "Received #{connection.class} instead." 134 | end 135 | end 136 | end 137 | end 138 | end 139 | end 140 | -------------------------------------------------------------------------------- /lib/daru/io/importers/plaintext.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Plaintext Importer Class, that extends `read_plaintext` method to 7 | # `Daru::DataFrame` 8 | class Plaintext < Base 9 | Daru::DataFrame.register_io_module :read_plaintext, self 10 | 11 | # Checks for required gem dependencies of Plaintext Importer 12 | def initialize; end 13 | 14 | # Reads data from a plaintext (.dat) file 15 | # 16 | # @!method self.read(path) 17 | # 18 | # @param path [String] Path to plaintext file, where the dataframe is to be 19 | # imported from. 20 | # 21 | # @return [Daru::IO::Importers::Plaintext] 22 | # 23 | # @example Reading from plaintext file 24 | # instance = Daru::IO::Importers::Plaintext.read("bank2.dat") 25 | def read(path) 26 | @file_data = File.read(path).split("\n").map do |line| 27 | row = process_row(line.strip.split(/\s+/),['']) 28 | next if row == ["\x1A"] 29 | row 30 | end 31 | self 32 | end 33 | 34 | # Imports `Daru::DataFrame` from a Plaintext Importer instance 35 | # 36 | # @param fields [Array] An array of vectors. 37 | # 38 | # @return [Daru::DataFrame] 39 | # 40 | # @example Initializing with fields 41 | # df = instance.call([:v1, :v2, :v3, :v4, :v5, :v6]) 42 | # 43 | # #=> # 44 | # # v1 v2 v3 v4 v5 v6 45 | # # 0 214.8 131.0 131.1 9.0 9.7 141.0 46 | # # 1 214.6 129.7 129.7 8.1 9.5 141.7 47 | # # 2 214.8 129.7 129.7 8.7 9.6 142.2 48 | # # 3 214.8 129.7 129.6 7.5 10.4 142.0 49 | # # 4 215.0 129.6 129.7 10.4 7.7 141.8 50 | # # 5 215.7 130.8 130.5 9.0 10.1 141.4 51 | # # 6 215.5 129.5 129.7 7.9 9.6 141.6 52 | # # 7 214.5 129.6 129.2 7.2 10.7 141.7 53 | # # 8 214.9 129.4 129.7 8.2 11.0 141.9 54 | # # 9 215.2 130.4 130.3 9.2 10.0 140.7 55 | # # 10 215.3 130.4 130.3 7.9 11.7 141.8 56 | # # 11 215.1 129.5 129.6 7.7 10.5 142.2 57 | # # 12 215.2 130.8 129.6 7.9 10.8 141.4 58 | # # 13 214.7 129.7 129.7 7.7 10.9 141.7 59 | # # 14 215.1 129.9 129.7 7.7 10.8 141.8 60 | # #... ... ... ... ... ... ... 61 | def call(fields) 62 | Daru::DataFrame.rows(@file_data, order: fields) 63 | end 64 | 65 | private 66 | 67 | INT_PATTERN = /^[-+]?\d+$/ 68 | FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/ 69 | 70 | def process_row(row,empty) 71 | row.to_a.map do |c| 72 | if empty.include?(c) 73 | # FIXME: As far as I can guess, it will never work. 74 | # It is called only inside `from_plaintext`, and there 75 | # data is splitted by `\s+` -- there is no chance that 76 | # "empty" (currently just '') will be between data?.. 77 | nil 78 | else 79 | try_string_to_number(c) 80 | end 81 | end 82 | end 83 | 84 | def try_string_to_number(str) 85 | case str 86 | when INT_PATTERN 87 | str.to_i 88 | when FLOAT_PATTERN 89 | str.tr(',', '.').to_f 90 | else 91 | str 92 | end 93 | end 94 | end 95 | end 96 | end 97 | end 98 | -------------------------------------------------------------------------------- /lib/daru/io/importers/r_data.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/rds' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # RData Importer Class, that extends `read_rdata` method to `Daru::DataFrame` 7 | # 8 | # @see Daru::IO::Importers::RDS For .rds format 9 | class RData < RDS 10 | Daru::DataFrame.register_io_module :read_rdata, self 11 | 12 | # Checks for required gem dependencies of RData Importer 13 | def initialize 14 | super 15 | end 16 | 17 | # Reads data from a Rdata file 18 | # 19 | # @!method self.read(path) 20 | # 21 | # @param path [String] Path to RData file, where the dataframe is to be imported from. 22 | # 23 | # @return [Daru::IO::Importers::RData] 24 | # 25 | # @example Reading from rdata file 26 | # instance = Daru::IO::Importers::RData.read('ACScounty.RData') 27 | def read(path) 28 | @instance = RSRuby.instance 29 | @instance.eval_R("load('#{path}')") 30 | self 31 | end 32 | 33 | # Imports a `Daru::DataFrame` from a RData Importer instance and rdata file 34 | # 35 | # @param variable [String] The variable to be imported from the 36 | # variables stored in the RData file. Please note that the R 37 | # variable to be imported from the RData file should be a 38 | # `data.frame` 39 | # 40 | # @return [Daru::DataFrame] 41 | # 42 | # @example Importing a particular variable 43 | # df = instance.call("ACS3") 44 | # 45 | # #=> # 46 | # # Abbreviati FIPS Non.US State cnty females.di ... 47 | # # 0 AL 1001 14.7 alabama autauga 13.8 ... 48 | # # 1 AL 1003 13.5 alabama baldwin 14.1 ... 49 | # # 2 AL 1005 20.1 alabama barbour 16.1 ... 50 | # # 3 AL 1009 18.0 alabama blount 13.7 ... 51 | # # 4 AL 1015 18.6 alabama calhoun 12.9 ... 52 | # # ... ... ... ... ... ... ... ... 53 | def call(variable) 54 | @variable = variable.to_s 55 | 56 | validate_params 57 | 58 | process_dataframe(@instance.send(@variable.to_sym)) 59 | end 60 | 61 | private 62 | 63 | def validate_params 64 | valid_r_dataframe_variables = @instance.eval_R('Filter(function(x) is.data.frame(get(x)) , ls())') 65 | return if valid_r_dataframe_variables.include?(@variable) 66 | 67 | variable_type = @instance.eval_R("typeof(#{@variable})") 68 | raise ArgumentError, "Expected the given R variable (#{@variable}) to be a data.frame, got a "\ 69 | "#{variable_type} instead." 70 | end 71 | end 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/daru/io/importers/rds.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # RDS Importer Class, that extends `read_rds` method to `Daru::DataFrame` 7 | # 8 | # @see Daru::IO::Importers::RData For .Rdata format 9 | class RDS < Base 10 | Daru::DataFrame.register_io_module :read_rds, self 11 | 12 | # Checks for required gem dependencies of RDS Importer 13 | def initialize 14 | optional_gem 'rsruby' 15 | end 16 | 17 | # Reads data from a rds file 18 | # 19 | # @!method self.read(path) 20 | # 21 | # @param path [String] Path to rds file, where the dataframe is to be 22 | # imported from. 23 | # 24 | # @return [Daru::IO::Importers::RDS] 25 | # 26 | # @example Reading from rds file 27 | # instance = Daru::IO::Importers::RDS.read('bc_sites.rds') 28 | def read(path) 29 | @instance = RSRuby.instance.eval_R("readRDS('#{path}')") 30 | self 31 | end 32 | 33 | # Imports a `Daru::DataFrame` from a RDS Importer instance and rds file 34 | # 35 | # @return [Daru::DataFrame] 36 | # 37 | # @example Reading from a RDS file 38 | # df = instance.call 39 | # 40 | # #=> # 41 | # # area descriptio epa_reach format_ver latitude location location_c ... 42 | # # 0 016 GSPTN NaN 4.1 49.5 THOR IS 2MS22016 T ... 43 | # # 1 012 CSPT NaN 4.1 50.6167 MITC BY 2MN26012 M ... 44 | # # ... ... ... ... ... ... ... ... ... 45 | def call 46 | process_dataframe(@instance) 47 | end 48 | 49 | private 50 | 51 | def process_dataframe(data) 52 | data = data.map { |key, values| [key.to_sym, values.map { |val| convert_datatype(val) }] }.to_h 53 | Daru::DataFrame.new(data) 54 | end 55 | 56 | def convert_datatype(value) 57 | case value.to_s 58 | when 'NaN' then nil 59 | when value.to_f.to_s then value.to_f 60 | when value.to_i.to_s then value.to_i 61 | else value 62 | end 63 | end 64 | end 65 | end 66 | end 67 | end 68 | -------------------------------------------------------------------------------- /lib/daru/io/importers/redis.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # Redis Importer Class, that extends `from_redis` method to `Daru::DataFrame` 7 | class Redis < Base 8 | Daru::DataFrame.register_io_module :from_redis, self 9 | 10 | # Checks for required gem dependencies of Redis Importer 11 | def initialize 12 | require 'json' 13 | optional_gem 'redis' 14 | end 15 | 16 | # Loads data from a given connection 17 | # 18 | # @!method self.from(connection) 19 | # 20 | # @param connection [Hash or Redis Instance] Either a Hash of *Redis* configurations, 21 | # or an existing *Redis* instance. For the hash configurations, have a 22 | # look at 23 | # [Redis#initialize](http://www.rubydoc.info/github/redis/redis-rb/Redis:initialize). 24 | # 25 | # @return [Daru::IO::Importers::Redis] 26 | # 27 | # @example Loading from a hash 28 | # instance = Daru::IO::Importers::Redis.from({url: "redis://:[password]@[hostname]:[port]/[db]"}) 29 | # 30 | # @example Loading from a Redis connection 31 | # instance = Daru::IO::Importers::Redis.from(Redis.new({url: "redis://:[password]@[hostname]:[port]/[db]"})) 32 | def from(connection={}) 33 | @client = get_client(connection) 34 | self 35 | end 36 | 37 | # Imports a `Daru::DataFrame` from a Redis Importer instance 38 | # 39 | # @param keys [Array] Redis key(s) from whom, the `Daru::DataFrame` 40 | # should be constructed. If no keys are given, all keys in the *Redis* 41 | # connection will be used. 42 | # @param match [String] A pattern to get matching keys. 43 | # @param count [Integer] Number of matching keys to be obtained. Defaults to 44 | # nil, to collect ALL matching keys. 45 | # 46 | # @return [Daru::DataFrame] 47 | # 48 | # @example Importing with no options 49 | # # Say, the Redis connection has this setup 50 | # # Key "10001" => { "name" => "Tyrion", "age" => 32 }.to_json 51 | # # Key "10002" => { "name" => "Jamie", "age" => 37 }.to_json 52 | # # Key "10003" => { "name" => "Cersei", "age" => 37 }.to_json 53 | # # Key "10004" => { "name" => "Joffrey", "age" => 19 }.to_json 54 | # 55 | # df = instance.call 56 | # 57 | # #=> 58 | # # name age 59 | # # 10001 Tyrion 32 60 | # # 10002 Jamie 37 61 | # # 10003 Cersei 37 62 | # # 10004 Joffrey 19 63 | # 64 | # @example Importing with keys 65 | # # Say, the Redis connection has this setup 66 | # # Key "10001" => { "name" => "Tyrion", "age" => 32 }.to_json 67 | # # Key "10002" => { "name" => "Jamie", "age" => 37 }.to_json 68 | # # Key "10003" => { "name" => "Cersei", "age" => 37 }.to_json 69 | # # Key "10004" => { "name" => "Joffrey", "age" => 19 }.to_json 70 | # 71 | # df = instance.call("10001", "10002") 72 | # 73 | # #=> 74 | # # name age 75 | # # 10001 Tyrion 32 76 | # # 10002 Jamie 37 77 | # 78 | # @example Importing with query for matching keys and count 79 | # # Say, the Redis connection has this setup 80 | # # Key "key:1" => { "name" => "name1", "age" => "age1" }.to_json 81 | # # Key "key:2" => { "name" => "name2", "age" => "age2" }.to_json 82 | # # Key "key:3" => { "name" => "name3", "age" => "age3" }.to_json 83 | # # ... 84 | # # Key "key:2000" => { "name" => "name2000", "age" => "age2000" }.to_json 85 | # 86 | # df = instance.call(match: "key:1*", count: 200) 87 | # 88 | # #=> # 89 | # # name age 90 | # # key:1927 name1927 age1927 91 | # # key:1759 name1759 age1759 92 | # # key:1703 name1703 age1703 93 | # # key:1640 name1640 age1640 94 | # # ... ... ... 95 | def call(*keys, match: nil, count: nil) 96 | @match = match 97 | @count = count 98 | @keys = keys 99 | @keys = choose_keys(*@keys).map(&:to_sym) 100 | 101 | vals = @keys.map { |key| ::JSON.parse(@client.get(key), symbolize_names: true) } 102 | Base.guess_parse(@keys, vals) 103 | end 104 | 105 | private 106 | 107 | def choose_keys(*keys) 108 | return keys.to_a unless keys.empty? 109 | 110 | cursor = nil 111 | # Loop to iterate through paginated results of Redis#scan. 112 | until cursor == '0' || (!@count.nil? && keys.count > (@count-1)) 113 | cursor, chunk = @client.scan(cursor, match: @match, count: @count) 114 | keys.concat(chunk).uniq! 115 | end 116 | return keys[0..-1] if @count.nil? 117 | keys[0..@count-1] 118 | end 119 | 120 | def get_client(connection) 121 | case connection 122 | when ::Redis 123 | connection 124 | when Hash 125 | ::Redis.new connection 126 | else 127 | raise ArgumentError, "Expected '#{connection}' to be either "\ 128 | 'a Hash or an initialized Redis instance, '\ 129 | "but received #{connection.class} instead." 130 | end 131 | end 132 | end 133 | end 134 | end 135 | end 136 | -------------------------------------------------------------------------------- /lib/daru/io/importers/sql.rb: -------------------------------------------------------------------------------- 1 | require 'daru/io/importers/base' 2 | 3 | module Daru 4 | module IO 5 | module Importers 6 | # SQL Importer Class, that extends `from_sql` and `read_sql` methods to `Daru::DataFrame` 7 | class SQL < Base 8 | Daru::DataFrame.register_io_module :from_sql, self 9 | Daru::DataFrame.register_io_module :read_sql, self 10 | 11 | # Checks for required gem dependencies of SQL Importer 12 | def initialize 13 | optional_gem 'dbd-sqlite3', requires: 'dbd/SQLite3' 14 | optional_gem 'activerecord', '~> 4.0', requires: 'active_record' 15 | optional_gem 'dbi' 16 | optional_gem 'sqlite3' 17 | end 18 | 19 | # Loads from a DBI connection 20 | # 21 | # @!method self.from(dbh) 22 | # 23 | # @param dbh [DBI::DatabaseHandle] A DBI connection. 24 | # 25 | # @return [Daru::IO::Importers::SQL] 26 | # 27 | # @example Importing from a DBI connection 28 | # instance = Daru::IO::Importers::SQL.from(DBI.connect("DBI:Mysql:database:localhost", "user", "password")) 29 | def from(dbh) 30 | @dbh = dbh 31 | self 32 | end 33 | 34 | # Reads from a sqlite.db file 35 | # 36 | # @!method self.read(path) 37 | # 38 | # @param path [String] Path to a SQlite3 database file. 39 | # 40 | # @return [Daru::IO::Importers::SQL] 41 | # 42 | # @example Reading from a sqlite.db file 43 | # instance = Daru::IO::Importers::SQL.read('path/to/sqlite.db') 44 | def read(path) 45 | @dbh = attempt_sqlite3_connection(path) if Pathname(path).exist? 46 | self 47 | end 48 | 49 | # Imports a `Daru::DataFrame` from SQL Importer instance 50 | # 51 | # @param query [String] The query to be executed 52 | # 53 | # @return [Daru::DataFrame] 54 | # 55 | # @example Importing with a SQL query 56 | # df = instance.call("SELECT * FROM test") 57 | # 58 | # #=> # 59 | # # id name age 60 | # # 0 1 Homer 20 61 | # # 1 2 Marge 30 62 | def call(query) 63 | @query = query 64 | @conn, @adapter = choose_adapter(@dbh, @query) 65 | df_hash = result_hash 66 | Daru::DataFrame.new(df_hash).tap(&:update) 67 | end 68 | 69 | private 70 | 71 | def attempt_sqlite3_connection(db) 72 | DBI.connect("DBI:SQLite3:#{db}") 73 | rescue SQLite3::NotADatabaseException 74 | raise ArgumentError, "Expected #{db} to point to a SQLite3 database" 75 | end 76 | 77 | def choose_adapter(db, query) 78 | query = String.try_convert(query) or 79 | raise ArgumentError, "Query must be a string, #{query.class} received" 80 | 81 | case db 82 | when DBI::DatabaseHandle 83 | [db, :dbi] 84 | when ::ActiveRecord::ConnectionAdapters::AbstractAdapter 85 | [db, :activerecord] 86 | else 87 | raise ArgumentError, "Unknown database adapter type #{db.class}" 88 | end 89 | end 90 | 91 | def column_names 92 | case @adapter 93 | when :dbi 94 | result.column_names 95 | when :activerecord 96 | result.columns 97 | end 98 | end 99 | 100 | def result 101 | case @adapter 102 | when :dbi 103 | @conn.execute(@query) 104 | when :activerecord 105 | @conn.exec_query(@query) 106 | end 107 | end 108 | 109 | def result_hash 110 | column_names 111 | .map(&:to_sym) 112 | .zip(rows.transpose) 113 | .to_h 114 | end 115 | 116 | def rows 117 | case @adapter 118 | when :dbi 119 | result.to_a.map(&:to_a) 120 | when :activerecord 121 | result.cast_values 122 | end 123 | end 124 | end 125 | end 126 | end 127 | end 128 | -------------------------------------------------------------------------------- /lib/daru/io/link.rb: -------------------------------------------------------------------------------- 1 | module Daru 2 | class DataFrame 3 | class << self 4 | # Links `Daru::DataFrame` Import / Export methods to corresponding 5 | # `Daru::IO` Importer / Exporter classes. Here is the list of linkages: 6 | # 7 | # #### Importers 8 | # 9 | # | `Daru::DataFrame` method | `Daru::IO::Importers` class | 10 | # | ----------------------------------- | -----------------------------------------| 11 | # | `Daru::DataFrame.from_activerecord` | {Daru::IO::Importers::ActiveRecord#from} | 12 | # | `Daru::DataFrame.read_avro` | {Daru::IO::Importers::Avro#read} | 13 | # | `Daru::DataFrame.read_csv` | {Daru::IO::Importers::CSV#read} | 14 | # | `Daru::DataFrame.read_excel` | {Daru::IO::Importers::Excel#read}, | 15 | # | | {Daru::IO::Importers::Excelx#read} | 16 | # | `Daru::DataFrame.read_html` | {Daru::IO::Importers::HTML#read} | 17 | # | `Daru::DataFrame.from_json` | {Daru::IO::Importers::JSON#from} | 18 | # | `Daru::DataFrame.read_json` | {Daru::IO::Importers::JSON#read} | 19 | # | `Daru::DataFrame.from_mongo` | {Daru::IO::Importers::Mongo#from} | 20 | # | `Daru::DataFrame.read_plaintext` | {Daru::IO::Importers::Plaintext#read} | 21 | # | `Daru::DataFrame.read_rails_log` | {Daru::IO::Importers::RailsLog#read} | 22 | # | `Daru::DataFrame.read_rdata` | {Daru::IO::Importers::RData#read} | 23 | # | `Daru::DataFrame.read_rds` | {Daru::IO::Importers::RDS#read} | 24 | # | `Daru::DataFrame.from_redis` | {Daru::IO::Importers::Redis#from} | 25 | # | `Daru::DataFrame.from_sql` | {Daru::IO::Importers::SQL#from} | 26 | # 27 | # #### Exporters 28 | # 29 | # | `Daru::DataFrame` instance method | `Daru::IO::Exporters` class | 30 | # | --------------------------------- | -----------------------------------| 31 | # | `Daru::DataFrame.to_avro_string` | {Daru::IO::Exporters::Avro#to_s} | 32 | # | `Daru::DataFrame.write_avro` | {Daru::IO::Exporters::Avro#write} | 33 | # | `Daru::DataFrame.to_csv_string` | {Daru::IO::Exporters::CSV#to_s} | 34 | # | `Daru::DataFrame.write_csv` | {Daru::IO::Exporters::CSV#write} | 35 | # | `Daru::DataFrame.to_excel_string` | {Daru::IO::Exporters::Excel#to_s} | 36 | # | `Daru::DataFrame.write_excel` | {Daru::IO::Exporters::Excel#write} | 37 | # | `Daru::DataFrame.to_json` | {Daru::IO::Exporters::JSON#to} | 38 | # | `Daru::DataFrame.to_json_string` | {Daru::IO::Exporters::JSON#to_s} | 39 | # | `Daru::DataFrame.write_json` | {Daru::IO::Exporters::JSON#write} | 40 | # | `Daru::DataFrame.to_rds_string` | {Daru::IO::Exporters::RDS#to_s} | 41 | # | `Daru::DataFrame.write_rds` | {Daru::IO::Exporters::RDS#write} | 42 | # | `Daru::DataFrame.to_sql` | {Daru::IO::Exporters::SQL#to} | 43 | # 44 | # @param function [Symbol] Functon name to be monkey-patched into +Daru::DataFrame+ 45 | # @param instance [Class] The Daru-IO class to be linked to monkey-patched function 46 | # 47 | # @return A `Daru::DataFrame` class method in case of Importer, and instance 48 | # variable method in case of Exporter. 49 | def register_io_module(function, instance=nil, &block) 50 | return define_singleton_method(function, &block) if block_given? 51 | 52 | case function.to_s 53 | when /\Ato_.*_string\Z/, /\Ato_/, /\Awrite_/ then register_exporter(function, instance) 54 | when /\Afrom_/, /\Aread_/ then register_importer(function, instance) 55 | else raise ArgumentError, 'Invalid function name given to monkey-patch into Daru::DataFrame.' 56 | end 57 | end 58 | 59 | private 60 | 61 | def register_exporter(function, instance) 62 | define_method(function) do |*args, &io_block| 63 | case function.to_s 64 | when /\Ato_.*_string\Z/ then instance.new(self, *args, &io_block).to_s 65 | when /\Ato_/ then instance.new(self, *args, &io_block).to 66 | when /Awrite_/ then instance.new(self, *args[1..-1], &io_block).write(*args[0]) 67 | end 68 | end 69 | end 70 | 71 | def register_importer(function, instance) 72 | define_singleton_method(function) do |*args, &io_block| 73 | case function.to_s 74 | when /\Afrom_/ then instance.new.from(*args[0]).call(*args[1..-1], &io_block) 75 | when /\Aread_/ then instance.new.read(*args[0]).call(*args[1..-1], &io_block) 76 | end 77 | end 78 | end 79 | end 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /lib/daru/io/version.rb: -------------------------------------------------------------------------------- 1 | module Daru 2 | module IO 3 | VERSION = '0.1.0'.freeze 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/avro_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::Avro do 2 | subject do 3 | Daru::DataFrame.new( 4 | ::Avro::DataFile::Reader.new( 5 | StringIO.new(File.read(tempfile.path)), 6 | ::Avro::IO::DatumReader.new 7 | ).to_a 8 | ) 9 | end 10 | 11 | include_context 'exporter setup' 12 | 13 | let(:filename) { 'test.avro' } 14 | 15 | before { described_class.new(df, schema).write(tempfile.path) } 16 | 17 | context 'writes DataFrame to an Avro file' do 18 | context 'when schema is Hash' do 19 | let(:schema) do 20 | { 21 | 'type' => 'record', 22 | 'name' => 'test', 23 | 'fields' => [ 24 | {'name' => 'a', 'type' => 'int'}, 25 | {'name' => 'b', 'type' => 'int'}, 26 | {'name' => 'c', 'type' => %w[int string]}, 27 | {'name' => 'd', 'type' => %w[int string null]} 28 | ] 29 | } 30 | end 31 | 32 | it_behaves_like 'exact daru dataframe', 33 | ncols: 4, 34 | nrows: 5, 35 | order: %w[a b c d], 36 | data: [ 37 | [1,2,3,4,5], 38 | [11,22,33,44,55], 39 | ['a', 'g', 4, 5,'addadf'], 40 | [nil, 23, 4,'a','ff'] 41 | ] 42 | end 43 | 44 | context 'when schema is Avro::Schema' do 45 | let(:schema) do 46 | ::Avro::Schema.parse( 47 | { 48 | 'type' => 'record', 49 | 'name' => 'test', 50 | 'fields' => [ 51 | {'name' => 'a', 'type' => 'int'}, 52 | {'name' => 'b', 'type' => 'int'}, 53 | {'name' => 'c', 'type' => %w[int string]}, 54 | {'name' => 'd', 'type' => %w[int string null]} 55 | ] 56 | }.to_json 57 | ) 58 | end 59 | 60 | it_behaves_like 'exact daru dataframe', 61 | ncols: 4, 62 | nrows: 5, 63 | order: %w[a b c d], 64 | data: [ 65 | [1,2,3,4,5], 66 | [11,22,33,44,55], 67 | ['a', 'g', 4, 5,'addadf'], 68 | [nil, 23, 4,'a','ff'] 69 | ] 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/csv_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::CSV do 2 | subject { File.open(tempfile.path, &:readline).chomp.split(',', -1) } 3 | 4 | include_context 'exporter setup' 5 | 6 | let(:filename) { 'test.csv' } 7 | 8 | before { described_class.new(df, opts).write(tempfile.path) } 9 | 10 | context 'writes DataFrame to a CSV file' do 11 | subject { Daru::DataFrame.rows content[1..-1].map { |x| x.map { |y| convert(y) } }, order: content[0] } 12 | 13 | let(:opts) { {} } 14 | let(:content) { CSV.read(tempfile.path) } 15 | 16 | it_behaves_like 'exact daru dataframe', 17 | ncols: 4, 18 | nrows: 5, 19 | order: %w[a b c d], 20 | data: [ 21 | [1,2,3,4,5], 22 | [11,22,33,44,55], 23 | ['a', 'g', 4, 5,'addadf'], 24 | [nil, 23, 4,'a','ff'] 25 | ] 26 | end 27 | 28 | context 'writes headers unless headers=false' do 29 | it { is_expected.to be_an(Array) } 30 | it { is_expected.to eq(df.vectors.to_a) } 31 | end 32 | 33 | context 'does not write headers when headers=false' do 34 | let(:headers) { false } 35 | let(:opts) { {headers: headers} } 36 | 37 | it { is_expected.to be_an(Array) } 38 | it { is_expected.to eq(df.head(1).map { |v| (v.first || '').to_s }) } 39 | end 40 | 41 | context 'writes convert_comma only on float values' do 42 | subject { CSV.read(tempfile.path, col_sep: ';') } 43 | 44 | let(:df) { Daru::DataFrame.new('a' => [1, 4.4, nil, 'Sr. Arthur']) } 45 | let(:opts) { {convert_comma: true, col_sep: ';'} } 46 | 47 | it { is_expected.to eq([['a'], ['1'], ['4,4'], [''], ['Sr. Arthur']]) } 48 | end 49 | 50 | context 'writes into .csv.gz format' do 51 | subject { Zlib::GzipReader.new(open(tempfile.path)).read.split("\n") } 52 | 53 | let(:opts) { {compression: :gzip} } 54 | let(:filename) { 'test.csv.gz' } 55 | 56 | it { is_expected.to be_an(Array).and all be_a(String) } 57 | it { is_expected.to eq(['a,b,c,d', '1,11,a,', '2,22,g,23', '3,33,4,4', '4,44,5,a', '5,55,addadf,ff']) } 58 | end 59 | 60 | context 'writes into .csv.gz format with only order' do 61 | subject { Zlib::GzipReader.new(open(tempfile.path)).read.split("\n") } 62 | 63 | let(:df) { Daru::DataFrame.new('a' => [], 'b' => [], 'c' => [], 'd' => []) } 64 | let(:opts) { {compression: :gzip} } 65 | let(:filename) { 'test.csv.gz' } 66 | 67 | it { is_expected.to be_an(Array).and all be_a(String) } 68 | it { is_expected.to eq(%w[a,b,c,d]) } 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/excel_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::Excel do 2 | include_context 'exporter setup' 3 | 4 | let(:filename) { 'test_write.xls' } 5 | let(:content) { Spreadsheet.open tempfile.path } 6 | let(:opts) { {header: {color: :blue}, data: {color: :red}, index: {color: :green}} } 7 | 8 | before { described_class.new(df, **opts).write(tempfile.path) } 9 | 10 | context 'writes to excel spreadsheet' do 11 | subject do 12 | Daru::DataFrame.rows( 13 | Spreadsheet.open(tempfile.path).worksheet(0).rows[1..-1].map(&:to_a), 14 | order: Spreadsheet.open(tempfile.path).worksheet(0).rows[0].to_a 15 | ) 16 | end 17 | 18 | let(:opts) { {index: false} } 19 | 20 | it_behaves_like 'exact daru dataframe', 21 | ncols: 4, 22 | nrows: 5, 23 | order: %w[a b c d], 24 | data: [ 25 | [1,2,3,4,5], 26 | [11,22,33,44,55], 27 | ['a', 'g', 4, 5,'addadf'], 28 | [nil, 23, 4,'a','ff'] 29 | ] 30 | end 31 | 32 | context 'writes to excel spreadsheet with header formatting' do 33 | subject { Spreadsheet.open(tempfile.path).worksheet(0).rows[0].format(0).font.color } 34 | 35 | it { is_expected.to eq(:blue) } 36 | end 37 | 38 | context 'writes to excel spreadsheet with index formatting' do 39 | subject { Spreadsheet.open(tempfile.path).worksheet(0).rows[1].format(0).font.color } 40 | 41 | it { is_expected.to eq(:green) } 42 | end 43 | 44 | context 'writes to excel spreadsheet with data formatting' do 45 | subject { Spreadsheet.open(tempfile.path).worksheet(0).rows[1].format(1).font.color } 46 | 47 | it { is_expected.to eq(:red) } 48 | end 49 | 50 | context 'writes to excel spreadsheet with multi-index' do 51 | subject { Spreadsheet.open(tempfile.path).worksheet(0).rows } 52 | 53 | let(:df) do 54 | Daru::DataFrame.new( 55 | [[1,2],[3,4]], 56 | order: %i[x y], 57 | index: [%i[a b c], %i[d e f]] 58 | ) 59 | end 60 | 61 | it { is_expected.to eq([[' ', ' ', ' ', 'x', 'y'], ['a', 'b', 'c', 1, 3], ['d', 'e', 'f', 2, 4]]) } 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/json_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::JSON do 2 | include_context 'exporter setup' 3 | 4 | subject { JSON.parse(File.read(tempfile.path)) } 5 | 6 | let(:df) do 7 | Daru::DataFrame.new( 8 | [ 9 | {name: 'Jon Snow', age: 18, sex: 'Male'}, 10 | {name: 'Rhaegar Targaryen', age: 54, sex: 'Male'}, 11 | {name: 'Lyanna Stark', age: 36, sex: 'Female'} 12 | ], 13 | order: %i[name age sex], 14 | index: %i[child dad mom] 15 | ) 16 | end 17 | let(:opts) { {} } 18 | let(:orient) { :records } 19 | let(:pretty) { true } 20 | let(:filename) { 'test.json' } 21 | 22 | before { described_class.new(df, pretty: pretty, orient: orient, **opts).write(tempfile.path) } 23 | 24 | context 'writes DataFrame with default jsonpath options' do 25 | it { is_expected.to be_an(Array).and all be_a(Hash) } 26 | its(:count) { is_expected.to eq(3) } 27 | its('first.keys') { is_expected.to match_array(%w[name age sex]) } 28 | end 29 | 30 | context 'writes DataFrame with orient: :values' do 31 | let(:orient) { :values } 32 | 33 | it { is_expected.to be_an(Array).and all be_an(Array) } 34 | its(:count) { is_expected.to eq(3) } 35 | its(:first) { is_expected.to eq(['Jon Snow', 'Rhaegar Targaryen', 'Lyanna Stark']) } 36 | end 37 | 38 | context 'writes DataFrame with orient: :split' do 39 | let(:orient) { :split } 40 | 41 | it { is_expected.to be_a(Hash).and all be_an(Array) } 42 | its(:count) { is_expected.to eq(3) } 43 | its(:keys) { is_expected.to eq(%w[vectors index data]) } 44 | end 45 | 46 | context 'writes DataFrame with orient: :index' do 47 | let(:orient) { :index } 48 | 49 | it { is_expected.to be_an(Array).and all be_a(Hash) } 50 | its(:count) { is_expected.to eq(3) } 51 | its(:first) { is_expected.to eq('child' => {'sex' => 'Male', 'age' => 18, 'name' => 'Jon Snow'}) } 52 | end 53 | 54 | context 'writes DataFrame with nested jsonpath options' do 55 | let(:opts) { {name: '$.person.name', age: '$.person.age', sex: '$.gender', index: '$.relation'} } 56 | 57 | it { is_expected.to be_an(Array).and all be_a(Hash) } 58 | its(:first) do 59 | is_expected.to eq( 60 | 'gender' => 'Male', 61 | 'relation' => 'child', 62 | 'person' => {'age' => 18, 'name' => 'Jon Snow'} 63 | ) 64 | end 65 | end 66 | 67 | context 'writes DataFrame with dynamic jsonpath options' do 68 | let(:opts) { {age: '$.{index}.{name}.age', sex: '$.{index}.{name}.gender'} } 69 | 70 | it { is_expected.to be_an(Array).and all be_a(Hash) } 71 | its(:first) { is_expected.to eq('child' => {'Jon Snow' => {'age' => 18, 'gender' => 'Male'}}) } 72 | end 73 | 74 | context 'writes DataFrame with block manipulation' do 75 | before do 76 | described_class.new(df, orient: orient, pretty: pretty) do |json| 77 | json.map { |j| [j.keys.first, j.values.first] }.to_h 78 | end.write(tempfile.path) 79 | end 80 | 81 | let(:orient) { :index } 82 | 83 | it { is_expected.to be_a(Hash) } 84 | its(:keys) { is_expected.to eq(%w[child dad mom]) } 85 | its('values.first') { is_expected.to eq('sex' => 'Male', 'age' => 18, 'name' => 'Jon Snow') } 86 | end 87 | end 88 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/r_data_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::RData do 2 | subject { Daru::DataFrame.new(instance.send(variables[0].to_sym)) } 3 | 4 | include_context 'exporter setup' 5 | 6 | let(:instance) { RSRuby.instance } 7 | let(:filename) { 'test.RData' } 8 | let(:variables) { instance.eval_R("load('#{tempfile.path}')") } 9 | 10 | before { described_class.new(**opts).write(tempfile.path) } 11 | 12 | context 'writes DataFrame to a RData file' do 13 | let(:opts) { {:'first.df' => df, :'last.df' => df} } 14 | 15 | it_behaves_like 'exact daru dataframe', 16 | ncols: 4, 17 | nrows: 5, 18 | order: %w[a b c d], 19 | data: [ 20 | [1.0,2.0,3.0,4.0,5.0], 21 | [11.0,22.0,33.0,44.0,55.0], 22 | %w[a g 4 5 addadf], 23 | %w[NA 23 4 a ff] 24 | ] 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/rds_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::RDS do 2 | subject { Daru::DataFrame.new(RSRuby.instance.eval_R("readRDS('#{tempfile.path}')")) } 3 | 4 | include_context 'exporter setup' 5 | 6 | let(:variable) { 'test.dataframe' } 7 | let(:filename) { 'test.rds' } 8 | 9 | before { described_class.new(df, variable).write(tempfile.path) } 10 | 11 | context 'writes DataFrame to a RDS file' do 12 | it_behaves_like 'exact daru dataframe', 13 | ncols: 4, 14 | nrows: 5, 15 | order: %w[a b c d], 16 | data: [ 17 | [1.0,2.0,3.0,4.0,5.0], 18 | [11.0,22.0,33.0,44.0,55.0], 19 | %w[a g 4 5 addadf], 20 | %w[NA 23 4 a ff] 21 | ] 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /spec/daru/io/exporters/sql_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Exporters::SQL do 2 | include_context 'exporter setup' 3 | 4 | let(:dbh) { double } 5 | let(:table) { 'test' } 6 | let(:prepared_query) { double } 7 | 8 | it 'writes to an SQL table' do 9 | expect(dbh).to receive(:prepare) 10 | .with("INSERT INTO #{table} (a,b,c,d) VALUES (?,?,?,?)") 11 | .and_return(prepared_query) 12 | 13 | df.each_row { |r| allow(prepared_query).to receive(:execute).and_return(*r.to_a) } 14 | 15 | described_class.new(df, dbh, table).to 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /spec/daru/io/importers/active_record_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::ActiveRecord do 2 | include_context 'sqlite3 database setup' 3 | context 'without specifying field names' do 4 | let(:fields) { [] } 5 | 6 | it_behaves_like 'exact daru dataframe', 7 | ncols: 3, 8 | nrows: 2, 9 | order: %i[id name age], 10 | data: [[1,2],%w[Homer Marge],[20, 30]] 11 | 12 | its('id.to_a.first') { is_expected.to eq(1) } 13 | end 14 | 15 | context 'with specifying field names in parameters' do 16 | let(:fields) { %I[name age] } 17 | 18 | it_behaves_like 'exact daru dataframe', 19 | ncols: 2, 20 | nrows: 2, 21 | order: %i[name age], 22 | data: [%w[Homer Marge],[20, 30]] 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /spec/daru/io/importers/avro_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::Avro do 2 | subject { described_class.read(path).call } 3 | 4 | let(:path) { '' } 5 | 6 | context 'on complex numbers avro file' do 7 | let(:path) { 'spec/fixtures/avro/one_complex.avro' } 8 | 9 | it_behaves_like 'exact daru dataframe', 10 | ncols: 2, 11 | nrows: 1, 12 | order: %w[re im], 13 | data: [[100],[200]] 14 | end 15 | 16 | context 'on twitter avro file' do 17 | let(:path) { 'spec/fixtures/avro/twitter.avro' } 18 | 19 | it_behaves_like 'exact daru dataframe', 20 | ncols: 3, 21 | nrows: 10, 22 | order: %w[username tweet timestamp], 23 | data: [ 24 | %w[miguno BlizzardCS DarkTemplar VoidRay VoidRay DarkTemplar Immortal Immortal VoidRay DarkTemplar], 25 | [ 26 | 'Rock: Nerf paper, scissors is fine.', 27 | 'Works as intended. Terran is IMBA.', 28 | 'From the shadows I come!', 29 | 'Prismatic core online!', 30 | 'Fire at will, commander.', 31 | 'I am the blade of Shakuras!', 32 | 'I return to serve!', 33 | 'En Taro Adun!', 34 | 'There is no greater void than the one between your ears.', 35 | 'I strike from the shadows!' 36 | ], 37 | [ 38 | 136_615_068_1, 136_615_448_1, 136_615_468_1, 136_616_000_0, 136_616_001_0, 39 | 136_617_468_1, 136_617_568_1, 136_617_628_3, 136_617_630_0, 136_618_468_1 40 | ] 41 | ] 42 | end 43 | 44 | context 'on users avro file' do 45 | let(:path) { 'spec/fixtures/avro/users.avro' } 46 | 47 | it_behaves_like 'exact daru dataframe', 48 | ncols: 3, 49 | nrows: 2, 50 | order: %w[name favorite_color favorite_numbers], 51 | data: [%w[Alyssa Ben],[nil, 'red'],[[3,9,15,20], []]] 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /spec/daru/io/importers/csv_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::CSV do 2 | ALL_CSV_FILES = %w[ 3 | boolean_converter_test matrix_test repeated_fields scientific_notation 4 | sales-funnel column_headers_only empty_rows_test 5 | ].freeze 6 | 7 | before do 8 | ALL_CSV_FILES.each do |file| 9 | WebMock 10 | .stub_request(:get,"http://dummy-remote-url/#{file}.csv") 11 | .to_return(status: 200, body: File.read("spec/fixtures/csv/#{file}.csv")) 12 | WebMock.disable_net_connect!(allow: /dummy-remote-url/) 13 | end 14 | end 15 | 16 | subject { described_class.new.read(path).call(opts) } 17 | 18 | let(:path) { 'spec/fixtures/csv/matrix_test.csv' } 19 | let(:opts) { {col_sep: ' ', headers: true} } 20 | 21 | context 'loads from a CSV file' do 22 | let('subject.vectors') { %I[image_resolution mls true_transform].to_index } 23 | 24 | it_behaves_like 'exact daru dataframe', 25 | ncols: 3, 26 | nrows: 99, 27 | order: %i[image_resolution true_transform mls], 28 | :'image_resolution.first' => 6.55779, 29 | :'true_transform.first' => '-0.2362347,0.6308649,0.7390552,0,0.6523478'\ 30 | ',-0.4607318,0.6018043,0,0.7201635,0.6242881'\ 31 | ',-0.3027024,4262.65,0,0,0,1' 32 | end 33 | 34 | context 'works properly for repeated headers' do 35 | let(:path) { 'spec/fixtures/csv/repeated_fields.csv' } 36 | let(:opts) { {header_converters: :symbol} } 37 | 38 | it_behaves_like 'exact daru dataframe', 39 | ncols: 7, 40 | nrows: 6, 41 | order: %w[id name_1 age_1 city a1 name_2 age_2], 42 | age_2: Daru::Vector.new([3, 4, 5, 6, nil, 8]) 43 | end 44 | 45 | context 'accepts scientific notation as float' do 46 | let(:path) { 'spec/fixtures/csv/scientific_notation.csv' } 47 | let(:opts) { {order: %w[x y]} } 48 | let(:df) { subject } 49 | 50 | it_behaves_like 'exact daru dataframe', 51 | ncols: 2, 52 | nrows: 3, 53 | order: %w[x y] 54 | 55 | # @note If a better syntax is possible without naming the subject, 56 | # feel free to suggest / adopt it. 57 | # 58 | # Signed off by @athityakumar on 31/05/2017 at 10:25PM 59 | it 'checks for float accuracy' do 60 | y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130] 61 | y.zip(df['y']).each do |y_expected, y_ds| 62 | expect(y_ds).to be_within(0.001).of(y_expected) 63 | end 64 | end 65 | end 66 | 67 | context 'follows the order of columns given in CSV' do 68 | let(:path) { 'spec/fixtures/csv/sales-funnel.csv' } 69 | let(:opts) { {} } 70 | 71 | it_behaves_like 'exact daru dataframe', 72 | ncols: 8, 73 | nrows: 17, 74 | order: %w[Account Name Rep Manager Product Quantity Price Status] 75 | end 76 | 77 | context 'parses empty dataframe from CSV with only headers' do 78 | let(:path) { 'spec/fixtures/csv/column_headers_only.csv' } 79 | let(:opts) { {} } 80 | 81 | it_behaves_like 'exact daru dataframe', 82 | ncols: 3, 83 | nrows: 0, 84 | order: %w[col0 col1 col2] 85 | end 86 | 87 | context 'skips rows from CSV files with headers option' do 88 | let(:path) { 'spec/fixtures/csv/sales-funnel.csv' } 89 | let(:opts) { {skiprows: 8, headers: true} } 90 | 91 | it_behaves_like 'exact daru dataframe', 92 | ncols: 8, 93 | nrows: 9, 94 | order: %i[account name rep manager product quantity price status] 95 | end 96 | 97 | context 'skips rows from CSV files without headers option' do 98 | let(:path) { 'spec/fixtures/csv/sales-funnel.csv' } 99 | let(:opts) { {skiprows: 8} } 100 | 101 | it_behaves_like 'exact daru dataframe', 102 | ncols: 8, 103 | nrows: 9, 104 | order: %w[Account Name Rep Manager Product Quantity Price Status] 105 | end 106 | 107 | context 'checks for boolean converter' do 108 | let(:path) { 'spec/fixtures/csv/boolean_converter_test.csv' } 109 | let(:opts) { {converters: [:boolean]} } 110 | 111 | it_behaves_like 'exact daru dataframe', 112 | ncols: 22, 113 | nrows: 4 114 | its('Domestic.to_a') { is_expected.to all be_boolean } 115 | end 116 | 117 | context 'checks for skip_blanks option to skip empty rows' do 118 | let(:path) { 'spec/fixtures/csv/empty_rows_test.csv' } 119 | 120 | it_behaves_like 'exact daru dataframe', 121 | ncols: 3, 122 | nrows: 13 123 | end 124 | 125 | context 'checks for equal parsing of csv and csv.gz files' do 126 | ALL_CSV_FILES.each do |file| 127 | before { Zlib::GzipWriter.open(path) { |gz| gz.write File.read(csv_path) } } 128 | 129 | let(:csv_path) { "spec/fixtures/csv/#{file}.csv" } 130 | let(:tempfile) { Tempfile.new("#{file}.csv.gz") } 131 | let(:csv) { described_class.read(csv_path).call } 132 | let(:path) { tempfile.path } 133 | let(:opts) { {compression: :gzip} } 134 | 135 | it_behaves_like 'a daru dataframe' 136 | it { is_expected.to eq(csv) } 137 | end 138 | end 139 | 140 | context 'checks for equal parsing of local CSV files and remote CSV files' do 141 | ALL_CSV_FILES.each do |file| 142 | let(:local) { described_class.read("spec/fixtures/csv/#{file}.csv").call } 143 | let(:path) { "http://dummy-remote-url/#{file}.csv" } 144 | let(:opts) { {} } 145 | 146 | it_behaves_like 'a daru dataframe' 147 | it { is_expected.to eq(local) } 148 | end 149 | end 150 | end 151 | -------------------------------------------------------------------------------- /spec/daru/io/importers/excel_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::Excel do 2 | context 'loads from excel spreadsheet' do 3 | subject { described_class.read(path).call } 4 | 5 | let(:path) { 'spec/fixtures/excel/test_xls.xls' } 6 | 7 | it_behaves_like 'exact daru dataframe', 8 | ncols: 5, 9 | nrows: 6, 10 | order: %i[id name age city a1], 11 | data: [ 12 | (1..6).to_a, 13 | %w[Alex Claude Peter Franz George Fernand], 14 | [20, 23, 25, nil, 5.5, nil], 15 | ['New York', 'London', 'London', 'Paris', 'Tome', nil], 16 | ['a,b', 'b,c', 'a', nil, 'a,b,c', nil] 17 | ], 18 | :'age.to_a.last' => nil 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /spec/daru/io/importers/excelx_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::Excelx do 2 | subject { described_class.read(path).call(opts) } 3 | 4 | let(:opts) { {} } 5 | 6 | context 'when sheet is not specified' do 7 | let(:path) { 'spec/fixtures/excelx/Microcode.xlsx' } 8 | 9 | it_behaves_like 'exact daru dataframe', 10 | ncols: 32, 11 | nrows: 37, 12 | index: (0..36).to_a, 13 | :'State.first' => 'FETCH0' 14 | end 15 | 16 | context 'when sheet name is given' do 17 | let(:path) { 'spec/fixtures/excelx/LOBSTAHS_rt.windows.xlsx' } 18 | let(:opts) { {sheet: 'LOBSTAHS_rt.windows'} } 19 | 20 | it_behaves_like 'exact daru dataframe', 21 | ncols: 3, 22 | nrows: 93, 23 | order: %w[lipid_class rt_win_max rt_win_min], 24 | index: (0..92).to_a, 25 | :'lipid_class.first' => 'DGCC' 26 | end 27 | 28 | context 'when sheet contains nil elements' do 29 | let(:path) { 'spec/fixtures/excelx/Stock-counts-sheet.xlsx' } 30 | let(:opts) { {sheet: 2} } 31 | 32 | it_behaves_like 'exact daru dataframe', 33 | ncols: 7, 34 | nrows: 15, 35 | order: [ 36 | 'Status','Stock count number','Item code','New','Description', 37 | 'Stock count date','Offset G/L Inventory' 38 | ], 39 | index: (0..14).to_a, 40 | :'Item code.first' => nil, 41 | :'Stock count number.first' => 1 42 | end 43 | 44 | context 'when skipping rows and columns' do 45 | let(:path) { 'spec/fixtures/excelx/pivot.xlsx' } 46 | let(:opts) { {sheet: 'Data1', skiprows: 2, skipcols: 1} } 47 | 48 | it_behaves_like 'exact daru dataframe', 49 | ncols: 9, 50 | nrows: 2155, 51 | index: (0..2154).to_a, 52 | :'Unit Price.first' => 14 53 | end 54 | 55 | before do 56 | %w[LOBSTAHS_rt.windows Microcode Stock-counts-sheet].each do |file| 57 | WebMock 58 | .stub_request(:get,"http://dummy-remote-url/#{file}.xlsx") 59 | .to_return(status: 200, body: File.read("spec/fixtures/excelx/#{file}.xlsx")) 60 | WebMock.disable_net_connect!(allow: /dummy-remote-url/) 61 | end 62 | end 63 | 64 | context 'checks for equal parsing of local XLSX files and remote XLSX files' do 65 | %w[LOBSTAHS_rt.windows Microcode Stock-counts-sheet].each do |file| 66 | let(:local) { described_class.read("spec/fixtures/excelx/#{file}.xlsx").call } 67 | let(:path) { "http://dummy-remote-url/#{file}.xlsx" } 68 | 69 | it { is_expected.to be_an(Daru::DataFrame) } 70 | it { is_expected.to eq(local) } 71 | end 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /spec/daru/io/importers/html_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::HTML do 2 | subject { described_class.read(path).call(opts)[df_index] } 3 | 4 | let(:opts) { {} } 5 | let(:df_index) { 0 } 6 | 7 | context 'in wiki info table' do 8 | let(:path) { 'spec/fixtures/html/wiki_table_info.html' } 9 | let(:order) { %w[FName LName Age] } 10 | let(:index) { %w[One Two Three Four Five Six Seven] } 11 | let(:name) { 'Wikipedia Information Table' } 12 | 13 | context 'returns default dataframe' do 14 | it_behaves_like 'exact daru dataframe', 15 | ncols: 3, 16 | nrows: 7, 17 | order: ['First name','Last name','Age'], 18 | data: [ 19 | %w[Tinu Blaszczyk Lily Olatunkboh Adrienne Axelia Jon-Kabat], 20 | %w[Elejogun Kostrzewski McGarrett Chijiaku Anthoula Athanasios Zinn], 21 | %w[14 25 16 22 22 22 22] 22 | ] 23 | end 24 | 25 | context 'returns user-modified dataframe' do 26 | let(:opts) { {order: order, index: index, name: name} } 27 | 28 | it_behaves_like 'exact daru dataframe', 29 | ncols: 3, 30 | nrows: 7, 31 | order: %w[FName LName Age], 32 | index: %w[One Two Three Four Five Six Seven], 33 | name: 'Wikipedia Information Table', 34 | data: [ 35 | %w[Tinu Blaszczyk Lily Olatunkboh Adrienne Axelia Jon-Kabat], 36 | %w[Elejogun Kostrzewski McGarrett Chijiaku Anthoula Athanasios Zinn], 37 | %w[14 25 16 22 22 22 22] 38 | ] 39 | end 40 | end 41 | 42 | context 'in wiki climate data' do 43 | let(:path) { 'spec/fixtures/html/wiki_climate.html' } 44 | 45 | context 'returns default dataframe' do 46 | it_behaves_like 'exact daru dataframe', 47 | ncols: 13, 48 | nrows: 10, 49 | order: %w[Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Year], 50 | index: [ 51 | 'Record high °C (°F)', 'Average high °C (°F)', 'Daily mean °C (°F)', 52 | 'Average low °C (°F)', 'Record low °C (°F)', 53 | 'Average rainfall mm (inches)', 'Average rainy days', 54 | 'Average relative humidity (%)', 'Mean monthly sunshine hours', 55 | 'Mean daily sunshine hours' 56 | ] 57 | end 58 | end 59 | 60 | context 'with valid html table markups' do 61 | let(:path) { 'spec/fixtures/html/valid_markup.html' } 62 | let(:index) { %w[W X Y Z] } 63 | let(:name) { 'Small HTML table with index' } 64 | 65 | context 'returns user-modified dataframe' do 66 | let(:opts) { {index: index, name: name} } 67 | 68 | it_behaves_like 'exact daru dataframe', 69 | ncols: 2, 70 | nrows: 4, 71 | order: %w[a b], 72 | index: %w[W X Y Z], 73 | name: 'Small HTML table with index', 74 | data: [%w[6 4 9 7], %w[7 0 4 0]] 75 | end 76 | end 77 | 78 | context 'in year-wise passengers figure' do 79 | let(:path) { 'spec/fixtures/html/macau.html' } 80 | let(:match) { '2001' } 81 | let(:name) { 'Year-wise Passengers Figure' } 82 | 83 | context 'returns matching dataframes with index' do 84 | let(:opts) { {match: match, name: name} } 85 | 86 | it_behaves_like 'exact daru dataframe', 87 | ncols: 6, 88 | nrows: 13, 89 | order: %w[2001 2000 1999 1998 1997 1996], 90 | index: %w[January February March April May June July August September October November December Total], 91 | name: 'Year-wise Passengers Figure', 92 | data: [ 93 | ['265,603','184,381','161,264','161,432','117,984',''], 94 | ['249,259','264,066','209,569','168,777','150,772',''], 95 | ['312,319','226,483','186,965','172,060','149,795',''], 96 | ['351,793','296,541','237,449','180,241','179,049',''], 97 | ['338,692','288,949','230,691','172,391','189,925',''], 98 | ['332,630','271,181','231,328','157,519','175,402',''], 99 | ['344,658','304,276','243,534','205,595','173,103',''], 100 | ['360,899','300,418','257,616','241,140','178,118',''], 101 | ['291,817','280,803','210,885','183,954','163,385',''], 102 | ['327,232','298,873','231,251','205,726','176,879',''], 103 | ['315,538','265,528','228,637','181,677','146,804',''], 104 | ['314,866','257,929','210,922','183,975','151,362',''], 105 | ['3,805,306','3,239,428','2,640,111','2,214,487','1,952,578','0'] 106 | ].transpose 107 | end 108 | end 109 | 110 | context 'in share market data' do 111 | let(:path) { 'spec/fixtures/html/moneycontrol.html' } 112 | let(:match) { 'Sun Pharma' } 113 | let(:index) { %w[Alpha Beta Gamma Delta Misc] } 114 | let(:name) { 'Share Market Analysis' } 115 | 116 | context 'returns matching dataframes' do 117 | let(:opts) { {match: match} } 118 | 119 | it_behaves_like 'exact daru dataframe', 120 | ncols: 4, 121 | nrows: 5, 122 | order: ['Company','Price','Change','Value (Rs Cr.)'], 123 | data: [ 124 | ['Sun Pharma','502.60','-65.05','2,117.87'], 125 | ['Reliance','1356.90','19.60','745.10'], 126 | ['Tech Mahindra','379.45','-49.70','650.22'], 127 | ['ITC','315.85','6.75','621.12'], 128 | ['HDFC','1598.85','50.95','553.91'] 129 | ].transpose 130 | end 131 | 132 | context 'returns user-modified matching dataframes' do 133 | let(:opts) { {match: match, index: index, name: name} } 134 | 135 | it_behaves_like 'exact daru dataframe', 136 | ncols: 4, 137 | nrows: 5, 138 | order: ['Company','Price','Change','Value (Rs Cr.)'], 139 | index: %w[Alpha Beta Gamma Delta Misc], 140 | name: 'Share Market Analysis', 141 | data: [ 142 | ['Sun Pharma','502.60','-65.05','2,117.87'], 143 | ['Reliance','1356.90','19.60','745.10'], 144 | ['Tech Mahindra','379.45','-49.70','650.22'], 145 | ['ITC','315.85','6.75','621.12'], 146 | ['HDFC','1598.85','50.95','553.91'] 147 | ].transpose 148 | end 149 | end 150 | 151 | context 'in election results data' do 152 | let(:path) { 'spec/fixtures/html/eciresults.html' } 153 | 154 | context 'returns default dataframes' do 155 | it_behaves_like 'exact daru dataframe', 156 | ncols: 2, 157 | nrows: 19, 158 | order: ['PartyName', 'Votes Wise(%)'] 159 | end 160 | end 161 | end 162 | -------------------------------------------------------------------------------- /spec/daru/io/importers/json_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::JSON do 2 | subject { described_class.read(path).call(*columns, order: order, index: index, **named_columns) } 3 | 4 | let(:path) { '' } 5 | let(:index) { nil } 6 | let(:order) { nil } 7 | let(:columns) { nil } 8 | let(:named_columns) { {} } 9 | 10 | context 'on simple json file' do 11 | context 'in NASA data' do 12 | let(:path) { 'spec/fixtures/json/nasadata.json' } 13 | 14 | context 'without xpath (simple json)' do 15 | it_behaves_like 'exact daru dataframe', 16 | ncols: 10, 17 | nrows: 202, 18 | order: %w[designation discovery_date h_mag i_deg moid_au orbit_class period_yr pha q_au_1 q_au_2] 19 | end 20 | end 21 | end 22 | 23 | it_behaves_like 'importer with json-path option' 24 | 25 | context 'parses json response' do 26 | subject { described_class.from(instance).call(*columns, order: order, index: index, **named_columns) } 27 | 28 | let(:instance) { ::JSON.parse(File.read('spec/fixtures/json/nasadata.json')) } 29 | 30 | it_behaves_like 'exact daru dataframe', 31 | ncols: 10, 32 | nrows: 202, 33 | order: %w[designation discovery_date h_mag i_deg moid_au orbit_class period_yr pha q_au_1 q_au_2] 34 | end 35 | 36 | context 'parses json string' do 37 | subject { described_class.from(instance).call(*columns, order: order, index: index, **named_columns) } 38 | 39 | let(:instance) { File.read('spec/fixtures/json/nasadata.json') } 40 | 41 | it_behaves_like 'exact daru dataframe', 42 | ncols: 10, 43 | nrows: 202, 44 | order: %w[designation discovery_date h_mag i_deg moid_au orbit_class period_yr pha q_au_1 q_au_2] 45 | end 46 | 47 | context 'parses remote and local file similarly' do 48 | let(:local_path) { 'spec/fixtures/json/nasadata.json' } 49 | let(:path) { 'http://dummy-remote-url/nasadata.json' } 50 | 51 | before do 52 | WebMock 53 | .stub_request(:get, path) 54 | .to_return(status: 200, body: File.read(local_path)) 55 | WebMock.disable_net_connect!(allow: /dummy-remote-url/) 56 | end 57 | 58 | it_behaves_like 'exact daru dataframe', 59 | ncols: 10, 60 | nrows: 202, 61 | order: %w[designation discovery_date h_mag i_deg moid_au orbit_class period_yr pha q_au_1 q_au_2] 62 | end 63 | 64 | context 'raises error for invalid argument' do # rubocop:disable RSpec/EmptyExampleGroup 65 | context 'json input is invalid' do # rubocop:disable RSpec/EmptyExampleGroup 66 | subject { described_class.from([]).call(*columns, order: order, index: index, **named_columns) } 67 | 68 | let(:order) { %i[a b] } 69 | let(:named_columns) { {x: 1, y: 2} } 70 | 71 | its_call { is_expected.to raise_error(ArgumentError) } 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /spec/daru/io/importers/log_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::Log.new do 2 | context 'parsing rails log' do 3 | subject { described_class.read(path,format: :rails3).call } 4 | 5 | let(:path) { 'spec/fixtures/log/rails.log' } 6 | 7 | it_behaves_like 'exact daru dataframe', 8 | ncols: 17, 9 | nrows: 1, 10 | order: %i[method resource_path ip timestamp line_type lineno 11 | source controller action format params rendered_file 12 | partial_duration status duration view db], 13 | :'timestamp.to_a' => [20_180_312_174_118], 14 | :'duration.to_a' => [0.097] 15 | end 16 | 17 | context 'parsing apache log' do 18 | subject { described_class.read(path,format: :apache).call } 19 | 20 | let(:path) { 'spec/fixtures/log/apache.log' } 21 | 22 | it_behaves_like 'exact daru dataframe', 23 | ncols: 14, 24 | nrows: 1, 25 | order: %i[remote_host remote_logname user timestamp http_method 26 | resource_path http_version http_status bytes_sent 27 | referer user_agent line_type lineno source], 28 | :'timestamp.to_a' => [20_161_207_103_443], 29 | :'bytes_sent.to_a' => [571] 30 | end 31 | 32 | context 'parsing amazon_s3 log' do 33 | subject { described_class.read(path,format: :amazon_s3).call } 34 | 35 | let(:path) { 'spec/fixtures/log/s3.log' } 36 | 37 | it_behaves_like 'exact daru dataframe', 38 | ncols: 20, 39 | nrows: 1, 40 | order: %i[bucket_owner bucket timestamp remote_ip requester request_id operation 41 | key request_uri http_status error_code bytes_sent object_size total_time 42 | turnaround_time referer user_agent line_type lineno source], 43 | :'timestamp.to_a' => [20_150_612_054_010], 44 | :'turnaround_time.to_a' => [0.019] 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /spec/daru/io/importers/mongo_spec.rb: -------------------------------------------------------------------------------- 1 | unless RUBY_VERSION == '2.4.0' 2 | ::Mongo::Logger.logger.level = ::Logger::FATAL 3 | 4 | # @note 5 | # 6 | # Mongo gem faces the 'wrong argument type, Expected Proc' bug prelavent in 7 | # Ruby 2.4.0, whose origin is in MRI 2.4.0. This seems to have been fixed 8 | # in Ruby 2.4.1 release. 9 | # 10 | # Signed off by @athityakumar on 19/06/2017 at 11:55PM IST. 11 | RSpec.describe Daru::IO::Importers::Mongo do 12 | subject do 13 | described_class.from(connection).call( 14 | collection, 15 | *columns, 16 | order: order, 17 | index: index, 18 | filter: filter, 19 | skip: skip, 20 | limit: limit, 21 | **named_columns 22 | ) 23 | end 24 | 25 | let(:connection) { ::Mongo::Client.new('mongodb://127.0.0.1:27017/test') } 26 | let(:collection) { path.split('json').last.tr('/.','').to_sym } 27 | let(:index) { nil } 28 | let(:order) { nil } 29 | let(:skip) { nil } 30 | let(:limit) { nil } 31 | let(:filter) { nil } 32 | let(:columns) { nil } 33 | let(:named_columns) { {} } 34 | 35 | def store(path) 36 | collection = path.split('json').last.tr('/.','').to_sym 37 | documents = ::JSON.parse(File.read(path)) 38 | if documents.is_a?(Array) 39 | connection[collection].insert_many(documents) 40 | else 41 | connection[collection].insert_one(documents) 42 | end 43 | end 44 | 45 | before { store path } 46 | after { connection[collection].drop } 47 | 48 | context 'on simple json file' do 49 | context 'in NASA data' do 50 | let(:path) { 'spec/fixtures/json/nasadata.json' } 51 | let(:vector) do 52 | %w[_id designation discovery_date h_mag i_deg moid_au orbit_class period_yr pha q_au_1 q_au_2] 53 | end 54 | 55 | context 'without xpath (simple json)' do 56 | it_behaves_like 'exact daru dataframe', 57 | ncols: 11, 58 | nrows: 202 59 | its('vectors.to_a') { is_expected.to match_array(vector) } 60 | end 61 | 62 | context 'fetches paginated results - first page' do 63 | let(:limit) { 30 } 64 | let(:nrows) { 30 } 65 | 66 | it_behaves_like 'exact daru dataframe', 67 | ncols: 11, 68 | nrows: 30, 69 | index: (0..29).to_a 70 | its('vectors.to_a') { is_expected.to match_array(vector) } 71 | end 72 | 73 | context 'fetches paginated results - last page' do 74 | let(:skip) { 180 } 75 | let(:limit) { 30 } 76 | 77 | it_behaves_like 'exact daru dataframe', 78 | ncols: 11, 79 | nrows: 22 80 | its('vectors.to_a') { is_expected.to match_array(vector) } 81 | end 82 | 83 | context 'fetches results with filter' do 84 | let(:filter) { {pha: :N} } 85 | let(:limit) { 200 } 86 | 87 | it_behaves_like 'exact daru dataframe', 88 | ncols: 11, 89 | nrows: 151 90 | its('vectors.to_a') { is_expected.to match_array(vector) } 91 | end 92 | 93 | context 'fetches results with filter and pagination' do 94 | let(:filter) { {pha: :N} } 95 | let(:limit) { 100 } 96 | 97 | it_behaves_like 'exact daru dataframe', 98 | ncols: 11, 99 | nrows: 100 100 | its('vectors.to_a') { is_expected.to match_array(vector) } 101 | end 102 | end 103 | end 104 | 105 | it_behaves_like 'importer with json-path option' 106 | end 107 | end 108 | -------------------------------------------------------------------------------- /spec/daru/io/importers/plaintext_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::Plaintext do 2 | subject { described_class.read(path).call(vectors) } 3 | 4 | let(:vectors) { %i[v1 v2 v3] } 5 | 6 | context 'reads data from plain text files' do 7 | let(:path) { 'spec/fixtures/plaintext/bank2.dat' } 8 | let(:vectors) { %i[v1 v2 v3 v4 v5 v6] } 9 | 10 | it_behaves_like 'exact daru dataframe', 11 | ncols: 6, 12 | nrows: 200, 13 | order: %i[v1 v2 v3 v4 v5 v6] 14 | end 15 | 16 | context 'understands empty fields', skip: 'See FIXME note at importers/plainext.rb#L33-L36' do 17 | let(:path) { 'spec/fixtures/plaintext/empties.dat' } 18 | 19 | it_behaves_like 'exact daru dataframe', 20 | ncols: 5, 21 | nrows: 6, 22 | :'row[1].to_a' => [4, nil, 6] 23 | end 24 | 25 | context 'understands non-numeric fields' do 26 | let(:path) { 'spec/fixtures/plaintext/strings.dat' } 27 | 28 | it_behaves_like 'exact daru dataframe', 29 | ncols: 3, 30 | nrows: 2, 31 | :'v1.to_a' => %w[test foo] 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /spec/daru/io/importers/r_data_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::RData do 2 | subject { described_class.read(path).call(variable) } 3 | 4 | let(:variable) { nil } 5 | 6 | context 'reads data from ACScounty file' do 7 | let(:path) { 'spec/fixtures/rdata/ACScounty.RData' } 8 | let(:variable) { 'ACS3' } 9 | 10 | it_behaves_like 'exact daru dataframe', 11 | ncols: 30, 12 | nrows: 1629, 13 | index: (0..1628).to_a, 14 | order: %i[ 15 | State Abbreviation FIPS 16 | males.married males.diorced females.married females.divorced 17 | perc.HS+ Non.US perc.disability 18 | race-white race-black race-am.ind race-asian 19 | ind-agric ind-construc ind-manufact ind-wholesale ind-retail ind-transport ind-information 20 | ind-finance ind-scientific ind-educational ind-arts ind-other.industry ind-public.admin 21 | median.earnings perc.no.health.insurance cnty 22 | ] 23 | end 24 | 25 | context 'reads data from Filings-by-state file' do 26 | let(:path) { 'spec/fixtures/rdata/Filings-by-state.RData' } 27 | let(:variable) { 'bk.rates' } 28 | 29 | it_behaves_like 'exact daru dataframe', 30 | ncols: 5, 31 | nrows: 1755, 32 | index: (0..1754).to_a, 33 | order: %i[ 34 | State.Code Total.Filings Business.Filings Non.Business.Filings year 35 | ] 36 | end 37 | 38 | context 'reads data from Ownership file' do 39 | let(:path) { 'spec/fixtures/rdata/Ownership.RData' } 40 | let(:variable) { 'ownership.state.qtr' } 41 | 42 | it_behaves_like 'exact daru dataframe', 43 | ncols: 4, 44 | nrows: 1632, 45 | index: (0..1631).to_a, 46 | order: %i[ 47 | State Date own.rate se 48 | ] 49 | end 50 | 51 | context 'when not a data.frame variable in RData file' do # rubocop:disable RSpec/EmptyExampleGroup 52 | { 53 | 'spec/fixtures/rdata/FRED-cpi-house.RData' => 'cpi.house', 54 | 'spec/fixtures/rdata/case-shiller.RData' => 'case.shiller', 55 | 'spec/fixtures/rdata/state-migration.RData' => 'state.migration', 56 | 'spec/fixtures/rdata/zip-county.RData' => 'zip' 57 | }.each do |path, variable| 58 | let(:path) { path } 59 | let(:variable) { variable } 60 | 61 | its_call { is_expected.to raise_error(ArgumentError) } 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /spec/daru/io/importers/rds_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::RDS do 2 | subject { described_class.read(path).call } 3 | 4 | context 'reads data from bc_sites RDS file' do 5 | let(:path) { 'spec/fixtures/rds/bc_sites.rds' } 6 | 7 | it_behaves_like 'exact daru dataframe', 8 | ncols: 25, 9 | nrows: 1113, 10 | index: (0..1112).to_a, 11 | order: %i[ 12 | state_or_province water_type sector region area location sub_location 13 | record_code format_version submission_date reporting_agency 14 | location_code location_type name latitude longitude psc_basin 15 | psc_region epa_reach description rmis_region rmis_basin rmis_latitude 16 | rmis_longitude record_origin 17 | ] 18 | end 19 | 20 | context 'reads data from chicago RDS file' do 21 | let(:path) { 'spec/fixtures/rds/chicago.rds' } 22 | 23 | it_behaves_like 'exact daru dataframe', 24 | ncols: 8, 25 | nrows: 6940, 26 | index: (0..6939).to_a, 27 | order: %i[ 28 | city tmpd dptp date pm25tmean2 pm10tmean2 o3tmean2 no2tmean2 29 | ] 30 | end 31 | 32 | context 'reads data from healthexp RDS file' do 33 | let(:path) { 'spec/fixtures/rds/healthexp.Rds' } 34 | 35 | it_behaves_like 'exact daru dataframe', 36 | ncols: 6, 37 | nrows: 3030, 38 | index: (0..3029).to_a, 39 | order: %i[ 40 | Country Region Year Population Life.Expectancy Health.Expenditure 41 | ] 42 | end 43 | 44 | context 'reads data from heights RDS file' do 45 | let(:path) { 'spec/fixtures/rds/heights.RDS' } 46 | 47 | it_behaves_like 'exact daru dataframe', 48 | ncols: 10, 49 | nrows: 3988, 50 | index: (0..3987).to_a, 51 | order: %i[ 52 | id income height weight sex race education asvab sat_math bdate 53 | ] 54 | end 55 | 56 | context 'reads data from maacs_env RDS file' do 57 | let(:path) { 'spec/fixtures/rds/maacs_env.rds' } 58 | 59 | it_behaves_like 'exact daru dataframe', 60 | ncols: 27, 61 | nrows: 750, 62 | index: (0..749).to_a, 63 | order: %i[ 64 | MxNum VisitNum pm25 no2 airnic coarse 65 | duBedWeight duBdRmWeight duKitchWeight 66 | duBedBlaG duBdRmBlaG duKitchBlaG 67 | duBedMusM duBdRmMusM duKitchMusM airmus 68 | duBedFelD duBdRmFelD duKitchFelD airFelD 69 | duBedCanF duBdRmCanF duKitchCanF airCanF 70 | duBedDerF duBdRmDerF duKitchDerF 71 | ] 72 | end 73 | 74 | context 'reads data from RPPdataConverted RDS file' do 75 | let(:path) { 'spec/fixtures/rds/RPPdataConverted.rds' } 76 | 77 | it_behaves_like 'exact daru dataframe', 78 | ncols: 138, 79 | nrows: 168, 80 | index: (0..167).to_a, 81 | order: %i[ 82 | Local.ID Study.Title.O Authors.O Journal.O Volume.O Issue.O Pages.O 83 | Project.URL Descriptors.O Number.of.Authors.O Number.of.Authors.R 84 | 1st.author.O Citation.Count.1st.author.O Institution.1st.author.O 85 | Senior.author.O Citation.count.senior.author.O 86 | Institution.senior.author.O 1st.author.R Citation.count.1st.author.R 87 | Institution.1st.author.R Senior.author.R Citation.count.senior.author.R 88 | Institution.senior.author.R Citation.count.paper.O 89 | Institution.prestige.1st.author.O Institution.prestige.senior.author.O 90 | Institution.prestige.1st.author.R Institution.prestige.senior.author.R 91 | Number.of.Studies.O Discipline.O Number.of.research.sites.O 92 | Secondary.data.O Methodology.expertise.required.O 93 | Opportunity.for.expectancy.bias.O Opportunity.for.lack.of.diligence.O 94 | Surprising.result.O Exciting.result.O 95 | Internal.conceptual.replications.O Successful.conceptual.replications.O 96 | Internal.direct.replications.O Successful.direct.replications.O 97 | Feasibility.O Status.R Completion.R Secondary.R Contact.Researcher.R 98 | Study.claim.date.R Analysis.completion.date.R Coder.s.email.R 99 | Replicated.study.number.R Test.statistic.O N.O Reported.P.value.O 100 | Calculated.P.value.O Tails.O Type.of.analysis.O.56 Type.of.effect.O 101 | Description.of.effect.O Effect.size.O Actual.Power.O 80.power 90.power 102 | 95.power Collect.materials.from.authors Planned.Sample Planned.Power 103 | Original.Author.s.Assessment OSC.reviewer.O Test.statistic.R N.R 104 | P.value.R Direction.R Tails.R Type.of.analysis.R.74 Type.of.effect.R 105 | Replicate.R Power.R Effect.Size.R OSC.reviewer.R Notes.R 106 | Project.audit.complete.R R.check.location.R Degree.R 107 | Year.of.highest.degree.R Current.position.R Domain.expertise.R 108 | Method.expertise.R Total.publications.R Peer.reviewed.articles.R 109 | Citations.R Implementation.quality.R Data.collection.quality.R 110 | Replication.similarity.R Differences.R Effect.similarity.R 111 | Findings.similarity.R Difficulty.of.implimentation.R 112 | Surprise.of.outcome.R Dummy Number.of.Research.sites.R 113 | Secondary.data.R Area.of.expertise.R T.N.O T.Test.Statistic.O 114 | T.TestComparison.O T.df1.O T.df2.O T.Test.value.O T.p.comparison.O 115 | T.pval.O T.pval.recalc.O T.pval.USE.O T.sign.O.113 T.r.O T.N.R 116 | T.Test.Statistic.R T.Test.Comparison.R T.df1.R T.df2.R T.Test.value.R 117 | T.p.comparison.R T.pval.R T.pval.recalc.R T.pval.USE.R T.sign.R.125 118 | T.r.R T.Comparison.effects.R.O Type.of.analysis.O.128 119 | Type.of.analysis.R.129 V130 T.sign.O.131 T.sign.R.132 T.O.larger 120 | T.N.O.for.tables T.N.R.for.tables Meta.analytic.estimate.Fz 121 | O.within.CI.R Meta.analysis.significant 122 | ] 123 | end 124 | end 125 | -------------------------------------------------------------------------------- /spec/daru/io/importers/redis_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec::Matchers.define :belong_to do |expected| 2 | match { |actual| (expected.to_a.uniq - actual.to_a.uniq).empty? } 3 | end 4 | 5 | RSpec::Matchers.define :match_unordered_data do |expected| 6 | match do |actual| 7 | actual = actual.to_a.map { |x| x.data.to_a }.flatten.uniq 8 | expected.map!(&:values) unless expected.first.is_a? Array 9 | expected = expected.flatten.uniq 10 | (expected - actual).empty? 11 | end 12 | end 13 | 14 | RSpec.shared_examples 'unordered daru dataframe' do |data: nil, nrows: nil, ncols: nil, order: nil, index: nil, name: nil, **opts| # rubocop:disable Metrics/LineLength 15 | it_behaves_like 'a daru dataframe', 16 | name: name, 17 | nrows: nrows, 18 | ncols: ncols, 19 | **opts 20 | 21 | its(:data) { is_expected.to match_unordered_data(data) } if data 22 | its(:index) { is_expected.to belong_to(index.to_index) } if index 23 | its(:vectors) { is_expected.to belong_to(order.to_index) } if order 24 | end 25 | 26 | # @note 27 | # 28 | # Custom matchers +belong_to+ and +unordered_data+ have been used, 29 | # as Redis doesn't necessarily insert keys in the given order. Due 30 | # to this, some rows and columns might be jumbled, and there is no 31 | # way to expect for an exact match while testing on RSpec. Rather, 32 | # the DataFrame is tested to have the same data, in *ANY* order. 33 | # 34 | # Signed off by @athityakumar on 08/16/2017 at 10:00PM IST 35 | RSpec.describe Daru::IO::Importers::Redis do 36 | subject { described_class.from(connection).call(*keys, match: pattern, count: count) } 37 | 38 | let(:keys) { [] } 39 | let(:count) { nil } 40 | let(:pattern) { nil } 41 | let(:connection) { Redis.new(port: 6379) } 42 | 43 | before { index.each_with_index { |k,i| store(k, data[i]) } } 44 | 45 | def store(key, value) 46 | connection.set key, value.to_json 47 | end 48 | 49 | after { connection.flushdb } 50 | 51 | context 'on array of keys having hashes' do 52 | let(:index) { %i[10001 10002 10003 10004] } 53 | let(:data) do 54 | [ 55 | {name: 'Tyrion', age: 32}, 56 | {name: 'Jamie', age: 37}, 57 | {name: 'Cersei', age: 37}, 58 | {name: 'Joffrey', age: 19} 59 | ] 60 | end 61 | 62 | context 'without key options' do 63 | it_behaves_like 'unordered daru dataframe', 64 | nrows: 4, 65 | ncols: 2, 66 | index: %i[10001 10002 10003 10004], 67 | order: %i[name age], 68 | data: [ 69 | ['Tyrion', 32], 70 | ['Jamie', 37], 71 | ['Cersei', 37], 72 | ['Joffrey', 19] 73 | ] 74 | end 75 | 76 | context 'with key options' do 77 | let(:keys) { index[0..1] } 78 | 79 | it_behaves_like 'unordered daru dataframe', 80 | nrows: 2, 81 | ncols: 2, 82 | index: %i[10001 10002], 83 | order: %i[name age], 84 | data: [ 85 | ['Tyrion', 32], 86 | ['Jamie', 37] 87 | ] 88 | end 89 | end 90 | 91 | context 'on keys having array of hashes' do 92 | let(:index) { %i[10001 10003] } 93 | let(:data) do 94 | [ 95 | [{name: 'Tyrion', age: 32},{name: 'Jamie', age: 37}], 96 | [{name: 'Cersei', age: 37},{name: 'Joffrey', age: 19}] 97 | ] 98 | end 99 | 100 | context 'without key options' do 101 | it_behaves_like 'unordered daru dataframe', 102 | nrows: 4, 103 | ncols: 2, 104 | index: (0..3).to_a, 105 | order: %i[name age], 106 | data: [ 107 | ['Tyrion', 32], 108 | ['Jamie', 37], 109 | ['Cersei', 37], 110 | ['Joffrey', 19] 111 | ] 112 | end 113 | 114 | context 'with key options' do 115 | let(:keys) { index[0..0] } 116 | 117 | it_behaves_like 'unordered daru dataframe', 118 | nrows: 2, 119 | ncols: 2, 120 | index: (0..1).to_a, 121 | order: %i[name age], 122 | data: [ 123 | ['Tyrion', 32], 124 | ['Jamie', 37] 125 | ] 126 | end 127 | end 128 | 129 | context 'on hash keys having arrays' do 130 | let(:index) { %i[age living name] } 131 | let(:data) do 132 | [ 133 | [32,37,37,19], 134 | [true, true, true, false], 135 | %w[Tyrion Jamie Cersei Joffrey] 136 | ] 137 | end 138 | 139 | context 'without key options' do 140 | it_behaves_like 'unordered daru dataframe', 141 | nrows: 4, 142 | ncols: 3, 143 | index: (0..3).to_a, 144 | order: %i[name age living], 145 | data: [ 146 | [32, true, 'Tyrion'], 147 | [37, true, 'Jamie'], 148 | [37, true, 'Cersei'], 149 | [19, false, 'Joffrey'] 150 | ] 151 | end 152 | 153 | context 'with key options' do 154 | let(:keys) { index[0..1] } 155 | 156 | it_behaves_like 'unordered daru dataframe', 157 | nrows: 4, 158 | ncols: 2, 159 | index: (0..3).to_a 160 | end 161 | end 162 | 163 | context 'on timestamps' do 164 | let(:index) { %i[090620171216 090620171218 090620171222 100620171225] } 165 | let(:data) do 166 | [ 167 | {name: 'Tyrion', age: 32}, 168 | {name: 'Jamie', age: 37}, 169 | {name: 'Cersei', age: 37}, 170 | {name: 'Joffrey', age: 19} 171 | ] 172 | end 173 | 174 | context 'gets keys with pattern match and count' do 175 | let(:count) { 3 } 176 | let(:pattern) { '09062017*' } 177 | 178 | it_behaves_like 'unordered daru dataframe', 179 | nrows: 3, 180 | ncols: 2, 181 | index: %i[090620171216 090620171218 090620171222], 182 | order: %i[name age], 183 | data: [ 184 | ['Tyrion', 32], 185 | ['Jamie', 37], 186 | ['Cersei', 37] 187 | ] 188 | end 189 | 190 | context 'gets keys without pattern and count' do 191 | it_behaves_like 'unordered daru dataframe', 192 | nrows: 4, 193 | ncols: 2, 194 | index: %i[090620171216 090620171218 090620171222], 195 | order: %i[name age], 196 | data: [ 197 | ['Tyrion', 32], 198 | ['Jamie', 37], 199 | ['Cersei', 37], 200 | ['Joffrey', 19] 201 | ] 202 | end 203 | 204 | context 'gets keys with pattern match' do 205 | let(:pattern) { '09062017*' } 206 | 207 | it_behaves_like 'unordered daru dataframe', 208 | nrows: 3, 209 | ncols: 2, 210 | index: %i[090620171216 090620171218 090620171222], 211 | order: %i[name age], 212 | data: [ 213 | ['Tyrion', 32], 214 | ['Jamie', 37], 215 | ['Cersei', 37] 216 | ] 217 | end 218 | end 219 | 220 | context 'on dummy data of paginated keys' do 221 | let(:data) { Array.new(2000) { |i| {a: "a#{i}", b: "b#{i}"} } } 222 | let(:index) { Array.new(2000) { |i| "key#{i}".to_sym } } 223 | let(:pattern) { 'key1*' } 224 | 225 | context 'parses only 1st page by default' do 226 | let(:count) { 400 } 227 | 228 | it_behaves_like 'unordered daru dataframe', 229 | nrows: 400, 230 | ncols: 2, 231 | order: %i[a b] 232 | end 233 | 234 | context 'parses entire pagination' do 235 | let(:count) { nil } 236 | 237 | it_behaves_like 'unordered daru dataframe', 238 | nrows: 1111, 239 | ncols: 2, 240 | order: %i[a b] 241 | end 242 | end 243 | end 244 | -------------------------------------------------------------------------------- /spec/daru/io/importers/sql_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO::Importers::SQL do 2 | include_context 'sqlite3 database setup' 3 | 4 | subject { described_class.from(source).call(query) } 5 | 6 | let(:query) { 'select * from accounts' } 7 | let(:source) { ActiveRecord::Base.connection } 8 | 9 | context 'with a database handler of DBI' do 10 | subject { described_class.from(db).call(query) } 11 | 12 | let(:db) { DBI.connect("DBI:SQLite3:#{db_name}") } 13 | 14 | it_behaves_like 'exact daru dataframe', 15 | ncols: 3, 16 | nrows: 2, 17 | order: %i[id name age], 18 | data: [[1,2],%w[Homer Marge],[20, 30]] 19 | end 20 | 21 | context 'with a database connection of ActiveRecord' do 22 | subject { described_class.from(connection).call(query) } 23 | 24 | let(:connection) { Daru::IO::Rspec::Account.connection } 25 | 26 | before { Daru::IO::Rspec::Account.establish_connection "sqlite3:#{db_name}" } 27 | 28 | it_behaves_like 'exact daru dataframe', 29 | ncols: 3, 30 | nrows: 2, 31 | order: %i[id name age], 32 | data: [[1,2],%w[Homer Marge],[20, 30]] 33 | end 34 | 35 | before { ActiveRecord::Base.establish_connection("sqlite3:#{db_name}") } 36 | 37 | context 'with DBI::DatabaseHandle' do 38 | let(:source) { DBI.connect("DBI:SQLite3:#{db_name}") } 39 | 40 | it_behaves_like 'exact daru dataframe', 41 | ncols: 3, 42 | nrows: 2, 43 | order: %i[id name age], 44 | data: [[1,2],%w[Homer Marge],[20, 30]] 45 | end 46 | 47 | context 'with ActiveRecord::Connection' do 48 | it_behaves_like 'exact daru dataframe', 49 | ncols: 3, 50 | nrows: 2, 51 | order: %i[id name age], 52 | data: [[1,2],%w[Homer Marge],[20, 30]] 53 | end 54 | 55 | context 'with path to sqlite3 file' do 56 | subject { described_class.read(source).call(query) } 57 | 58 | let(:source) { db_name } 59 | 60 | it_behaves_like 'exact daru dataframe', 61 | ncols: 3, 62 | nrows: 2, 63 | order: %i[id name age], 64 | data: [[1,2],%w[Homer Marge],[20, 30]] 65 | end 66 | 67 | context 'raises error for invalid arguments' do # rubocop:disable RSpec/EmptyExampleGroup 68 | let(:query) { Object.new } 69 | let(:source) { 'spec/fixtures/plaintext/bank2.dat' } 70 | 71 | its_call { is_expected.to raise_error(ArgumentError) } 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /spec/daru/io_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Daru::IO do 2 | it 'has a version number' do 3 | expect(described_class::VERSION).not_to be nil 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /spec/fixtures/README.md: -------------------------------------------------------------------------------- 1 | # FIXTURE FILES 2 | 3 | ### Avro 4 | 5 | - `one_complex.avro` : Downloaded from [here](https://github.com/GoogleCloudPlatform/google-cloud-dotnet/blob/master/apis/Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2.IntegrationTests/one_complex.avro). Contains schema for Imaginary number representation. 6 | - `twitter.avro` : Downloaded from [here](https://github.com/miguno/avro-hadoop-starter/blob/master/src/test/resources/avro/twitter.avro). An AVRO schema with simple fields for Twitter users like `username`, `timestamp`, `tweet`. 7 | - `users.avro` : Downloaded from [here](https://github.com/apache/spark/blob/master/examples/src/main/resources/users.avro). A generic AVRO schema with fields like `name`, `favorite_color` and `favorite_numbers`. 8 | 9 | ### Excelx 10 | 11 | - `LOBSTAHS_rt.windows.xlsx` : Downloaded from [here](https://github.com/vanmooylipidomics/LOBSTAHS/blob/master/inst/doc/xlsx/LOBSTAHS_rt.windows.xlsx). Contains data about Lipid and Oxylipin Biomarker Screening Through Adduct Hierarchy Sequences (LOBSTASHS). Conatins two sheets called `LOBSTAHS_rt.windows` and `Notes`. 12 | - `Microcode.xlsx` : Downloaded from [here](https://github.com/tkim371/CS2200/blob/master/proj2/project2/Microcode.xlsx). 13 | - `Stock-counts-sheet.xlsx` : Downloaded from [here](https://www.exact.com/uk/images/downloads/getting-started-excel-sheets/Stock-counts-sheet.xlsx). Contains data about stocks. Helps in ensuring that HTML tags of cell attributes are striped off, while constructing the DataFrame. 14 | - `pivot.xlsx` : Downloaded from [here](myy.haaga-helia.fi/~taaak/r/pivot.xlsx). Contains pivot tables, which requires `:skiprows` and `:skipcols` functionality. 15 | 16 | ### JSON 17 | 18 | - `allsets.json` : An ultra-truncated version of the huge zip file available [here](http://mtgjson.com/json/AllSets.json.zip). Contains nested hashes. 19 | - `got.json` : API response for the popular Game of Thrones series, by TV-Maze website found [here](http://api.tvmaze.com/singlesearch/shows?q=game-of-thrones&embed=episodes). A fairly small data source with nested hashes. 20 | - `jsonvat.json` : Nested hash response from [this website](http://jsonvat.com/). A fairly small data source. 21 | - `nasadata.json` : JSON response of Array of hashes, from [NASA website](https://data.nasa.gov/resource/2vr3-k9wn.json). Not complexly nested, but helps to select few keys of the hash without using json-xpath. 22 | - `temp.json` : Time-series response for Average temperature data, from [this website](https://www.ncdc.noaa.gov/cag/time-series/us/110/00/tavg/ytd/12/1895-2016.json?base_prd=true&begbaseyear=1901&endbaseyear=2000). A fairly small data source, with relatively simple nesting - to directly get a dataframe from the response field. 23 | 24 | ### RData 25 | 26 | - `ACScounty.RData` : Downloaded from [here](https://github.com/floswald/Rdata/blob/master/out/ACScounty.RData). Contains two datasets - `ACS3` and `ACS5`. 27 | - `case-shiller.RData` : Downloaded from [here](https://github.com/floswald/Rdata/blob/master/out/case-shiller.RData). Contains a single dataset called `case.shiller`. 28 | - `Filings-by-state.rdata` : Downloaded from [here](https://github.com/floswald/Rdata/blob/master/out/Filings-by-state.RData). Contains a single dataset called `bk.rates`. 29 | - `Ownership.rdata` : Downloaded from [here](https://github.com/floswald/Rdata/blob/master/out/Ownership.RData). Contains two datasets - `ownership.state.qtr` and `ownership.age`. 30 | - `state-migration.rdata` : Downloaded from [here](https://github.com/floswald/Rdata/blob/master/out/state-migration.RData). Contains a single dataset called `state.migration`. 31 | - `zip-county.rdata` : Downloaded from [here](https://github.com/floswald/Rdata/blob/master/out/zip-county.RData). Contains a single dataset called `m`. 32 | 33 | ### RDS 34 | 35 | - `bc_sites.rds` : Contains data about geological attributes. Can potentially create `<1113*25> Daru::DataFrame`. Downloaded from [here](https://github.com/eriqande/rep-res-course/blob/master/data/bc_sites.rds) 36 | - `chicago.rds` : Contains data about city and temperatures. Can potentially create a `<6940*8> Daru::DataFrame`. Downloaded from [here](https://github.com/DataScienceSpecialization/courses/blob/master/03_GettingData/dplyr/chicago.rds) 37 | - `healthexp.rds` : Contains data comparing health expectancies among various countries. Can potentially create a `<3030*6> Daru::DataFrame`. Downloaded from [here](https://github.com/jcheng5/googleCharts/blob/master/inst/examples/bubble/healthexp.Rds) 38 | - `heights.rds` : Contains data as individual-wise, with attributes such as income, education, height, weight, etc. Can potentially create a `<3988*10> Daru::DataFrame`. Downloaded from [here](https://github.com/hadley/r4ds/blob/master/data/heights.RDS) 39 | - `maacs_env.rds` : Contains data about Marine Air Command and Control System (MAACS) Environment. Can potentially create a `<750*27> Daru::DataFrame`. Downloaded from [here](https://github.com/DataScienceSpecialization/courses/blob/master/04_ExploratoryAnalysis/PlottingLattice/maacs_env.rds) 40 | - `RPPdataConverted.rds`: Contains data about author, citations and more of such fields. A fairly large dataset, which can potentially create a `<168*138> Daru::DataFrame`. Downloaded from [here](https://github.com/CenterForOpenScience/rpp/blob/master/data_allformats/RPPdataConverted.rds) 41 | 42 | 43 | ### NOTE FOR FUTURE MAINTAINERS 44 | 45 | If you're having difficulty in finding fixtures files for a certain format, search in google for a specific filetype and keyword. The search url usually comes in a format like https://www.google.co.in/search?q=filetype:{filetype}+{keyword}. For example, https://www.google.co.in/search?q=filetype:avro+github 46 | 47 | Go through the search results, to check if the file is feasible (not too large) to be added as a fixture file to this repository. -------------------------------------------------------------------------------- /spec/fixtures/avro/one_complex.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/avro/one_complex.avro -------------------------------------------------------------------------------- /spec/fixtures/avro/twitter.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/avro/twitter.avro -------------------------------------------------------------------------------- /spec/fixtures/avro/users.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/avro/users.avro -------------------------------------------------------------------------------- /spec/fixtures/csv/boolean_converter_test.csv: -------------------------------------------------------------------------------- 1 | ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location 2 | 8517337,HV194652,03/12/2012 02:00:00 PM,027XX S HAMLIN AVE,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,ATM (AUTOMATIC TELLER MACHINE),false,true,1031,010,22,30,11,1151482,1885517,2012,02/04/2016 06:33:39 AM,41.841738053,-87.719605942,"(41.841738053, -87.719605942)" 3 | 8517338,HV194241,03/06/2012 10:49:00 PM,102XX S VERNON AVE,0917,MOTOR VEHICLE THEFT,"CYCLE, SCOOTER, BIKE W-VIN",STREET,false,false,0511,005,9,49,07,1181052,1837191,2012,02/04/2016 06:33:39 AM,41.708495677,-87.612580474,"(41.708495677, -87.612580474)" 4 | 8517339,HV194563,02/01/2012 08:15:00 AM,003XX W 108TH ST,0460,BATTERY,SIMPLE,"SCHOOL, PRIVATE, BUILDING",false,false,0513,005,34,49,08B,1176016,1833309,2012,02/04/2016 06:33:39 AM,41.6979571,-87.631138505,"(41.6979571, -87.631138505)" 5 | 8517340,HV194531,03/12/2012 05:50:00 PM,089XX S CARPENTER ST,0560,ASSAULT,SIMPLE,STREET,false,false,2222,022,21,73,08A,1170886,1845421,2012,02/04/2016 06:33:39 AM,41.731307475,-87.649569675,"(41.731307475, -87.649569675)" 6 | -------------------------------------------------------------------------------- /spec/fixtures/csv/column_headers_only.csv: -------------------------------------------------------------------------------- 1 | col0,col1,col2 2 | -------------------------------------------------------------------------------- /spec/fixtures/csv/empty_rows_test.csv: -------------------------------------------------------------------------------- 1 | image_resolution true_transform mls 2 | 6.55779 -0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4262.65,0,0,0,1 0 3 | 2.14746 -0.1539447,-0.2832125,-0.9466212,0,-0.1585857,0.9527035,-0.2592422,0,0.9752699,0.1102116,-0.1915772,1278.86,0,0,0,1 0 4 | 8.31104 0.3832846,0.8818775,-0.2745634,0,0.8566163,-0.4505714,-0.2513841,0,-0.3454004,-0.1388438,-0.9281277,5448.81,0,0,0,1 0 5 | 3.47872 0.3832846,0.8818775,-0.2745634,0,0.8566163,-0.4505714,-0.2513841,0,-0.3454004,-0.1388438,-0.9281277,2179.52,0,0,0,1 0 6 | 4.16725 -0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,2645.34,0,0,0,1 0 7 | 8 | 9 | 5.79983 -0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,3749.86,0,0,0,1 0 10 | 1.9058 -0.895577,-0.4178617,0.152753,0,-0.1004795,-0.1445008,-0.9843898,0,0.4334117,-0.8969454,0.08742505,1115.36,0,0,0,1 0 11 | 1.9058 -0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,1115.36,0,0,0,1 0 12 | 4.11806 -0.895577,-0.4178617,0.152753,0,-0.1004795,-0.1445008,-0.9843898,0,0.4334117,-0.8969454,0.08742505,2612.06,0,0,0,1 0 13 | 6.26622 -0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,4065.39,0,0,0,1 0 14 | 2.57805 -0.1539447,-0.2832125,-0.9466212,0,-0.1585857,0.9527035,-0.2592422,0,0.9752699,0.1102116,-0.1915772,1570.18,0,0,0,1 0 15 | 4.76151 -0.2362347,0.6308649,0.7390552,0,0.6523478,-0.4607318,0.6018043,0,0.7201635,0.6242881,-0.3027024,3047.39,0,0,0,1 0 16 | 7.11002 -0.895577,-0.4178617,0.152753,0,-0.1004795,-0.1445008,-0.9843898,0,0.4334117,-0.8969454,0.08742505,4636.26,0,0,0,1 0 17 | -------------------------------------------------------------------------------- /spec/fixtures/csv/repeated_fields.csv: -------------------------------------------------------------------------------- 1 | "id","name","age","city","a1","name","age" 2 | 1,"Alex",20,"New York","a,b","a",3 3 | 2,"Claude",23,"London","b,c","b",4 4 | 3,"Peter",25,"London","a","c",5 5 | 4,"Franz",27,"Paris",,"d",6 6 | 5,"George","5,5","Tome","a,b,c","f", 7 | 6,"Fernand",20,"London","c,b","f",8 8 | -------------------------------------------------------------------------------- /spec/fixtures/csv/sales-funnel.csv: -------------------------------------------------------------------------------- 1 | Account,Name,Rep,Manager,Product,Quantity,Price,Status 2 | 714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented 3 | 714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented 4 | 714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending 5 | 737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined 6 | 146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won 7 | 218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending 8 | 218895,Kulas Inc,Daniel Hilton,Debra Henley,Software,1,10000,presented 9 | 412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending 10 | 740150,Barton LLC,John Smith,Debra Henley,CPU,1,35000,declined 11 | 141962,Herman LLC,Cedric Moss,Fred Anderson,CPU,2,65000,won 12 | 163416,Purdy-Kunde,Cedric Moss,Fred Anderson,CPU,1,30000,presented 13 | 239344,Stokes LLC,Cedric Moss,Fred Anderson,Maintenance,1,5000,pending 14 | 239344,Stokes LLC,Cedric Moss,Fred Anderson,Software,1,10000,presented 15 | 307599,"Kassulke, Ondricka and Metz",Wendy Yule,Fred Anderson,Maintenance,3,7000,won 16 | 688981,Keeling LLC,Wendy Yule,Fred Anderson,CPU,5,100000,won 17 | 729833,Koepp Ltd,Wendy Yule,Fred Anderson,CPU,2,65000,declined 18 | 729833,Koepp Ltd,Wendy Yule,Fred Anderson,Monitor,2,5000,presented 19 | -------------------------------------------------------------------------------- /spec/fixtures/csv/scientific_notation.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 1,9.629587310436753e+127 3 | 2,1.9341543147883677e+129 4 | 3,3.88485279048245e+130 5 | -------------------------------------------------------------------------------- /spec/fixtures/csv/stock_data.csv: -------------------------------------------------------------------------------- 1 | 17.66 2 | 17.65 3 | 17.68 4 | 17.66 5 | 17.68 6 | 17.67 7 | 17.68 8 | 17.68 9 | 17.67 10 | 17.67 11 | 17.68 12 | 17.71 13 | 17.74 14 | 17.72 15 | 17.73 16 | 17.76 17 | 17.74 18 | 17.69 19 | 17.69 20 | 17.67 21 | 17.66 22 | 17.67 23 | 17.69 24 | 17.69 25 | 17.68 26 | 17.65 27 | 17.65 28 | 17.64 29 | 17.63 30 | 17.64 31 | 17.67 32 | 17.68 33 | 17.7 34 | 17.68 35 | 17.69 36 | 17.69 37 | 17.72 38 | 17.71 39 | 17.71 40 | 17.71 41 | 17.69 42 | 17.69 43 | 17.71 44 | 17.72 45 | 17.71 46 | 17.68 47 | 17.68 48 | 17.68 49 | 17.69 50 | 17.68 51 | 17.68 52 | 17.69 53 | 17.67 54 | 17.69 55 | 17.71 56 | 17.7 57 | 17.7 58 | 17.71 59 | 17.73 60 | 17.74 61 | 17.74 62 | 17.74 63 | 17.76 64 | 17.77 65 | 17.55 66 | 17.55 67 | 17.5 68 | 17.46 69 | 17.49 70 | 17.54 71 | 17.51 72 | 17.54 73 | 17.57 74 | 17.54 75 | 17.52 76 | 17.53 77 | 17.56 78 | 17.55 79 | 17.55 80 | 17.54 81 | 17.55 82 | 17.55 83 | 17.55 84 | 17.54 85 | 17.52 86 | 17.53 87 | 17.51 88 | 17.52 89 | 17.5 90 | 17.5 91 | 17.5 92 | 17.49 93 | 17.46 94 | 17.47 95 | 17.48 96 | 17.45 97 | 17.41 98 | 17.39 99 | 17.38 100 | 17.43 101 | 17.44 102 | 17.43 103 | 17.43 104 | 17.46 105 | 17.46 106 | 17.47 107 | 17.47 108 | 17.45 109 | 17.48 110 | 17.49 111 | 17.5 112 | 17.49 113 | 17.48 114 | 17.49 115 | 17.47 116 | 17.47 117 | 17.44 118 | 17.44 119 | 17.43 120 | 17.45 121 | 17.42 122 | 17.43 123 | 17.43 124 | 17.44 125 | 17.44 126 | 17.43 127 | 17.41 128 | 17.41 129 | 17.38 130 | 17.38 131 | 17.37 132 | 17.37 133 | 17.37 134 | 17.3 135 | 17.28 136 | 17.27 137 | 17.19 138 | 16.41 139 | 16.44 140 | 16.48 141 | 16.53 142 | 16.51 143 | 16.57 144 | 16.54 145 | 16.59 146 | 16.64 147 | 16.6 148 | 16.65 149 | 16.69 150 | 16.69 151 | 16.68 152 | 16.64 153 | 16.65 154 | 16.66 155 | 16.64 156 | 16.61 157 | 16.65 158 | 16.67 159 | 16.66 160 | 16.65 161 | 16.61 162 | 16.59 163 | 16.57 164 | 16.55 165 | 16.55 166 | 16.57 167 | 16.54 168 | 16.6 169 | 16.62 170 | 16.6 171 | 16.59 172 | 16.61 173 | 16.66 174 | 16.69 175 | 16.67 176 | 16.65 177 | 16.66 178 | 16.65 179 | 16.65 180 | 16.68 181 | 16.68 182 | 16.67 183 | 16.64 184 | 16.73 185 | 16.76 186 | 16.75 187 | 16.79 188 | 16.8 189 | 16.77 190 | 16.74 191 | 16.76 192 | 16.83 193 | 16.84 194 | 16.82 195 | 16.89 196 | 16.93 197 | 16.94 198 | 16.9 199 | 16.92 200 | 16.88 201 | 16.85 202 | 16.87 203 | 16.8 204 | 16.79 205 | 16.85 206 | 16.85 207 | 16.8 208 | 16.82 209 | 16.85 210 | 16.9 211 | 16.86 212 | 16.79 213 | 16.75 214 | 16.78 215 | 17.06 216 | 17.05 217 | 17.04 218 | 17.02 219 | 17.01 220 | 17.02 221 | 17.05 222 | 17.07 223 | 17.08 224 | 17.09 225 | 17.1 226 | 17.11 227 | 17.09 228 | 17.1 229 | 17.1 230 | 17.12 231 | 17.17 232 | 17.16 233 | 17.17 234 | 17.18 235 | 17.18 236 | 17.18 237 | 17.17 238 | 17.15 239 | 17.14 240 | 17.13 241 | 17.14 242 | 17.13 243 | 17.12 244 | 17.12 245 | 17.09 246 | 17.09 247 | 17.11 248 | 17.06 249 | 17.07 250 | 17.06 251 | 17.07 252 | 17.06 253 | 17.09 254 | 17.05 255 | 17.04 256 | 17.04 257 | 16.99 258 | 17 259 | 17.03 260 | 17 261 | 16.97 262 | 16.96 263 | 16.98 264 | 16.98 265 | 16.98 266 | 17.03 267 | 17 268 | 17 269 | 17 270 | 17.02 271 | 17 272 | 17.02 273 | 17.01 274 | 17.02 275 | 17.03 276 | 17.03 277 | 17.01 278 | 17.03 279 | 17.03 280 | 17.03 281 | 17.01 282 | 17.03 283 | 17.05 284 | 17.05 285 | 17.08 286 | 17.04 287 | 17.01 288 | 17.03 289 | 17.02 290 | 17.03 291 | 17.04 292 | 17.05 293 | 17.37 294 | 17.35 295 | 17.34 296 | 17.32 297 | 17.29 298 | 17.29 299 | 17.22 300 | 17.26 301 | 17.3 302 | 17.34 303 | 17.33 304 | 17.39 305 | 17.4 306 | 17.39 307 | 17.48 308 | 17.5 309 | 17.47 310 | 17.43 311 | 17.4 312 | 17.42 313 | 17.46 314 | 17.48 315 | 17.48 316 | 17.46 317 | 17.46 318 | 17.45 319 | 17.43 320 | 17.44 321 | 17.48 322 | 17.43 323 | 17.45 324 | 17.47 325 | 17.46 326 | 17.46 327 | 17.48 328 | 17.48 329 | 17.48 330 | 17.46 331 | 17.5 332 | 17.55 333 | 17.58 334 | 17.57 335 | 17.56 336 | 17.59 337 | 17.61 338 | 17.62 339 | 17.63 340 | 17.62 341 | 17.61 342 | 17.61 343 | 17.62 344 | 17.64 345 | 17.65 346 | 17.61 347 | 17.62 348 | 17.66 349 | 17.65 350 | 17.64 351 | 17.63 352 | 17.64 353 | 17.64 354 | 17.64 355 | 17.63 356 | 17.61 357 | 17.61 358 | 17.62 359 | 17.63 360 | 17.64 361 | 17.65 362 | 17.66 363 | 17.68 364 | 17.69 365 | 17.69 366 | 17.69 367 | 17.66 368 | 17.69 369 | 17.69 370 | 17.62 371 | 17.68 372 | 17.64 373 | 17.65 374 | 17.61 375 | 17.52 376 | 17.56 377 | 17.55 378 | 17.55 379 | 17.48 380 | 17.45 381 | 17.46 382 | 17.46 383 | 17.44 384 | 17.47 385 | 17.5 386 | 17.49 387 | 17.5 388 | 17.53 389 | 17.53 390 | 17.54 391 | 17.51 392 | 17.51 393 | 17.53 394 | 17.53 395 | 17.53 396 | 17.55 397 | 17.55 398 | 17.54 399 | 17.56 400 | 17.59 401 | 17.57 402 | 17.58 403 | 17.58 404 | 17.57 405 | 17.59 406 | 17.57 407 | 17.55 408 | 17.51 409 | 17.51 410 | 17.52 411 | 17.52 412 | 17.53 413 | 17.55 414 | 17.59 415 | 17.61 416 | 17.61 417 | 17.6 418 | 17.6 419 | 17.62 420 | 17.65 421 | 17.62 422 | 17.6 423 | 17.6 424 | 17.62 425 | 17.61 426 | 17.62 427 | 17.63 428 | 17.64 429 | 17.65 430 | 17.61 431 | 17.62 432 | 17.64 433 | 17.63 434 | 17.62 435 | 17.6 436 | 17.57 437 | 17.57 438 | 17.6 439 | 17.59 440 | 17.6 441 | 17.61 442 | 17.61 443 | 17.63 444 | 17.63 445 | 17.59 446 | 17.58 447 | 17.76 448 | 17.79 449 | 17.76 450 | 17.73 451 | 17.74 452 | 17.73 453 | 17.67 454 | 17.66 455 | 17.66 456 | 17.64 457 | 17.63 458 | 17.62 459 | 17.61 460 | 17.6 461 | 17.61 462 | 17.61 463 | 17.6 464 | 17.6 465 | 17.64 466 | 17.65 467 | 17.65 468 | 17.63 469 | 17.61 470 | 17.6 471 | 17.63 472 | 17.63 473 | 17.62 474 | 17.63 475 | 17.64 476 | 17.62 477 | 17.63 478 | 17.65 479 | 17.64 480 | 17.6 481 | 17.59 482 | 17.59 483 | 17.58 484 | 17.58 485 | 17.6 486 | 17.6 487 | 17.6 488 | 17.6 489 | 17.6 490 | 17.58 491 | 17.59 492 | 17.6 493 | 17.6 494 | 17.6 495 | 17.59 496 | 17.59 497 | 17.58 498 | 17.58 499 | 17.65 500 | 17.65 501 | -------------------------------------------------------------------------------- /spec/fixtures/excel/test_xls.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/excel/test_xls.xls -------------------------------------------------------------------------------- /spec/fixtures/excelx/LOBSTAHS_rt.windows.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/excelx/LOBSTAHS_rt.windows.xlsx -------------------------------------------------------------------------------- /spec/fixtures/excelx/Microcode.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/excelx/Microcode.xlsx -------------------------------------------------------------------------------- /spec/fixtures/excelx/Stock-counts-sheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/excelx/Stock-counts-sheet.xlsx -------------------------------------------------------------------------------- /spec/fixtures/excelx/pivot.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/excelx/pivot.xlsx -------------------------------------------------------------------------------- /spec/fixtures/html/valid_markup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
ab
067
140
294
370
40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
ab
067
140
61 | 62 | 63 | -------------------------------------------------------------------------------- /spec/fixtures/json/jsonvat.json: -------------------------------------------------------------------------------- 1 | {"details":"http://github.com/adamcooke/vat-rates","version":"bd48c1","rates":[{"name":"Germany","code":"DE","country_code":"DE","periods":[{"effective_from":"0000-01-01","rates":{"reduced":7.0,"standard":19.0}}]},{"name":"Poland","code":"PL","country_code":"PL","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":8.0,"standard":23.0}}]},{"name":"Hungary","code":"HU","country_code":"HU","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":18.0,"standard":27.0}}]},{"name":"Slovenia","code":"SI","country_code":"SI","periods":[{"effective_from":"0000-01-01","rates":{"reduced":9.5,"standard":22.0}}]},{"name":"Slovakia","code":"SK","country_code":"SK","periods":[{"effective_from":"0000-01-01","rates":{"reduced":10.0,"standard":20.0}}]},{"name":"Portugal","code":"PT","country_code":"PT","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":6.0,"reduced2":13.0,"standard":23.0,"parking":13.0}}]},{"name":"France","code":"FR","country_code":"FR","periods":[{"effective_from":"0000-01-01","rates":{"super_reduced":2.1,"reduced1":5.5,"reduced2":10.0,"standard":20.0}}]},{"name":"Denmark","code":"DK","country_code":"DK","periods":[{"effective_from":"0000-01-01","rates":{"standard":25.0}}]},{"name":"Romania","code":"RO","country_code":"RO","periods":[{"effective_from":"2017-01-01","rates":{"reduced1":5.0,"reduced2":9.0,"standard":19.0}},{"effective_from":"2016-01-01","rates":{"reduced1":5.0,"reduced2":9.0,"standard":20.0}},{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":9.0,"standard":24.0}}]},{"name":"United Kingdom","code":"UK","country_code":"GB","periods":[{"effective_from":"2011-01-04","rates":{"standard":20.0,"reduced":5.0}}]},{"name":"Sweden","code":"SE","country_code":"SE","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":6.0,"reduced2":12.0,"standard":25.0}}]},{"name":"Croatia","code":"HR","country_code":"HR","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":13.0,"standard":25.0}}]},{"name":"Finland","code":"FI","country_code":"FI","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":10.0,"reduced2":14.0,"standard":24.0}}]},{"name":"Netherlands","code":"NL","country_code":"NL","periods":[{"effective_from":"2012-10-01","rates":{"reduced":6.0,"standard":21.0}},{"effective_from":"0000-01-01","rates":{"reduced":6.0,"standard":19.0}}]},{"name":"Luxembourg","code":"LU","country_code":"LU","periods":[{"effective_from":"2016-01-01","rates":{"super_reduced":3.0,"reduced1":8.0,"standard":17.0,"parking":13.0}},{"effective_from":"2015-01-01","rates":{"super_reduced":3.0,"reduced1":8.0,"reduced2":14.0,"standard":17.0,"parking":12.0}},{"effective_from":"0000-01-01","rates":{"super_reduced":3.0,"reduced1":6.0,"reduced2":12.0,"standard":15.0,"parking":12.0}}]},{"name":"Belgium","code":"BE","country_code":"BE","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":6.0,"reduced2":12.0,"standard":21.0,"parking":12.0}}]},{"name":"Spain","code":"ES","country_code":"ES","periods":[{"effective_from":"0000-01-01","rates":{"super_reduced":4.0,"reduced":10.0,"standard":21.0}}]},{"name":"Lithuania","code":"LT","country_code":"LT","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":9.0,"standard":21.0}}]},{"name":"Greece","code":"EL","country_code":"GR","periods":[{"effective_from":"2016-06-01","rates":{"reduced1":6.0,"reduced2":13.5,"standard":24.0}},{"effective_from":"2016-01-01","rates":{"reduced1":6.0,"reduced2":13.5,"standard":23.0}},{"effective_from":"0000-01-01","rates":{"reduced1":6.5,"reduced2":13.0,"standard":23.0}}]},{"name":"Latvia","code":"LV","country_code":"LV","periods":[{"effective_from":"0000-01-01","rates":{"reduced":12.0,"standard":21.0}}]},{"name":"Czech Republic","code":"CZ","country_code":"CZ","periods":[{"effective_from":"0000-01-01","rates":{"reduced":15.0,"standard":21.0}}]},{"name":"Malta","code":"MT","country_code":"MT","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":7.0,"standard":18.0}}]},{"name":"Italy","code":"IT","country_code":"IT","periods":[{"effective_from":"0000-01-01","rates":{"super_reduced":4.0,"reduced":10.0,"standard":22.0}}]},{"name":"Austria","code":"AT","country_code":"AT","periods":[{"effective_from":"2016-01-01","rates":{"reduced1":10.0,"reduced2":13.0,"standard":20.0,"parking":13.0}},{"effective_from":"0000-01-01","rates":{"reduced":10.0,"standard":20.0,"parking":12.0}}]},{"name":"Estonia","code":"EE","country_code":"EE","periods":[{"effective_from":"0000-01-01","rates":{"reduced":9.0,"standard":20.0}}]},{"name":"Bulgaria","code":"BG","country_code":"BG","periods":[{"effective_from":"0000-01-01","rates":{"reduced":9.0,"standard":20.0}}]},{"name":"Cyprus","code":"CY","country_code":"CY","periods":[{"effective_from":"0000-01-01","rates":{"reduced1":5.0,"reduced2":9.0,"standard":19.0}}]},{"name":"Ireland","code":"IE","country_code":"IE","periods":[{"effective_from":"0000-01-01","rates":{"super_reduced":4.8,"reduced1":9.0,"reduced2":13.5,"standard":23.0,"parking":13.5}}]}]} -------------------------------------------------------------------------------- /spec/fixtures/json/temp.json: -------------------------------------------------------------------------------- 1 | {"description":{"title":"Contiguous U.S., Average Temperature, January-December","units":"Degrees Fahrenheit","base_period":"1901-2000","missing":-9999},"data":{"189512":{"value":"50.34","anomaly":"-1.68"},"189612":{"value":"51.99","anomaly":"-0.03"},"189712":{"value":"51.56","anomaly":"-0.46"},"189812":{"value":"51.43","anomaly":"-0.59"},"189912":{"value":"51.01","anomaly":"-1.01"},"190012":{"value":"52.77","anomaly":"0.75"},"190112":{"value":"51.87","anomaly":"-0.15"},"190212":{"value":"51.59","anomaly":"-0.43"},"190312":{"value":"50.62","anomaly":"-1.40"},"190412":{"value":"51.16","anomaly":"-0.86"},"190512":{"value":"51.00","anomaly":"-1.02"},"190612":{"value":"51.73","anomaly":"-0.29"},"190712":{"value":"51.48","anomaly":"-0.54"},"190812":{"value":"52.08","anomaly":"0.06"},"190912":{"value":"51.43","anomaly":"-0.59"},"191012":{"value":"52.42","anomaly":"0.40"},"191112":{"value":"52.03","anomaly":"0.01"},"191212":{"value":"50.23","anomaly":"-1.79"},"191312":{"value":"51.54","anomaly":"-0.48"},"191412":{"value":"51.84","anomaly":"-0.18"},"191512":{"value":"51.45","anomaly":"-0.57"},"191612":{"value":"50.85","anomaly":"-1.17"},"191712":{"value":"50.06","anomaly":"-1.96"},"191812":{"value":"51.87","anomaly":"-0.15"},"191912":{"value":"51.55","anomaly":"-0.47"},"192012":{"value":"51.07","anomaly":"-0.95"},"192112":{"value":"53.80","anomaly":"1.78"},"192212":{"value":"52.03","anomaly":"0.01"},"192312":{"value":"51.64","anomaly":"-0.38"},"192412":{"value":"50.59","anomaly":"-1.43"},"192512":{"value":"52.52","anomaly":"0.50"},"192612":{"value":"51.95","anomaly":"-0.07"},"192712":{"value":"52.15","anomaly":"0.13"},"192812":{"value":"51.92","anomaly":"-0.10"},"192912":{"value":"50.85","anomaly":"-1.17"},"193012":{"value":"51.98","anomaly":"-0.04"},"193112":{"value":"53.54","anomaly":"1.52"},"193212":{"value":"51.73","anomaly":"-0.29"},"193312":{"value":"52.99","anomaly":"0.97"},"193412":{"value":"54.10","anomaly":"2.08"},"193512":{"value":"51.90","anomaly":"-0.12"},"193612":{"value":"52.15","anomaly":"0.13"},"193712":{"value":"51.55","anomaly":"-0.47"},"193812":{"value":"53.18","anomaly":"1.16"},"193912":{"value":"53.26","anomaly":"1.24"},"194012":{"value":"51.89","anomaly":"-0.13"},"194112":{"value":"52.66","anomaly":"0.64"},"194212":{"value":"51.84","anomaly":"-0.18"},"194312":{"value":"52.07","anomaly":"0.05"},"194412":{"value":"51.83","anomaly":"-0.19"},"194512":{"value":"51.75","anomaly":"-0.27"},"194612":{"value":"52.95","anomaly":"0.93"},"194712":{"value":"51.92","anomaly":"-0.10"},"194812":{"value":"51.61","anomaly":"-0.41"},"194912":{"value":"52.02","anomaly":"0.00"},"195012":{"value":"51.39","anomaly":"-0.63"},"195112":{"value":"51.12","anomaly":"-0.90"},"195212":{"value":"52.27","anomaly":"0.25"},"195312":{"value":"53.37","anomaly":"1.35"},"195412":{"value":"53.33","anomaly":"1.31"},"195512":{"value":"51.69","anomaly":"-0.33"},"195612":{"value":"52.34","anomaly":"0.32"},"195712":{"value":"52.04","anomaly":"0.02"},"195812":{"value":"51.93","anomaly":"-0.09"},"195912":{"value":"52.11","anomaly":"0.09"},"196012":{"value":"51.44","anomaly":"-0.58"},"196112":{"value":"51.87","anomaly":"-0.15"},"196212":{"value":"51.90","anomaly":"-0.12"},"196312":{"value":"52.26","anomaly":"0.24"},"196412":{"value":"51.67","anomaly":"-0.35"},"196512":{"value":"51.69","anomaly":"-0.33"},"196612":{"value":"51.49","anomaly":"-0.53"},"196712":{"value":"51.76","anomaly":"-0.26"},"196812":{"value":"51.32","anomaly":"-0.70"},"196912":{"value":"51.50","anomaly":"-0.52"},"197012":{"value":"51.61","anomaly":"-0.41"},"197112":{"value":"51.66","anomaly":"-0.36"},"197212":{"value":"51.37","anomaly":"-0.65"},"197312":{"value":"52.29","anomaly":"0.27"},"197412":{"value":"52.26","anomaly":"0.24"},"197512":{"value":"51.50","anomaly":"-0.52"},"197612":{"value":"51.47","anomaly":"-0.55"},"197712":{"value":"52.55","anomaly":"0.53"},"197812":{"value":"51.05","anomaly":"-0.97"},"197912":{"value":"50.88","anomaly":"-1.14"},"198012":{"value":"52.39","anomaly":"0.37"},"198112":{"value":"53.12","anomaly":"1.10"},"198212":{"value":"51.35","anomaly":"-0.67"},"198312":{"value":"51.88","anomaly":"-0.14"},"198412":{"value":"51.98","anomaly":"-0.04"},"198512":{"value":"51.30","anomaly":"-0.72"},"198612":{"value":"53.32","anomaly":"1.30"},"198712":{"value":"53.33","anomaly":"1.31"},"198812":{"value":"52.63","anomaly":"0.61"},"198912":{"value":"51.84","anomaly":"-0.18"},"199012":{"value":"53.51","anomaly":"1.49"},"199112":{"value":"53.16","anomaly":"1.14"},"199212":{"value":"52.60","anomaly":"0.58"},"199312":{"value":"51.26","anomaly":"-0.76"},"199412":{"value":"52.87","anomaly":"0.85"},"199512":{"value":"52.65","anomaly":"0.63"},"199612":{"value":"51.89","anomaly":"-0.13"},"199712":{"value":"52.20","anomaly":"0.18"},"199812":{"value":"54.23","anomaly":"2.21"},"199912":{"value":"53.88","anomaly":"1.86"},"200012":{"value":"53.27","anomaly":"1.25"},"200112":{"value":"53.70","anomaly":"1.68"},"200212":{"value":"53.21","anomaly":"1.19"},"200312":{"value":"53.26","anomaly":"1.24"},"200412":{"value":"53.10","anomaly":"1.08"},"200512":{"value":"53.64","anomaly":"1.62"},"200612":{"value":"54.25","anomaly":"2.23"},"200712":{"value":"53.65","anomaly":"1.63"},"200812":{"value":"52.29","anomaly":"0.27"},"200912":{"value":"52.39","anomaly":"0.37"},"201012":{"value":"52.98","anomaly":"0.96"},"201112":{"value":"53.18","anomaly":"1.16"},"201212":{"value":"55.28","anomaly":"3.26"},"201312":{"value":"52.43","anomaly":"0.41"},"201412":{"value":"52.54","anomaly":"0.52"},"201512":{"value":"54.40","anomaly":"2.38"},"201612":{"value":"54.91","anomaly":"2.89"}}} -------------------------------------------------------------------------------- /spec/fixtures/log/apache.log: -------------------------------------------------------------------------------- 1 | 77.179.66.156 - - [07/Dec/2016:10:34:43 +0100] "GET /favicon.ico HTTP/1.1" 404 571 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36" 2 | -------------------------------------------------------------------------------- /spec/fixtures/log/rails.log: -------------------------------------------------------------------------------- 1 | Started GET "/articles/9" for 127.0.0.1 at 2018-03-12 17:41:18 +0530 2 | Processing by ArticlesController#show as HTML 3 | Parameters: {"id"=>"9"} 4 | Article Load (1.4ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" = ? LIMIT ? [["id", 9], ["LIMIT", 1]] 5 | Rendering articles/show.html.erb within layouts/application 6 | Rendered articles/show.html.erb within layouts/application (2.9ms) 7 | Completed 200 OK in 97ms (Views: 50.6ms | ActiveRecord: 1.4ms) 8 | -------------------------------------------------------------------------------- /spec/fixtures/log/s3.log: -------------------------------------------------------------------------------- 1 | 8aefdcbf18ef13fe9f82be73697a945f423e4299c995e9e96bb619975db40bd6 drat [12/Jun/2015:05:40:10 +0000] 208.54.5.245 - 40D2FE0D1C76A065 WEBSITE.GET.OBJECT file "GET /file HTTP/1.1" 200 - 5 5 20 19 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0" - 2 | -------------------------------------------------------------------------------- /spec/fixtures/plaintext/empties.dat: -------------------------------------------------------------------------------- 1 | 1 2 3 2 | 4 6 3 | -------------------------------------------------------------------------------- /spec/fixtures/plaintext/strings.dat: -------------------------------------------------------------------------------- 1 | test 1 2 2 | foo 3 4 3 | -------------------------------------------------------------------------------- /spec/fixtures/rdata/ACScounty.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/ACScounty.RData -------------------------------------------------------------------------------- /spec/fixtures/rdata/FRED-cpi-house.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/FRED-cpi-house.RData -------------------------------------------------------------------------------- /spec/fixtures/rdata/Filings-by-state.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/Filings-by-state.RData -------------------------------------------------------------------------------- /spec/fixtures/rdata/Ownership.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/Ownership.RData -------------------------------------------------------------------------------- /spec/fixtures/rdata/case-shiller.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/case-shiller.RData -------------------------------------------------------------------------------- /spec/fixtures/rdata/state-migration.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/state-migration.RData -------------------------------------------------------------------------------- /spec/fixtures/rdata/zip-county.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rdata/zip-county.RData -------------------------------------------------------------------------------- /spec/fixtures/rds/RPPdataConverted.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rds/RPPdataConverted.rds -------------------------------------------------------------------------------- /spec/fixtures/rds/bc_sites.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rds/bc_sites.rds -------------------------------------------------------------------------------- /spec/fixtures/rds/chicago.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rds/chicago.rds -------------------------------------------------------------------------------- /spec/fixtures/rds/healthexp.Rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rds/healthexp.Rds -------------------------------------------------------------------------------- /spec/fixtures/rds/heights.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rds/heights.RDS -------------------------------------------------------------------------------- /spec/fixtures/rds/maacs_env.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SciRuby/daru-io/dce4be1089a6c9902d91a857e0cd43cf1f9832bc/spec/fixtures/rds/maacs_env.rds -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'simplecov' 2 | SimpleCov.start do 3 | add_filter 'spec' 4 | # minimum_coverage_by_file 95 5 | end 6 | 7 | require 'bundler/setup' 8 | 9 | require 'rspec' 10 | require 'rspec/its' 11 | require 'webmock/rspec' 12 | require 'saharspec/its/call' 13 | 14 | require 'tempfile' 15 | require 'open-uri' 16 | 17 | require 'dbd/SQLite3' 18 | require 'active_record' 19 | require 'redis' 20 | require 'dbi' 21 | require 'jsonpath' 22 | require 'nokogiri' 23 | require 'mongo' 24 | require 'spreadsheet' 25 | require 'sqlite3' 26 | 27 | require 'daru/io' 28 | 29 | require_relative 'support/shared_contexts' 30 | require_relative 'support/shared_examples' 31 | require_relative 'support/custom_matchers' 32 | 33 | RSpec::Expectations.configuration.warn_about_potential_false_positives = false 34 | 35 | RSpec.configure do |config| 36 | # Enable flags like --only-failures and --next-failure 37 | config.example_status_persistence_file_path = '.rspec_status' 38 | 39 | # Disable RSpec exposing methods globally on `Module` and `main` 40 | config.disable_monkey_patching! 41 | 42 | config.expect_with :rspec do |c| 43 | c.syntax = :expect 44 | end 45 | end 46 | 47 | class String 48 | # allows to pretty test agains multiline strings: 49 | # %Q{ 50 | # |test 51 | # |me 52 | # }.unindent # => 53 | # "test 54 | # me" 55 | def unindent 56 | gsub(/\n\s+?\|/, "\n") # for all lines looking like "|" -- remove this. 57 | .gsub(/\|\n/, "\n") # allow to write trailing space not removed by editor 58 | .gsub(/^\n|\n\s+$/, '') # remove empty strings before and after 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /spec/support/custom_matchers.rb: -------------------------------------------------------------------------------- 1 | RSpec::Matchers.define :be_boolean do 2 | match do |actual| 3 | [true, false].include?(actual) 4 | end 5 | end 6 | 7 | RSpec::Matchers.define :ordered_data do |expected| 8 | match do |actual| 9 | actual = actual.to_a.map { |x| x.data.to_a } 10 | actual == expected 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /spec/support/shared_contexts.rb: -------------------------------------------------------------------------------- 1 | RSpec.shared_context 'sqlite3 database setup' do 2 | module Daru 3 | module IO 4 | module Rspec 5 | class Account < ActiveRecord::Base 6 | self.table_name = 'accounts' 7 | end 8 | end 9 | end 10 | end 11 | 12 | before do 13 | FileUtils.rm(db_name) if File.file?(db_name) 14 | SQLite3::Database.new(db_name).tap do |db| 15 | db.execute 'create table accounts(id integer, name varchar, age integer, primary key(id))' 16 | db.execute "insert into accounts values(1, 'Homer', 20)" 17 | db.execute "insert into accounts values(2, 'Marge', 30)" 18 | end 19 | Daru::IO::Rspec::Account.establish_connection "sqlite3:#{db_name}" 20 | end 21 | 22 | subject { Daru::IO::Importers::ActiveRecord.from(relation).call(*fields) } 23 | 24 | let(:db_name) { 'daru_test' } 25 | let(:relation) { Daru::IO::Rspec::Account.all } 26 | 27 | after { FileUtils.rm(db_name) } 28 | end 29 | 30 | RSpec.shared_context 'exporter setup' do 31 | let(:tempfile) { Tempfile.new(filename) } 32 | let(:opts) { {} } 33 | let(:df) do 34 | Daru::DataFrame.new( 35 | 'a' => [1,2,3,4,5], 36 | 'b' => [11,22,33,44,55], 37 | 'c' => ['a', 'g', 4, 5,'addadf'], 38 | 'd' => [nil, 23, 4,'a','ff'] 39 | ) 40 | end 41 | 42 | def convert(input) 43 | if input.to_i.to_s == input # Integer in string 44 | input.to_i 45 | elsif input.to_f.to_s == input # Float in string 46 | input.to_f 47 | elsif input == 'nil' # nil in string 48 | nil 49 | else 50 | input # Just string 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /spec/support/shared_examples.rb: -------------------------------------------------------------------------------- 1 | RSpec.shared_examples 'a daru dataframe' do |name: nil, nrows: nil, ncols: nil, **opts| 2 | it { is_expected.to be_a(Daru::DataFrame) } 3 | 4 | its(:name) { is_expected.to eq(name) } if name 5 | its(:ncols) { is_expected.to eq(ncols) } if ncols 6 | its(:nrows) { is_expected.to eq(nrows) } if nrows 7 | 8 | opts.each { |key, value| its(key.to_sym) { is_expected.to eq(value) } } 9 | end 10 | 11 | RSpec.shared_examples 'exact daru dataframe' do |dataframe: nil, data: nil, nrows: nil, ncols: nil, order: nil, index: nil, name: nil, **opts| # rubocop:disable Metrics/LineLength 12 | it_behaves_like 'a daru dataframe', 13 | name: name, 14 | nrows: nrows, 15 | ncols: ncols, 16 | **opts 17 | 18 | it { is_expected.to eq(dataframe) } if dataframe 19 | its(:data) { is_expected.to ordered_data(data) } if data 20 | its(:index) { is_expected.to eq(index.to_index) } if index 21 | its(:vectors) { is_expected.to eq(order.to_index) } if order 22 | end 23 | 24 | RSpec.shared_examples 'importer with json-path option' do 25 | context 'in temperature data' do 26 | let(:path) { 'spec/fixtures/json/temp.json' } 27 | 28 | context 'with only jsonpath columns' do 29 | let(:columns) { %w[value anomaly].map { |x| '$..data..'+x } } 30 | 31 | it_behaves_like 'exact daru dataframe', 32 | ncols: 2, 33 | nrows: 122, 34 | order: (0..1).to_a 35 | end 36 | 37 | context 'with only jsonpath named columns' do 38 | let(:named_columns) { {Anom: '$..data..anomaly', Val: '$..data..value'} } 39 | 40 | it_behaves_like 'exact daru dataframe', 41 | ncols: 2, 42 | nrows: 122, 43 | order: %i[Anom Val] 44 | end 45 | 46 | context 'with both jsonpath columns and named columns' do 47 | let(:columns) { %w[$..data..anomaly] } 48 | let(:named_columns) { {Val: '$..data..value'} } 49 | 50 | it_behaves_like 'exact daru dataframe', 51 | ncols: 2, 52 | nrows: 122, 53 | order: [0, :Val] 54 | end 55 | end 56 | 57 | context 'in tv series data' do 58 | let(:path) { 'spec/fixtures/json/got.json' } 59 | 60 | context 'with jsonpath columns' do 61 | let(:columns) do 62 | %w[name season number runtime] 63 | .map { |x| '$.._embedded..episodes..' + x } 64 | end 65 | 66 | it_behaves_like 'exact daru dataframe', 67 | ncols: 4, 68 | nrows: 61, 69 | order: (0..3).to_a 70 | end 71 | 72 | context 'with jsonpath named columns' do 73 | let(:named_columns) do 74 | { 75 | Name: '$.._embedded..episodes..name', 76 | Season: '$.._embedded..episodes..season', 77 | Number: '$.._embedded..episodes..number', 78 | Runtime: '$.._embedded..episodes..runtime' 79 | } 80 | end 81 | 82 | it_behaves_like 'exact daru dataframe', 83 | ncols: 4, 84 | nrows: 61, 85 | order: %i[Name Season Number Runtime] 86 | end 87 | 88 | context 'with jsonpath columns' do 89 | let(:columns) { %w[$.._embedded..episodes..name $.._embedded..episodes..season] } 90 | let(:named_columns) do 91 | { 92 | Number: '$.._embedded..episodes..number', 93 | Runtime: '$.._embedded..episodes..runtime' 94 | } 95 | end 96 | 97 | it_behaves_like 'exact daru dataframe', 98 | ncols: 4, 99 | nrows: 61, 100 | order: [0, 1, :Number, :Runtime] 101 | end 102 | end 103 | 104 | context 'on allsets data' do 105 | let(:path) { 'spec/fixtures/json/allsets.json' } 106 | 107 | context 'with jsonpath columns' do 108 | let(:columns) { %w[artist cmc mciNumber].map { |x| '$..LEA..cards..' + x } } 109 | let(:index) { '$..LEA..cards..multiverseid' } 110 | 111 | it_behaves_like 'exact daru dataframe', 112 | ncols: 3, 113 | nrows: 18, 114 | order: (0..2).to_a, 115 | index: [94, 95, 96, 48, 232, 1, 233, 140, 49, 279, 234, 2, 280, 235, 141, 142, 50, 3] 116 | end 117 | end 118 | 119 | context 'on VAT data' do 120 | let(:path) { 'spec/fixtures/json/jsonvat.json' } 121 | 122 | context 'with jsonpath columns' do 123 | let(:columns) { %w[name periods].map { |x| '$..rates..'+x } } 124 | let(:index) { '$..rates..code' } 125 | 126 | it_behaves_like 'exact daru dataframe', 127 | ncols: 2, 128 | nrows: 28, 129 | order: [0, 1], 130 | index: %w[DE PL HU SI SK PT FR DK RO UK SE HR FI NL LU BE ES LT EL LV CZ MT IT AT EE BG CY IE] 131 | end 132 | end 133 | end 134 | --------------------------------------------------------------------------------