├── .gitignore ├── Gemfile ├── Gemfile.lock ├── MIT-LICENSE ├── README.md ├── Rakefile ├── lib ├── generators │ └── install_generator.rb ├── rimportor.rb ├── rimportor │ ├── active_record │ │ ├── adapter │ │ │ └── mysql2.rb │ │ ├── import.rb │ │ └── sql_builder.rb │ ├── error │ │ ├── bulk_validation.rb │ │ └── invalid_adapter.rb │ ├── plugin.rb │ ├── util │ │ └── connection.rb │ └── version.rb └── templates │ └── rimportor.rb └── rimportor.gemspec /.gitignore: -------------------------------------------------------------------------------- 1 | .bundle/ 2 | log/*.log 3 | pkg/ 4 | spec/test_app 5 | .ruby-gemset 6 | .ruby-version 7 | .idea -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Declare your gem's dependencies in rimportor.gemspec. 4 | # Bundler will treat runtime dependencies like base dependencies, and 5 | # development dependencies will be added by default to the :development group. 6 | gemspec 7 | 8 | # Declare any dependencies that are still in development here instead of in 9 | # your gemspec. These might include edge Rails or gems from your path or 10 | # Git. Remember to move these dependencies to your gemspec before releasing 11 | # your gem to rubygems.org. 12 | 13 | # To use a debugger 14 | # gem 'byebug', group: [:development, :test] 15 | 16 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | rimportor (0.3) 5 | parallel (~> 1.5, >= 1.5.0) 6 | rails (~> 4.0, >= 4.0.0) 7 | 8 | GEM 9 | remote: https://rubygems.org/ 10 | specs: 11 | actionmailer (4.2.4) 12 | actionpack (= 4.2.4) 13 | actionview (= 4.2.4) 14 | activejob (= 4.2.4) 15 | mail (~> 2.5, >= 2.5.4) 16 | rails-dom-testing (~> 1.0, >= 1.0.5) 17 | actionpack (4.2.4) 18 | actionview (= 4.2.4) 19 | activesupport (= 4.2.4) 20 | rack (~> 1.6) 21 | rack-test (~> 0.6.2) 22 | rails-dom-testing (~> 1.0, >= 1.0.5) 23 | rails-html-sanitizer (~> 1.0, >= 1.0.2) 24 | actionview (4.2.4) 25 | activesupport (= 4.2.4) 26 | builder (~> 3.1) 27 | erubis (~> 2.7.0) 28 | rails-dom-testing (~> 1.0, >= 1.0.5) 29 | rails-html-sanitizer (~> 1.0, >= 1.0.2) 30 | activejob (4.2.4) 31 | activesupport (= 4.2.4) 32 | globalid (>= 0.3.0) 33 | activemodel (4.2.4) 34 | activesupport (= 4.2.4) 35 | builder (~> 3.1) 36 | activerecord (4.2.4) 37 | activemodel (= 4.2.4) 38 | activesupport (= 4.2.4) 39 | arel (~> 6.0) 40 | activesupport (4.2.4) 41 | i18n (~> 0.7) 42 | json (~> 1.7, >= 1.7.7) 43 | minitest (~> 5.1) 44 | thread_safe (~> 0.3, >= 0.3.4) 45 | tzinfo (~> 1.1) 46 | arel (6.0.3) 47 | builder (3.2.2) 48 | diff-lcs (1.2.5) 49 | erubis (2.7.0) 50 | globalid (0.3.6) 51 | activesupport (>= 4.1.0) 52 | i18n (0.7.0) 53 | json (1.8.3) 54 | loofah (2.0.3) 55 | nokogiri (>= 1.5.9) 56 | mail (2.6.3) 57 | mime-types (>= 1.16, < 3) 58 | mime-types (2.6.2) 59 | mini_portile (0.6.2) 60 | minitest (5.8.2) 61 | nokogiri (1.6.6.2) 62 | mini_portile (~> 0.6.0) 63 | parallel (1.6.1) 64 | rack (1.6.4) 65 | rack-test (0.6.3) 66 | rack (>= 1.0) 67 | rails (4.2.4) 68 | actionmailer (= 4.2.4) 69 | actionpack (= 4.2.4) 70 | actionview (= 4.2.4) 71 | activejob (= 4.2.4) 72 | activemodel (= 4.2.4) 73 | activerecord (= 4.2.4) 74 | activesupport (= 4.2.4) 75 | bundler (>= 1.3.0, < 2.0) 76 | railties (= 4.2.4) 77 | sprockets-rails 78 | rails-deprecated_sanitizer (1.0.3) 79 | activesupport (>= 4.2.0.alpha) 80 | rails-dom-testing (1.0.7) 81 | activesupport (>= 4.2.0.beta, < 5.0) 82 | nokogiri (~> 1.6.0) 83 | rails-deprecated_sanitizer (>= 1.0.1) 84 | rails-html-sanitizer (1.0.2) 85 | loofah (~> 2.0) 86 | railties (4.2.4) 87 | actionpack (= 4.2.4) 88 | activesupport (= 4.2.4) 89 | rake (>= 0.8.7) 90 | thor (>= 0.18.1, < 2.0) 91 | rake (10.4.2) 92 | rspec-core (3.0.4) 93 | rspec-support (~> 3.0.0) 94 | rspec-expectations (3.0.4) 95 | diff-lcs (>= 1.2.0, < 2.0) 96 | rspec-support (~> 3.0.0) 97 | rspec-mocks (3.0.4) 98 | rspec-support (~> 3.0.0) 99 | rspec-rails (3.0.2) 100 | actionpack (>= 3.0) 101 | activesupport (>= 3.0) 102 | railties (>= 3.0) 103 | rspec-core (~> 3.0.0) 104 | rspec-expectations (~> 3.0.0) 105 | rspec-mocks (~> 3.0.0) 106 | rspec-support (~> 3.0.0) 107 | rspec-support (3.0.4) 108 | sprockets (3.4.0) 109 | rack (> 1, < 3) 110 | sprockets-rails (2.3.3) 111 | actionpack (>= 3.0) 112 | activesupport (>= 3.0) 113 | sprockets (>= 2.8, < 4.0) 114 | sqlite3 (1.3.11) 115 | thor (0.19.1) 116 | thread_safe (0.3.5) 117 | tzinfo (1.2.2) 118 | thread_safe (~> 0.1) 119 | 120 | PLATFORMS 121 | ruby 122 | 123 | DEPENDENCIES 124 | rimportor! 125 | rspec-rails (~> 3.0.0, >= 3.0.0) 126 | sqlite3 127 | 128 | BUNDLED WITH 129 | 1.10.6 130 | -------------------------------------------------------------------------------- /MIT-LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015 Erwin Schens 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rimportor 2 | 3 | Rimportor is a new and modern bulk importing gem. 4 | It utilizes arel under the hood to generate insert statements. 5 | By working directly on the model Rimportor is able to execute callbacks and validate the records before inserting them into the database - **which is missing in most importing gems**. 6 | 7 | ### Features 8 | - Import in batches 9 | - Validation of the bulk 10 | - Callback execution 11 | 12 | ## Installation 13 | 14 | Add this line to your application's Gemfile: 15 | 16 | ```ruby 17 | gem 'rimportor', '~> 0.3' 18 | ``` 19 | 20 | And then execute: 21 | 22 | $ bundle 23 | 24 | Or install it yourself as: 25 | 26 | $ gem install rimportor 27 | 28 | ## Usage 29 | 30 | Rimportor adds to every ActiveRecord model an additional method called rimport. This method then takes a collection of your records you want to persist. 31 | Let me give you an example. 32 | ```ruby 33 | users = [] 34 | 1000.times.each { users << User.new(some_params) } 35 | User.rimport users # Imports your collection as a bulk insert to your database 36 | ``` 37 | But wait... what about validations and callbacks of my bulk? 38 | Rimportor got you! Just add some configuration options for your rimport. 39 | Let me show you what i mean. 40 | ```ruby 41 | users = [] 42 | 1000.times.each { users << User.new(some_params) } 43 | 44 | # true if bulk valid and imported else false 45 | User.rimport users, before_callbacks: true, 46 | after_callbacks: true, 47 | validate_bulk: true 48 | ``` 49 | The rimport method returns true if your bulk is valid and all callbacks are executed. 50 | If an error occurs Rimportor won't insert your bulk in the database. 51 | 52 | And what if i want to insert my records in batches? Rimportor got your back on that too. 53 | ```ruby 54 | users = [] 55 | 1000.times.each { users << User.new(some_params) } 56 | 57 | # Rimportor will insert the 1000 records in 100 chunks 58 | User.rimport users, batch_size: 100 59 | ``` 60 | 61 | ## Supported Databases 62 | 63 | - MySQL 64 | 65 | ## Benchmarks 66 | 67 | The below benchmarks were done with MySQL 5.6.26 on Mac OSX 10.11.1, test were run against the InnoDB engine. 68 | Following ActiveRecord model was used for this benchmark: 69 | ```ruby 70 | # == Schema Information 71 | # 72 | # Table name: test_dummies 73 | # 74 | # id :integer not null, primary key 75 | # lorem :string(255) 76 | # lorem2 :string(255) 77 | # created_at :datetime 78 | # updated_at :datetime 79 | # 80 | class TestDummy < ActiveRecord::Base 81 | validates_presence_of :lorem 82 | end 83 | ``` 84 | Following statement was used for importing the records: 85 | ```ruby 86 | TestDummy.rimport test_dummies, validate_bulk: true, batch_size: 5000 87 | ``` 88 | All times are displayed in seconds. Every record is validated and a batch size of 5000 together with 4 threads was used. 89 | 90 | ## [![](http://i.imgur.com/kJJWImi.png)](http://qurasoft.de) 91 | 92 | ## Development 93 | 94 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment. 95 | 96 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 97 | 98 | ## Contributing 99 | 100 | 1. Fork it 101 | 2. Create your feature branch (`git checkout -b my-new-feature`) 102 | 3. Commit your changes (`git commit -am 'Add some feature'`) 103 | 4. Push to the branch (`git push origin my-new-feature`) 104 | 5. Create a new Pull Request 105 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | begin 2 | require 'bundler/setup' 3 | rescue LoadError 4 | puts 'You must `gem install bundler` and `bundle install` to run rake tasks' 5 | end 6 | 7 | require 'rdoc/task' 8 | 9 | RDoc::Task.new(:rdoc) do |rdoc| 10 | rdoc.rdoc_dir = 'rdoc' 11 | rdoc.title = 'Rimportor' 12 | rdoc.options << '--line-numbers' 13 | rdoc.rdoc_files.include('README.md') 14 | rdoc.rdoc_files.include('lib/**/*.rb') 15 | end 16 | 17 | 18 | 19 | 20 | 21 | 22 | Bundler::GemHelper.install_tasks 23 | 24 | -------------------------------------------------------------------------------- /lib/generators/install_generator.rb: -------------------------------------------------------------------------------- 1 | require 'rails/generators' 2 | 3 | module Rimportor 4 | class InstallGenerator < ::Rails::Generators::Base 5 | source_root(File.expand_path(File.dirname(__FILE__))) 6 | 7 | def copy_initializer 8 | copy_file '../templates/rimportor.rb', 'config/initializers/rimportor.rb' 9 | end 10 | end 11 | end -------------------------------------------------------------------------------- /lib/rimportor.rb: -------------------------------------------------------------------------------- 1 | require 'rimportor/active_record/sql_builder' 2 | require 'rimportor/active_record/import' 3 | require 'rimportor/plugin' 4 | require 'rimportor/error/bulk_validation' 5 | require 'rimportor/error/invalid_adapter' 6 | require 'rimportor/active_record/adapter/mysql2' 7 | require 'rimportor/util/connection' 8 | require 'generators/install_generator' 9 | 10 | module Rimportor 11 | class << self 12 | attr_accessor :configuration 13 | end 14 | 15 | def self.configure 16 | self.configuration ||= Configuration.new 17 | yield(configuration) if block_given? 18 | end 19 | 20 | class Configuration 21 | attr_accessor :threads 22 | 23 | def initialize 24 | @threads = 4 25 | end 26 | end 27 | end 28 | 29 | ActiveRecord::Base.send :include, Rimportor::Plugin -------------------------------------------------------------------------------- /lib/rimportor/active_record/adapter/mysql2.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | module ActiveRecord 3 | module Adapter 4 | class Mysql2 5 | 6 | # Returns maximum number of bytes that the server will accept for a query 7 | # @return [Fixnum] number of maximum allowed packet size 8 | def max_allowed_packet 9 | exec_in_pool do |connection| 10 | result = connection.execute("SHOW VARIABLES like 'max_allowed_packet';") 11 | val = result.respond_to?(:fetch_row) ? result.fetch_row[1] : result.first[1] 12 | val.to_i 13 | end 14 | end 15 | 16 | def exec_in_pool 17 | ::Rimportor::Util::Connection.in_pool do |connection| 18 | yield(connection) 19 | end 20 | end 21 | 22 | # Checks if the given statement is too big for the database insert 23 | # @return [TrueClass, FalseClass] true if the statement size is too big for the database else false 24 | def statement_too_big?(statement) 25 | statement.size > max_allowed_packet 26 | end 27 | 28 | def exec_insert(import_statement) 29 | insert_statement, value_statements = import_statement 30 | if statement_too_big? ("#{insert_statement}, #{value_statements.join(',')}") 31 | puts 'Statement too big' 32 | else 33 | exec_statement "#{insert_statement},#{value_statements.join(',')}" 34 | end 35 | end 36 | 37 | def exec_statement(statement) 38 | exec_in_pool { |connection| connection.execute statement } 39 | end 40 | 41 | end 42 | end 43 | end 44 | end -------------------------------------------------------------------------------- /lib/rimportor/active_record/import.rb: -------------------------------------------------------------------------------- 1 | require 'parallel' 2 | 3 | module Rimportor 4 | module ActiveRecord 5 | class Import 6 | 7 | def initialize(bulk, adapter, opts = {}) 8 | @bulk = bulk 9 | @adapter = adapter 10 | @before_callbacks = !!opts[:before_callbacks] 11 | @after_callbacks = !!opts[:after_callbacks] 12 | @validate_bulk = !!opts[:validate_bulk] 13 | @batch_size = opts[:batch_size] ? opts[:batch_size] : 1000 14 | @threads = Rimportor.configuration.threads 15 | end 16 | 17 | def run_before_callbacks 18 | ::Parallel.map(@bulk, in_threads: @threads) do |element| 19 | execute_callbacks(element, :before) 20 | end 21 | end 22 | 23 | def run_after_callbacks 24 | ::Parallel.map(@bulk, in_threads: @threads) do |element| 25 | execute_callbacks(element, :after) 26 | end 27 | end 28 | 29 | def run_validations 30 | validation_result = ::Parallel.map(@bulk, in_threads: @threads) do |element| 31 | element.valid? 32 | end.all? 33 | raise Rimportor::Error::BulkValidation.new("Your bulk is not valid") unless validation_result 34 | end 35 | 36 | def execute_callbacks(element, before_or_after) 37 | case before_or_after 38 | when :before 39 | element.run_callbacks(:save) { false } 40 | when :after 41 | element.run_callbacks(:save) { true } 42 | end 43 | end 44 | 45 | def import_statement(batch) 46 | insert_statement = SqlBuilder.new(batch.first).full_insert_statement 47 | result = ::Parallel.map(batch.drop(1), in_threads: @threads) do |element| 48 | @adapter.exec_in_pool { SqlBuilder.new(element).partial_insert_statement.gsub('VALUES', '') } 49 | end 50 | [insert_statement, result] 51 | end 52 | 53 | def exec_statement 54 | begin 55 | run_validations if @validate_bulk 56 | run_before_callbacks if @before_callbacks 57 | @bulk.each_slice(@batch_size) do |batch| 58 | @adapter.exec_insert(import_statement(batch)) 59 | end 60 | run_after_callbacks if @after_callbacks 61 | true 62 | rescue => e 63 | puts "Error importing the bulk. Reason #{e.message}" 64 | false 65 | end 66 | end 67 | 68 | end 69 | end 70 | end -------------------------------------------------------------------------------- /lib/rimportor/active_record/sql_builder.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | module ActiveRecord 3 | class SqlBuilder 4 | 5 | def initialize(model) 6 | @model = model 7 | set_timestamps 8 | end 9 | 10 | def full_insert_statement 11 | insert_manager.tap do |im| 12 | im.insert(arel_for_create) 13 | end.to_sql 14 | end 15 | 16 | def partial_insert_statement 17 | insert_manager.insert(arel_for_create).to_sql 18 | end 19 | 20 | def arel_for_create 21 | @model.send(:arel_attributes_with_values_for_create, @model.attribute_names) 22 | end 23 | 24 | def insert_manager 25 | @model.class.arel_table.create_insert 26 | end 27 | 28 | def set_timestamps 29 | set_created_at 30 | set_updated_at 31 | end 32 | 33 | def set_created_at 34 | @model.updated_at = Time.zone.now if @model.respond_to? :updated_at 35 | end 36 | 37 | def set_updated_at 38 | @model.created_at = Time.zone.now if @model.respond_to? :created_at 39 | end 40 | 41 | end 42 | end 43 | end -------------------------------------------------------------------------------- /lib/rimportor/error/bulk_validation.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | module Error 3 | class BulkValidation < StandardError 4 | end 5 | end 6 | end -------------------------------------------------------------------------------- /lib/rimportor/error/invalid_adapter.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | module Error 3 | class InvalidAdapter < StandardError 4 | end 5 | end 6 | end -------------------------------------------------------------------------------- /lib/rimportor/plugin.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | module Plugin 3 | extend ActiveSupport::Concern 4 | 5 | included do 6 | end 7 | 8 | module ClassMethods 9 | def rimport(records, options = {}) 10 | ::Rimportor::ActiveRecord::Import.new(records, self.current_adapter, options).exec_statement 11 | end 12 | 13 | def current_adapter 14 | load_adapter(::ActiveRecord::Base.connection_config[:adapter]) 15 | end 16 | 17 | def load_adapter(adapter_name) 18 | begin 19 | ::Rimportor::ActiveRecord::Adapter.const_get(adapter_name.to_s.camelize).new 20 | rescue => e 21 | raise ::Rimportor::Error::InvalidAdapter.new("Invalid adapter. Reason #{e}") 22 | end 23 | end 24 | 25 | end 26 | 27 | end 28 | end -------------------------------------------------------------------------------- /lib/rimportor/util/connection.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | module Util 3 | class Connection 4 | 5 | def self.in_pool 6 | ::ActiveRecord::Base.connection_pool.with_connection do |connection| 7 | yield(connection) 8 | end 9 | end 10 | 11 | end 12 | end 13 | end -------------------------------------------------------------------------------- /lib/rimportor/version.rb: -------------------------------------------------------------------------------- 1 | module Rimportor 2 | VERSION = "0.3" 3 | end 4 | -------------------------------------------------------------------------------- /lib/templates/rimportor.rb: -------------------------------------------------------------------------------- 1 | Rimportor.configure do |config| 2 | # Configure how many threads rimportor should use for importing. 3 | # Consider that rimportor will use threads not only for building the statement 4 | # but also for running validations for your bulk. 5 | # The default value are 4 threads 6 | # config.threads = 4 7 | end -------------------------------------------------------------------------------- /rimportor.gemspec: -------------------------------------------------------------------------------- 1 | $:.push File.expand_path("../lib", __FILE__) 2 | 3 | # Maintain your gem's version: 4 | require "rimportor/version" 5 | 6 | # Describe your gem and declare its dependencies: 7 | Gem::Specification.new do |s| 8 | s.name = "rimportor" 9 | s.version = Rimportor::VERSION 10 | s.authors = ['Erwin Schens'] 11 | s.email = ['erwin.schens@qurasoft.de'] 12 | s.homepage = 'https://github.com/ndea/rimportor' 13 | s.summary = 'Fast, modern and concurrent bulk import for ruby on rails.' 14 | s.description = 'Fast, modern and concurrent bulk import for ruby on rails.' 15 | s.license = 'MIT' 16 | 17 | s.files = Dir['{app,config,db,lib}/**/*', 'MIT-LICENSE', 'Rakefile', 'README.md'] 18 | 19 | s.add_dependency 'rails', '~> 4.0', '>= 4.0.0' 20 | s.add_dependency 'parallel', '~> 1.5', '>= 1.5.0' 21 | 22 | s.add_development_dependency 'sqlite3' 23 | s.add_development_dependency 'rspec-rails', '~> 3.0.0', '>= 3.0.0' 24 | end 25 | --------------------------------------------------------------------------------