├── test-app ├── .gitignore ├── Gemfile ├── run.sh └── app.rb ├── .document ├── Gemfile ├── lib ├── postgresql_cursor │ ├── version.rb │ ├── active_record │ │ ├── connection_adapters │ │ │ └── postgresql_type_map.rb │ │ ├── relation │ │ │ └── cursor_iterators.rb │ │ └── sql_cursor.rb │ └── cursor.rb └── postgresql_cursor.rb ├── gemfiles ├── activerecord_5.gemfile ├── activerecord_6.gemfile └── activerecord_4.gemfile ├── Appraisals ├── .gitignore ├── Rakefile ├── test ├── helper.rb └── test_postgresql_cursor.rb ├── .travis.yml ├── LICENSE ├── postgresql_cursor.gemspec └── README.md /test-app/.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | -------------------------------------------------------------------------------- /.document: -------------------------------------------------------------------------------- 1 | README.rdoc 2 | lib/**/*.rb 3 | bin/* 4 | features/**/*.feature 5 | LICENSE 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in postgresql_cursor.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /lib/postgresql_cursor/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module PostgresqlCursor 4 | VERSION = '0.6.9' 5 | end 6 | -------------------------------------------------------------------------------- /gemfiles/activerecord_5.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "https://rubygems.org" 4 | 5 | gem "activerecord", "5.2.3" 6 | 7 | gemspec path: "../" 8 | -------------------------------------------------------------------------------- /gemfiles/activerecord_6.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "https://rubygems.org" 4 | 5 | gem "activerecord", "6.0.0" 6 | 7 | gemspec path: "../" 8 | -------------------------------------------------------------------------------- /gemfiles/activerecord_4.gemfile: -------------------------------------------------------------------------------- 1 | # This file was generated by Appraisal 2 | 3 | source "https://rubygems.org" 4 | 5 | gem "activerecord", "4.2.11.1" 6 | gem "pg", "~> 0.15" 7 | 8 | gemspec path: "../" 9 | -------------------------------------------------------------------------------- /test-app/Gemfile: -------------------------------------------------------------------------------- 1 | # A sample Gemfile 2 | source "https://rubygems.org" 3 | 4 | gem "activerecord", "~> 7.0.4.1" 5 | # gem 'activerecord', '~> 6.1.7.1' 6 | 7 | gem "pg" 8 | gem "postgresql_cursor", path: "../" 9 | -------------------------------------------------------------------------------- /Appraisals: -------------------------------------------------------------------------------- 1 | appraise "activerecord-4" do 2 | gem "activerecord", "4.2.11.1" 3 | gem "pg", "~> 0.15" 4 | end 5 | 6 | appraise "activerecord-5" do 7 | gem "activerecord", "5.2.3" 8 | end 9 | 10 | appraise "activerecord-6" do 11 | gem "activerecord", "6.0.0" 12 | end 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## MAC OS 2 | .DS_Store 3 | 4 | ## TEXTMATE 5 | *.tmproj 6 | tmtags 7 | 8 | ## EMACS 9 | *~ 10 | \#* 11 | .\#* 12 | 13 | ## VIM 14 | *.swp 15 | 16 | ## IntelliJ/Rubymine 17 | .idea 18 | *.iml 19 | 20 | ## PROJECT::GENERAL 21 | coverage 22 | rdoc 23 | pkg 24 | 25 | ## PROJECT::SPECIFIC 26 | Gemfile.lock 27 | -------------------------------------------------------------------------------- /test-app/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #bundle install 3 | if [ "$1" = "irb" ]; then 4 | bundle exec irb -Ilib -r postgresql_cursor 5 | elif [ "$1" = "setup" ]; then 6 | createdb postgresql_cursor_test 7 | echo "create table products ( id serial);" | psql postgresql_cursor_test 8 | else 9 | bundle exec ruby app.rb 10 | fi 11 | -------------------------------------------------------------------------------- /lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map.rb: -------------------------------------------------------------------------------- 1 | # lib/postgresql_cursor/active_record/connection_adapters/postgresql_type_map 2 | module PostgreSQLCursor 3 | module ActiveRecord 4 | module ConnectionAdapters 5 | module PostgreSQLTypeMap 6 | # Returns the private "type_map" needed for the cursor operation 7 | def get_type_map # :nodoc: 8 | if ::ActiveRecord::VERSION::MAJOR == 4 && ::ActiveRecord::VERSION::MINOR == 0 9 | ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::OID::TYPE_MAP 10 | else 11 | type_map 12 | end 13 | end 14 | end 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "bundler/setup" 3 | require 'rake/testtask' 4 | 5 | task :default => :test 6 | 7 | desc "Run the Test Suite, toot suite" 8 | task :test do 9 | sh "ruby test/test_*" 10 | end 11 | 12 | desc "Open and IRB Console with the gem and test-app loaded" 13 | task :console do 14 | sh "bundle exec irb -Ilib -I . -r pg -r postgresql_cursor -r test-app/app" 15 | #require 'irb' 16 | #ARGV.clear 17 | #IRB.start 18 | end 19 | 20 | desc "Setup testing database and table" 21 | task :setup do 22 | sh %q(createdb postgresql_cursor_test) 23 | sh %Q 24 | sh %Q 25 | end 26 | -------------------------------------------------------------------------------- /lib/postgresql_cursor.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'postgresql_cursor/version' 4 | require 'active_support' 5 | 6 | ActiveSupport.on_load :active_record do 7 | require 'postgresql_cursor/cursor' 8 | require 'postgresql_cursor/active_record/relation/cursor_iterators' 9 | require 'postgresql_cursor/active_record/sql_cursor' 10 | require 'postgresql_cursor/active_record/connection_adapters/postgresql_type_map' 11 | 12 | # ActiveRecord 4.x 13 | require 'active_record/connection_adapters/postgresql_adapter' 14 | ActiveRecord::Base.extend(PostgreSQLCursor::ActiveRecord::SqlCursor) 15 | ActiveRecord::Relation.send(:include, PostgreSQLCursor::ActiveRecord::Relation::CursorIterators) 16 | ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.send(:include, PostgreSQLCursor::ActiveRecord::ConnectionAdapters::PostgreSQLTypeMap) 17 | end 18 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 2 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 3 | require 'rubygems' 4 | require 'minitest' 5 | require 'active_record' 6 | require 'postgresql_cursor' 7 | 8 | ActiveRecord::Base.establish_connection(adapter: 'postgresql', 9 | database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test', 10 | username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor') 11 | 12 | class Product < ActiveRecord::Base 13 | has_many :prices 14 | 15 | # create table products (id serial primary key, data varchar); 16 | # create table prices (id serial primary key, product_id integer references products(id)); 17 | def self.generate(max=1_000) 18 | max.times do |i| 19 | connection.execute("insert into products values (#{i+1})") 20 | end 21 | end 22 | end 23 | 24 | class Price < ActiveRecord::Base 25 | belongs_to :product 26 | end 27 | 28 | Product.destroy_all 29 | Product.generate(1000) 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.6.5 4 | - 2.7.1 5 | before_install: 6 | - sudo apt-get update 7 | - sudo apt-get --yes remove postgresql\* 8 | - sudo apt-get install -y postgresql-12 postgresql-client-12 9 | - sudo cp /etc/postgresql/{9.6,12}/main/pg_hba.conf 10 | - sudo service postgresql restart 12 11 | gemfile: 12 | - gemfiles/activerecord_4.gemfile 13 | - gemfiles/activerecord_5.gemfile 14 | - gemfiles/activerecord_6.gemfile 15 | matrix: 16 | exclude: 17 | - rvm: 2.7.1 18 | gemfile: gemfiles/activerecord_4.gemfile 19 | services: 20 | - postgresql 21 | before_script: 22 | - psql -c 'create database postgresql_cursor_test;' -U postgres 23 | - psql -c 'CREATE ROLE travis SUPERUSER LOGIN CREATEDB;' -U postgres 24 | - psql -c 'create table products ( id serial primary key, data varchar);' -U postgres -d postgresql_cursor_test 25 | - psql -c 'create table prices ( id serial primary key, data varchar, product_id integer);' -U postgres -d postgresql_cursor_test 26 | addons: 27 | postgresql: '12.3' 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Allen Fair 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /test-app/app.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | ################################################################################ 3 | # To run this "app", do a "rake setup" first. 4 | # To work with this app, run the "rake console" task, which loads this file. 5 | ################################################################################ 6 | require 'rubygems' 7 | require 'bundler/setup' 8 | require 'pg' 9 | require 'active_record' 10 | require 'postgresql_cursor' 11 | 12 | ActiveRecord::Base.establish_connection( adapter: 'postgresql', 13 | database: ENV['TEST_DATABASE'] || 'postgresql_cursor_test', 14 | username: ENV['TEST_USER'] || ENV['USER'] || 'postgresql_cursor') 15 | 16 | class Product < ActiveRecord::Base 17 | def self.generate(max=1_000) 18 | Product.destroy_all 19 | max.times do |i| 20 | connection.execute("insert into products values (#{i})") 21 | end 22 | end 23 | 24 | def tests 25 | Product.where("id>0").each_row(block_size:100) { |r| p r["id"] } # Hash 26 | Product.where("id>0").each_instance(block_size:100) { |r| p r.id } # Instance 27 | end 28 | end 29 | 30 | #Product.generate 31 | -------------------------------------------------------------------------------- /postgresql_cursor.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | lib = File.expand_path("lib", __dir__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | require "postgresql_cursor/version" 6 | 7 | Gem::Specification.new do |spec| 8 | spec.name = "postgresql_cursor" 9 | spec.version = PostgresqlCursor::VERSION 10 | spec.authors = ["Allen Fair"] 11 | spec.email = ["allen.fair@gmail.com"] 12 | spec.summary = <<-SUMMARY 13 | ActiveRecord PostgreSQL Adapter extension for using a cursor to return a 14 | large result set 15 | SUMMARY 16 | spec.description = <<-DESCRIPTION 17 | PostgreSQL Cursor is an extension to the ActiveRecord PostgreSQLAdapter for 18 | very large result sets. It provides a cursor open/fetch/close interface to 19 | access data without loading all rows into memory, and instead loads the result 20 | rows in 'chunks' (default of 1_000 rows), buffers them, and returns the rows 21 | one at a time. 22 | DESCRIPTION 23 | spec.homepage = "http://github.com/afair/postgresql_cursor" 24 | spec.license = "MIT" 25 | 26 | spec.files = `git ls-files -z`.split("\x0") 27 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 28 | spec.require_paths = ["lib"] 29 | 30 | spec.add_dependency "activerecord", ">= 6.0" 31 | # spec.add_development_dependency "activerecord", "~> 6.1" 32 | # spec.add_development_dependency "activerecord", "~> 7.0" 33 | 34 | # PG dependency held back for Jruby. See pg_jruby or jdbc-postgres-adapter gems. 35 | spec.add_development_dependency "pg" unless RUBY_PLATFORM == "java" 36 | spec.add_development_dependency "irb" 37 | spec.add_development_dependency "minitest" 38 | spec.add_development_dependency "rake" 39 | spec.add_development_dependency "appraisal" 40 | end 41 | -------------------------------------------------------------------------------- /lib/postgresql_cursor/active_record/relation/cursor_iterators.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Defines extension to ActiveRecord/AREL to use this library 4 | module PostgreSQLCursor 5 | module ActiveRecord 6 | module Relation 7 | module CursorIterators 8 | 9 | # Public: Executes the query, returning each row as a hash 10 | # to the given block. 11 | # 12 | # options - Hash to control 13 | # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) 14 | # block_size: 1..n - The number of rows to fetch per db block fetch 15 | # while: value - Exits loop when block does not return this value. 16 | # until: value - Exits loop when block returns this value. 17 | # cursor_name: string - Allows you to name your cursor. 18 | # 19 | # Example: 20 | # Post.where(user_id:123).each_row { |hash| Post.process(hash) } 21 | # Post.each_row.map {|r| r["id"].to_i } 22 | # 23 | # Returns the number of rows yielded to the block 24 | def each_row(options={}, &block) 25 | options = {:connection => self.connection}.merge(options) 26 | cursor = PostgreSQLCursor::Cursor.new(to_unprepared_sql, options) 27 | return cursor.each_row(&block) if block_given? 28 | cursor 29 | end 30 | alias :each_hash :each_row 31 | 32 | # Public: Like each_row, but returns an instantiated model object to the block 33 | # 34 | # Paramaters: same as each_row 35 | # 36 | # Example: 37 | # Post.where(user_id:123).each_instance { |post| post.process } 38 | # Post.where(user_id:123).each_instance.map { |post| post.process } 39 | # 40 | # Returns the number of rows yielded to the block 41 | def each_instance(options={}, &block) 42 | options = {:connection => self.connection}.merge(options) 43 | cursor = PostgreSQLCursor::Cursor.new(to_unprepared_sql, options) 44 | return cursor.each_instance(self, &block) if block_given? 45 | cursor.iterate_type(self) 46 | end 47 | 48 | # Public: Executes the query, yielding each batch of up to block_size 49 | # rows where each row is a hash to the given block. 50 | # 51 | # Parameters: same as each_row 52 | # 53 | # Example: 54 | # Post.where(user_id:123).each_row_batch do |batch| 55 | # Post.process_batch(batch) 56 | # end 57 | # Post.each_row_batch.map { |batch| Post.transform_batch(batch) } 58 | # 59 | # Returns the number of rows yielded to the block 60 | def each_row_batch(options={}, &block) 61 | options = {:connection => self.connection}.merge(options) 62 | cursor = PostgreSQLCursor::Cursor.new(to_unprepared_sql, options) 63 | return cursor.each_row_batch(&block) if block_given? 64 | cursor.iterate_batched 65 | end 66 | alias :each_hash_batch :each_row_batch 67 | 68 | # Public: Like each_row, but yields an array of instantiated model 69 | # objects to the block 70 | # 71 | # Parameters: same as each_row 72 | # 73 | # Example: 74 | # Post.where(user_id:123).each_instance_batch do |batch| 75 | # Post.process_batch(batch) 76 | # end 77 | # Post.where(user_id:123).each_instance_batch.map do |batch| 78 | # Post.transform_batch(batch) 79 | # end 80 | # 81 | # Returns the number of rows yielded to the block 82 | def each_instance_batch(options={}, &block) 83 | options = {:connection => self.connection}.merge(options) 84 | cursor = PostgreSQLCursor::Cursor.new(to_unprepared_sql, options) 85 | return cursor.each_instance_batch(self, &block) if block_given? 86 | cursor.iterate_type(self).iterate_batched 87 | end 88 | 89 | # Plucks the column names from the rows, and return them in an array 90 | def pluck_rows(*cols) 91 | options = cols.last.is_a?(Hash) ? cols.pop : {} 92 | options[:connection] = self.connection 93 | self.each_row(options).pluck(*cols) 94 | end 95 | alias :pluck_row :pluck_rows 96 | 97 | # Plucks the column names from the instances, and return them in an array 98 | def pluck_instances(*cols) 99 | options = cols.last.is_a?(Hash) ? cols.pop : {} 100 | options[:connection] = self.connection 101 | self.each_instance(options).pluck(*cols) 102 | end 103 | alias :pluck_instance :pluck_instances 104 | 105 | private 106 | 107 | # Returns sql string like #to_sql, but with bind parameters interpolated. 108 | # ActiveRecord sets up query as prepared statements with bind variables. 109 | # Cursors will prepare statements regardless. 110 | def to_unprepared_sql 111 | if self.connection.respond_to?(:unprepared_statement) 112 | self.connection.unprepared_statement do 113 | to_sql 114 | end 115 | else 116 | to_sql 117 | end 118 | end 119 | 120 | end 121 | end 122 | end 123 | end 124 | -------------------------------------------------------------------------------- /lib/postgresql_cursor/active_record/sql_cursor.rb: -------------------------------------------------------------------------------- 1 | module PostgreSQLCursor 2 | module ActiveRecord 3 | module SqlCursor 4 | # Public: Executes the query, returning each row as a hash 5 | # to the given block. 6 | # 7 | # options - Hash to control 8 | # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) 9 | # block_size: 1..n - The number of rows to fetch per db block fetch 10 | # while: value - Exits loop when block does not return this value. 11 | # until: value - Exits loop when block returns this value. 12 | # with_hold: boolean - Allows the query to remain open across commit points. 13 | # 14 | # Example: 15 | # Post.each_row { |hash| Post.process(hash) } 16 | # 17 | # Returns the number of rows yielded to the block 18 | def each_row(options={}, &block) 19 | options = {:connection => self.connection}.merge(options) 20 | all.each_row(options, &block) 21 | end 22 | alias :each_hash :each_row 23 | 24 | # Public: Like each_row, but returns an instantiated model object to the block 25 | # 26 | # Paramaters: same as each_row 27 | # 28 | # Example: 29 | # Post.each_instance { |post| post.process } 30 | # 31 | # Returns the number of rows yielded to the block 32 | def each_instance(options={}, &block) 33 | options = {:connection => self.connection}.merge(options) 34 | all.each_instance(options, &block) 35 | end 36 | 37 | # Public: Returns each row as a hash to the given block 38 | 39 | # sql - Full SQL statement, variables interpolated 40 | # options - Hash to control 41 | # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) 42 | # block_size: 1..n - The number of rows to fetch per db block fetch 43 | # while: value - Exits loop when block does not return this value. 44 | # until: value - Exits loop when block returns this value. 45 | # with_hold: boolean - Allows the query to remain open across commit points. 46 | # 47 | # Example: 48 | # Post.each_row_by_sql("select * from posts") { |hash| Post.process(hash) } 49 | # Post.each_row_by_sql("select * from posts").count 50 | # 51 | # Returns the number of rows yielded to the block 52 | def each_row_by_sql(sql, options={}, &block) 53 | options = {:connection => self.connection}.merge(options) 54 | cursor = PostgreSQLCursor::Cursor.new(sql, options) 55 | return cursor.each_row(&block) if block_given? 56 | cursor 57 | end 58 | alias :each_hash_by_sql :each_row_by_sql 59 | 60 | # Public: Returns each row as a model instance to the given block 61 | # As this instantiates a model object, it is slower than each_row_by_sql 62 | # 63 | # Paramaters: see each_row_by_sql 64 | # 65 | # Example: 66 | # Post.each_instance_by_sql("select * from posts") { |post| post.process } 67 | # Post.each_instance_by_sql("select * from posts").count 68 | # 69 | # Returns the number of rows yielded to the block 70 | def each_instance_by_sql(sql, options={}, &block) 71 | options = {:connection => self.connection}.merge(options) 72 | cursor = PostgreSQLCursor::Cursor.new(sql, options) 73 | return cursor.each_instance(self, &block) if block_given? 74 | cursor.iterate_type(self) 75 | end 76 | 77 | # Public: Executes the query, yielding an array of up to block_size rows 78 | # where each row is a hash to the given block. 79 | # 80 | # Parameters: same as each_row 81 | # 82 | # Example: 83 | # Post.each_row_batch { |batch| Post.process_batch(batch) } 84 | # 85 | # Returns the number of rows yielded to the block 86 | def each_row_batch(options={}, &block) 87 | options = {:connection => self.connection}.merge(options) 88 | all.each_row_batch(options, &block) 89 | end 90 | alias :each_hash_batch :each_row_batch 91 | 92 | # Public: Like each_row_batch, but yields an array of instantiated model 93 | # objects to the block 94 | # 95 | # Parameters: same as each_row 96 | # 97 | # Example: 98 | # Post.each_instance_batch { |batch| Post.process_batch(batch) } 99 | # 100 | # Returns the number of rows yielded to the block 101 | def each_instance_batch(options={}, &block) 102 | options = {:connection => self.connection}.merge(options) 103 | all.each_instance_batch(options, &block) 104 | end 105 | 106 | # Public: Yields each batch of up to block_size rows as an array of rows 107 | # where each row as a hash to the given block 108 | # 109 | # Parameters: see each_row_by_sql 110 | # 111 | # Example: 112 | # Post.each_row_batch_by_sql("select * from posts") do |batch| 113 | # Post.process_batch(batch) 114 | # end 115 | # Post.each_row_batch_by_sql("select * from posts").map do |batch| 116 | # Post.transform_batch(batch) 117 | # end 118 | # 119 | # Returns the number of rows yielded to the block 120 | def each_row_batch_by_sql(sql, options={}, &block) 121 | options = {:connection => self.connection}.merge(options) 122 | cursor = PostgreSQLCursor::Cursor.new(sql, options) 123 | return cursor.each_row_batch(&block) if block_given? 124 | cursor.iterate_batched 125 | end 126 | alias :each_hash_batch_by_sql :each_row_batch_by_sql 127 | 128 | # Public: Yields each batch up to block_size of rows as model instances 129 | # to the given block 130 | # 131 | # As this instantiates a model object, it is slower than each_row_batch_by_sql 132 | # 133 | # Paramaters: see each_row_by_sql 134 | # 135 | # Example: 136 | # Post.each_instance_batch_by_sql("select * from posts") do |batch| 137 | # Post.process_batch(batch) 138 | # end 139 | # Post.each_instance_batch_by_sql("select * from posts").map do |batch| 140 | # Post.transform_batch(batch) 141 | # end 142 | # 143 | # Returns the number of rows yielded to the block 144 | def each_instance_batch_by_sql(sql, options={}, &block) 145 | options = {:connection => self.connection}.merge(options) 146 | cursor = PostgreSQLCursor::Cursor.new(sql, options) 147 | return cursor.each_instance_batch(self, &block) if block_given? 148 | cursor.iterate_type(self).iterate_batched 149 | end 150 | 151 | # Returns an array of the given column names. Use if you need cursors and don't expect 152 | # this to comsume too much memory. Values are strings. Like ActiveRecord's pluck. 153 | def pluck_rows(*cols) 154 | options = cols.last.is_a?(Hash) ? cols.pop : {} 155 | all.each_row(options).pluck(*cols) 156 | end 157 | alias :pluck_row :pluck_rows 158 | 159 | # Returns an array of the given column names. Use if you need cursors and don't expect 160 | # this to comsume too much memory. Values are instance types. Like ActiveRecord's pluck. 161 | def pluck_instances(*cols) 162 | options = cols.last.is_a?(Hash) ? cols.pop : {} 163 | all.each_instance(options).pluck(*cols) 164 | end 165 | alias :pluck_instance :pluck_instances 166 | end 167 | end 168 | end 169 | -------------------------------------------------------------------------------- /test/test_postgresql_cursor.rb: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Before running test, set up the test db & table with: 3 | # rake setup 4 | # or create the database manually if your environment doesn't permit 5 | ################################################################################ 6 | require_relative "helper" 7 | require "minitest/autorun" 8 | require "minitest/pride" 9 | 10 | class TestPostgresqlCursor < Minitest::Test 11 | def test_each 12 | c = PostgreSQLCursor::Cursor.new("select * from products order by 1") 13 | nn = 0 14 | n = c.each { nn += 1 } 15 | assert_equal nn, n 16 | end 17 | 18 | def test_each_batch 19 | c = PostgreSQLCursor::Cursor.new("select * from products order by 1") 20 | nn = 0 21 | n = c.each_batch { |b| nn += 1 } 22 | assert_equal nn, n 23 | end 24 | 25 | def test_enumerables 26 | assert_equal true, PostgreSQLCursor::Cursor.new("select * from products order by 1").any? 27 | assert_equal false, PostgreSQLCursor::Cursor.new("select * from products where id<0").any? 28 | end 29 | 30 | def test_each_while_until 31 | c = PostgreSQLCursor::Cursor.new("select * from products order by 1", until: true) 32 | n = c.each { |r| r["id"].to_i > 100 } 33 | assert_equal 101, n 34 | 35 | c = PostgreSQLCursor::Cursor.new("select * from products order by 1", while: true) 36 | n = c.each { |r| r["id"].to_i < 100 } 37 | assert_equal 100, n 38 | end 39 | 40 | def test_each_batch_while_until 41 | c = PostgreSQLCursor::Cursor.new("select * from products order by id asc", until: true, block_size: 50) 42 | n = c.each_batch { |b| b.last["id"].to_i > 100 } 43 | assert_equal 3, n 44 | 45 | c = PostgreSQLCursor::Cursor.new("select * from products order by id asc", while: true, block_size: 50) 46 | n = c.each_batch { |b| b.last["id"].to_i < 100 } 47 | assert_equal 2, n 48 | end 49 | 50 | def test_each_array 51 | c = PostgreSQLCursor::Cursor.new("select * from products where id = 1") 52 | c.each_array do |ary| 53 | assert_equal Array, ary.class 54 | assert_equal 1, ary[0].to_i 55 | end 56 | end 57 | 58 | def test_each_array_batch 59 | c = PostgreSQLCursor::Cursor.new("select * from products where id = 1") 60 | c.each_array_batch do |b| 61 | assert_equal 1, b.size 62 | ary = b.first 63 | assert_equal Array, ary.class 64 | assert_equal 1, ary[0].to_i 65 | end 66 | end 67 | 68 | def test_relation 69 | nn = 0 70 | Product.where("id>0").each_row { |r| nn += 1 } 71 | assert_equal 1000, nn 72 | end 73 | 74 | def test_relation_batch 75 | nn = 0 76 | row = nil 77 | Product.where("id>0").each_row_batch(block_size: 100) { |b| 78 | row = b.last 79 | nn += 1 80 | } 81 | assert_equal 10, nn 82 | assert_equal Hash, row.class 83 | 84 | nn = 0 85 | row = nil 86 | Product.where("id>0").each_instance_batch(block_size: 100) { |b| 87 | row = b.last 88 | nn += 1 89 | } 90 | assert_equal 10, nn 91 | assert_equal Product, row.class 92 | end 93 | 94 | def test_activerecord 95 | nn = 0 96 | row = nil 97 | Product.each_row_by_sql("select * from products") { |r| 98 | row = r 99 | nn += 1 100 | } 101 | assert_equal 1000, nn 102 | assert_equal Hash, row.class 103 | 104 | nn = 0 105 | Product.each_instance_by_sql("select * from products") { |r| 106 | row = r 107 | nn += 1 108 | } 109 | assert_equal 1000, nn 110 | assert_equal Product, row.class 111 | end 112 | 113 | def test_activerecord_batch 114 | nn = 0 115 | row = nil 116 | Product.each_row_batch_by_sql("select * from products", block_size: 100) { |b| 117 | row = b.last 118 | nn += 1 119 | } 120 | assert_equal 10, nn 121 | assert_equal Hash, row.class 122 | 123 | nn = 0 124 | Product.each_instance_batch_by_sql("select * from products", block_size: 100) { |b| 125 | row = b.last 126 | nn += 1 127 | } 128 | assert_equal 10, nn 129 | assert_equal Product, row.class 130 | end 131 | 132 | def test_exception 133 | Product.each_row_by_sql("select * from products") do |r| 134 | raise "Oops" 135 | end 136 | rescue => e 137 | assert_equal e.message, "Oops" 138 | end 139 | 140 | def test_batch_exception 141 | Product.each_row_batch_by_sql("select * from products") do |r| 142 | raise "Oops" 143 | end 144 | rescue => e 145 | assert_equal e.message, "Oops" 146 | end 147 | 148 | def test_exception_in_failed_transaction 149 | Product.each_row_by_sql("select * from products") do |r| 150 | Product.connection.execute("select kaboom") 151 | end 152 | rescue => e 153 | assert_match(/PG::(InFailedSqlTransaction|UndefinedColumn)/, e.message) 154 | end 155 | 156 | def test_batch_exception_in_failed_transaction 157 | Product.each_row_batch_by_sql("select * from products") do |r| 158 | Product.connection.execute("select kaboom") 159 | end 160 | rescue => e 161 | assert_match(/PG::(InFailedSqlTransaction|UndefinedColumn)/, e.message) 162 | end 163 | 164 | def test_cursor 165 | cursor = Product.all.each_row 166 | assert cursor.respond_to?(:each) 167 | r = cursor.map { |row| row["id"] } 168 | assert_equal 1000, r.size 169 | cursor = Product.each_row_by_sql("select * from products") 170 | assert cursor.respond_to?(:each) 171 | r = cursor.map { |row| row["id"] } 172 | assert_equal 1000, r.size 173 | end 174 | 175 | def test_batched_cursor 176 | cursor = Product.all.each_row_batch(block_size: 100) 177 | assert cursor.respond_to?(:each) 178 | b = cursor.map { |batch| batch.map { |r| r["id"] } } 179 | assert_equal 10, b.size 180 | cursor = Product.each_row_batch_by_sql("select * from products", block_size: 100) 181 | assert cursor.respond_to?(:each) 182 | b = cursor.map { |batch| batch.map { |r| r["id"] } } 183 | assert_equal 10, b.size 184 | end 185 | 186 | def test_pluck 187 | r = Product.pluck_rows(:id) 188 | assert_equal 1000, r.size 189 | r = Product.all.pluck_instances(:id) 190 | assert_equal 1000, r.size 191 | assert_equal Integer, r.first.class 192 | end 193 | 194 | def test_with_hold 195 | items = 0 196 | Product.where("id < 4").each_instance(with_hold: true, block_size: 1) do |row| 197 | Product.transaction do 198 | row.update(data: Time.now.to_f.to_s) 199 | items += 1 200 | end 201 | end 202 | assert_equal 3, items 203 | end 204 | 205 | def test_fetch_symbolize_keys 206 | Product.transaction do 207 | # cursor = PostgreSQLCursor::Cursor.new("select * from products order by 1") 208 | cursor = Product.all.each_row 209 | r = cursor.fetch 210 | assert r.has_key?("id") 211 | r = cursor.fetch(symbolize_keys: true) 212 | assert r.has_key?(:id) 213 | cursor.close 214 | end 215 | end 216 | 217 | def test_bad_sql 218 | ActiveRecord::Base.each_row_by_sql("select * from bad_table") {} 219 | raise "Did Not Raise Expected Exception" 220 | rescue => e 221 | assert_match(/bad_table/, e.message) 222 | end 223 | 224 | def test_relation_association_is_not_loaded 225 | cursor = Product.first.prices.each_instance 226 | refute cursor.instance_variable_get(:@type).loaded? 227 | end 228 | 229 | def test_size 230 | r = Product.each_instance 231 | assert_equal -1, r.size 232 | r = Product.each_hash 233 | assert_equal -1, r.size 234 | r = Product.each_row 235 | assert_equal -1, r.size 236 | end 237 | end 238 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PostgreSQLCursor for handling large Result Sets 2 | 3 | [![Gem Version](https://badge.fury.io/rb/postgresql_cursor.svg)](http://badge.fury.io/rb/postgresql_cursor) 4 | 5 | PostgreSQLCursor extends ActiveRecord to allow for efficient processing of queries 6 | returning a large number of rows, and allows you to sort your result set. 7 | 8 | In PostgreSQL, a 9 | [cursor](http://www.postgresql.org/docs/9.4/static/plpgsql-cursors.html) 10 | runs a query, from which you fetch a block of 11 | (say 1000) rows, process them, and continue fetching until the result 12 | set is exhausted. By fetching a smaller chunk of data, this reduces the 13 | amount of memory your application uses and prevents the potential crash 14 | of running out of memory. 15 | 16 | Supports Rails/ActiveRecord v3.1 (v3.2 recommended) higher (including 17 | v5.0) and Ruby 1.9 and higher. Not all features work in ActiveRecord v3.1. 18 | Support for this gem will only be for officially supported versions of 19 | ActiveRecord and Ruby; others can try older versions of the gem. 20 | 21 | ## Using Cursors 22 | 23 | PostgreSQLCursor was developed to take advantage of PostgreSQL's cursors. Cursors allow the program 24 | to declare a cursor to run a given query returning "chunks" of rows to the application program while 25 | retaining the position of the full result set in the database. This overcomes all the disadvantages 26 | of using find_each and find_in_batches. 27 | 28 | Also, with PostgreSQL, you have on option to have raw hashes of the row returned instead of the 29 | instantiated models. An informal benchmark showed that returning instances is a factor of 4 times 30 | slower than returning hashes. If you are can work with the data in this form, you will find better 31 | performance. 32 | 33 | With PostgreSQL, you can work with cursors as follows: 34 | 35 | ```ruby 36 | Product.where("id>0").order("name").each_row { |hash| Product.process(hash) } 37 | 38 | Product.where("id>0").each_instance { |product| product.process! } 39 | Product.where("id>0").each_instance(block_size:100_000) { |product| product.process } 40 | 41 | Product.each_row { |hash| Product.process(hash) } 42 | Product.each_instance { |product| product.process } 43 | 44 | Product.each_row_by_sql("select * from products") { |hash| Product.process(hash) } 45 | Product.each_instance_by_sql("select * from products") { |product| product.process } 46 | ``` 47 | 48 | Cursors must be run in a transaction if you need to fetch each row yourself 49 | 50 | ```ruby 51 | Product.transaction do 52 | cursor = Product.all.each_row 53 | row = cursor.fetch #=> {"id"=>"1"} 54 | row = cursor.fetch(symbolize_keys:true) #=> {:id =>"2"} 55 | cursor.close 56 | end 57 | ``` 58 | 59 | All these methods take an options hash to control things more: 60 | 61 | block_size:n The number of rows to fetch from the database each time (default 1000) 62 | while:value Continue looping as long as the block returns this value 63 | until:value Continue looping until the block returns this value 64 | connection:conn Use this connection instead of the current Product connection 65 | fraction:float A value to set for the cursor_tuple_fraction variable. 66 | PostgreSQL uses 0.1 (optimize for 10% of result set) 67 | This library uses 1.0 (Optimize for 100% of the result set) 68 | Do not override this value unless you understand it. 69 | with_hold:boolean Keep the cursor "open" even after a commit. 70 | cursor_name:string Give your cursor a name. 71 | 72 | Notes: 73 | 74 | * Use cursors *only* for large result sets. They have more overhead with the database 75 | than ActiveRecord selecting all matching records. 76 | * Aliases each_hash and each_hash_by_sql are provided for each_row and each_row_by_sql 77 | if you prefer to express what types are being returned. 78 | 79 | ### PostgreSQLCursor is an Enumerable 80 | 81 | If you do not pass in a block, the cursor is returned, which mixes in the Enumerable 82 | libary. With that, you can pass it around, or chain in the awesome enumerable things 83 | like `map` and `reduce`. Furthermore, the cursors already act as `lazy`, but you can 84 | also chain in `lazy` when you want to keep the memory footprint small for rest of the process. 85 | 86 | ```ruby 87 | Product.each_row.map {|r| r["id"].to_i } #=> [1, 2, 3, ...] 88 | Product.each_instance.map {|r| r.id }.each {|id| p id } #=> [1, 2, 3, ...] 89 | Product.each_instance.lazy.inject(0) {|sum,r| sum + r.quantity } #=> 499500 90 | ``` 91 | 92 | ### PostgreSQLCursor and collection rendering 93 | 94 | You can render cursor collection, using enumeration as collection attribute. 95 | 96 | ```ruby 97 | render partial: "some_partial", collection: Product.each_instance 98 | render partial: "some_partial", collection: Product.each_row 99 | render partial: "some_partial", collection: Product.each_hash 100 | ``` 101 | 102 | ### Hashes vs. Instances 103 | 104 | The each_row method returns the Hash of strings for speed (as this allows you to process a lot of rows). 105 | Hashes are returned with String values, and you must take care of any type conversion. 106 | 107 | When you use each_instance, ActiveRecord lazily casts these strings into 108 | Ruby types (Time, Fixnum, etc.) only when you read the attribute. 109 | 110 | If you find you need the types cast for your attributes, consider using each_instance 111 | insead. ActiveRecord's read casting algorithm will only cast the values you need and 112 | has become more efficient over time. 113 | 114 | ### Select and Pluck 115 | 116 | To limit the columns returned to just those you need, use `.select(:id, :name)` 117 | query method. 118 | 119 | ```ruby 120 | Product.select(:id, :name).each_row { |product| product.process } 121 | ``` 122 | 123 | Pluck is a great alternative instead of using a cursor. It does not instantiate 124 | the row, and builds an array of result values, and translates the values into ruby 125 | values (numbers, Timestamps. etc.). Using the cursor would still allow you to lazy 126 | load them in batches for very large sets. 127 | 128 | You can also use the `pluck_rows` or `pluck_instances` if the results 129 | won't eat up too much memory. 130 | 131 | ```ruby 132 | Product.newly_arrived.pluck(:id) #=> [1, 2, 3, ...] 133 | Product.newly_arrived.each_row { |hash| } 134 | Product.select(:id).each_row.map {|r| r["id"].to_i } # cursor instead of pluck 135 | Product.pluck_rows(:id) #=> ["1", "2", ...] 136 | Product.pluck_instances(:id, :quantity) #=> [[1, 503], [2, 932], ...] 137 | ``` 138 | 139 | ### Associations and Eager Loading 140 | 141 | ActiveRecord performs some magic when eager-loading associated row. It 142 | will usually not join the tables, and prefers to load the data in 143 | separate queries. 144 | 145 | This library hooks onto the `to_sql` feature of the query builder. As a 146 | result, it can't do the join if ActiveRecord decided not to join, nor 147 | can it construct the association objects eagerly. 148 | 149 | ## Locking and Updating Each Row (FOR UPDATE Queries) 150 | 151 | When you use the AREL `lock` method, a "FOR UPDATE" clause is added to 152 | the query. This causes the block of rows returned from each FETCH 153 | operation (see the `block_size` option) to be locked for you to update. 154 | The lock is released on those rows once the block is exhausted and the 155 | next FETCH or CLOSE statement is executed. 156 | 157 | This example will run through a large table and potentially update each 158 | row, locking only a set of rows at a time to allow concurrent use. 159 | 160 | ```ruby 161 | Product.lock.each_instance(block_size:100) do |p| 162 | p.update(price: p.price * 1.05) 163 | end 164 | ``` 165 | 166 | Also, pay attention to the `block_size` you request. Locking large 167 | blocks of rows for an extended time can cause deadlocks or other 168 | performance issues in your application. On a busy table, or if the 169 | processing of each row consumes a lot of time or resources, try a 170 | `block_size` <= 10. 171 | 172 | See the [PostgreSQL Select Documentation](https://www.postgresql.org/docs/current/static/sql-select.html) 173 | for more information and limitations when using "FOR UPDATE" locking. 174 | 175 | ## Background: Why PostgreSQL Cursors? 176 | 177 | ActiveRecord is designed and optimized for web performance. In a web transaction, only a "page" of 178 | around 20 rows is returned to the user. When you do this 179 | 180 | ```ruby 181 | Product.where("id>0").each { |product| product.process } 182 | ``` 183 | 184 | The database returns all matching result set rows to ActiveRecord, which instantiates each row with 185 | the data returned. This function returns an array of all these rows to the caller. 186 | 187 | Asynchronous, Background, or Offline processing may require processing a large amount of data. 188 | When there is a very large number of rows, this requires a lot more memory to hold the data. Ruby 189 | does not return that memory after processing the array, and the causes your process to "bloat". If you 190 | don't have enough memory, it will cause an exception. 191 | 192 | ### ActiveRecord.find_each and find_in_batches 193 | 194 | To solve this problem, ActiveRecord gives us two alternative methods that work in "chunks" of your data: 195 | 196 | ```ruby 197 | Product.where("id>0").find_each { |model| Product.process } 198 | 199 | Product.where("id>0").find_in_batches do |batch| 200 | batch.each { |model| Product.process } 201 | end 202 | ``` 203 | 204 | Optionally, you can specify a :batch_size option as the size of the "chunk", and defaults to 1000. 205 | 206 | There are drawbacks with these methods: 207 | 208 | * You cannot specify the order, it will be ordered by the primary key (usually id) 209 | * The primary key must be numeric 210 | * The query is rerun for each chunk (1000 rows), starting at the next id sequence. 211 | * You cannot use overly complex queries as that will be rerun and incur more overhead. 212 | 213 | ### How it works 214 | 215 | Under the covers, the library calls the PostgreSQL cursor operations 216 | with the pseudo-code: 217 | 218 | SET cursor_tuple_fraction TO 1.0; 219 | DECLARE cursor_1 CURSOR WITH HOLD FOR select * from widgets; 220 | loop 221 | rows = FETCH 100 FROM cursor_1; 222 | rows.each {|row| yield row} 223 | until rows.size < 100; 224 | CLOSE cursor_1; 225 | 226 | ## Meta 227 | ### Author 228 | Allen Fair, [@allenfair](https://twitter.com/allenfair), [github://afair](https://github.com/afair) 229 | 230 | ### Note on Patches/Pull Requests 231 | 232 | * Fork the project. 233 | * Make your feature addition or bug fix. 234 | * Add tests for it. This is important so I don't break it in a 235 | future version unintentionally. 236 | * Commit, do not mess with rakefile, version, or history. 237 | (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull) 238 | * Send me a pull request. Bonus points for topic branches. 239 | 240 | ### Code of Conduct 241 | 242 | This project adheres to the [Open Code of Conduct](http://todogroup.org/opencodeofconduct/#postgresql_cursor/2016@allenfair.com). 243 | By participating, you are expected to honor this code. 244 | 245 | ### Copyright 246 | 247 | Copyright (c) 2010-2017 Allen Fair. See (MIT) LICENSE for details. 248 | -------------------------------------------------------------------------------- /lib/postgresql_cursor/cursor.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | ################################################################################ 4 | # PostgreSQLCursor: library class provides postgresql cursor for large result 5 | # set processing. Requires ActiveRecord, but can be adapted to other DBI/ORM libraries. 6 | # If you don't use AR, this assumes #connection and #instantiate methods are available. 7 | # 8 | # options - Hash to control operation and loop breaks 9 | # connection: instance - ActiveRecord connection to use 10 | # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) 11 | # block_size: 1..n - The number of rows to fetch per db block fetch 12 | # while: value - Exits loop when block does not return this value. 13 | # until: value - Exits loop when block returns this value. 14 | # with_hold: boolean - Allows the query to remain open across commit points. 15 | # cursor_name: string - Allows you to name your cursor. 16 | # 17 | # Exmaples: 18 | # PostgreSQLCursor::Cursor.new("select ...").each { |hash| ... } 19 | # ActiveRecordModel.where(...).each_row { |hash| ... } 20 | # ActiveRecordModel.each_row_by_sql("select ...") { |hash| ... } 21 | # ActiveRecordModel.each_instance_by_sql("select ...") { |model| ... } 22 | # 23 | 24 | module PostgreSQLCursor 25 | class Cursor 26 | include Enumerable 27 | attr_reader :sql, :options, :connection, :count, :result 28 | 29 | # Public: Start a new PostgreSQL cursor query 30 | # sql - The SQL statement with interpolated values 31 | # options - hash of processing controls 32 | # while: value - Exits loop when block does not return this value. 33 | # until: value - Exits loop when block returns this value. 34 | # fraction: 0.1..1.0 - The cursor_tuple_fraction (default 1.0) 35 | # block_size: 1..n - The number of rows to fetch per db block fetch 36 | # Defaults to 1000 37 | # with_hold - Allows the query to remain open across commit points. 38 | # 39 | # Examples 40 | # 41 | # PostgreSQLCursor::Cursor.new("select ....") 42 | # 43 | # Returns the cursor object when called with new. 44 | def initialize(sql, options = {}) 45 | @sql = sql 46 | @options = options 47 | @connection = @options.fetch(:connection) { ::ActiveRecord::Base.connection } 48 | @count = 0 49 | @iterate = options[:instances] ? :each_instance : :each_row 50 | @batched = false 51 | end 52 | 53 | # Specify the type to instantiate, or reset to return a Hash. 54 | # 55 | # Explicitly check for type class to prevent calling equality 56 | # operator on active record relation, which will load it. 57 | def iterate_type(type = nil) 58 | if type.nil? || (type.instance_of?(Class) && type == Hash) 59 | @iterate = :each_row 60 | elsif type.instance_of?(Class) && type == Array 61 | @iterate = :each_array 62 | else 63 | @iterate = :each_instance 64 | @type = type 65 | end 66 | self 67 | end 68 | 69 | def iterate_batched(batched = true) 70 | @batched = batched 71 | self 72 | end 73 | 74 | # ActiveRecord call #size when rendering a collection 75 | # Define it and return some dummy value 76 | def size 77 | -1 78 | end 79 | 80 | # Public: Yields each row of the result set to the passed block 81 | # 82 | # Yields the row to the block. The row is a hash with symbolized keys. 83 | # {colname: value, ....} 84 | # 85 | # Returns the count of rows processed 86 | def each(&block) 87 | if @iterate == :each_row 88 | @batched ? each_row_batch(&block) : each_row(&block) 89 | elsif @iterate == :each_array 90 | @batched ? each_array_batch(&block) : each_array(&block) 91 | else 92 | @batched ? each_instance_batch(@type, &block) : each_instance(@type, &block) 93 | end 94 | end 95 | 96 | def each_row(&block) 97 | each_tuple do |row| 98 | row = row.symbolize_keys if @options[:symbolize_keys] 99 | block.call(row) 100 | end 101 | end 102 | 103 | def each_array(&block) 104 | old_iterate = @iterate 105 | @iterate = :each_array 106 | begin 107 | rv = each_tuple do |row| 108 | block.call(row) 109 | end 110 | ensure 111 | @iterate = old_iterate 112 | end 113 | rv 114 | end 115 | 116 | def each_instance(klass = nil, &block) 117 | klass ||= @type 118 | each_tuple do |row| 119 | if ::ActiveRecord::VERSION::MAJOR < 4 120 | model = klass.send(:instantiate, row) 121 | else 122 | @column_types ||= column_types 123 | model = klass.send(:instantiate, row, @column_types) 124 | end 125 | block.call(model) 126 | end 127 | end 128 | 129 | def each_row_batch(&block) 130 | each_batch do |batch| 131 | batch.map!(&:symbolize_keys) if @options[:symbolize_keys] 132 | block.call(batch) 133 | end 134 | end 135 | 136 | def each_array_batch(&block) 137 | old_iterate = @iterate 138 | @iterate = :each_array 139 | begin 140 | rv = each_batch do |batch| 141 | block.call(batch) 142 | end 143 | ensure 144 | @iterate = old_iterate 145 | end 146 | rv 147 | end 148 | 149 | def each_instance_batch(klass = nil, &block) 150 | klass ||= @type 151 | each_batch do |batch| 152 | models = batch.map do |row| 153 | if ::ActiveRecord::VERSION::MAJOR < 4 154 | klass.send(:instantiate, row) 155 | else 156 | @column_types ||= column_types 157 | klass.send(:instantiate, row, @column_types) 158 | end 159 | end 160 | block.call(models) 161 | end 162 | end 163 | 164 | # Returns an array of columns plucked from the result rows. 165 | # Experimental function, as this could still use too much memory 166 | # and negate the purpose of this libarary. 167 | # Should this return a lazy enumerator instead? 168 | def pluck(*cols) 169 | options = cols.last.is_a?(Hash) ? cols.pop : {} 170 | @options.merge!(options) 171 | @options[:symbolize_keys] = true 172 | iterate_type(options[:class]) if options[:class] 173 | cols = cols.map { |c| c.to_sym } 174 | result = [] 175 | 176 | each do |row| 177 | row = row.symbolize_keys if row.is_a?(Hash) 178 | result << cols.map { |c| row[c] } 179 | end 180 | 181 | result.flatten! if cols.size == 1 182 | result 183 | end 184 | 185 | def each_tuple(&block) # :nodoc: 186 | has_do_until = @options.has_key?(:until) 187 | has_do_while = @options.has_key?(:while) 188 | @count = 0 189 | @column_types = nil 190 | with_optional_transaction do 191 | open 192 | while (row = fetch) 193 | break if row.size == 0 194 | @count += 1 195 | rc = block.call(row) 196 | break if has_do_until && rc == @options[:until] 197 | break if has_do_while && rc != @options[:while] 198 | end 199 | rescue => e 200 | raise e 201 | ensure 202 | close if @block && connection.active? 203 | end 204 | @count 205 | end 206 | 207 | def each_batch(&block) # :nodoc: 208 | has_do_until = @options.key?(:until) 209 | has_do_while = @options.key?(:while) 210 | @count = 0 211 | @column_types = nil 212 | with_optional_transaction do 213 | open 214 | while (batch = fetch_block) 215 | break if batch.empty? 216 | @count += 1 217 | rc = block.call(batch) 218 | break if has_do_until && rc == @options[:until] 219 | break if has_do_while && rc != @options[:while] 220 | end 221 | ensure 222 | close if @block && connection.active? 223 | end 224 | @count 225 | end 226 | 227 | def cast_types(row) 228 | row 229 | end 230 | 231 | def column_types 232 | return nil if ::ActiveRecord::VERSION::MAJOR < 4 233 | return @column_types if @column_types 234 | 235 | types = {} 236 | fields = @result.fields 237 | fields.each_with_index do |fname, i| 238 | ftype = @result.ftype(i) 239 | fmod = @result.fmod(i) 240 | 241 | # From @netrusov 2023-01-18. This is the same call used in the PostgreSQL Adapter 242 | types[fname] = @connection.send(:get_oid_type, ftype, fmod, fname) 243 | 244 | # # From @simi 2023-01-18 (Works as well, used old calling method) 245 | # types[fname] = @connection.get_type_map.fetch(ftype) 246 | end 247 | 248 | @column_types = types 249 | end 250 | 251 | # Public: Opens (actually, "declares") the cursor. Call this before fetching 252 | def open 253 | set_cursor_tuple_fraction 254 | @cursor = @options[:cursor_name] || ("cursor_" + SecureRandom.uuid.delete("-")) 255 | hold = @options[:with_hold] ? "with hold " : "" 256 | @result = @connection.execute("declare #{@cursor} no scroll cursor #{hold}for #{@sql}") 257 | @block = [] 258 | end 259 | 260 | # Public: Returns the next row from the cursor, or empty hash if end of results 261 | # 262 | # Returns a row as a hash of {'colname'=>value,...} 263 | def fetch(options = {}) 264 | open unless @block 265 | fetch_block if @block.size == 0 266 | row = @block.shift 267 | row = row.symbolize_keys if row && options[:symbolize_keys] 268 | row 269 | end 270 | 271 | # Private: Fetches the next block of rows into @block 272 | def fetch_block(block_size = nil) 273 | block_size ||= @block_size ||= @options.fetch(:block_size, 1000) 274 | @result = @connection.execute("fetch #{block_size} from #{@cursor}") 275 | 276 | @block = if @iterate == :each_array 277 | @result.each_row.collect { |row| row } 278 | else 279 | @result.collect { |row| row } 280 | end 281 | end 282 | 283 | # Public: Closes the cursor 284 | def close 285 | @connection.execute("close #{@cursor}") 286 | end 287 | 288 | # Private: Open transaction unless with_hold option, specified 289 | def with_optional_transaction 290 | if @options[:with_hold] 291 | yield 292 | else 293 | @connection.transaction { yield } 294 | end 295 | end 296 | 297 | # Private: Sets the PostgreSQL cursor_tuple_fraction value = 1.0 to assume all rows will be fetched 298 | # This is a value between 0.1 and 1.0 (PostgreSQL defaults to 0.1, this library defaults to 1.0) 299 | # used to determine the expected fraction (percent) of result rows returned the the caller. 300 | # This value determines the access path by the query planner. 301 | def set_cursor_tuple_fraction(frac = 1.0) 302 | @cursor_tuple_fraction ||= @options.fetch(:fraction, 1.0) 303 | return @cursor_tuple_fraction if frac == @cursor_tuple_fraction 304 | @cursor_tuple_fraction = frac 305 | @result = @connection.execute("set cursor_tuple_fraction to #{frac}") 306 | frac 307 | end 308 | end 309 | end 310 | --------------------------------------------------------------------------------