├── .codecov.yaml ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .rspec ├── CHANGELOG.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── bin ├── console └── setup ├── lib ├── piperator.rb └── piperator │ ├── builder.rb │ ├── io.rb │ ├── pipeline.rb │ └── version.rb ├── piperator.gemspec └── spec ├── piperator ├── io_spec.rb └── pipeline_spec.rb ├── piperator_spec.rb └── spec_helper.rb /.codecov.yaml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | only_pulls: true 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: >- 8 | Test (${{ matrix.os }} ${{ matrix.ruby }}) 9 | runs-on: ${{ matrix.os }}-latest 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | os: [ubuntu, macos, windows] 14 | ruby: 15 | - 2.3 16 | - 2.4 17 | - 2.5 18 | - 2.6 19 | - 2.7 20 | - "3.0" 21 | - "3.1" 22 | - "3.2" 23 | - jruby 24 | - truffleruby 25 | exclude: 26 | - os: windows 27 | ruby: jruby 28 | - os: windows 29 | ruby: truffleruby 30 | steps: 31 | - uses: actions/checkout@v2 32 | - uses: ruby/setup-ruby@v1 33 | with: 34 | ruby-version: ${{ matrix.ruby }} 35 | bundler-cache: true 36 | - run: bundle exec rake 37 | - uses: codecov/codecov-action@v2 38 | if: ${{ matrix.os == 'ubuntu' && matrix.ruby == '3.0' }} 39 | with: 40 | file: ./coverage/coverage.xml 41 | fail_ci_if_error: true 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | 11 | # rspec failure tracking 12 | .rspec_status 13 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 1.1.0 (6 December 2019) 2 | 3 | - add `#pos` to Piperator::IO to get current position 4 | 5 | ## 1.0.0. (13 October 2019) 6 | 7 | - add `Piperator.build` to build pipelines with DSL 8 | 9 | ## 0.3.0 (13 July 2017) 10 | 11 | - remove implicit wrapping to callable from `Pipeline.pipe` 12 | - add `Pipeline.wrap` to wrap a value as callable 13 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in piperator.gemspec 4 | gemspec 5 | 6 | if RUBY_VERSION >= '2.5' 7 | gem 'simplecov', '~> 0.21.2', group: [:development] 8 | gem 'simplecov-cobertura', '~> 2.1.0', group: [:development] 9 | end 10 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Ville Lautanala 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Piperator 2 | 3 | Pipelines for streaming large collections. The pipeline enables composition of streaming pipelines with lazy enumerables. 4 | 5 | The library is heavily inspired by [Elixir pipe operator](https://elixirschool.com/en/lessons/basics/pipe-operator/) and [Node.js Stream](https://nodejs.org/api/stream.html). 6 | 7 | 8 | 9 | **Table of Contents** 10 | 11 | - [Installation](#installation) 12 | - [Usage](#usage) 13 | - [Pipelines](#pipelines) 14 | - [Enumerators as IO objects](#enumerators-as-io-objects) 15 | - [Development](#development) 16 | - [Related Projects](#related-projects) 17 | - [Contributing](#contributing) 18 | - [License](#license) 19 | 20 | 21 | 22 | ## Installation 23 | 24 | Piperator is distributed as a ruby gem and can be installed with 25 | 26 | ``` 27 | $ gem install piperator 28 | ``` 29 | 30 | ## Usage 31 | 32 | Start by requiring the gem 33 | 34 | ```ruby 35 | require 'piperator' 36 | ``` 37 | 38 | ### Pipelines 39 | 40 | As an appetizer, here's a pipeline that triples all input values and then sums the values. 41 | 42 | ```ruby 43 | Piperator. 44 | pipe(->(values) { values.lazy.map { |i| i * 3 } }). 45 | pipe(->(values) { values.sum }). 46 | call([1, 2, 3]) 47 | # => 18 48 | ``` 49 | 50 | The same could also be achieved using DSL instead of method chaining: 51 | 52 | ```ruby 53 | Piperator.build do 54 | pipe(->(values) { values.lazy.map { |i| i * 3 } }) 55 | pipe(->(values) { values.sum }) 56 | end.call([1, 2, 3]) 57 | ``` 58 | 59 | If desired, the input enumerable can also be given as the first element of the pipeline using `Piperator.wrap`. 60 | 61 | ```ruby 62 | Piperator. 63 | wrap([1, 2, 3]). 64 | pipe(->(values) { values.lazy.map { |i| i * 3 } }). 65 | pipe(->(values) { values.sum }). 66 | call 67 | # => 18 68 | ``` 69 | 70 | Have reasons to defer constructing a pipe? Evaluate it lazily: 71 | 72 | ```ruby 73 | summing = ->(values) { values.sum } 74 | Piperator.build 75 | pipe(->(values) { values.lazy.map { |i| i * 3 } }) 76 | lazy do 77 | summing 78 | end 79 | end.call([1, 2, 3]) 80 | ``` 81 | 82 | There is, of course, a much more idiomatic alternative in Ruby: 83 | 84 | ```ruby 85 | [1, 2, 3].map { |i| i * 3 }.sum 86 | ``` 87 | 88 | So why bother? 89 | 90 | To run code before the stream processing start and after processing has ended. Let's use the same pattern to calculate the decompressed length of a GZip file fetched over HTTP with streaming. 91 | 92 | ```ruby 93 | require 'piperator' 94 | require 'uri' 95 | require 'em-http-request' 96 | require 'net/http' 97 | 98 | module HTTPFetch 99 | def self.call(url) 100 | uri = URI(url) 101 | Enumerator.new do |yielder| 102 | Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| 103 | request = Net::HTTP::Get.new(uri.request_uri) 104 | http.request request do |response| 105 | response.read_body { |chunk| yielder << chunk } 106 | end 107 | end 108 | end 109 | end 110 | end 111 | 112 | module GZipDecoder 113 | def self.call(enumerable) 114 | Enumerator.new do |yielder| 115 | decoder = EventMachine::HttpDecoders::GZip.new do |chunk| 116 | yielder << chunk 117 | end 118 | 119 | enumerable.each { |chunk| decoder << chunk } 120 | yielder << decoder.finalize.to_s 121 | end 122 | end 123 | end 124 | 125 | length = proc do |enumerable| 126 | enumerable.lazy.reduce(0) { |aggregate, chunk| aggregate + chunk.length } 127 | end 128 | 129 | Piperator. 130 | pipe(HTTPFetch). 131 | pipe(GZipDecoder). 132 | pipe(length). 133 | call('http://ftp.gnu.org/gnu/gzip/gzip-1.2.4.tar.gz') 134 | ``` 135 | 136 | At no point is it necessary to keep the full response or decompressed content in memory. This is a huge win when the file sizes grow beyond the 780kB seen in the example. 137 | 138 | Pipelines themselves respond to `#call`. This enables using pipelines as pipes in other pipelines. 139 | 140 | ```ruby 141 | append_end = proc do |enumerator| 142 | Enumerator.new do |yielder| 143 | enumerator.each { |item| yielder << item } 144 | yielder << 'end' 145 | end 146 | end 147 | 148 | prepend_start = proc do |enumerator| 149 | Enumerator.new do |yielder| 150 | yielder << 'start' 151 | enumerator.each { |item| yielder << item } 152 | end 153 | end 154 | 155 | double = ->(enumerator) { enumerator.lazy.map { |i| i * 2 } } 156 | 157 | prepend_append = Piperator.pipe(prepend_start).pipe(append_end) 158 | Piperator.pipe(double).pipe(prepend_append).call([1, 2, 3]).to_a 159 | # => ['start', 2, 4, 6, 'end'] 160 | ``` 161 | 162 | ### Enumerators as IO objects 163 | 164 | Piperator also provides a helper class that allows `Enumerator`s to be used as 165 | IO objects. This is useful to provide integration with libraries that work only 166 | with IO objects such as [Nokogiri](http://www.nokogiri.org) or 167 | [Oj](https://github.com/ohler55/oj). 168 | 169 | An example pipe that would yield all XML node in a document read in streams: 170 | 171 | ```ruby 172 | 173 | require 'nokogiri' 174 | streaming_xml = lambda do |enumerable| 175 | Enumerator.new do |yielder| 176 | io = Piperator::IO.new(enumerable.each) 177 | reader = Nokogiri::XML::Reader(io) 178 | reader.each { |node| yielder << node } 179 | end 180 | end 181 | ``` 182 | 183 | In real-world scenarios, the pipe would need to filter the nodes. Passing every 184 | single XML node forward is not that useful. 185 | 186 | ## Development 187 | 188 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. 189 | 190 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 191 | 192 | ## Related Projects 193 | 194 | * [D★Stream](https://github.com/ddfreyne/d-stream) - Set of extensions for writing stream-processing code. 195 | * [ddbuffer](https://github.com/ddfreyne/ddbuffer) - Buffer enumerables/enumerators. 196 | * [Down::ChunkedIO](https://github.com/janko-m/down/blob/master/lib/down/chunked_io.rb) - A similar IO class as Piperator::IO. 197 | 198 | ## Contributing 199 | 200 | Bug reports and pull requests are welcome on GitHub at https://github.com/lautis/piperator. 201 | 202 | ## License 203 | 204 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). 205 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | require 'rspec/core/rake_task' 3 | 4 | RSpec::Core::RakeTask.new(:spec) 5 | 6 | task default: :spec 7 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'bundler/setup' 4 | require 'piperator' 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | # (If you use this, don't forget to add pry to your Gemfile!) 10 | # require "pry" 11 | # Pry.start 12 | 13 | require 'irb' 14 | IRB.start(__FILE__) 15 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /lib/piperator.rb: -------------------------------------------------------------------------------- 1 | require 'piperator/version' 2 | require 'piperator/pipeline' 3 | require 'piperator/io' 4 | require 'piperator/builder' 5 | 6 | # Top-level shortcuts 7 | module Piperator 8 | # Build a new pipeline using DSL. This enables easy control of the pipeline 9 | # stucture. 10 | # 11 | # Piperator.build do 12 | # wrap [1, 2, 3] 13 | # pipe(-> (enumerable) { enumerable.map { |i| i + 1 } }) 14 | # end 15 | # # => Pipeline that returns [2, 3, 4] called 16 | # 17 | # # Alternatively, the Builder is also given as argument to the block 18 | # Piperator.build do |p| 19 | # p.wrap [1, 2, 3] 20 | # p.pipe(-> (enumerable) { enumerable.map { |i| i + 1 } }) 21 | # end 22 | # # This is similar, but allows access to instance variables. 23 | # 24 | # @return [Pipeline] Pipeline containing defined steps 25 | def self.build(&block) 26 | return Pipeline.new unless block_given? 27 | 28 | Builder.new(block&.binding).tap do |builder| 29 | if block.arity.positive? 30 | yield builder 31 | else 32 | builder.instance_eval(&block) 33 | end 34 | end.to_pipeline 35 | end 36 | 37 | # Build a new pipeline from a callable or an enumerable object 38 | # 39 | # @see Piperator::Pipeline.pipe 40 | # @param enumerable An object responding to call(enumerable) 41 | # @return [Pipeline] A pipeline containing only the callable 42 | def self.pipe(enumerable) 43 | Pipeline.pipe(enumerable) 44 | end 45 | 46 | # Build a new pipeline from a from a non-callable, i.e. string, array, etc. 47 | # 48 | # @see Piperator::Pipeline.wrap 49 | # @param value A raw value which will be passed through the pipeline 50 | # @return [Pipeline] A pipeline containing only the callable 51 | def self.wrap(value) 52 | Pipeline.wrap(value) 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/piperator/builder.rb: -------------------------------------------------------------------------------- 1 | module Piperator 2 | # Builder is used to provide DSL-based Pipeline building. Using Builder, 3 | # Pipelines can be built without pipe chaining, which might be easier if 4 | # some steps need to be included only on specific conditions. 5 | # 6 | # @see Piperator.build 7 | class Builder 8 | # Expose a chained method in Pipeline in DSL 9 | # 10 | # @param method_name Name of method in Pipeline 11 | # @see Pipeline 12 | # 13 | # @!macro [attach] dsl_method 14 | # @method $1 15 | # Call Pipeline#$1 given arguments and use the return value as builder state. 16 | # 17 | # @see Pipeline.$1 18 | def self.dsl_method(method_name) 19 | define_method(method_name) do |*arguments, &block| 20 | @pipeline = @pipeline.send(method_name, *arguments, &block) 21 | end 22 | end 23 | 24 | dsl_method :lazy 25 | dsl_method :pipe 26 | dsl_method :wrap 27 | 28 | def initialize(saved_binding, pipeline = Pipeline.new) 29 | @pipeline = pipeline 30 | @saved_binding = saved_binding 31 | end 32 | 33 | # Return build pipeline 34 | # 35 | # @return [Pipeline] 36 | def to_pipeline 37 | @pipeline 38 | end 39 | 40 | private 41 | 42 | def method_missing(method_name, *arguments, &block) 43 | if @saved_binding.receiver.respond_to?(method_name, true) 44 | @saved_binding.receiver.send(method_name, *arguments, &block) 45 | else 46 | super 47 | end 48 | end 49 | 50 | def respond_to_missing?(method_name, include_private = false) 51 | @saved_binding.receiver.respond_to?(method_name, include_private) || super 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/piperator/io.rb: -------------------------------------------------------------------------------- 1 | require 'English' 2 | 3 | module Piperator 4 | # Pseudo I/O on Enumerators 5 | class IO 6 | FLUSH_THRESHOLD = 128 * 1028 # 128KiB 7 | 8 | attr_reader :eof 9 | attr_reader :pos 10 | 11 | def initialize(enumerator, flush_threshold: FLUSH_THRESHOLD) 12 | @enumerator = enumerator 13 | @flush_threshold = flush_threshold 14 | @buffer = StringIO.new 15 | @pos = 0 16 | @buffer_read_pos = 0 17 | @eof = false 18 | end 19 | 20 | alias eof? eof 21 | alias tell pos 22 | 23 | # Return the first bytes of the buffer without marking the buffer as read. 24 | def peek(bytes) 25 | while @eof == false && readable_bytes < (bytes || 1) 26 | @buffer.write(@enumerator.next) 27 | end 28 | peek_buffer(bytes) 29 | rescue StopIteration 30 | @eof = true 31 | peek_buffer(bytes) 32 | end 33 | 34 | # Reads the next "line" from the I/O stream; lines are separated by 35 | # separator. 36 | # 37 | # @param separator [String] separator to split input 38 | # @param _limit Unused parameter for compatiblity 39 | # @return [String] 40 | def gets(separator = $INPUT_RECORD_SEPARATOR, _limit = nil) 41 | while !@eof && !contains_line?(separator) 42 | begin 43 | @buffer.write(@enumerator.next) 44 | rescue StopIteration 45 | @eof = true 46 | nil 47 | end 48 | end 49 | read_with { @buffer.gets(separator) } 50 | end 51 | 52 | # Flush internal buffer until the last unread byte 53 | def flush 54 | if @buffer.pos == @buffer_read_pos 55 | initialize_buffer 56 | else 57 | @buffer.pos = @buffer_read_pos 58 | initialize_buffer(@buffer.read) 59 | end 60 | end 61 | 62 | # Reads length bytes from the I/O stream. 63 | # 64 | # @param length [Integer] number of bytes to read 65 | # @param buffer [String] optional read buffer 66 | # @return String 67 | def read(length = nil, buffer = nil) 68 | return @enumerator.next.tap { |e| @pos += e.bytesize } if length.nil? && readable_bytes.zero? 69 | @buffer.write(@enumerator.next) while !@eof && readable_bytes < (length || 1) 70 | read_with { @buffer.read(length, buffer) } 71 | rescue StopIteration 72 | @eof = true 73 | read_with { @buffer.read(length, buffer) } if readable_bytes > 0 74 | end 75 | 76 | # Current buffer size - including non-freed read content 77 | # 78 | # @return [Integer] number of bytes stored in buffer 79 | def used 80 | @buffer.size 81 | end 82 | 83 | private 84 | 85 | def readable_bytes 86 | @buffer.pos - @buffer_read_pos 87 | end 88 | 89 | def read_with 90 | pos = @buffer.pos 91 | @buffer.pos = @buffer_read_pos 92 | 93 | yield.tap do |data| 94 | @buffer_read_pos += data.bytesize if data 95 | @buffer.pos = pos 96 | flush if flush? 97 | end 98 | end 99 | 100 | def peek_buffer(bytes) 101 | @buffer.string.byteslice(@buffer_read_pos...@buffer_read_pos + bytes) 102 | end 103 | 104 | def flush? 105 | @buffer.pos == @buffer_read_pos || @buffer.pos > @flush_threshold 106 | end 107 | 108 | def initialize_buffer(data = nil) 109 | @pos += @buffer_read_pos 110 | @buffer_read_pos = 0 111 | @buffer = StringIO.new 112 | @buffer.write(data) if data 113 | end 114 | 115 | def contains_line?(separator = $INPUT_RECORD_SEPARATOR) 116 | return true if @eof 117 | @buffer.string.byteslice(@buffer_read_pos..-1).include?(separator) 118 | rescue ArgumentError # Invalid UTF-8 119 | false 120 | end 121 | end 122 | end 123 | -------------------------------------------------------------------------------- /lib/piperator/pipeline.rb: -------------------------------------------------------------------------------- 1 | module Piperator 2 | # Pipeline is responsible of composition of a lazy enumerable from callables. 3 | # It contains a collection of pipes that respond to #call and return a 4 | # enumerable. 5 | # 6 | # For streaming purposes, it usually is desirable to have pipes that takes 7 | # a lazy Enumerator as an argument a return a (modified) lazy Enumerator. 8 | class Pipeline 9 | # Build a new pipeline from a lazily evaluated callable or an enumerable 10 | # object. 11 | # 12 | # @param block A block returning a callable(enumerable) 13 | # @return [Pipeline] A pipeline containing only the lazily evaluated 14 | # callable. 15 | def self.lazy(&block) 16 | Pipeline.new([]).lazy(&block) 17 | end 18 | 19 | # Build a new pipeline from a callable or an enumerable object 20 | # 21 | # @param callable An object responding to call(enumerable) 22 | # @return [Pipeline] A pipeline containing only the callable 23 | def self.pipe(callable) 24 | Pipeline.new([callable]) 25 | end 26 | 27 | # Build a new pipeline from a from a non-callable, i.e. string, array, etc. 28 | # This method will wrap the value in a proc, thus making it callable. 29 | # 30 | # Piperator::Pipeline.wrap([1, 2, 3]).pipe(add_one) 31 | # # => [2, 3, 4] 32 | # 33 | # # Wrap is syntactic sugar for wrapping a value in a proc 34 | # Piperator::Pipeline.pipe(->(_) { [1, 2, 3] }).pipe(add_one) 35 | # # => [2, 3, 4] 36 | # 37 | # @param value A raw value which will be passed through the pipeline 38 | # @return [Pipeline] A pipeline containing only the callable 39 | def self.wrap(value) 40 | Pipeline.new([->(_) { value }]) 41 | end 42 | 43 | # Returns enumerable given as an argument without modifications. Usable when 44 | # Pipeline is used as an identity transformation. 45 | # 46 | # @param enumerable [Enumerable] 47 | # @return [Enumerable] 48 | def self.call(enumerable = []) 49 | enumerable 50 | end 51 | 52 | def initialize(pipes = []) 53 | @pipes = pipes 54 | freeze 55 | end 56 | 57 | # Compute the pipeline and return a lazy enumerable with all the pipes. 58 | # 59 | # @param enumerable Argument passed to the first pipe in the pipeline. 60 | # @return [Enumerable] A lazy enumerable containing all the pipes 61 | def call(enumerable = []) 62 | @pipes.reduce(enumerable) { |pipe, memo| memo.call(pipe) } 63 | end 64 | 65 | # Compute the pipeline and strictly evaluate the result 66 | # 67 | # @return [Array] 68 | def to_a(enumerable = []) 69 | call(enumerable).to_a 70 | end 71 | 72 | # Add a new lazily evaluated part to the pipeline. 73 | # 74 | # @param block A block returning a callable(enumerable) to append in 75 | # pipeline. 76 | # @return [Pipeline] A new pipeline instance 77 | def lazy(&block) 78 | callable = nil 79 | Pipeline.new(@pipes + [lambda do |e| 80 | callable ||= block.call 81 | callable.call(e) 82 | end]) 83 | end 84 | 85 | # Add a new part to the pipeline 86 | # 87 | # @param other A pipe to append in pipeline. Responds to #call. 88 | # @return [Pipeline] A new pipeline instance 89 | def pipe(other) 90 | Pipeline.new(@pipes + [other]) 91 | end 92 | 93 | # Add a new value to the pipeline 94 | # 95 | # @param other A value which is wrapped into a pipe, then appended to the 96 | # pipeline. 97 | # @return [Pipeline] A new pipeline instance 98 | def wrap(other) 99 | Pipeline.new(@pipes + [->(_) { other }]) 100 | end 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /lib/piperator/version.rb: -------------------------------------------------------------------------------- 1 | module Piperator 2 | # Piperator version 3 | VERSION = '1.3.0'.freeze 4 | end 5 | -------------------------------------------------------------------------------- /piperator.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | lib = File.expand_path('../lib', __FILE__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | require 'piperator/version' 6 | 7 | Gem::Specification.new do |spec| 8 | spec.name = 'piperator' 9 | spec.version = Piperator::VERSION 10 | spec.authors = ['Ville Lautanala'] 11 | spec.email = ['lautis@gmail.com'] 12 | 13 | spec.summary = 'Composable pipelines for streaming large collections' 14 | spec.description = 'Pipelines for streaming large collections with ' \ 15 | 'composition inspired by Elixir pipes.' 16 | spec.homepage = 'https://github.com/lautis/piperator' 17 | spec.license = 'MIT' 18 | 19 | spec.files = `git ls-files -z`.split("\x0").reject do |f| 20 | f.match(%r{^(test|spec|features)/}) 21 | end 22 | spec.bindir = 'exe' 23 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 24 | spec.require_paths = ['lib'] 25 | 26 | spec.add_development_dependency 'bundler', '>= 1.14' 27 | spec.add_development_dependency 'rake', '~> 12.0' 28 | spec.add_development_dependency 'rspec', '~> 3.0' 29 | end 30 | -------------------------------------------------------------------------------- /spec/piperator/io_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'piperator/io' 3 | 4 | RSpec.describe Piperator::IO do 5 | KILOBYTE = 1024 6 | 7 | describe '#read' do 8 | subject { Piperator::IO.new(["foo", "bar"].each) } 9 | 10 | it 'reads specific number of bytes' do 11 | expect(subject.read(4)).to eq('foob') 12 | end 13 | 14 | it 'buffers rest and returns on next read' do 15 | expect(subject.read(2)).to eq('fo') 16 | expect(subject.read(2)).to eq('ob') 17 | expect(subject.read(2)).to eq('ar') 18 | end 19 | 20 | it 'returns nil when at end of enumerable' do 21 | expect(subject.read(6)).to eq('foobar') 22 | expect(subject.read).to be_nil 23 | end 24 | 25 | it 'defaults to returning items one by one' do 26 | expect(subject.read).to eq('foo') 27 | end 28 | 29 | it 'reads into buffer' do 30 | buffer = '' 31 | subject.read(2, buffer) 32 | 33 | expect(buffer).to eq('fo') 34 | end 35 | end 36 | 37 | describe '#pos' do 38 | subject { Piperator::IO.new(%w[123 456].each, flush_threshold: 4) } 39 | 40 | it 'is correctly set when using the default #read' do 41 | expect { subject.read }.to change(subject, :pos).to(3) 42 | end 43 | 44 | it 'is correctly set when performing a partial #read' do 45 | expect { subject.read(4) }.to change(subject, :pos).to(4) 46 | end 47 | 48 | it 'is aliased as #tell' do 49 | expect { subject.read }.to change(subject, :tell).to(3) 50 | end 51 | end 52 | 53 | describe '#gets' do 54 | subject { Piperator::IO.new(["foo\n", "bar\n"].each) } 55 | 56 | it 'returns characters until the separator' do 57 | expect(subject.gets).to eq("foo\n") 58 | end 59 | 60 | it 'handles split UTF-8 characters' do 61 | broken = 'ä'.force_encoding(Encoding::ASCII_8BIT) 62 | subject = Piperator::IO.new( 63 | [ 64 | 'foo', 65 | "\nb".force_encoding(Encoding::ASCII_8BIT) + broken[0], 66 | broken[1] + "r\n" 67 | ].each 68 | ) 69 | expect(subject.gets).to eql("foo\n") 70 | expect(subject.gets).to eql("bär\n") 71 | end 72 | 73 | it 'returns the last incomplete line when stream closes' do 74 | subject = Piperator::IO.new(["foo\n", "bar"].each) 75 | subject.read 76 | expect(subject.gets).to eq('bar') 77 | end 78 | 79 | it 'responds to gets with nil when enumerable is exhausted' do 80 | 2.times { subject.gets } 81 | expect(subject.gets).to be_nil 82 | end 83 | 84 | it 'uses bytes for indices' do 85 | subject = Piperator::IO.new(["foo®\nbar\n"].each) 86 | expect(subject.gets.force_encoding('UTF-8')).to eq("foo®\n") 87 | expect(subject.gets).to eq("bar\n") 88 | end 89 | end 90 | 91 | describe '#flush' do 92 | subject { Piperator::IO.new(['a' * 16 * KILOBYTE].each) } 93 | let(:flush_threshold) { Piperator::IO::FLUSH_THRESHOLD } 94 | 95 | it 'flushes read data' do 96 | subject.read(1 * KILOBYTE) 97 | expect { subject.flush } 98 | .to change(subject, :used).by(-1 * KILOBYTE) 99 | end 100 | 101 | it 'does not flush on small reads' do 102 | subject.read(1 * KILOBYTE) 103 | expect { subject.read(1 * KILOBYTE) } 104 | .not_to change(subject, :used) 105 | end 106 | 107 | it 'flushes automatically when the whole buffer is read' do 108 | subject.read(1 * KILOBYTE) 109 | expect { subject.read(15 * KILOBYTE) } 110 | .to change(subject, :used).to(0) 111 | end 112 | 113 | it 'flushes automatically when more than flush threshold is flushable' do 114 | subject = Piperator::IO.new(['a' * (flush_threshold + 1)].each) 115 | expect { subject.read(flush_threshold) } 116 | .to change(subject, :used).to(1) 117 | end 118 | end 119 | 120 | describe '#peek' do 121 | subject { Piperator::IO.new(%w[foo bar].each) } 122 | 123 | it 'reads specific number of bytes' do 124 | expect(subject.peek(4)).to eq('foob') 125 | end 126 | 127 | it 'does not change pointer for read' do 128 | subject.peek(2) 129 | expect(subject.read(4)).to eq('foob') 130 | end 131 | 132 | it 'peeking to EOF works' do 133 | expect(subject.peek(20)).to eq('foobar') 134 | end 135 | 136 | it 'peeking to EOF does not change pointer for read' do 137 | subject.peek(10) 138 | expect(subject.read(4)).to eq('foob') 139 | end 140 | end 141 | 142 | describe 'eof?' do 143 | subject { Piperator::IO.new(%w[foo bar].each) } 144 | 145 | it 'is not at eof when starting' do 146 | expect(subject).not_to be_eof 147 | end 148 | 149 | it 'is eof when at end' do 150 | subject.read(10) 151 | expect(subject).to be_eof 152 | end 153 | end 154 | end 155 | -------------------------------------------------------------------------------- /spec/piperator/pipeline_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe Piperator::Pipeline do 4 | let(:add1) { ->(input) { input.lazy.map { |i| i + 1 } } } 5 | let(:square) { ->(input) { input.lazy.map { |i| i * i } } } 6 | let(:sum) { ->(input) { input.reduce(0, &:+) } } 7 | 8 | describe 'calling' do 9 | it 'calls through all chain pipes in order' do 10 | chain = Piperator::Pipeline.new([add1, square]) 11 | expect(chain.call([1, 2, 3]).to_a).to eq([4, 9, 16]) 12 | end 13 | 14 | it 'returns original enumerable when chain is empty' do 15 | input = [1, 2, 3] 16 | chain = Piperator::Pipeline.new([]) 17 | expect(chain.call(input).to_a).to be(input) 18 | end 19 | 20 | it 'defaults to empty array when calling' do 21 | expect(Piperator::Pipeline.new([sum]).call([])).to eq(0) 22 | end 23 | end 24 | 25 | describe 'composition' do 26 | it 'runs runs through all input pipes' do 27 | first = Piperator::Pipeline.new([square]) 28 | second = Piperator::Pipeline.new([add1]) 29 | expect(first.pipe(second).call([1, 2, 3]).to_a).to eq([2, 5, 10]) 30 | end 31 | 32 | it 'can compose callables' do 33 | pipeline = Piperator::Pipeline.new 34 | expect(pipeline.pipe(add1).pipe(square).call([1, 2, 3]).to_a) 35 | .to eq([4, 9, 16]) 36 | end 37 | 38 | it 'can compose values with using Pipeline#wrap' do 39 | pipeline = Piperator::Pipeline.wrap([1, 2, 3]).pipe(square) 40 | 41 | expect(pipeline.call.to_a).to eq([1, 4, 9]) 42 | end 43 | 44 | it 'can start composition from empty Pipeline class' do 45 | expect(Piperator::Pipeline.pipe(add1).call([3]).to_a).to eq([4]) 46 | end 47 | 48 | it 'treats pipeline pipe as an identity transformation' do 49 | pipeline = Piperator::Pipeline.pipe(add1).pipe(Piperator::Pipeline) 50 | expect(pipeline.call([1, 2]).to_a).to eq([2, 3]) 51 | end 52 | 53 | it 'can start pipeline from an enumerable' do 54 | pipeline = Piperator::Pipeline.wrap([1, 2, 3]).pipe(add1) 55 | expect(pipeline.to_a).to eq([2, 3, 4]) 56 | end 57 | 58 | it 'can do strict evaluation at the end' do 59 | expect(Piperator::Pipeline.pipe(add1).pipe(sum).call([1, 2, 3])).to eq(9) 60 | end 61 | end 62 | 63 | describe '.lazy' do 64 | it 'gets invoked' do 65 | counter = 0 66 | chain = Piperator::Pipeline.lazy do 67 | counter += 1 68 | ->(input) { input } 69 | end 70 | 71 | expect(chain.call([1, 2, 3]).to_a).to eq([1, 2, 3]) 72 | expect(counter).to eq(1) 73 | end 74 | 75 | it 'memoizes its pipe' do 76 | counter = 0 77 | chain = Piperator::Pipeline.lazy do 78 | counter += 1 79 | ->(input) { input } 80 | end 81 | 82 | 2.times { chain.call([1, 2, 3]) } 83 | expect(counter).to eq(1) 84 | end 85 | end 86 | 87 | describe '#lazy' do 88 | it 'gets invoked' do 89 | counter = 0 90 | chain = Piperator::Pipeline.pipe(add1).lazy do 91 | counter += 1 92 | ->(input) { input.lazy } 93 | end 94 | 95 | expect(chain.call([1, 2, 3]).to_a).to eq([2, 3, 4]) 96 | expect(counter).to eq(1) 97 | end 98 | 99 | it 'memoizes its pipe' do 100 | counter = 0 101 | chain = Piperator::Pipeline.pipe(add1).lazy do 102 | counter += 1 103 | ->(input) { input.lazy } 104 | end 105 | 106 | expect(chain.call([1, 2, 3]).to_a).to eq([2, 3, 4]) 107 | expect(chain.call([1, 2, 3]).to_a).to eq([2, 3, 4]) 108 | expect(counter).to eq(1) 109 | end 110 | end 111 | end 112 | -------------------------------------------------------------------------------- /spec/piperator_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | # rubocop:disable Metrics/BlockLength 4 | RSpec.describe Piperator do 5 | it 'has a version number' do 6 | expect(Piperator::VERSION).not_to be nil 7 | end 8 | 9 | it 'can start piping from Piperator' do 10 | expect(Piperator.pipe(->(i) { i }).call([1, 2, 3]).to_a).to eq([1, 2, 3]) 11 | end 12 | 13 | it 'can start wrap directly from Piperator' do 14 | expect(Piperator.wrap([1]).call.to_a).to eq([1]) 15 | end 16 | 17 | describe '.build' do 18 | it 'returns a new Pipeline' do 19 | expect(Piperator.build).to be_a(Piperator::Pipeline) 20 | end 21 | 22 | it 'can build a pipeline with block' do 23 | counter = 0 24 | def ok? 25 | true 26 | end 27 | pipeline = Piperator.build do 28 | wrap [4, 5] if ok? 29 | pipe(->(input) { input.lazy.map { |i| i + 1 } }) 30 | lazy do 31 | counter += 1 32 | ->(input) { input.lazy } 33 | end 34 | pipe(->(input) { input.lazy.map { |i| i * 2 } }) 35 | end 36 | expect(pipeline.call.to_a).to eq([10, 12]) 37 | expect(counter).to eq(1) 38 | end 39 | 40 | it 'can call private methods' do 41 | klass = Class.new do 42 | def pipeline 43 | Piperator.build do 44 | wrap [4, 5] 45 | pipe plus1 46 | end 47 | end 48 | 49 | private 50 | 51 | def plus1 52 | ->(input) { input.lazy.map { |i| i + 1 } } 53 | end 54 | end 55 | 56 | expect(klass.new.pipeline.to_a).to eq([5, 6]) 57 | end 58 | 59 | it 'gives builder as argument' do 60 | @ok = -> { true } 61 | 62 | expect(Piperator.build do |pipeline| 63 | pipeline.wrap [4, 5] if @ok 64 | pipeline.pipe(->(input) { input.lazy.map { |i| i + 1 } }) 65 | end.to_a).to eq([5, 6]) 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | 3 | if RUBY_VERSION >= '2.5' 4 | require 'simplecov' 5 | require 'simplecov-cobertura' 6 | SimpleCov.start do 7 | add_filter '/spec/' 8 | end 9 | 10 | if ENV['CI'] 11 | SimpleCov.formatter = SimpleCov::Formatter::CoberturaFormatter 12 | end 13 | end 14 | 15 | require 'piperator' 16 | 17 | RSpec.configure do |config| 18 | # Enable flags like --only-failures and --next-failure 19 | config.example_status_persistence_file_path = '.rspec_status' 20 | 21 | config.expect_with :rspec do |c| 22 | c.syntax = :expect 23 | end 24 | end 25 | --------------------------------------------------------------------------------