├── Rakefile ├── lib ├── ruby_snowflake │ ├── version.rb │ ├── client │ │ ├── streaming_result_strategy.rb │ │ ├── single_thread_in_memory_strategy.rb │ │ ├── http_connection_wrapper.rb │ │ ├── threaded_in_memory_strategy.rb │ │ └── key_pair_jwt_auth_manager.rb │ ├── result.rb │ ├── streaming_result.rb │ ├── row.rb │ └── client.rb └── rb_snowflake_client.rb ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ ├── release-gh-packages.yml │ ├── release-rubygems.yml │ └── ci.yml ├── spec ├── spec_helper.rb ├── streaming_test.rb ├── test.rb └── ruby_snowflake │ ├── client │ ├── http_connection_wrapper_spec.rb │ └── key_pair_jwt_auth_manager_spec.rb │ └── client_spec.rb ├── Gemfile ├── rb_snowflake_client.gemspec ├── LICENSE.txt ├── Gemfile.lock ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md └── README.md /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'bundler/gem_tasks' 4 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/version.rb: -------------------------------------------------------------------------------- 1 | module RubySnowflake 2 | VERSION = "1.5.0" 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.project 2 | /.rakeTasks 3 | .idea/* 4 | 5 | # ruby gems 6 | *.gem 7 | /.DS_Store 8 | .env 9 | .ruby-version 10 | -------------------------------------------------------------------------------- /lib/rb_snowflake_client.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "ruby_snowflake/client" 4 | 5 | module RubySnowflake 6 | end 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # 4 | # BUNDLER 5 | # 6 | - package-ecosystem: bundler 7 | directory: '/' 8 | schedule: 9 | interval: weekly 10 | allow: 11 | - dependency-type: direct 12 | - dependency-type: indirect 13 | open-pull-requests-limit: 10 14 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require "rb_snowflake_client" 2 | require "rspec" 3 | require "pry" 4 | require "dotenv/load" 5 | 6 | RSpec.configure do |config| 7 | config.run_all_when_everything_filtered = true 8 | config.order = "random" 9 | config.mock_with( :rspec ) do |mock| 10 | mock.syntax = :expect 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | # Specify your gem's dependencies in ruby_snowflake_client.gemspec 6 | gemspec 7 | 8 | gem "bundler" 9 | gem "rake" 10 | 11 | group :development, :test do 12 | gem "activesupport" 13 | end 14 | 15 | group :development do 16 | gem "parallel" 17 | gem "pry" 18 | end 19 | 20 | group :test do 21 | gem "rspec" 22 | end 23 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/client/streaming_result_strategy.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module RubySnowflake 4 | class Client 5 | class StreamingResultStrategy 6 | def self.result(statement_json_body, retreive_proc) 7 | partitions = statement_json_body["resultSetMetaData"]["partitionInfo"] 8 | 9 | result = StreamingResult.new( 10 | partitions.size, 11 | statement_json_body["resultSetMetaData"]["rowType"], 12 | retreive_proc 13 | ) 14 | result[0] = statement_json_body["data"] 15 | 16 | result 17 | end 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/client/single_thread_in_memory_strategy.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module RubySnowflake 4 | class Client 5 | class SingleThreadInMemoryStrategy 6 | def self.result(statement_json_body, retreive_proc) 7 | partitions = statement_json_body["resultSetMetaData"]["partitionInfo"] 8 | result = Result.new(partitions.size, statement_json_body["resultSetMetaData"]["rowType"]) 9 | result[0] = statement_json_body["data"] 10 | 11 | partitions.each_with_index do |partition, index| 12 | next if index == 0 # already have the first partition 13 | result[index] = retreive_proc.call(index) 14 | end 15 | 16 | result 17 | end 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /.github/workflows/release-gh-packages.yml: -------------------------------------------------------------------------------- 1 | name: Release to Github Packages 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - "master" 7 | paths: 8 | - "lib/ruby_snowflake/version.rb" 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | packages: write 14 | contents: read 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up Ruby 18 | uses: ruby/setup-ruby@v1 19 | with: 20 | ruby-version: '3.3' 21 | - name: Install dependencies 22 | run: bundle install 23 | - name: Build gem 24 | run: rake build 25 | - name: Build and publish to GitHub Package 26 | uses: actionshub/publish-gem-to-github@v1.0.6 27 | with: 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | owner: rinsed-org 30 | -------------------------------------------------------------------------------- /.github/workflows/release-rubygems.yml: -------------------------------------------------------------------------------- 1 | name: Release to Rubygems 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - "master" 7 | paths: 8 | - "lib/ruby_snowflake/version.rb" 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Ruby 15 | uses: ruby/setup-ruby@v1 16 | with: 17 | ruby-version: '3.3' 18 | - name: Install dependencies 19 | run: bundle install 20 | - name: Publish to RubyGems 21 | run: | 22 | mkdir -p $HOME/.gem 23 | touch $HOME/.gem/credentials 24 | chmod 0600 $HOME/.gem/credentials 25 | printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials 26 | gem build *.gemspec 27 | gem push *.gem 28 | env: 29 | GEM_HOST_API_KEY: "${{secrets.RUBYGEMS_AUTH_TOKEN}}" 30 | -------------------------------------------------------------------------------- /rb_snowflake_client.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "lib/ruby_snowflake/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "rb_snowflake_client" 7 | s.version = RubySnowflake::VERSION 8 | s.summary = "Snowflake connector for Ruby" 9 | s.author = "Rinsed" 10 | s.email = ["reid@rinsed.co", "alex@rinsed.co"] 11 | s.description = <<~DESC 12 | Using the HTTP V2 Api for Snowflake runs queries & creates native Ruby objects. 13 | DESC 14 | s.homepage = "https://github.com/rinsed-org/rb-snowflake-client" 15 | s.license = "MIT" 16 | 17 | s.files = Dir.chdir(File.expand_path(__dir__)) do 18 | `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features|vendor)/}) } 19 | end 20 | 21 | s.require_paths = ["lib"] 22 | s.add_dependency "bigdecimal", ">= 3.0" 23 | s.add_dependency "concurrent-ruby", ">= 1.2" 24 | s.add_dependency "connection_pool", ">= 2.4" 25 | s.add_dependency "dotenv", ">= 2.8" 26 | s.add_dependency "json", ">= 2.1.0" 27 | s.add_dependency "jwt", ">= 2.7" 28 | s.add_dependency "retryable", ">= 3.0" 29 | end 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Dotan Nahum 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/client/http_connection_wrapper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "net/http" 4 | 5 | module RubySnowflake 6 | class Client 7 | class HttpConnectionWrapper 8 | def initialize(hostname, port) 9 | @hostname = hostname 10 | @port = port 11 | end 12 | 13 | def start 14 | @connection = Net::HTTP.start(@hostname, @port, use_ssl: true) 15 | self 16 | rescue OpenSSL::SSL::SSLError => e 17 | raise e # let open ssl errors propagate up to get retried 18 | rescue StandardError 19 | raise ConnectionError.new "Error connecting to server." 20 | end 21 | 22 | def request(request) 23 | # connections can timeout and close, re-open them 24 | # which is what the connection pool expects 25 | start unless connection.active? 26 | 27 | begin 28 | connection.request(request) 29 | rescue OpenSSL::SSL::SSLError => e 30 | raise e # let open ssl errors propagate up to get retried 31 | rescue StandardError 32 | raise RequestError, "HTTP error requesting data" 33 | end 34 | end 35 | 36 | private 37 | attr_accessor :connection 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/streaming_test.rb: -------------------------------------------------------------------------------- 1 | require "benchmark" 2 | require "rb_snowflake_client" 3 | require 'dotenv/load' 4 | 5 | def new_client 6 | client = RubySnowflake::Client.new( 7 | "https://oza47907.us-east-1.snowflakecomputing.com", 8 | ENV["SNOWFLAKE_PRIVATE_KEY"], # set this in your .env file 9 | "GBLARLO", 10 | "OZA47907", 11 | "SNOWFLAKE_CLIENT_TEST", 12 | "WEB_TEST_WH", 13 | "" 14 | ) 15 | client.logger.level = Logger::DEBUG 16 | client 17 | end 18 | 19 | size = 1_000 20 | 11.times do 21 | count = 0 22 | data = nil 23 | bm = 24 | Benchmark.measure do 25 | data = new_client.query( 26 | "SELECT * FROM FIVETRAN_DATABASE.RINSED_WEB_PRODUCTION_MAMMOTH.EVENTS limit #{size};", 27 | streaming: true 28 | ) 29 | 30 | data.each {|row| row; count += 1 } # access each row, causing type conversion to happen 31 | end 32 | 33 | # you can now data.first or data.each and get rows that act like hashes 34 | # Row does the parsing at access time right now 35 | # data.first.tap do |row| 36 | # puts row 37 | # puts "#{row[:id]}, #{row[:code]}, #{row[:payload]}, #{row[:updated_at]}" 38 | # end 39 | 40 | puts "Querying with #{size}; took #{bm.real} actual size #{count}" 41 | puts 42 | puts 43 | size = size * 2 44 | end 45 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: "*" 6 | pull_request: 7 | branches: "*" 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Ruby 15 | uses: ruby/setup-ruby@v1 16 | with: 17 | ruby-version: '3.3' 18 | - name: Install dependencies 19 | run: bundle install 20 | - name: Build gem 21 | run: rake build 22 | # Enable this section to allow debugging via SSH 23 | #- name: Setup upterm session 24 | #uses: lhotari/action-upterm@v1 25 | #with: 26 | ### limits ssh access and adds the ssh public key for the user which triggered the workflow 27 | #limit-access-to-actor: true 28 | - name: Install gem 29 | run: cd pkg && gem install --local *.gem 30 | - name: Run tests 31 | run: bundle exec rspec 32 | env: # Or as an environment variable 33 | SNOWFLAKE_URI: ${{ secrets.SNOWFLAKE_URI }} 34 | SNOWFLAKE_ORGANIZATION: ${{ secrets.SNOWFLAKE_ORGANIZATION }} 35 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} 36 | SNOWFLAKE_DEFAULT_WAREHOUSE: ${{ secrets.SNOWFLAKE_DEFAULT_WAREHOUSE }} 37 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} 38 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} 39 | SNOWFLAKE_PRIVATE_KEY: ${{ secrets.SNOWFLAKE_CLIENT_TEST_PRIVATE_KEY }} 40 | -------------------------------------------------------------------------------- /spec/test.rb: -------------------------------------------------------------------------------- 1 | require "benchmark" 2 | require "logger" 3 | require "rb_snowflake_client" 4 | require "dotenv/load" 5 | 6 | def new_client 7 | client = RubySnowflake::Client.new( 8 | "https://oza47907.us-east-1.snowflakecomputing.com", 9 | ENV["SNOWFLAKE_PRIVATE_KEY"], # set this in your .env file 10 | "GBLARLO", 11 | "OZA47907", 12 | "SNOWFLAKE_CLIENT_TEST", 13 | "WEB_TEST_WH", 14 | "" 15 | ) 16 | client.logger.level = Logger::DEBUG 17 | client 18 | end 19 | 20 | size = 1_000 21 | 11.times do 22 | data = nil 23 | type_conversion_time = 0 24 | bm = 25 | Benchmark.measure do 26 | data = new_client.query <<-SQL 27 | SELECT * FROM FIVETRAN_DATABASE.RINSED_WEB_PRODUCTION_MAMMOTH.EVENTS limit #{size}; 28 | SQL 29 | 30 | # access each column on each row, causing type conversion to happen 31 | keys = data.columns 32 | data.each do |row| 33 | type_conversion_time += Benchmark.measure do 34 | keys.each { |key| row[key] } 35 | end.utime 36 | end 37 | end 38 | 39 | # you can now data.first or data.each and get rows that act like hashes 40 | # Row does the parsing at access time right now 41 | # data.first.tap do |row| 42 | # puts row 43 | # puts "#{row[:id]}, #{row[:code]}, #{row[:payload]}, #{row[:updated_at]}" 44 | # end 45 | 46 | puts "Querying with #{size}; took #{bm.utime} actual size #{data.size} type conversion: #{type_conversion_time}" 47 | puts 48 | puts 49 | size = size * 2 50 | end 51 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/result.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "concurrent" 4 | 5 | require_relative "row" 6 | 7 | module RubySnowflake 8 | class Result 9 | include Enumerable 10 | 11 | attr_reader :data 12 | 13 | def initialize(partition_count, row_type_data) 14 | @data = Concurrent::Array.new(partition_count) 15 | extract_row_metadata(row_type_data) 16 | end 17 | 18 | def []=(index, value) 19 | data[index] = value 20 | end 21 | 22 | def get_all_rows 23 | map(&:to_h) 24 | end 25 | 26 | def each 27 | return to_enum(:each) unless block_given? 28 | 29 | data.each do |partition| 30 | partition.each do |row| 31 | yield wrap_row(row) 32 | end 33 | end 34 | end 35 | 36 | def size 37 | data.map(&:size).sum 38 | end 39 | 40 | alias length size 41 | 42 | def first 43 | wrap_row(data.first.first) 44 | end 45 | 46 | def last 47 | wrap_row(data.last.last) 48 | end 49 | 50 | def columns 51 | @row_types.map {|type| type[:name].downcase } 52 | end 53 | 54 | private 55 | def wrap_row(row) 56 | Row.new(@row_types, @column_to_index, row) 57 | end 58 | 59 | def extract_row_metadata(row_type_data) 60 | @row_types = [] 61 | @column_to_index = {} 62 | 63 | row_type_data.each_with_index do |type_data, index| 64 | @row_types[index] = { 65 | type: type_data["type"].downcase.to_sym, 66 | scale: type_data["scale"].to_i, 67 | precision: type_data["precision"].to_i, 68 | name: type_data["name"].to_sym, 69 | } 70 | @column_to_index[type_data["name"].downcase] = index 71 | end 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/streaming_result.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "concurrent" 4 | 5 | require_relative "result" 6 | 7 | module RubySnowflake 8 | class StreamingResult < Result 9 | def initialize(partition_count, row_type_data, retreive_proc) 10 | super(partition_count, row_type_data) 11 | @retreive_proc = retreive_proc 12 | end 13 | 14 | def each 15 | return to_enum(:each) unless block_given? 16 | 17 | thread_pool = Concurrent::FixedThreadPool.new 1 18 | 19 | data.each_with_index do |_partition, index| 20 | next_index = [index+1, data.size-1].min 21 | if data[next_index].nil? # prefetch 22 | data[next_index] = Concurrent::Future.execute(executor: thread_pool) do 23 | @retreive_proc.call(next_index) 24 | end 25 | end 26 | 27 | if data[index].is_a? Concurrent::Future 28 | data[index] = data[index].value # wait for it to finish 29 | end 30 | 31 | data[index].each do |row| 32 | yield wrap_row(row) 33 | end 34 | 35 | # After iterating over the current partition, clear the data to release memory 36 | data[index].clear 37 | 38 | # Reassign to a symbol so: 39 | # - When looking at the list of partitions in `data` it is easier to detect 40 | # - Will raise an exception if `data.each` is attempted to be called again 41 | # - It won't trigger prefetch detection as `next_index` 42 | data[index] = :finished 43 | end 44 | end 45 | 46 | 47 | def size 48 | not_implemented 49 | end 50 | 51 | def last 52 | not_implemented 53 | end 54 | 55 | private 56 | def not_implemented 57 | raise "not implemented on streaming result set" 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/client/threaded_in_memory_strategy.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module RubySnowflake 4 | class Client 5 | class ThreadedInMemoryStrategy 6 | def self.result(statement_json_body, retreive_proc, num_threads) 7 | partitions = statement_json_body["resultSetMetaData"]["partitionInfo"] 8 | result = Result.new(partitions.size, statement_json_body["resultSetMetaData"]["rowType"]) 9 | result[0] = statement_json_body["data"] 10 | 11 | thread_pool = Concurrent::FixedThreadPool.new(num_threads) 12 | partitions 13 | .each_with_index.map do |partition, index| 14 | next if index == 0 # already have the first partition 15 | [index, Concurrent::Future.execute(executor: thread_pool) { retreive_proc.call(index) }] 16 | end 17 | .each do |entry| 18 | next if entry.nil? # 0th index 19 | 20 | index, future = entry 21 | if future.rejected? 22 | if future.reason.is_a? RubySnowflake::Error 23 | raise future.reason 24 | else 25 | raise ConnectionStarvedError.new( 26 | "A partition request timed out. This is usually do to using the client in" \ 27 | "multiple threads. The client uses a connection thread pool and if too many" \ 28 | "requests are all done in threads at the same time, threads can get starved" \ 29 | "of access to connections. The solution for this is to either increase the " \ 30 | "max_connections parameter on the client or create a new client instance" \ 31 | "with it's own connection pool to snowflake per thread. Rejection reason: #{future.reason.message}" 32 | ) 33 | end 34 | end 35 | result[index] = future.value 36 | end 37 | result 38 | end 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | rb_snowflake_client (1.4.0) 5 | bigdecimal (>= 3.0) 6 | concurrent-ruby (>= 1.2) 7 | connection_pool (>= 2.4) 8 | dotenv (>= 2.8) 9 | json (>= 2.1.0) 10 | jwt (>= 2.7) 11 | retryable (>= 3.0) 12 | 13 | GEM 14 | remote: https://rubygems.org/ 15 | specs: 16 | activesupport (8.0.3) 17 | base64 18 | benchmark (>= 0.3) 19 | bigdecimal 20 | concurrent-ruby (~> 1.0, >= 1.3.1) 21 | connection_pool (>= 2.2.5) 22 | drb 23 | i18n (>= 1.6, < 2) 24 | logger (>= 1.4.2) 25 | minitest (>= 5.1) 26 | securerandom (>= 0.3) 27 | tzinfo (~> 2.0, >= 2.0.5) 28 | uri (>= 0.13.1) 29 | base64 (0.3.0) 30 | benchmark (0.4.1) 31 | bigdecimal (3.3.1) 32 | coderay (1.1.3) 33 | concurrent-ruby (1.3.5) 34 | connection_pool (2.5.4) 35 | diff-lcs (1.6.2) 36 | dotenv (3.1.8) 37 | drb (2.2.3) 38 | i18n (1.14.7) 39 | concurrent-ruby (~> 1.0) 40 | json (2.15.1) 41 | jwt (3.1.2) 42 | base64 43 | logger (1.7.0) 44 | method_source (1.1.0) 45 | minitest (5.26.0) 46 | parallel (1.27.0) 47 | pry (0.15.2) 48 | coderay (~> 1.1) 49 | method_source (~> 1.0) 50 | rake (13.3.0) 51 | retryable (3.0.5) 52 | rspec (3.13.1) 53 | rspec-core (~> 3.13.0) 54 | rspec-expectations (~> 3.13.0) 55 | rspec-mocks (~> 3.13.0) 56 | rspec-core (3.13.5) 57 | rspec-support (~> 3.13.0) 58 | rspec-expectations (3.13.5) 59 | diff-lcs (>= 1.2.0, < 2.0) 60 | rspec-support (~> 3.13.0) 61 | rspec-mocks (3.13.5) 62 | diff-lcs (>= 1.2.0, < 2.0) 63 | rspec-support (~> 3.13.0) 64 | rspec-support (3.13.6) 65 | securerandom (0.4.1) 66 | tzinfo (2.0.6) 67 | concurrent-ruby (~> 1.0) 68 | uri (1.0.4) 69 | 70 | PLATFORMS 71 | arm64-darwin-22 72 | ruby 73 | 74 | DEPENDENCIES 75 | activesupport 76 | bundler 77 | parallel 78 | pry 79 | rake 80 | rb_snowflake_client! 81 | rspec 82 | 83 | BUNDLED WITH 84 | 2.5.10 85 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/client/key_pair_jwt_auth_manager.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "jwt" 4 | require "openssl" 5 | require "concurrent" 6 | 7 | module RubySnowflake 8 | class Client 9 | class KeyPairJwtAuthManager 10 | # requires text of a PEM formatted RSA private key 11 | def initialize(organization, account, user, private_key, jwt_token_ttl) 12 | @organization = organization 13 | @account = account 14 | @user = user 15 | @private_key_pem = private_key 16 | @jwt_token_ttl = jwt_token_ttl 17 | 18 | # start with an expired value to force creation 19 | @token_expires_at = Time.now.to_i - 1 20 | @token_semaphore = Concurrent::Semaphore.new(1) 21 | end 22 | 23 | def jwt_token 24 | return @token unless jwt_token_expired? 25 | 26 | @token_semaphore.acquire do 27 | now = Time.now.to_i 28 | @token_expires_at = now + @jwt_token_ttl 29 | 30 | private_key = OpenSSL::PKey.read(@private_key_pem) 31 | 32 | payload = { 33 | :iss => "#{account_name}.#{@user.upcase}.#{public_key_fingerprint}", 34 | :sub => "#{account_name}.#{@user.upcase}", 35 | :iat => now, 36 | :exp => @token_expires_at 37 | } 38 | 39 | @token = JWT.encode payload, private_key, "RS256" 40 | end 41 | end 42 | 43 | private 44 | def jwt_token_expired? 45 | Time.now.to_i > @token_expires_at 46 | end 47 | 48 | def account_name 49 | if @organization == nil || @organization == "" 50 | @account.upcase 51 | else 52 | "#{@organization.upcase}-#{@account.upcase}" 53 | end 54 | end 55 | 56 | def public_key_fingerprint 57 | return @public_key_fingerprint unless @public_key_fingerprint.nil? 58 | 59 | public_key_der = OpenSSL::PKey::RSA.new(@private_key_pem).public_key.to_der 60 | digest = OpenSSL::Digest::SHA256.new.digest(public_key_der) 61 | fingerprint = Base64.strict_encode64(digest) 62 | 63 | @public_key_fingerprint = "SHA256:#{fingerprint}" 64 | end 65 | end 66 | end 67 | end 68 | -------------------------------------------------------------------------------- /spec/ruby_snowflake/client/http_connection_wrapper_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | RSpec.describe RubySnowflake::Client::HttpConnectionWrapper do 4 | let(:hostname) { "example.com" } 5 | let(:port) { 443 } 6 | let(:wrapper) { described_class.new(hostname, port) } 7 | 8 | describe "#start" do 9 | context "when the connection is successful" do 10 | it "returns itself" do 11 | allow(Net::HTTP).to receive(:start).with(hostname, port, use_ssl: true).and_return(double("HTTP Connection")) 12 | expect(wrapper.start).to eq(wrapper) 13 | end 14 | end 15 | 16 | context "when the connection fails with OpenSSL::SSL::SSLError" do 17 | it "propagates OpenSSL::SSL::SSLError" do 18 | allow(Net::HTTP).to receive(:start).with(hostname, port, use_ssl: true).and_raise(OpenSSL::SSL::SSLError) 19 | expect { wrapper.start }.to raise_error(OpenSSL::SSL::SSLError) 20 | end 21 | end 22 | 23 | context "when the connection fails with another StandardError" do 24 | it "raises a ConnectionError" do 25 | allow(Net::HTTP).to receive(:start).with(hostname, port, use_ssl: true).and_raise(StandardError) 26 | expect { wrapper.start }.to raise_error(RubySnowflake::ConnectionError) 27 | end 28 | end 29 | end 30 | 31 | describe "#request" do 32 | let(:request) { double("HTTP Request") } 33 | let(:response) { double("HTTP Response") } 34 | let(:connection_double) { double("HTTP Connection", active?: true, request: response) } 35 | 36 | before do 37 | allow(wrapper).to receive(:connection).and_return(connection_double) 38 | end 39 | 40 | context "when the request is successful" do 41 | it "returns the response" do 42 | allow(connection_double).to receive(:request).with(request).and_return(response) 43 | expect(wrapper.request(request)).to eq(response) 44 | end 45 | end 46 | 47 | context "when there is an OpenSSL::SSL::SSLError" do 48 | it "propagates the OpenSSL::SSL::SSLError" do 49 | allow(connection_double).to receive(:request).with(request).and_raise(OpenSSL::SSL::SSLError) 50 | expect { wrapper.request(request) }.to raise_error(OpenSSL::SSL::SSLError) 51 | end 52 | end 53 | 54 | context "when there is another StandardError" do 55 | it "raises a RequestError" do 56 | allow(connection_double).to receive(:request).with(request).and_raise(StandardError) 57 | expect { wrapper.request(request) }.to raise_error(RubySnowflake::RequestError) 58 | end 59 | end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/row.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "date" 4 | require "time" 5 | 6 | module RubySnowflake 7 | class Row 8 | include Enumerable 9 | 10 | EPOCH_JULIAN_DAY_NUMBER = Date.new(1970,1,1).jd 11 | TIME_FORMAT = "%s.%N".freeze 12 | 13 | def initialize(row_types, column_to_index, data) 14 | @row_types = row_types 15 | @data = data 16 | @column_to_index = column_to_index 17 | end 18 | 19 | # see: https://docs.snowflake.com/en/developer-guide/sql-api/handling-responses#getting-the-data-from-the-results 20 | def [](column) 21 | index = if column.is_a?(Numeric) 22 | Integer(column) 23 | else 24 | # Handle column names case-insensitively regardless of string or symbol 25 | @column_to_index[column.to_s.downcase] 26 | end 27 | 28 | return nil if index.nil? 29 | return nil if @data[index].nil? 30 | 31 | case @row_types[index][:type] 32 | when :boolean 33 | @data[index] == "true" 34 | when :date 35 | Date.jd(Integer(@data[index]) + EPOCH_JULIAN_DAY_NUMBER) 36 | when :fixed 37 | if @row_types[index][:scale] == 0 38 | Integer(@data[index]) 39 | else 40 | BigDecimal(@data[index]).round(@row_types[index][:scale]) 41 | end 42 | 43 | # snowflake treats these all as 64 bit IEEE 754 floating point numbers, and will we too 44 | when :float, :double, :"double precision", :real 45 | Float(@data[index]) 46 | 47 | # Despite snowflake indicating that it sends the offset in minutes, the actual time in UTC 48 | # is always sent in the first half of the data. If an offset is sent it looks like: 49 | # "1641008096.123000000 1980" 50 | # If there isn't one, it's just like this: 51 | # "1641065696.123000000" 52 | # in all cases, the actual time, in UTC is the float value, and the offset is ignorable 53 | when :time, :datetime, :timestamp, :timestamp_ntz, :timestamp_ltz, :timestamp_tz 54 | Time.strptime(@data[index], TIME_FORMAT).utc 55 | else 56 | @data[index] 57 | end 58 | end 59 | 60 | def each 61 | return to_enum __method__ unless block_given? 62 | 63 | @column_to_index.each_pair do |name, index| 64 | yield(name, self[index]) 65 | end 66 | 67 | self 68 | end 69 | 70 | def keys 71 | map { |k, _| k } 72 | end 73 | 74 | alias columns keys 75 | 76 | def values 77 | map { |_, v| v } 78 | end 79 | 80 | def to_s 81 | to_h.to_s 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /spec/ruby_snowflake/client/key_pair_jwt_auth_manager_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | RSpec.describe RubySnowflake::Client::KeyPairJwtAuthManager do 4 | let(:organization) { nil } 5 | let(:account) { "account" } 6 | let(:user) { "user" } 7 | let(:private_key) { OpenSSL::PKey::RSA.new(2048).to_pem } 8 | let(:jwt_token_ttl) { 3600 } 9 | 10 | subject { described_class.new(organization, account, user, private_key, jwt_token_ttl) } 11 | 12 | describe "#jwt_token" do 13 | context "when creating a JWT token" do 14 | it "generates a valid token" do 15 | expect(subject.jwt_token).to be_a(String) 16 | end 17 | 18 | it "generates a token with the correct claims" do 19 | # Use the JWT gem to decode the token 20 | token = subject.jwt_token 21 | decoded_token = JWT.decode(token, OpenSSL::PKey::RSA.new(private_key).public_key, true, { algorithm: 'RS256' })[0] 22 | 23 | expect(decoded_token["iss"]).to include(account.upcase) 24 | expect(decoded_token["iss"]).to include(user.upcase) 25 | expect(decoded_token["sub"]).to eq("#{account.upcase}.#{user.upcase}") 26 | expect(decoded_token["iat"]).to be_a(Integer) 27 | expect(decoded_token["exp"]).to be_a(Integer) 28 | end 29 | 30 | it "creates token with proper expiration time" do 31 | now = Time.now.to_i 32 | token = subject.jwt_token 33 | decoded_token = JWT.decode(token, OpenSSL::PKey::RSA.new(private_key).public_key, true, { algorithm: 'RS256' })[0] 34 | 35 | # Expect the token to expire in approximately jwt_token_ttl seconds 36 | expect(decoded_token["exp"] - now).to be_within(5).of(jwt_token_ttl) 37 | end 38 | end 39 | end 40 | 41 | describe "account_name handling" do 42 | context "when organization is nil" do 43 | let(:organization) { nil } 44 | 45 | it "uses only the account in the token" do 46 | token = subject.jwt_token 47 | decoded_token = JWT.decode(token, OpenSSL::PKey::RSA.new(private_key).public_key, true, { algorithm: 'RS256' })[0] 48 | 49 | expect(decoded_token["iss"]).to start_with("#{account.upcase}.") 50 | expect(decoded_token["sub"]).to eq("#{account.upcase}.#{user.upcase}") 51 | end 52 | end 53 | 54 | context "when organization is empty string" do 55 | let(:organization) { "" } 56 | 57 | it "uses only the account in the token" do 58 | token = subject.jwt_token 59 | decoded_token = JWT.decode(token, OpenSSL::PKey::RSA.new(private_key).public_key, true, { algorithm: 'RS256' })[0] 60 | 61 | expect(decoded_token["iss"]).to start_with("#{account.upcase}.") 62 | expect(decoded_token["sub"]).to eq("#{account.upcase}.#{user.upcase}") 63 | end 64 | end 65 | 66 | context "when organization is provided" do 67 | let(:organization) { "org" } 68 | 69 | it "uses org-account format in the token" do 70 | token = subject.jwt_token 71 | decoded_token = JWT.decode(token, OpenSSL::PKey::RSA.new(private_key).public_key, true, { algorithm: 'RS256' })[0] 72 | 73 | expect(decoded_token["iss"]).to start_with("#{organization.upcase}-#{account.upcase}.") 74 | expect(decoded_token["sub"]).to eq("#{organization.upcase}-#{account.upcase}.#{user.upcase}") 75 | end 76 | end 77 | end 78 | end -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## Unreleased 9 | 10 | ## [1.5.0] - 2025-10-14 11 | ### Added 12 | - Instrumentation feature added for Active Support users 13 | - Added `query_timeout` as a per-query parameter, allowing timeout override on individual queries 14 | ### Fixed 15 | - `query_timeout` now properly sends timeout parameter to Snowflake API for server-side enforcement 16 | - Streaming mode now releases consumed records, fixing memory leak. Note: if you were iterating over streaming results more than once, this is a breaking change (though that was not its intended usage). 17 | 18 | ## [1.4.0] - 2025-05-01 19 | ### Added 20 | - Enhanced Row API to implement Enumerable interface 21 | - Added case-insensitive access to Row columns via both symbol and string keys 22 | - Added numeric column access to Row (e.g., `row[0]`) 23 | - Support setting organization or ENV["SNOWFLAKE_ORGANIZATION"] to nil or "" in JWT authentication 24 | - Added default_role parameter and SNOWFLAKE_DEFAULT_ROLE env variable 25 | 26 | ## [1.3.0] - 2025-01-03 27 | ### Changed 28 | - Bumped gem dependencies to newer versions 29 | - Added support for role parameter in Client and query method 30 | 31 | ## [1.2.0] - 2025-01-03 32 | ### Changed 33 | - Switched from Oj to JSON gem for parsing 34 | - Improved performance by utilizing the optimized JSON gem 35 | 36 | ## [1.1.5] - 2024-12-19 37 | ### Fixed 38 | - Parse exception detail OR message for better error handling 39 | 40 | ## [1.1.4] - 2024-11-05 41 | ### Fixed 42 | - Fixed ENV variable issue 43 | 44 | ## [1.1.3] - 2024-08-09 45 | ### Added 46 | - Retry HTTP codes in the 3xx range 47 | 48 | ## [1.1.2] - 2024-08-06 49 | ### Fixed 50 | - CI error fixes 51 | 52 | ## [1.1.1] - 2024-07-12 53 | ### Fixed 54 | - Added 502 to specific list of retryable HTTP error codes 55 | - Fixed issue with checking string code presence in an array of integer values 56 | 57 | ## [1.1.0] - 2024-06-05 58 | ### Added 59 | - Support for specifying a schema in query method 60 | - Merged multiple community contributions 61 | 62 | ## [1.0.6] - 2024-06-05 63 | ### Added 64 | - Allow specifying schema in query method 65 | 66 | ## [1.0.5] - 2024-03-20 67 | ### Added 68 | - Added exponential backoff to retryable calls 69 | - Improved handling of rate limiting (429 responses) 70 | 71 | ## [1.0.4] - 2024-01-30 72 | ### Fixed 73 | - Fixed raise arguments 74 | - Now properly raising OpenSSL errors to retry them 75 | 76 | ## [1.0.3] - 2024-01-17 77 | ### Fixed 78 | - Now upcasing database and warehouse fields in requests 79 | - Fixed error where lowercase field names would result in "Unable to run command without specifying database/warehouse" 80 | 81 | ## [1.0.2] - 2024-01-16 82 | ### Fixed 83 | - Fixed typo in key pair memoization 84 | 85 | ## [1.0.1] - 2024-01-09 86 | ### Added 87 | - Added `create_jwt_token` helper method for testing 88 | - Support for time travel in tests 89 | 90 | ## [1.0.0] - 2023-12-11 91 | ### Changed 92 | - First stable release 93 | - Fixed markdown links in documentation 94 | 95 | ## [0.3.0] - 2023-12-08 96 | ### Added 97 | - Support for Snowflake polling responses 98 | - Handle async query execution 99 | 100 | ## [0.2.0] - 2023-12-07 101 | ### Added 102 | - Extracted authentication logic into its own class 103 | - Improved time handling for various Snowflake date/time types 104 | - Support for TIME, DATETIME, TIMESTAMP, TIMESTAMP_LTZ, TIMESTAMP_NTZ, TIMESTAMP_TZ 105 | 106 | ## [0.1.2] - 2023-12-04 107 | ### Added 108 | - Support for database parameter in requests 109 | - Added missing dependencies to gemspec 110 | 111 | ## [0.1.1] - 2023-12-01 112 | ### Added 113 | - Added `fetch` as an alias for `query` for compatibility with other clients 114 | 115 | ## [0.1.0] - 2023-11-28 116 | ### Added 117 | - First minor version release with basic functionality 118 | - Support for querying Snowflake with the HTTP API 119 | - Support for streaming results 120 | 121 | ## [0.0.6] - 2023-11-27 122 | ### Changed 123 | - Cleaned up key pair authentication 124 | - Improved documentation with better setup instructions 125 | 126 | ## [0.0.5] - 2023-11-27 127 | ### Fixed 128 | - Various bug fixes and improvements 129 | 130 | ## [0.0.4] - 2023-11-22 131 | ### Changed 132 | - Fixed type handling for query results 133 | - All specs now pass 134 | 135 | ## [0.0.3] - 2023-11-21 136 | ### Changed 137 | - Renamed to RubySnowflake namespace 138 | - Initial gem structure 139 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, caste, color, religion, or sexual 11 | identity and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the overall 27 | community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or advances of 32 | any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email address, 36 | without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official email address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | [INSERT CONTACT METHOD]. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series of 87 | actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or permanent 94 | ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within the 114 | community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.1, available at 120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 127 | [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations 134 | 135 | -------------------------------------------------------------------------------- /lib/ruby_snowflake/client.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "base64" 4 | require "benchmark" 5 | require "bigdecimal" 6 | require "concurrent" 7 | require "connection_pool" 8 | require "json" 9 | require "logger" 10 | require "net/http" 11 | require "retryable" 12 | require "securerandom" 13 | require "uri" 14 | 15 | begin 16 | require "active_support" 17 | require "active_support/notifications" 18 | rescue LoadError 19 | # This isn't required 20 | end 21 | 22 | require_relative "client/http_connection_wrapper" 23 | require_relative "client/key_pair_jwt_auth_manager" 24 | require_relative "client/single_thread_in_memory_strategy" 25 | require_relative "client/streaming_result_strategy" 26 | require_relative "client/threaded_in_memory_strategy" 27 | require_relative "result" 28 | require_relative "streaming_result" 29 | 30 | module RubySnowflake 31 | class Error < StandardError 32 | def initialize(details) 33 | @details = details 34 | end 35 | 36 | def message 37 | @details.to_s 38 | end 39 | end 40 | 41 | class BadResponseError < Error ; end 42 | class ConnectionError < Error ; end 43 | class ConnectionStarvedError < Error ; end 44 | class MissingConfig < Error ; end 45 | class RetryableBadResponseError < Error ; end 46 | class RequestError < Error ; end 47 | class QueryTimeoutError < Error ; end 48 | 49 | class Client 50 | DEFAULT_LOGGER = Logger.new(STDOUT) 51 | DEFAULT_LOG_LEVEL = Logger::INFO 52 | # seconds (59 min), this is the max supported by snowflake - 1 minute 53 | DEFAULT_JWT_TOKEN_TTL = 3540 54 | # seconds, how long for a thread to wait for a connection before erroring 55 | DEFAULT_CONNECTION_TIMEOUT = 60 56 | # default maximum size of the http connection pool 57 | DEFAULT_MAX_CONNECTIONS = 16 58 | # default maximum size of the thread pool on a single query 59 | DEFAULT_MAX_THREADS_PER_QUERY = 8 60 | # partition count factor for number of threads 61 | # (i.e. 2 == once we have 4 partitions, spin up a second thread) 62 | DEFAULT_THREAD_SCALE_FACTOR = 4 63 | # how many times to retry common retryable HTTP responses (i.e. 429, 504) 64 | DEFAULT_HTTP_RETRIES = 2 65 | # how long to wait to allow a query to complete, in seconds 66 | DEFAULT_QUERY_TIMEOUT = 600 # 10 minutes 67 | # default role to use 68 | DEFAULT_ROLE = nil 69 | 70 | JSON_PARSE_OPTIONS = { decimal_class: BigDecimal }.freeze 71 | VALID_RESPONSE_CODES = %w(200 202).freeze 72 | POLLING_RESPONSE_CODE = "202" 73 | POLLING_INTERVAL = 2 # seconds 74 | 75 | # can't be set after initialization 76 | attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries, :query_timeout, :default_role 77 | 78 | def self.from_env(logger: DEFAULT_LOGGER, 79 | log_level: DEFAULT_LOG_LEVEL, 80 | jwt_token_ttl: env_option("SNOWFLAKE_JWT_TOKEN_TTL", DEFAULT_JWT_TOKEN_TTL), 81 | connection_timeout: env_option("SNOWFLAKE_CONNECTION_TIMEOUT", DEFAULT_CONNECTION_TIMEOUT ), 82 | max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ), 83 | max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY), 84 | thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR), 85 | http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES), 86 | query_timeout: env_option("SNOWFLAKE_QUERY_TIMEOUT", DEFAULT_QUERY_TIMEOUT), 87 | default_role: env_option("SNOWFLAKE_DEFAULT_ROLE", DEFAULT_ROLE)) 88 | private_key = 89 | if key = ENV["SNOWFLAKE_PRIVATE_KEY"] 90 | key 91 | elsif path = ENV["SNOWFLAKE_PRIVATE_KEY_PATH"] 92 | File.read(path) 93 | else 94 | raise MissingConfig, "Either ENV['SNOWFLAKE_PRIVATE_KEY'] or ENV['SNOWFLAKE_PRIVATE_KEY_PATH'] must be set" 95 | end 96 | 97 | new( 98 | ENV.fetch("SNOWFLAKE_URI"), 99 | private_key, 100 | ENV.fetch("SNOWFLAKE_ORGANIZATION"), 101 | ENV.fetch("SNOWFLAKE_ACCOUNT"), 102 | ENV.fetch("SNOWFLAKE_USER"), 103 | ENV["SNOWFLAKE_DEFAULT_WAREHOUSE"], 104 | ENV["SNOWFLAKE_DEFAULT_DATABASE"], 105 | default_role: ENV.fetch("SNOWFLAKE_DEFAULT_ROLE", nil), 106 | logger: logger, 107 | log_level: log_level, 108 | jwt_token_ttl: jwt_token_ttl, 109 | connection_timeout: connection_timeout, 110 | max_connections: max_connections, 111 | max_threads_per_query: max_threads_per_query, 112 | thread_scale_factor: thread_scale_factor, 113 | http_retries: http_retries, 114 | query_timeout: query_timeout, 115 | ) 116 | end 117 | 118 | def initialize( 119 | uri, private_key, organization, account, user, default_warehouse, default_database, 120 | default_role: nil, 121 | logger: DEFAULT_LOGGER, 122 | log_level: DEFAULT_LOG_LEVEL, 123 | jwt_token_ttl: DEFAULT_JWT_TOKEN_TTL, 124 | connection_timeout: DEFAULT_CONNECTION_TIMEOUT, 125 | max_connections: DEFAULT_MAX_CONNECTIONS, 126 | max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY, 127 | thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR, 128 | http_retries: DEFAULT_HTTP_RETRIES, 129 | query_timeout: DEFAULT_QUERY_TIMEOUT 130 | ) 131 | @base_uri = uri 132 | @key_pair_jwt_auth_manager = 133 | KeyPairJwtAuthManager.new(organization, account, user, private_key, jwt_token_ttl) 134 | @default_warehouse = default_warehouse 135 | @default_database = default_database 136 | @default_role = default_role 137 | 138 | # set defaults for config settings 139 | @logger = logger 140 | @logger.level = log_level 141 | @connection_timeout = connection_timeout 142 | @max_connections = max_connections 143 | @max_threads_per_query = max_threads_per_query 144 | @thread_scale_factor = thread_scale_factor 145 | @http_retries = http_retries 146 | @query_timeout = query_timeout 147 | 148 | # Do NOT use normally, this exists for tests so we can reliably trigger the polling 149 | # response workflow from snowflake in tests 150 | @_enable_polling_queries = false 151 | end 152 | 153 | def query(query, warehouse: nil, streaming: false, database: nil, schema: nil, bindings: nil, role: nil, query_name: nil, query_timeout: nil) 154 | warehouse ||= @default_warehouse 155 | database ||= @default_database 156 | role ||= @default_role 157 | query_timeout ||= @query_timeout 158 | 159 | with_instrumentation({ database:, schema:, warehouse:, query_name: }) do 160 | query_start_time = Time.now.to_i 161 | response = nil 162 | connection_pool.with do |connection| 163 | request_body = { 164 | "warehouse" => warehouse&.upcase, 165 | "schema" => schema&.upcase, 166 | "database" => database&.upcase, 167 | "statement" => query, 168 | "bindings" => bindings, 169 | "role" => role, 170 | "timeout" => query_timeout 171 | } 172 | 173 | response = request_with_auth_and_headers( 174 | connection, 175 | Net::HTTP::Post, 176 | "/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}", 177 | request_body.to_json 178 | ) 179 | end 180 | retrieve_result_set(query_start_time, query, response, streaming, query_timeout) 181 | end 182 | end 183 | 184 | alias fetch query 185 | 186 | def self.env_option(env_var_name, default_value) 187 | value = ENV[env_var_name] 188 | value.nil? || value.empty? ? default_value : ENV[env_var_name].to_i 189 | end 190 | 191 | # This method can be used to populate the JWT token used for authentication 192 | # in tests that require time travel. 193 | def create_jwt_token 194 | @key_pair_jwt_auth_manager.jwt_token 195 | end 196 | 197 | private_class_method :env_option 198 | 199 | private 200 | def connection_pool 201 | @connection_pool ||= ConnectionPool.new(size: @max_connections, timeout: @connection_timeout) do 202 | HttpConnectionWrapper.new(hostname, port).start 203 | end 204 | end 205 | 206 | def hostname 207 | @hostname ||= URI.parse(@base_uri).hostname 208 | end 209 | 210 | def port 211 | @port ||= URI.parse(@base_uri).port 212 | end 213 | 214 | def request_with_auth_and_headers(connection, request_class, path, body=nil) 215 | uri = URI.parse("#{@base_uri}#{path}") 216 | request = request_class.new(uri) 217 | request["Content-Type"] = "application/json" 218 | request["Accept"] = "application/json" 219 | request["Authorization"] = "Bearer #{@key_pair_jwt_auth_manager.jwt_token}" 220 | request["X-Snowflake-Authorization-Token-Type"] = "KEYPAIR_JWT" 221 | request.body = body unless body.nil? 222 | 223 | Retryable.retryable(tries: @http_retries + 1, 224 | sleep: lambda {|n| 2**n }, # 1, 2, 4, 8, etc 225 | on: [RetryableBadResponseError, OpenSSL::SSL::SSLError], 226 | log_method: retryable_log_method) do 227 | response = nil 228 | bm = Benchmark.measure { response = connection.request(request) } 229 | logger.debug { "HTTP Request time: #{bm.real}" } 230 | raise_on_bad_response(response) 231 | response 232 | end 233 | end 234 | 235 | def raise_on_bad_response(response) 236 | return if VALID_RESPONSE_CODES.include? response.code 237 | 238 | # there are a class of errors we want to retry rather than just giving up 239 | if retryable_http_response_code?(response.code) 240 | raise RetryableBadResponseError, 241 | "Retryable bad response! Got code: #{response.code}, w/ message #{response.body}" 242 | 243 | else # not one we should retry 244 | raise BadResponseError, 245 | "Bad response! Got code: #{response.code}, w/ message #{response.body}" 246 | end 247 | end 248 | 249 | # shamelessly stolen from the battle tested python client 250 | # https://github.com/snowflakedb/snowflake-connector-python/blob/eceed981f93e29d2f4663241253b48340389f4ef/src/snowflake/connector/network.py#L191 251 | def retryable_http_response_code?(code) 252 | # retry (in order): bad request, forbidden (token expired in flight), method not allowed, 253 | # request timeout, too many requests, anything in the 500 range (504 is fairly common), 254 | # anything in the 3xx range as those are mostly "redirect" responses 255 | [400, 403, 405, 408, 429].include?(code.to_i) || (500..599).include?(code.to_i) || 256 | (300..399).include?(code.to_i) 257 | end 258 | 259 | def retryable_log_method 260 | @retryable_log_method ||= proc do |retries, error| 261 | logger.info("Retry attempt #{retries} because #{error.message}") 262 | end 263 | end 264 | 265 | def poll_for_completion_or_timeout(query_start_time, query, statement_handle, query_timeout) 266 | first_data_json_body = nil 267 | 268 | connection_pool.with do |connection| 269 | loop do 270 | sleep POLLING_INTERVAL 271 | 272 | elapsed_time = Time.now.to_i - query_start_time 273 | if elapsed_time > query_timeout 274 | cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle) 275 | raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled}; Duration: #{elapsed_time}; Query: '#{query}'") 276 | end 277 | 278 | poll_response = request_with_auth_and_headers(connection, Net::HTTP::Get, 279 | "/api/v2/statements/#{statement_handle}") 280 | if poll_response.code == POLLING_RESPONSE_CODE 281 | next 282 | else 283 | return poll_response 284 | end 285 | end 286 | end 287 | end 288 | 289 | def attempt_to_cancel_and_silence_errors(connection, statement_handle) 290 | cancel_response = request_with_auth_and_headers(connection, Net::HTTP::Post, 291 | "/api/v2/#{statement_handle}/cancel") 292 | true 293 | rescue Error => error 294 | if error.is_a?(BadResponseError) && error.message.include?("404") 295 | return true # snowflake cancelled it before we did 296 | end 297 | @logger.error("Error on attempting to cancel query #{statement_handle}, will raise a QueryTimeoutError") 298 | false 299 | end 300 | 301 | def retrieve_result_set(query_start_time, query, response, streaming, query_timeout) 302 | json_body = JSON.parse(response.body, JSON_PARSE_OPTIONS) 303 | statement_handle = json_body["statementHandle"] 304 | 305 | if response.code == POLLING_RESPONSE_CODE 306 | result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle, query_timeout) 307 | json_body = JSON.parse(result_response.body, JSON_PARSE_OPTIONS) 308 | end 309 | 310 | num_threads = number_of_threads_to_use(json_body["resultSetMetaData"]["partitionInfo"].size) 311 | retrieve_proc = ->(index) { retrieve_partition_data(statement_handle, index) } 312 | 313 | if streaming 314 | StreamingResultStrategy.result(json_body, retrieve_proc) 315 | elsif num_threads == 1 316 | SingleThreadInMemoryStrategy.result(json_body, retrieve_proc) 317 | else 318 | ThreadedInMemoryStrategy.result(json_body, retrieve_proc, num_threads) 319 | end 320 | end 321 | 322 | def retrieve_partition_data(statement_handle, partition_index) 323 | partition_response = nil 324 | connection_pool.with do |connection| 325 | partition_response = request_with_auth_and_headers( 326 | connection, 327 | Net::HTTP::Get, 328 | "/api/v2/statements/#{statement_handle}?partition=#{partition_index}&requestId=#{SecureRandom.uuid}", 329 | ) 330 | end 331 | 332 | partition_json = {} 333 | bm = Benchmark.measure { partition_json = JSON.parse(partition_response.body, JSON_PARSE_OPTIONS) } 334 | logger.debug { "JSON parsing took: #{bm.real}" } 335 | partition_data = partition_json["data"] 336 | 337 | partition_data 338 | end 339 | 340 | def number_of_threads_to_use(partition_count) 341 | [[1, (partition_count / @thread_scale_factor.to_f).ceil].max, @max_threads_per_query].min 342 | end 343 | 344 | def with_instrumentation(tags, &block) 345 | return block.call unless defined?(::ActiveSupport) && ::ActiveSupport 346 | 347 | ::ActiveSupport::Notifications.instrument( 348 | "rb_snowflake_client.snowflake_query.finish", 349 | tags.merge(query_id: SecureRandom.uuid)) do 350 | block.call 351 | end 352 | end 353 | end 354 | end 355 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ruby snowflake client using the v2 HTTP API 2 | 3 | # Why this library? 4 | 5 | The available options for connecting from Ruby to Snowflake include: 6 | * ODBC - which works, but can be very slow, especially for a lot of data, which is probably why you're using Snowflake 7 | * The [ruby snowflake client](https://github.com/rinsed-org/ruby-snowflake-client) that wraps the go client. This is probably the fastest single threaded option, which we also created. However, that library takes the ruby GVL and so stops all other processing in your ruby process (threads). 8 | 9 | This library is implemented in ruby and while it leverages some libraries that have native extensions, doesn't currently include any native extensions itself. Depending on network latency and the shape of the data this library can be faster or slower than the go wrapper. The big advantages are: 10 | * It uses about half the memory when you pull a full result set into memory 11 | * It does not hold onto the [ruby GVL](https://www.speedshop.co/2020/05/11/the-ruby-gvl-and-scaling.html) and so does not block other threads while waiting on IO like the go wrapper client. 12 | * It will consume more resources for the same data, because it's using the HTTP v2 API and getting JSON back, there is just more work to as compared to the go or python clients that use Apache Arrow under the covers. 13 | 14 | # Usage 15 | 16 | ## Create a client 17 | 18 | Add to your Gemfile or use `gem install rb_snowflake_client` 19 | ```ruby 20 | gem "rb_snowflake_client" 21 | ``` 22 | 23 | Then require, create a client 24 | ```ruby 25 | require "rb_snowflake_client" 26 | 27 | 28 | # uses env variables, you can also new one up 29 | # see: https://github.com/rinsed-org/pure-ruby-snowflake-client/blob/master/lib/ruby_snowflake/client.rb#L43 30 | client = RubySnowflake::Client.new( 31 | "https://yourinstance.region.snowflakecomputing.com", # insert your URL here 32 | File.read("secrets/my_key.pem"), # your private key in PEM format (scroll down for instructions) 33 | "snowflake-organization", # your account name (doesn't match your URL), using nil may be required depending on your snowflake account 34 | "snowflake-account", # typically your subdomain 35 | "snowflake-user", # Your snowflake user 36 | "some_warehouse", # The name of your warehouse to use by default 37 | "some_database", # The name of the database in the context of which the queries will run 38 | default_role: "some_role", # The name of the role with which the queries will run. A `nil` value uses the primary role of the user. 39 | max_connections: 12, # Config options can be passed in 40 | connection_timeout: 45, # See below for the full set of options 41 | query_timeout: 1200, # how long to wait for queries, in seconds 42 | ) 43 | 44 | # alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable. 45 | RubySnowflake::Client.from_env 46 | ``` 47 | Available ENV variables (see below in the config section for details) 48 | - `SNOWFLAKE_URI` 49 | - `SNOWFLAKE_PRIVATE_KEY_PATH` or `SNOWFLAKE_PRIVATE_KEY` 50 | - Use either the key or the path. Key takes precedence if both are provided. 51 | - `SNOWFLAKE_ORGANIZATION` 52 | - Optional, if you leave it off, the library will authenticate with an account name of only SNOWFLAKE_ACCOUNT 53 | - `SNOWFLAKE_ACCOUNT` 54 | - `SNOWFLAKE_USER` 55 | - `SNOWFLAKE_DEFAULT_WAREHOUSE` 56 | - `SNOWFLAKE_DEFAULT_DATABASE` 57 | - `SNOWFLAKE_DEFAULT_ROLE` 58 | - `SNOWFLAKE_JWT_TOKEN_TTL` 59 | - `SNOWFLAKE_CONNECTION_TIMEOUT` 60 | - `SNOWFLAKE_MAX_CONNECTIONS` 61 | - `SNOWFLAKE_MAX_THREADS_PER_QUERY` 62 | - `SNOWFLAKE_THREAD_SCALE_FACTOR` 63 | - `SNOWFLAKE_HTTP_RETRIES` 64 | - `SNOWFLAKE_QUERY_TIMEOUT` 65 | 66 | ## Make queries 67 | 68 | Once you have a client, make queries 69 | ```ruby 70 | # will get all data in memory 71 | result = client.query("SELECT ID, NAME FROM SOMETABLE") 72 | 73 | # result is Enumerable 74 | result.each do |row| 75 | # Row implements Enumerable and provides flexible column access: 76 | puts row[:id] # access with symbols (case-insensitive) 77 | puts row["name"] # access with strings (case-insensitive) 78 | puts row[0] # access with numeric indices 79 | 80 | # Row has Enumerable methods 81 | puts row.keys # get all column names 82 | puts row.values # get all values 83 | puts row.to_h # convert to Hash with column names as keys 84 | 85 | # Use all Enumerable methods 86 | row.each { |column_name, value| puts "#{column_name}: #{value}" } 87 | filtered = row.select { |column, value| column.start_with?("i") } 88 | end 89 | ``` 90 | 91 | ## Stream results 92 | 93 | You can also stream results and not hold them all in memory. The client will prefetch the next data partition only. If you have some IO in your processing there should usually be data available for you. 94 | 95 | ```ruby 96 | result = client.query("SELECT * FROM HUGETABLE", streaming: true) 97 | result.each do |row| 98 | puts row 99 | end 100 | ``` 101 | 102 | ## Switching databases 103 | 104 | You can also overwrite the database specified in the initializer, and run your query with a different context. 105 | 106 | ```ruby 107 | result = client.query("SELECT * FROM SECRET_TABLE", database: "OTHER_DB") 108 | result.each do |row| 109 | puts row 110 | end 111 | ``` 112 | 113 | ## Switching warehouses 114 | 115 | Clients are not warehouse specific, you can override the default warehouse per query 116 | 117 | ```ruby 118 | client.query("SELECT * FROM BIGTABLE", warehouse: "FAST_WH") 119 | ``` 120 | 121 | ## Specifying a schema 122 | 123 | ```ruby 124 | client.query("SELECT * FROM BIGTABLE", schema: "MY_SCHEMA") 125 | ``` 126 | 127 | ## Specifying role 128 | 129 | Queries by default use the primary role assigned to the account. If there are multiple roles you can switch between them on a per query basis. 130 | 131 | ```ruby 132 | client.query("SELECT * FROM BIGTABLE", role: "MY_ROLE") 133 | ``` 134 | 135 | ## Query timeout 136 | 137 | You can override the query timeout on a per-query basis. The timeout is specified in seconds and will be enforced by both Snowflake server-side and the client-side polling mechanism. 138 | 139 | ```ruby 140 | client.query("SELECT * FROM BIGTABLE", query_timeout: 30) 141 | ``` 142 | 143 | ## Binding parameters 144 | 145 | Say we have `BIGTABLE` with a `data` column of a type `VARIANT`. 146 | 147 | ```ruby 148 | json_string = '{"valid": "json"}' 149 | query = "insert into BIGTABLE(data) select parse_json(?)" 150 | bindings = { 151 | "1" => { 152 | "type" => "TEXT", 153 | "value" => "Other Event" 154 | } 155 | } 156 | client.query(query, bindings: bindings) 157 | ``` 158 | 159 | For additional information about binding parameters refer to snowflake documentation: https://docs.snowflake.com/en/developer-guide/sql-api/submitting-requests#using-bind-variables-in-a-statement 160 | 161 | ## Instrumentation 162 | 163 | If ActiveSupport is available, this library additionally emits [notification events](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) around queries. You can subscribe to those to track timing, query counts, etc. 164 | 165 | * `rb_snowflake_client.snowflake_query.finish`: published at query end 166 | 167 | Events receive a payload with the following properties: 168 | * `database`: snowflake database 169 | * `schema`: snowflake schema 170 | * `warehouse`: snowflake warehouse 171 | * `query_id`: random UUID for the query 172 | * `query_name`: argument passed to query/fetch 173 | * `exception`: present if the query raised an error, see [Notifications documentation](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html#module-ActiveSupport::Notifications-label-Subscribers) for details 174 | * `exception_object`: present if the query raised an error, see [Notifications documentation](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html#module-ActiveSupport::Notifications-label-Subscribers) for details 175 | 176 | An example integration with [Datadog](https://www.rubydoc.info/gems/datadog) might look like this: 177 | 178 | ```ruby 179 | ActiveSupport::Notifications.subscribe("rb_snowflake_client.snowflake_query.finish") do |name, start, finish, id, payload| 180 | span = Datadog::Tracing.trace(payload[:query_name] || "snowflake_query", 181 | resource: "snowflake", 182 | start_time: start, 183 | tags: payload, 184 | type: Datadog::Tracing::Metadata::Ext::AppTypes::TYPE_DB) 185 | 186 | span.finish(finish) 187 | end 188 | ``` 189 | 190 | # Configuration Options 191 | 192 | The client supports the following configuration options, each with their own getter/setter except connection pool options which must be set at construction. Additionally, all except logger can be configured with environment variables (see above, but the pattern is like: "SNOWFLAKE_HTTP_RETRIES". Configuration options can only be set on initialization through `new` or `from_env`. 193 | 194 | - `logger` - takes any ruby logger (by default it's a std lib Logger.new(STDOUT), set at DEBUG level. Not available as an ENV variable config option 195 | - `log_level` - takes a log level, type is dependent on logger, for the default ruby Logger, use a level like `Logger::WARN`. Not available as an ENV variable config option. 196 | - `jwt_token_ttl` - The time to live set on JWT token in seconds, defaults to 3540 (59 minutes, the longest Snowflake supports is 60). 197 | - `connection_timeout` - The amount of time in seconds that the client's connection pool will wait before erroring in handing out a valid connection, defaults to 60 seconds 198 | - `max_connections` - The maximum number of http connections to hold open in the connection pool. If you use the client in a threaded context, you may need to increase this to be threads * client.max_threads_per_query, defaults to 16. 199 | - `max_threads_per_query` - The maximum number of threads the client should use to retrieve data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1 200 | - `thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`. 201 | - `http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this. 202 | - `query_timeout` - By default the client will wait 10 minutes (600s) for a query to finish, you can change this default, will also set this limit in the query for snowflake to obey. Set in seconds. 203 | 204 | Example configuration: 205 | ```ruby 206 | client = RubySnowflake::Client.from_env( 207 | logger: Rails.logger 208 | max_connections: 24 209 | http_retries 1 210 | ) 211 | end 212 | ``` 213 | 214 | # Gotchas 215 | 216 | 1. Does not yet support multiple statements (work around is to wrap in `BEGIN ... END`) 217 | 2. Only supports key pair authentication 218 | 3. It's faster to work directly with the row value and not call to_h if you don't need to 219 | 4. Rows are Enumerable, providing access to methods like `each`, `map`, `select`, `keys`, and `values` 220 | 5. Row column access is case-insensitive and supports string keys, symbol keys, and numeric indices 221 | 222 | # Setting up a user for key pair authentication 223 | 224 | This library uses JWT to authenticate with the API which relies on key-pair authentication to connect to Snowflake. 225 | 226 | 1. Generate a private/public key pair for your user. Your private key will now be in a file `private_key.pem`. Keep this safe! Don't check it in to source control. 227 | ```bash 228 | openssl genpkey -algorithm RSA -out private_key.pem -pkeyopt rsa_keygen_bits:2048 229 | ``` 230 | 2. Generate a public key in the format that Snowflake likes (will produce `public_key.pem`) 231 | ```bash 232 | openssl rsa -pubout -in private_key.pem -out public_key.pem 233 | ``` 234 | 3. Your public_key.pem file should look something like this 235 | ```text 236 | -----BEGIN PUBLIC KEY----- 237 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAx8FaPusz9X9MCvv0h3N3 238 | v1QaruyU1ivHs8jLjo6idzLSHJPGk7n3LSXerIw5/LkhfA27ibJj225/fKFnPy+X 239 | gidbhE4BlvSdoVgdMH7WB1ZC3PpAwwqHeMisIzarwOwUu6mLyG9VY55ciKJY8CwA 240 | 5xt19pgVsXg/lcOa72jDjK+ExdSAN6K2TqSKqq77yzeI5creslny5VuAGTbZy3Bt 241 | Wk0zg1xz8+C4regIOlSoFrzn1e4wHqbFv2zFFvORC2LV3HXFRaHYClB7jWRN1bFj 242 | om6gRpiTO8bsCSPKi0anxMN8qt1Lw2d/+cwezxCwI6xPLC7JhZYdx6u+hC0g3PVK 243 | PQIDAQAB 244 | -----END PUBLIC KEY----- 245 | ``` 246 | Snowflake doesn't like it in that format, but openssl can remove the newlines and begining and ending for you: 247 | ```bash 248 | openssl rsa -pubin -in public_key.pem -outform DER | openssl base64 -A 249 | ``` 250 | (if it spits out a % at the end, remove that). 251 | ```text 252 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArOL5WQYaXSnVhQXQZQHVIzrNt08A+bnGjBb6DWFVRao3dlPG+HOf9Nv0nGlk8m5AMvvETUnN3tihuRHOJ9MOUzDp58IYIr5xvOENSunbRVyJL7DuCGwZz8z1pEnlBjZPONzEX8dCKxCU0neJrksFgwdhfhIUs7GnbTuJjYP9EqXPlbsYNYTVVnFNZ9DHFur9PggPJpPHTfFDz8MEB3Xb3AWV3pE752ed/PtRcTODvgoQSpP80cTgsKjsG009NY2ulEtV3r7yNJgawxmcMTNLhFlSS7Wm2NSEIS0aNo+DgSZI72MnAOw2klUzvdBl0i43gI+aX0Y6y/y18VL1o9KMQwIDAQAB 253 | ``` 254 | 4. Now, in the snowflake web console or through your favorite client, log in as a user with permissions to edit users. For your particular user (`EXAMPLE_USER` below) update the user with the modified public key from above: 255 | ```sql 256 | ALTER USER EXAMPLE_USER SET RSA_PUBLIC_KEY = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArOL5WQYaXSnVhQXQZQHVIzrNt08A+bnGjBb6DWFVRao3dlPG+HOf9Nv0nGlk8m5AMvvETUnN3tihuRHOJ9MOUzDp58IYIr5xvOENSunbRVyJL7DuCGwZz8z1pEnlBjZPONzEX8dCKxCU0neJrksFgwdhfhIUs7GnbTuJjYP9EqXPlbsYNYTVVnFNZ9DHFur9PggPJpPHTfFDz8MEB3Xb3AWV3pE752ed/PtRcTODvgoQSpP80cTgsKjsG009NY2ulEtV3r7yNJgawxmcMTNLhFlSS7Wm2NSEIS0aNo+DgSZI72MnAOw2klUzvdBl0i43gI+aX0Y6y/y18VL1o9KMQwIDAQAB' 257 | ``` 258 | 5. Verify your auth setup. If you have `snowsql` installed, that has an easy method (CTRL-d to exit) 259 | ```bash 260 | # example: snowsql -a AAAAAAA.BBBBBBBB.us-east-1 -u john --private-key-path private_key.pem 261 | snowsql -a . -u --private-key-path private_key.pem 262 | ``` 263 | or alternatively, use the client to verify: 264 | ```ruby 265 | client = RubySnowflake::Client.new( 266 | "https://yourinstance.region.snowflakecomputing.com", # insert your URL here 267 | File.read("secrets/my_key.pem"), # path to your private key 268 | "snowflake-organization", # your account name (doesn't match your URL), using nil may be required depending on your snowflake account 269 | "snowflake-account", # typically your subdomain 270 | "snowflake-user", # Your snowflake user 271 | "some_warehouse", # The name of your warehouse to use by default 272 | "some_database", # The name of the database in the context of which the queries will run 273 | ) 274 | ``` 275 | 276 | # Change Log 277 | 278 | See [Change Log](CHANGELOG.md) 279 | 280 | # Code of conduct 281 | 282 | See [Code of Coduct](CODE_OF_CONDUCT.md) 283 | 284 | # Contributing 285 | 286 | Please fork and create a pull request. Getting tests to run will be the most labor intensive part. You'll want to have an active snowflake account, and then configure your `.env` for the tests to be able to connect to your instance. Inside of `client_spec.rb` there are SQL statements to create the required tables. A pull request to make this less manual is also welcome :-). We'll do our best to help you along. Also, feel free to use Issues to report issues. [We](https://rinsed.co) use this client in production today, so we're happy to look at issues, especially where performance or correctness is involved. 287 | 288 | # Links: 289 | - snowflake API reference https://docs.snowflake.com/en/developer-guide/sql-api/reference 290 | - snowflake authentication docs: https://docs.snowflake.com/en/developer-guide/sql-api/authenticating 291 | -------------------------------------------------------------------------------- /spec/ruby_snowflake/client_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | RSpec.describe RubySnowflake::Client do 4 | let(:client) { described_class.from_env } 5 | 6 | describe "initialization" do 7 | context "when the environment variables are not set" do 8 | around do |example| 9 | old_env = ENV.to_h 10 | 11 | begin 12 | ENV.clear 13 | example.run 14 | ensure 15 | ENV.replace(old_env) 16 | end 17 | end 18 | 19 | it "should raise an error" do 20 | expect { client }.to raise_error(RubySnowflake::MissingConfig) 21 | end 22 | end 23 | end 24 | 25 | describe "querying" do 26 | subject(:result) { client.query(query, query_name: "test_query") } 27 | let(:query) { "SELECT 1;" } 28 | 29 | it "emits instrumentation events" do 30 | finish_event_received = false 31 | finish_callback = lambda do |event| 32 | expect(event.payload[:query_name]).to eq("test_query") 33 | finish_event_received = true 34 | end 35 | 36 | ActiveSupport::Notifications.subscribed(finish_callback, "rb_snowflake_client.snowflake_query.finish") do 37 | expect(result).to be_a(RubySnowflake::Result) 38 | end 39 | 40 | expect(finish_event_received).to be(true) 41 | end 42 | 43 | context "with the 'fetch' alias" do 44 | subject(:result) { client.fetch(query) } 45 | 46 | it "works with 'fetch' alias" do 47 | expect(result).to be_a(RubySnowflake::Result) 48 | expect(result.length).to eq(1) 49 | rows = result.get_all_rows 50 | expect(rows).to eq( 51 | [{"1" => 1}] 52 | ) 53 | end 54 | end 55 | 56 | context "without ActiveSupport" do 57 | before do 58 | stub_const("ActiveSupport", nil) if defined?(ActiveSupport) 59 | end 60 | 61 | it "should work" do 62 | expect(result).to be_a(RubySnowflake::Result) 63 | expect(result.length).to eq(1) 64 | rows = result.get_all_rows 65 | expect(rows).to eq( 66 | [{"1" => 1}] 67 | ) 68 | end 69 | end 70 | 71 | context "with lower case database name" do 72 | subject(:result) { client.fetch(query, database: "ruby_snowflake_client_testing") } 73 | let(:query) { "SELECT * from public.test_datatypes;" } 74 | 75 | 76 | it "should work" do 77 | expect(result).to be_a(RubySnowflake::Result) 78 | expect(result.length).to eq(2) 79 | end 80 | end 81 | 82 | context "with lower case schema name" do 83 | subject(:result) { client.fetch(query, database: "ruby_snowflake_client_testing", schema: "public") } 84 | let(:query) { "SELECT * from test_datatypes;" } 85 | 86 | it "should work" do 87 | expect(result).to be_a(RubySnowflake::Result) 88 | expect(result.length).to eq(2) 89 | end 90 | end 91 | 92 | context "with lower case warehouse name" do 93 | subject(:result) { client.fetch(query, warehouse: "web_data_load_wh") } 94 | let(:query) { "SELECT * from ruby_snowflake_client_testing.public.test_datatypes;" } 95 | 96 | it "should work" do 97 | expect(result).to be_a(RubySnowflake::Result) 98 | expect(result.length).to eq(2) 99 | end 100 | end 101 | 102 | context "when we can't connect" do 103 | before do 104 | allow(Net::HTTP).to receive(:start).and_raise("Some connection error") 105 | end 106 | 107 | it "raises a ConnectionError" do 108 | expect { result }.to raise_error do |error| 109 | expect(error).to be_a RubySnowflake::ConnectionError 110 | expect(error.cause.message).to eq "Some connection error" 111 | end 112 | end 113 | end 114 | 115 | context "when the query times out" do 116 | before do 117 | ENV["SNOWFLAKE_QUERY_TIMEOUT"] = "1" 118 | client.instance_variable_set(:@_enable_polling_queries, true) 119 | end 120 | after do 121 | ENV["SNOWFLAKE_QUERY_TIMEOUT"] = nil 122 | end 123 | let(:query) { "SELECT SYSTEM$WAIT(10)" } 124 | 125 | it "attempts to cancel the query" do 126 | allow(client.logger).to receive(:error) 127 | 128 | start_time = Time.now.to_i 129 | expect { result }.to raise_error do |error| 130 | expect(error).to be_a RubySnowflake::QueryTimeoutError 131 | end 132 | # We are not receiving this error because we cancel it before Snowflake can 133 | expect(client.logger).not_to have_received(:error).with(a_string_including("cancel query")) 134 | expect(Time.now.to_i - start_time).to be >= 1 #query timeout 135 | end 136 | end 137 | 138 | context "with per-query timeout override" do 139 | let(:query) { "SELECT 1" } 140 | 141 | it "sends the timeout parameter in the request body" do 142 | allow_any_instance_of(Net::HTTP).to receive(:request) do |instance, request| 143 | request_body = JSON.parse(request.body) 144 | expect(request_body["timeout"]).to eq(30) 145 | 146 | response = Net::HTTPSuccess.new("1.1", "200", "OK") 147 | allow(response).to receive(:body).and_return({ 148 | statementHandle: "test-handle", 149 | resultSetMetaData: { 150 | partitionInfo: [{}], 151 | rowType: [{ name: "1", type: "FIXED" }] 152 | }, 153 | data: [[1]] 154 | }.to_json) 155 | response 156 | end 157 | 158 | result = client.query(query, query_timeout: 30) 159 | expect(result).to be_a(RubySnowflake::Result) 160 | end 161 | end 162 | 163 | context "when the query errors" do 164 | let(:query) { "INVALID QUERY;" } 165 | it "should raise an exception" do 166 | expect { result }.to raise_error do |error| 167 | expect(error).to be_a RubySnowflake::Error 168 | end 169 | end 170 | 171 | context "for unauthorized database" do 172 | let(:query) { "SELECT * FROM TEST_DATABASE.RINSED_WEB_APP.EMAILS LIMIT 1;" } 173 | it "should raise an exception" do 174 | expect { result }.to raise_error do |error| 175 | expect(error).to be_a RubySnowflake::Error 176 | expect(error.message).to include "'TEST_DATABASE' does not exist or not authorized" 177 | end 178 | end 179 | 180 | it "should raise the correct exception for threaded work" do 181 | require "parallel" 182 | 183 | Parallel.map((1..3).collect { _1 }, in_threads: 2) do |idx| 184 | c = described_class.from_env 185 | query = "SELECT * FROM TEST_DATABASE#{idx}.RINSED_WEB_APP.EMAILS LIMIT 1;" 186 | 187 | expect { c.query(query) }.to raise_error do |error| 188 | expect(error).to be_a RubySnowflake::Error 189 | expect(error.message).to include "TEST_DATABASE#{idx}" 190 | end 191 | end 192 | end 193 | end 194 | end 195 | 196 | context "with a simple query returning string" do 197 | let(:query) { "SELECT 1;" } 198 | 199 | it "should return a RubySnowflake::Result" do 200 | expect(result).to be_a(RubySnowflake::Result) 201 | end 202 | 203 | it "should respond to get_all_rows" do 204 | expect(result.length).to eq(1) 205 | rows = result.get_all_rows 206 | expect(rows).to eq( 207 | [{"1" => 1}] 208 | ) 209 | end 210 | 211 | it "should respond to each with a block" do 212 | expect { |b| result.each(&b) }.to yield_with_args(an_instance_of(RubySnowflake::Row)) 213 | end 214 | end 215 | 216 | context "with row access methods" do 217 | let(:query) { "SELECT id as ID, name as NAME from ruby_snowflake_client_testing.public.test_datatypes;" } 218 | let(:row) { result.first } 219 | 220 | it "allows access with string keys" do 221 | expect(row["id"]).to eq(1) 222 | expect(row["name"]).to eq("John Smith") 223 | end 224 | 225 | it "allows access with symbol keys" do 226 | expect(row[:id]).to eq(1) 227 | expect(row[:name]).to eq("John Smith") 228 | end 229 | 230 | it "is case insensitive" do 231 | expect(row["ID"]).to eq(1) 232 | expect(row["Name"]).to eq("John Smith") 233 | expect(row[:ID]).to eq(1) 234 | expect(row[:Name]).to eq("John Smith") 235 | end 236 | 237 | it "allows numeric index access" do 238 | expect(row[0]).to eq(1) # ID column 239 | expect(row[1]).to eq("John Smith") # NAME column 240 | end 241 | 242 | it "returns nil for non-existent columns" do 243 | expect(row["nonexistent"]).to be_nil 244 | expect(row[:nonexistent]).to be_nil 245 | expect(row[999]).to be_nil 246 | end 247 | 248 | it "implements Enumerable methods" do 249 | expect(row.keys).to contain_exactly("id", "name") 250 | expect(row.values).to contain_exactly(1, "John Smith") 251 | expect(row.to_h).to eq({"id" => 1, "name" => "John Smith"}) 252 | 253 | mapped_data = row.map { |k, v| [k.upcase, v] }.to_h 254 | expect(mapped_data).to eq({"ID" => 1, "NAME" => "John Smith"}) 255 | 256 | filtered_data = row.select { |k, v| k == "id" } 257 | expect(filtered_data.to_h).to eq({"id" => 1}) 258 | end 259 | end 260 | 261 | context "with a more complex query" do 262 | # We have setup a simple table in our Snowflake account with the below structure: 263 | # CREATE TABLE ruby_snowflake_client_testing.public.test_datatypes 264 | # (ID int, NAME string, DOB date, CREATED_AT timestamp, COFFES_PER_WEEK float); 265 | # And inserted some test data: 266 | # INSERT INTO test_datatypes 267 | # VALUES (1, 'John Smith', '1990-10-17', current_timestamp(), 3.41), 268 | # (2, 'Jane Smith', '1990-01-09', current_timestamp(), 3.525); 269 | let(:query) { "SELECT * from ruby_snowflake_client_testing.public.test_datatypes;" } 270 | let(:expected_john) do 271 | { 272 | "coffes_per_week" => 3.41, 273 | "id" => 1, 274 | "dob" => Date.new(1990, 10, 17), 275 | "created_at" => be_within(0.01).of(Time.new(2023,5,12,4,22,8.63,0)), 276 | "name" => "John Smith", 277 | } 278 | end 279 | let(:expected_jane) do 280 | { 281 | "coffes_per_week" => 3.525, 282 | "id" => 2, 283 | "dob" => Date.new(1990, 1, 9), 284 | "created_at" => be_within(0.01).of(Time.new(2023,5,12,4,22,8.63,0)), 285 | "name" => "Jane Smith", 286 | } 287 | end 288 | 289 | it "should return 2 rows with the right data types" do 290 | rows = result.get_all_rows 291 | expect(rows.length).to eq(2) 292 | john = rows[0] 293 | jane = rows[1] 294 | expect(john).to match(expected_john) 295 | expect(jane).to match(expected_jane) 296 | end 297 | end 298 | 299 | context "with NUMBER and HighPrecision" do 300 | # We have setup a simple table in our Snowflake account with the below structure: 301 | # CREATE TABLE ruby_snowflake_client_testing.public.test_big_datatypes 302 | # (ID NUMBER(38,0), BIGFLOAT NUMBER(8,2)); 303 | # And inserted some test data: 304 | # INSERT INTO test_big_datatypes VALUES (1, 8.2549); 305 | let(:query) { "SELECT * from ruby_snowflake_client_testing.public.test_big_datatypes;" } 306 | it "should return 1 row with correct data types" do 307 | rows = result.get_all_rows 308 | expect(rows.length).to eq(1) 309 | expect(rows[0]).to eq({ 310 | "id" => 1, 311 | "bigfloat" => BigDecimal("8.25"), #precision of only 2 decimals 312 | }) 313 | end 314 | end 315 | 316 | context "with all the time types" do 317 | # We have setup a simple table for testing these, created with: 318 | # CREATE TABLE ruby_snowflake_client_testing.public.time_test 319 | # (ID int PRIMARY KEY, time_value TIME, datetime_value DATETIME, timestamp_value TIMESTAMP, 320 | # timestamp_ltz_value TIMESTAMP_LTZ, timestamp_ntz_value TIMESTAMP_NTZ, 321 | # timestamp_tz_value TIMESTAMP_TZ); 322 | # And then ran an insert: 323 | # INSERT INTO ruby_snowflake_client_testing.public.time_test 324 | # (ID, time_value, datetime_value, timestamp_value, timestamp_ltz_value, 325 | # timestamp_ntz_value, timestamp_tz_value) 326 | # values 327 | # (1, 328 | # '12:34:56', -- time_value 329 | # '2022-01-01 12:34:56', -- datetime_value 330 | # '2022-01-01 12:34:56.123', -- timestamp_value 331 | # '2022-01-01 12:34:56.123 -7:00', -- timestamp_ltz_value 332 | # '2022-01-01 12:34:56.123', -- timestamp_ntz_value 333 | # '2022-01-01 12:34:56.123 +9:00') -- timestamp_tz_value 334 | it "converts them into the correct ruby value" do 335 | row = client.query("SELECT * FROM ruby_snowflake_client_testing.public.time_test").first 336 | expect(row["time_value"].utc.iso8601).to eq "1970-01-01T12:34:56Z" 337 | expect(row["datetime_value"].utc.iso8601).to eq "2022-01-01T12:34:56Z" 338 | expect(row["timestamp_value"].utc.iso8601).to eq "2022-01-01T12:34:56Z" 339 | expect(row["timestamp_ntz_value"].utc.iso8601).to eq "2022-01-01T12:34:56Z" 340 | expect(row["timestamp_ltz_value"].utc.iso8601).to eq "2022-01-01T19:34:56Z" 341 | expect(row["timestamp_tz_value"].utc.iso8601).to eq "2022-01-01T03:34:56Z" 342 | end 343 | end 344 | 345 | context "with a large amount of data" do 346 | # We have setup a very simple table with the below statement: 347 | # CREATE TABLE ruby_snowflake_client_testing.public.large_table (ID int PRIMARY KEY, random_text string); 348 | # We than ran a couple of inserts with large number of rows: 349 | # INSERT INTO ruby_snowflake_client_testing.public.large_table 350 | # SELECT random()%50000, randstr(64, random()) FROM table(generator(rowCount => 50000)); 351 | 352 | let(:limit) { 0 } 353 | let(:query) { "SELECT * FROM ruby_snowflake_client_testing.public.large_table LIMIT #{limit}" } 354 | 355 | context "fetching 50k rows" do 356 | let(:limit) { 50_000 } 357 | it "should work" do 358 | rows = result.get_all_rows 359 | expect(rows.length).to eq 50000 360 | expect((-50000...50000)).to include(rows[0]["id"].to_i) 361 | end 362 | end 363 | 364 | context "with async (polling) responses" do 365 | before { client.instance_variable_set(:@_enable_polling_queries, true) } 366 | 367 | let(:limit) { 1_000 } 368 | it "should work" do 369 | rows = result.get_all_rows 370 | expect(rows.length).to eq 1000 371 | expect((-50000...50000)).to include(rows[0]["id"].to_i) 372 | end 373 | end 374 | 375 | context "fetching 150k rows x 20 times" do 376 | let(:limit) { 150_000 } 377 | it "should work" do 378 | 20.times do |idx| 379 | client = described_class.from_env 380 | result = client.query(query) 381 | rows = result.get_all_rows 382 | expect(rows.length).to eq 150000 383 | expect((-50000...50000)).to include(rows[0]["id"].to_i) 384 | end 385 | end 386 | end 387 | 388 | context "fetching 50k rows x 5 times - with threads" do 389 | let(:limit) { 50_000 } 390 | 391 | before do 392 | ENV["SNOWFLAKE_MAX_CONNECTIONS"] = "12" 393 | ENV["SNOWFLAKE_MAX_THREADS_PER_QUERY"] = "12" 394 | end 395 | 396 | after do 397 | ENV["SNOWFLAKE_MAX_CONNECTIONS"] = nil 398 | ENV["SNOWFLAKE_MAX_THREADS_PER_QUERY"] = nil 399 | end 400 | it "should work" do 401 | t = [] 402 | 5.times do |idx| 403 | t << Thread.new do 404 | client = described_class.from_env 405 | result = client.query(query) 406 | rows = result.get_all_rows 407 | expect(rows.length).to eq 50_000 408 | expect((-50000...50000)).to include(rows[0]["id"].to_i) 409 | end 410 | end 411 | 412 | t.map(&:join) 413 | end 414 | end 415 | 416 | context "fetching 150k rows x 5 times - with threads & shared client" do 417 | let(:limit) { 150_000 } 418 | 419 | before { ENV["SNOWFLAKE_MAX_CONNECTIONS"] = "40" } 420 | after { ENV["SNOWFLAKE_MAX_CONNECTIONS"] = nil } 421 | 422 | it "should work" do 423 | t = [] 424 | client = described_class.from_env 425 | 5.times do |idx| 426 | t << Thread.new do 427 | result = client.query(query) 428 | rows = result.get_all_rows 429 | expect(rows.length).to eq 150000 430 | expect((-50000...50000)).to include(rows[0]["id"].to_i) 431 | end 432 | end 433 | 434 | t.map(&:join) 435 | end 436 | end 437 | 438 | context "with async (polling) responses" do 439 | before { client.instance_variable_set(:@_enable_polling_queries, true) } 440 | 441 | context "fetching 50k rows x 5 times - with threads & shared client" do 442 | let(:limit) { 50_000 } 443 | 444 | before { ENV["SNOWFLAKE_MAX_CONNECTIONS"] = "40" } 445 | after { ENV["SNOWFLAKE_MAX_CONNECTIONS"] = nil } 446 | 447 | it "should work" do 448 | t = [] 449 | client = described_class.from_env 450 | 5.times do |idx| 451 | t << Thread.new do 452 | result = client.query(query) 453 | rows = result.get_all_rows 454 | expect(rows.length).to eq 50_000 455 | expect((-50000...50000)).to include(rows[0]["id"].to_i) 456 | end 457 | end 458 | 459 | t.map(&:join) 460 | end 461 | end 462 | end 463 | 464 | context "fetching 150k rows x 10 times - with streaming" do 465 | let(:limit) { 150_000 } 466 | it "should work" do 467 | t = [] 468 | 10.times do |idx| 469 | t << Thread.new do 470 | client = described_class.from_env 471 | result = client.query(query) 472 | count = 0 473 | first_row = nil 474 | result.each do |row| 475 | first_row = row if first_row.nil? 476 | count += 1 477 | end 478 | expect(count).to eq 150000 479 | expect((-50000...50000)).to include(first_row["id"].to_i) 480 | end 481 | end 482 | 483 | t.map(&:join) 484 | end 485 | end 486 | end 487 | end 488 | 489 | shared_examples "a configuration setting" do |attribute, value, attr_reader_available| 490 | let!(:args) do 491 | { attribute => value} 492 | end 493 | 494 | it "supports configuring #{attribute} via from_env" do 495 | expect do 496 | new_client = described_class.from_env(**args) 497 | expect(new_client.send(attribute)).to eq(value) if attr_reader_available 498 | end.not_to raise_error 499 | end 500 | 501 | it "supports configuring #{attribute} via new" do 502 | expect do 503 | new_client = described_class.new("https://blah.snowflake", 504 | "MYPEMKEY", 505 | "MYORG", 506 | "ACCOUNT", 507 | "USER", 508 | "MYWAREHOUSE", 509 | "MYDB", 510 | **args) 511 | expect(new_client.send(attribute)).to eq(value) if attr_reader_available 512 | end.not_to raise_error 513 | end 514 | end 515 | 516 | describe "configuration" do 517 | it_behaves_like "a configuration setting", :logger, Logger.new(STDOUT) 518 | it_behaves_like "a configuration setting", :log_level, Logger::WARN, false 519 | it_behaves_like "a configuration setting", :jwt_token_ttl, 44 520 | it_behaves_like "a configuration setting", :connection_timeout, 42 521 | it_behaves_like "a configuration setting", :max_threads_per_query, 6 522 | it_behaves_like "a configuration setting", :thread_scale_factor, 5 523 | it_behaves_like "a configuration setting", :http_retries, 2 524 | it_behaves_like "a configuration setting", :query_timeout, 2000 525 | it_behaves_like "a configuration setting", :default_role, "OTHER_ROLE" 526 | 527 | 528 | context "with optional settings set through env variables" do 529 | before do 530 | ENV["SNOWFLAKE_JWT_TOKEN_TTL"] = "3333" 531 | ENV["SNOWFLAKE_CONNECTION_TIMEOUT"] = "33" 532 | ENV["SNOWFLAKE_MAX_CONNECTIONS"] = "33" 533 | ENV["SNOWFLAKE_MAX_THREADS_PER_QUERY"] = "33" 534 | ENV["SNOWFLAKE_THREAD_SCALE_FACTOR"] = "3" 535 | ENV["SNOWFLAKE_HTTP_RETRIES"] = "33" 536 | ENV["SNOWFLAKE_QUERY_TIMEOUT"] = "3333" 537 | ENV["SNOWFLAKE_DEFAULT_ROLE"] = "OTHER_ROLE" 538 | end 539 | 540 | after do 541 | ENV["SNOWFLAKE_JWT_TOKEN_TTL"] = nil 542 | ENV["SNOWFLAKE_CONNECTION_TIMEOUT"] = nil 543 | ENV["SNOWFLAKE_MAX_CONNECTIONS"] = nil 544 | ENV["SNOWFLAKE_MAX_THREADS_PER_QUERY"] = nil 545 | ENV["SNOWFLAKE_THREAD_SCALE_FACTOR"] = nil 546 | ENV["SNOWFLAKE_HTTP_RETRIES"] = nil 547 | ENV["SNOWFLAKE_QUERY_TIMEOUT"] = nil 548 | ENV["SNOWFLAKE_DEFAULT_ROLE"] = nil 549 | end 550 | 551 | it "sets the settings" do 552 | expect(client.instance_variable_get(:@key_pair_jwt_auth_manager). 553 | instance_variable_get(:@jwt_token_ttl)).to eq 3333 554 | expect(client.connection_timeout).to eq 33 555 | expect(client.max_connections).to eq 33 556 | expect(client.max_threads_per_query).to eq 33 557 | expect(client.thread_scale_factor).to eq 3 558 | expect(client.http_retries).to eq 33 559 | expect(client.query_timeout).to eq 3333 560 | expect(client.default_role).to eq "OTHER_ROLE" 561 | end 562 | end 563 | 564 | context "no extra env settings are set" do 565 | it "sets the settings to defaults" do 566 | expect(client.instance_variable_get(:@key_pair_jwt_auth_manager). 567 | instance_variable_get(:@jwt_token_ttl) 568 | ).to eq RubySnowflake::Client::DEFAULT_JWT_TOKEN_TTL 569 | expect(client.connection_timeout).to eq RubySnowflake::Client::DEFAULT_CONNECTION_TIMEOUT 570 | expect(client.max_connections).to eq RubySnowflake::Client::DEFAULT_MAX_CONNECTIONS 571 | expect(client.max_threads_per_query).to eq RubySnowflake::Client::DEFAULT_MAX_THREADS_PER_QUERY 572 | expect(client.thread_scale_factor).to eq RubySnowflake::Client::DEFAULT_THREAD_SCALE_FACTOR 573 | expect(client.http_retries).to eq RubySnowflake::Client::DEFAULT_HTTP_RETRIES 574 | expect(client.default_role).to be_nil 575 | end 576 | end 577 | end 578 | 579 | describe RubySnowflake::Error do 580 | it "initializes with error details" do 581 | error = described_class.new("Test error message") 582 | expect(error.message).to eq "Test error message" 583 | end 584 | 585 | it "handles hash details" do 586 | error_details = { code: 123, message: "Error occurred" } 587 | error = described_class.new(error_details) 588 | expect(error.message).to eq error_details.to_s 589 | end 590 | end 591 | end 592 | --------------------------------------------------------------------------------