├── .gitignore ├── CHANGELOG.md ├── Gemfile ├── Gemfile.lock ├── README.md ├── Rakefile ├── bigquery.gemspec ├── lib ├── big_query.rb └── big_query │ ├── client.rb │ ├── client │ ├── datasets.rb │ ├── errors.rb │ ├── hashable.rb │ ├── job_types.rb │ ├── jobs.rb │ ├── load.rb │ ├── options.rb │ ├── query.rb │ ├── response.rb │ └── tables.rb │ ├── errors.rb │ └── version.rb └── test └── bigquery.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .*.yml 3 | *.p12 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.9.1 2 | * Support optional query parameters to Datasets.list #34 3 | * Fetch all tables #35 4 | * Support optional parameters for Tabledata.list #36 5 | * Add table_raw_data method #37 6 | * Update google-api-client #38 7 | * Support JSON Key file authentication #42 8 | * Fix bug `insert_all_table_data` #43 9 | * Follow the version up of google-api-client #44 10 | * Use pessimitstic version constraint of google-api-client #45 11 | 12 | # 0.9.0 13 | * Add wrapper method of `bigquery.tables.patch` and `bigquery.tables.update` #32 14 | * Add wrapper method of `biqquery.datasets.list`, `bigquery.datasets.insert` and `bigquery.datasets.delete` #33 15 | 16 | # 0.8.3 17 | * Minor version up google-api-client 18 | 19 | # 0.8.2 20 | * Revert google-api-client version up 21 | 22 | # 0.8.1 23 | * Tweek condition to reduce the query count #23 24 | * Update google-api-client to `0.9.pre1` #24 25 | 26 | # 0.8.0 27 | * Allow media and parameters when inserting jobs #20 28 | * Add query options such as useQueryCache, dryRun, maxResults #22 29 | 30 | # 0.7.0 31 | * Adds support for passing string as key #18 32 | * Switch to Signet::OAuth2::Client for authorization #17 33 | 34 | # 0.6.1 35 | * Include insert module #16 36 | 37 | # 0.4.0 38 | * Added BigQuery::Client#insert_job. https://cloud.google.com/bigquery/docs/reference/v2/jobs/insert 39 | 40 | # 0.3.0 41 | * Added support to insert to allow for array of rows 42 | * Locked the google-api-client gem to ~> 0.7.X 43 | * Added a possible work around to #13 44 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | bigquery (0.9.0) 5 | google-api-client (>= 0.9.3) 6 | googleauth (>= 0.5.0) 7 | 8 | GEM 9 | remote: https://rubygems.org/ 10 | specs: 11 | addressable (2.4.0) 12 | byebug (8.2.4) 13 | coderay (1.1.1) 14 | faraday (0.9.2) 15 | multipart-post (>= 1.2, < 3) 16 | google-api-client (0.9.5) 17 | addressable (~> 2.3) 18 | googleauth (~> 0.5) 19 | httpclient (~> 2.7) 20 | hurley (~> 0.1) 21 | memoist (~> 0.11) 22 | mime-types (>= 1.6) 23 | representable (~> 2.3.0) 24 | retriable (~> 2.0) 25 | thor (~> 0.19) 26 | googleauth (0.5.1) 27 | faraday (~> 0.9) 28 | jwt (~> 1.4) 29 | logging (~> 2.0) 30 | memoist (~> 0.12) 31 | multi_json (~> 1.11) 32 | os (~> 0.9) 33 | signet (~> 0.7) 34 | httpclient (2.7.1) 35 | hurley (0.2) 36 | jwt (1.5.4) 37 | little-plugger (1.1.4) 38 | logging (2.1.0) 39 | little-plugger (~> 1.1) 40 | multi_json (~> 1.10) 41 | memoist (0.14.0) 42 | method_source (0.8.2) 43 | mime-types (3.0) 44 | mime-types-data (~> 3.2015) 45 | mime-types-data (3.2016.0221) 46 | minitest (5.8.4) 47 | multi_json (1.11.2) 48 | multipart-post (2.0.0) 49 | os (0.9.6) 50 | pry (0.10.3) 51 | coderay (~> 1.1.0) 52 | method_source (~> 0.8.1) 53 | slop (~> 3.4) 54 | pry-byebug (3.3.0) 55 | byebug (~> 8.0) 56 | pry (~> 0.10) 57 | rake (11.1.2) 58 | representable (2.3.0) 59 | uber (~> 0.0.7) 60 | retriable (2.1.0) 61 | signet (0.7.2) 62 | addressable (~> 2.3) 63 | faraday (~> 0.9) 64 | jwt (~> 1.5) 65 | multi_json (~> 1.10) 66 | slop (3.6.0) 67 | thor (0.19.1) 68 | uber (0.0.15) 69 | 70 | PLATFORMS 71 | ruby 72 | 73 | DEPENDENCIES 74 | bigquery! 75 | bundler 76 | minitest 77 | pry-byebug 78 | rake 79 | 80 | BUNDLED WITH 81 | 1.11.2 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BigQuery 2 | 3 | BigQuery is a wrapper around the Google api ruby gem designed to make interacting with BigQuery easier. 4 | 5 | ## Install 6 | 7 | gem install bigquery 8 | 9 | ## Authorization 10 | 11 | Only service accounts are supported right now. https://developers.google.com/accounts/docs/OAuth2#serviceaccount 12 | 13 | ## Available methods 14 | 15 | * query 16 | * tables 17 | * datasets 18 | * load 19 | * tables_formatted 20 | * job 21 | * jobs 22 | * insert_job 23 | * refresh_auth 24 | 25 | ## Example 26 | 27 | require 'big_query' 28 | 29 | opts = {} 30 | opts['client_id'] = '1234.apps.googleusercontent.com' 31 | opts['service_email'] = '1234@developer.gserviceaccount.com' 32 | opts['key'] = '/path/to/somekeyfile-privatekey.p12' 33 | opts['project_id'] = '54321' 34 | opts['dataset'] = 'yourdataset' 35 | 36 | bq = BigQuery::Client.new(opts) 37 | 38 | puts bq.tables 39 | 40 | ## Tables 41 | 42 | List tables in dataset 43 | 44 | bq.tables 45 | 46 | List table names 47 | 48 | bq.tables_formatted 49 | 50 | Fetch table data 51 | 52 | bq.table_data('table_name') 53 | 54 | Delete exiting table 55 | 56 | bq.delete_table('test123') 57 | 58 | Create table. First param is the table name second one is the table schema defined with the following format 59 | 60 | { 61 | field_name: { 62 | type: 'TYPE_VALUE BETWEEN (STRING, INTEGER, FLOAT, BOOLEAN, RECORD, TIMESTAMP)', 63 | mode: 'MODE_VALUE BETWEEN (NULLABLE, REQUIRED, REPEATED)' 64 | }, 65 | other_field_name: { ... } 66 | } 67 | 68 | 69 | As this example defines 70 | 71 | table_name = 'test123' 72 | table_schema = { id: { type: 'INTEGER' }, 73 | name: { type: 'STRING' } } 74 | bq.create_table(table_name, table_schema) 75 | 76 | Describe table schema 77 | 78 | bq.describe_table('table_name') 79 | 80 | ## Datasets 81 | 82 | List datasets in dataset 83 | 84 | bq.datasets 85 | 86 | List dataset names 87 | 88 | bq.datasets_formatted 89 | 90 | Delete exiting dataset 91 | 92 | bq.delete_dataset('test123') 93 | 94 | Create dataset. First param is the dataset name 95 | 96 | bq.create_dataset('test123') 97 | 98 | ## Querying 99 | 100 | bq.query("SELECT * FROM [#{config['dataset']}.table_name] LIMIT 1") 101 | 102 | ## Inserting 103 | 104 | Insert a single row 105 | 106 | bq.insert('table_name', 'id' => 123, 'type' => 'Task') 107 | 108 | Batch insert an array of rows. [See bigquery docs for limitations.](https://cloud.google.com/bigquery/streaming-data-into-bigquery#quota) 109 | 110 | data = [{'id' => 123, 'type' => 'Foo'}, {'id' => 321, 'type' => 'Bar'}] 111 | bq.insert('table_name', data) 112 | 113 | # Patching 114 | 115 | Patching a exiting table 116 | 117 | bq.patch_table('test', 'id' => 123, 'type' => 'Task', 'name' => 'Task1') 118 | 119 | Tables: patch [See bigquery docs for details.](https://cloud.google.com/bigquery/docs/reference/v2/tables/patch) 120 | 121 | # Updating 122 | 123 | Updating a exiting table 124 | 125 | bq.update_table('test', 'id' => 123, 'type' => 'Task', 'name' => 'Task1') 126 | 127 | Tables: update [See bigquery docs for details.](https://cloud.google.com/bigquery/docs/reference/v2/tables/update) 128 | 129 | ## Keys 130 | 131 | To get the keys you need to have a: 132 | 133 | * google API project (link)[https://console.developers.google.com/project] 134 | * bigquery activated (link)[https://bigquery.cloud.google.com] 135 | * create a bigquery dataset in the project (link)[https://bigquery.cloud.google.com] 136 | 137 | 1- Goto your project google api access 138 | 139 | https://code.google.com/apis/console/b/0/?noredirect&pli=1#project:YOUR_PROJECT_ID:access 140 | 141 | 2- Create a new client-ID for service_account 142 | 3- Download de key file 143 | 144 | Now you have everything: 145 | 146 | * client_id: API access client-ID 147 | * service_email: API access Email address 148 | * key: API access key file path 149 | * project_id: your google API project id 150 | * dataset: your big query dataset name 151 | 152 | ## Troubleshooting 153 | 154 | If you're getting an "invalid_grant" error it usually means your system clock is off. 155 | 156 | If you're getting unauthorized requested but you've been able to successfully connect before, you need to refresh your auth by running the "refresh_auth" method. 157 | 158 | ## How to run test 159 | 160 | Before run test, you must create file named `.bigquery_settings.yml` on root of this repository. `.bigquery_settings.yml` must include following infomation. 161 | 162 | ```yaml 163 | client_id: '1234.apps.googleusercontent.com' 164 | service_email: '1234@developer.gserviceaccount.com' 165 | key: '/path/to/somekeyfile-privatekey.p12' 166 | project_id: '54321' 167 | dataset: 'yourdataset' 168 | faraday_option: 169 | timeout: 999 170 | ``` 171 | 172 | Then run tests via rake. 173 | 174 | ``` 175 | $ bundle install && bundle exec rake test 176 | ``` 177 | 178 | ## Contributing 179 | 180 | Fork and submit a pull request and make sure you add a test for any feature you add. 181 | 182 | ## License 183 | 184 | LICENSE: 185 | 186 | (The MIT License) 187 | 188 | Copyright © 2012 Adam Bronte 189 | 190 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ‘Software’), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 191 | 192 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 193 | 194 | THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 195 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'bundler/gem_tasks' 3 | require 'rake/testtask' 4 | 5 | Rake::TestTask.new(:test) do |test| 6 | test.libs << 'lib' << 'test' 7 | test.pattern = 'test/*.rb' 8 | test.warning = false 9 | test.verbose = true 10 | end 11 | -------------------------------------------------------------------------------- /bigquery.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | $:.push File.expand_path("../lib", __FILE__) 3 | require "big_query/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "bigquery" 7 | s.version = BigQuery::VERSION 8 | s.authors = ["Adam Bronte", "Andres Bravo"] 9 | s.email = ["adam@brontesaurus.com", "andresbravog@gmail.com"] 10 | s.description = "This library is a wrapper around the google-api-client ruby gem. 11 | It's meant to make calls to BigQuery easier and streamlined." 12 | s.require_paths = ["lib"] 13 | s.summary = "A nice wrapper for Google Big Query" 14 | s.homepage = "https://github.com/abronte/BigQuery" 15 | s.files = `git ls-files`.split("\n") 16 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 17 | 18 | s.add_dependency "google-api-client", "~> 0.9.3" 19 | s.add_dependency "googleauth", "~> 0.5.0" 20 | 21 | s.add_development_dependency "bundler" 22 | s.add_development_dependency "rake" 23 | s.add_development_dependency "minitest" 24 | s.add_development_dependency "pry-byebug" 25 | end 26 | -------------------------------------------------------------------------------- /lib/big_query.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | require 'google/apis/bigquery_v2' 3 | require 'google/api_client/auth/key_utils' 4 | require 'big_query/version' 5 | require 'big_query/errors' 6 | require 'big_query/client' 7 | 8 | module BigQuery 9 | include BigQuery::Errors 10 | end 11 | -------------------------------------------------------------------------------- /lib/big_query/client.rb: -------------------------------------------------------------------------------- 1 | require 'big_query/client/errors' 2 | require 'big_query/client/query' 3 | require 'big_query/client/jobs' 4 | require 'big_query/client/tables' 5 | require 'big_query/client/datasets' 6 | require 'big_query/client/load' 7 | require 'big_query/client/hashable' 8 | require 'big_query/client/options' 9 | require 'big_query/client/response' 10 | require 'big_query/client/job_types' 11 | 12 | module BigQuery 13 | class Client 14 | include BigQuery::Client::Errors 15 | include BigQuery::Client::Query 16 | include BigQuery::Client::Jobs 17 | include BigQuery::Client::Tables 18 | include BigQuery::Client::Datasets 19 | include BigQuery::Client::Insert 20 | include BigQuery::Client::Hashable 21 | include BigQuery::Client::Options 22 | include BigQuery::Client::Response 23 | include BigQuery::Client::JobTypes 24 | 25 | attr_accessor :dataset, :project_id 26 | 27 | def initialize(opts = {}) 28 | # for debug 29 | # Google::Apis.logger.level = Logger::DEBUG 30 | 31 | @client = Google::Apis::BigqueryV2::BigqueryService.new 32 | 33 | @client.client_options.application_name = 'BigQuery ruby app' 34 | @client.client_options.application_version = BigQuery::VERSION 35 | 36 | # Memo: 37 | # The google-api-client 0.9 is HTTP Client no longer in Faraday 38 | # We accepts the options for backward compatibility 39 | # (HTTP Client is replaced in the near future HTTP::Client...) 40 | # https://github.com/google/google-api-ruby-client/issues/336#issuecomment-179400592 41 | if opts['faraday_option'].is_a?(Hash) 42 | @client.request_options.timeout_sec = opts['faraday_option']['timeout'] 43 | @client.request_options.open_timeout_sec = opts['faraday_option']['open_timeout'] 44 | # We accept the request_option instead of faraday_option 45 | elsif opts['request_option'].is_a?(Hash) 46 | @client.request_options.timeout_sec = opts['request_option']['timeout_sec'] 47 | @client.request_options.open_timeout_sec = opts['request_option']['open_timeout_sec'] 48 | end 49 | 50 | scope = 'https://www.googleapis.com/auth/bigquery' 51 | if opts['json_key'].is_a?(String) && !opts['json_key'].empty? 52 | if File.exist?(opts['json_key']) 53 | auth = File.open(opts['json_key']) do |f| 54 | Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope) 55 | end 56 | else 57 | key = StringIO.new(opts['json_key']) 58 | auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope) 59 | end 60 | else 61 | begin 62 | key = Google::APIClient::KeyUtils.load_from_pkcs12(opts['key'], 'notasecret') 63 | rescue ArgumentError 64 | key = Google::APIClient::KeyUtils.load_from_pem(opts['key'], 'notasecret') 65 | end 66 | auth = Signet::OAuth2::Client.new( 67 | token_credential_uri: 'https://accounts.google.com/o/oauth2/token', 68 | audience: 'https://accounts.google.com/o/oauth2/token', 69 | scope: scope, 70 | issuer: opts['service_email'], 71 | signing_key: key) 72 | end 73 | 74 | @client.authorization = auth 75 | 76 | refresh_auth 77 | 78 | @project_id = opts['project_id'] 79 | @dataset = opts['dataset'] 80 | end 81 | 82 | def refresh_auth 83 | @client.authorization.fetch_access_token! 84 | end 85 | 86 | private 87 | 88 | def api(resp) 89 | data = deep_stringify_keys(resp.to_h) 90 | handle_error(data) if data && is_error?(data) 91 | data 92 | end 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /lib/big_query/client/datasets.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Datasets 4 | 5 | # Lists the datasets 6 | # 7 | # @return [Hash] json api response 8 | def datasets(parameters = {}) 9 | response = api( 10 | @client.list_datasets( 11 | @project_id, 12 | parameters 13 | ) 14 | ) 15 | response['datasets'] || [] 16 | end 17 | 18 | # Lists the datasets returnning only the tableId 19 | # 20 | # @return [Hash] json api response 21 | def datasets_formatted(parameters = {}) 22 | datasets(parameters).map { |t| t['datasetReference']['datasetId'] } 23 | end 24 | 25 | # Creating a new dataset 26 | # 27 | # @param datasetId [String] dataset id to insert into 28 | # @return [Hash] json api response 29 | # 30 | # examples: 31 | # 32 | # @bq.create_dataset('new_dataset') 33 | def create_dataset(dataset_id) 34 | dataset = Google::Apis::BigqueryV2::Dataset.new( 35 | dataset_reference: { project_id: @project_id, dataset_id: dataset_id } 36 | ) 37 | api( 38 | @client.insert_dataset( 39 | @project_id, 40 | dataset 41 | ) 42 | ) 43 | end 44 | 45 | # Deletes the given datasetId 46 | # 47 | # @param datasetId [String] dataset id to insert into 48 | def delete_dataset(dataset_id) 49 | api( 50 | @client.delete_dataset( 51 | @project_id, 52 | dataset_id 53 | ) 54 | ) 55 | end 56 | end 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/big_query/client/errors.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Errors 4 | # Defines whenever the response is an error or not 5 | # 6 | # @param response [Hash] parsed json response 7 | # @return [Boolean] 8 | def is_error?(response) 9 | !response["error"].nil? 10 | end 11 | 12 | # handles the error and raises an understandable error 13 | # 14 | # @param response [Hash] parsed json response 15 | # @raise [BigQueryError] 16 | def handle_error(response) 17 | error = response['error'] 18 | case error['code'] 19 | when 404 20 | fail BigQuery::Errors::NotFound, error['message'] 21 | else 22 | fail BigQuery::Errors::BigQueryError, error['message'] 23 | end 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/big_query/client/hashable.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Hashable 4 | 5 | def process_value(val, convert_key_proc) 6 | case val 7 | when Hash 8 | Hash[val.map {|k, v| [convert_key_proc.call(k), process_value(v, convert_key_proc)] }] 9 | when Array 10 | val.map{ |v| process_value(v, convert_key_proc) } 11 | else 12 | val 13 | end 14 | end 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/big_query/client/job_types.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module JobTypes 4 | 5 | def _copy(opts) 6 | _opts = opts.dup 7 | if (_opts[:source_tables]) 8 | _opts[:source_tables] = _opts[:source_tables].dup.map { |source_table| Google::Apis::BigqueryV2::TableReference.new(source_table) } 9 | else 10 | _opts[:source_table] = Google::Apis::BigqueryV2::TableReference.new(_opts[:source_table]) 11 | end 12 | _opts[:destination_table] = Google::Apis::BigqueryV2::TableReference.new(_opts[:destination_table]) 13 | 14 | Google::Apis::BigqueryV2::JobConfigurationTableCopy.new( 15 | _opts 16 | ) 17 | end 18 | 19 | def _extract(opts) 20 | _opts = opts.dup 21 | _opts[:source_table] = Google::Apis::BigqueryV2::TableReference.new(_opts[:source_table]) 22 | Google::Apis::BigqueryV2::JobConfigurationExtract.new( 23 | _opts 24 | ) 25 | end 26 | 27 | def _load(opts) 28 | _opts = opts.dup 29 | _opts[:destination_table] = Google::Apis::BigqueryV2::TableReference.new(_opts[:destination_table]) 30 | _opts[:schema] = Google::Apis::BigqueryV2::TableSchema.new(_opts[:schema]) if _opts[:schema] 31 | Google::Apis::BigqueryV2::JobConfigurationLoad.new( 32 | _opts 33 | ) 34 | end 35 | 36 | def _query(opts) 37 | _opts = opts.dup 38 | Google::Apis::BigqueryV2::JobConfigurationQuery.new( 39 | _opts 40 | ) 41 | end 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/big_query/client/jobs.rb: -------------------------------------------------------------------------------- 1 | # https://cloud.google.com/bigquery/docs/reference/v2/jobs 2 | 3 | module BigQuery 4 | class Client 5 | module Jobs 6 | # Fetches a bigquery job by id 7 | # 8 | # @param id [Integer] job id to fetch 9 | # @param options [Hash] bigquery opts accepted 10 | # @return [Hash] json api response 11 | def job(id, opts = {}) 12 | api( 13 | @client.get_job( 14 | @project_id, 15 | id, 16 | deep_symbolize_keys(opts) 17 | ) 18 | ) 19 | end 20 | 21 | # lists all the jobs 22 | # 23 | # @param options [Hash] bigquery opts accepted 24 | # @return [Hash] json api response 25 | def jobs(opts = {}) 26 | api( 27 | @client.list_jobs( 28 | @project_id, 29 | deep_symbolize_keys(opts) 30 | ) 31 | ) 32 | end 33 | 34 | # Gets the results of a given job 35 | # 36 | # @param id [Integer] job id to fetch 37 | # @param options [Hash] bigquery opts accepted 38 | # @return [Hash] json api response 39 | def get_query_results(id, opts = {}) 40 | 41 | api( 42 | @client.get_job_query_results( 43 | @project_id, id, deep_symbolize_keys(opts) 44 | ) 45 | ) 46 | end 47 | 48 | # Insert a job 49 | # 50 | # @param options [Hash] hash of job options 51 | # @param parameters [Hash] hash of parameters (uploadType, etc.) 52 | # @param media [Google::APIClient::UploadIO] media upload 53 | # @return [Hash] json api response 54 | def insert_job(opts, parameters = {}, media = nil) 55 | _opts = deep_symbolize_keys(opts) 56 | job_type = _opts.keys.find { |k| [:copy, :extract, :load, :query].include?(k.to_sym) } 57 | job_type_configuration = __send__("_#{job_type.to_s}", _opts[job_type]) 58 | job_configuration = Google::Apis::BigqueryV2::JobConfiguration.new( 59 | job_type.to_sym => job_type_configuration 60 | ) 61 | job_configuration.dry_run = _opts[:dry_run] if _opts[:dry_run] 62 | job = Google::Apis::BigqueryV2::Job.new( 63 | configuration: job_configuration 64 | ) 65 | api( 66 | @client.insert_job( 67 | @project_id, 68 | job, 69 | upload_source: media 70 | ) 71 | ) 72 | end 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /lib/big_query/client/load.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Insert 4 | # Loading Data From Cloud Datastore 5 | # 6 | # see https://cloud.google.com/bigquery/loading-data-cloud-datastore for possible opts 7 | # @param opts [Hash] field value hash to be inserted 8 | # @return [Hash] 9 | def load(opts) 10 | _opts = deep_symbolize_keys(opts) 11 | job_configuration = Google::Apis::BigqueryV2::JobConfiguration.new( 12 | load: _load(_opts[:load]) 13 | ) 14 | job_configuration.dry_run = _opts[:dry_run] if _opts[:dry_run] 15 | job = Google::Apis::BigqueryV2::Job.new( 16 | configuration: job_configuration 17 | ) 18 | api( 19 | @client.insert_job( 20 | @project_id, 21 | job 22 | ) 23 | ) 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/big_query/client/options.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Options 4 | 5 | def deep_symbolize_keys(opts) 6 | convert_key_proc = Proc.new { |k| underscore(k.to_s).to_sym } 7 | Hash[opts.map { |k, v| [convert_key_proc.call(k), process_value(v, convert_key_proc)] }] 8 | end 9 | 10 | def underscore(str) 11 | str.gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2'). 12 | gsub(/([a-z\d])([A-Z])/,'\1_\2'). 13 | tr("-", "_"). 14 | downcase 15 | end 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/big_query/client/query.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Query 4 | # Performs the given query in the bigquery api 5 | # 6 | # @param given_query [String] query to perform 7 | # @param options [Hash] query options 8 | # @option options [Integer] timeout or timeoutMs (90 * 1000) timeout in miliseconds 9 | # @option options [Boolean] dryRun Don't actually run this job 10 | # @option options [Integer] maxResults The maximum number of rows of data to return per page of results. 11 | # @option options [Boolean] useQueryCache Whether to look for the result in the query cache. 12 | # @return [Hash] json api response 13 | # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs/query 14 | def query(given_query, options={}) 15 | query_request = Google::Apis::BigqueryV2::QueryRequest.new( 16 | query: given_query, 17 | ) 18 | query_request.timeout_ms = options[:timeout] || options[:timeoutMs] || 90 * 1000 19 | query_request.max_results = options[:maxResults] if options[:maxResults] 20 | query_request.dry_run = options[:dryRun] if options.has_key?(:dryRun) 21 | query_request.use_query_cache = options[:useQueryCache] if options.has_key?(:useQueryCache) 22 | 23 | api( 24 | @client.query_job( 25 | @project_id, 26 | query_request 27 | ) 28 | ) 29 | end 30 | 31 | # perform a query synchronously 32 | # fetch all result rows, even when that takes >1 query 33 | # invoke /block/ once for each row, passing the row 34 | # 35 | # @param q [String] query to be executed 36 | # @param options [Hash] query options 37 | # @option options [Integer] timeout (90 * 1000) timeout in miliseconds 38 | def each_row(q, options = {}, &block) 39 | current_row = 0 40 | # repeatedly fetch results, starting from current_row 41 | # invoke the block on each one, then grab next page if there is one 42 | # it'll terminate when res has no 'rows' key or when we've done enough rows 43 | # perform query... 44 | res = query(q, options) 45 | job_id = res['jobReference']['jobId'] 46 | # call the block on the first page of results 47 | if( res && res['rows'] ) 48 | res['rows'].each(&block) 49 | current_row += res['rows'].size 50 | end 51 | # keep grabbing pages from the API and calling the block on each row 52 | while( current_row < res['totalRows'].to_i && ( res = get_query_results(job_id, :startIndex => current_row) ) && res['rows'] ) do 53 | res['rows'].each(&block) 54 | current_row += res['rows'].size 55 | end 56 | end 57 | end 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /lib/big_query/client/response.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | class Client 3 | module Response 4 | 5 | def deep_stringify_keys(response) 6 | convert_key_proc = Proc.new { |k| camel_case_lower(k.to_s) } 7 | Hash[response.map { |k, v| [convert_key_proc.call(k), process_value(v, convert_key_proc)] }] 8 | end 9 | 10 | def camel_case_lower(str) 11 | str.split('_').inject([]){ |buffer,e| buffer.push(buffer.empty? ? e : e.capitalize) }.join 12 | end 13 | end 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /lib/big_query/client/tables.rb: -------------------------------------------------------------------------------- 1 | 2 | # Module to handle table actions 3 | # https://developers.google.com/bigquery/docs/tables 4 | module BigQuery 5 | class Client 6 | module Tables 7 | ALLOWED_FIELD_TYPES = ['STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', 'RECORD', 'TIMESTAMP', 'DATE'] 8 | ALLOWED_FIELD_MODES = ['NULLABLE', 'REQUIRED', 'REPEATED'] 9 | 10 | # Lists the tables 11 | # 12 | # @param dataset [String] dataset to look for 13 | # @return [Hash] json api response 14 | def tables(dataset = @dataset) 15 | response = api( 16 | @client.list_tables( 17 | @project_id, 18 | dataset, 19 | max_results: 9999999 # default is 50 20 | ) 21 | ) 22 | response['tables'] || [] 23 | end 24 | 25 | # Lists the tables returnning only the tableId 26 | # 27 | # @param dataset [String] dataset to look for 28 | # @return [Hash] json api response 29 | def tables_formatted(dataset = @dataset) 30 | tables(dataset).map { |t| t['tableReference']['tableId'] } 31 | end 32 | 33 | # Returns entire response of table data 34 | # 35 | # @param tableId [String] id of the table to look for 36 | # @param dataset [String] dataset to look for 37 | # @param options [Hash] hash of optional query parameters (maxResults, startIndex) 38 | # @return [Hash] json api response 39 | def table_raw_data(table_id, dataset_id = @dataset, options = {}) 40 | option_parameters = {} 41 | # I would like to change the option to snake case if there are no users, because I have added this feature 42 | option_parameters[:max_results] = options[:maxResults] if options[:maxResults] 43 | option_parameters[:start_index] = options[:startIndex] if options[:startIndex] 44 | 45 | api( 46 | @client.list_table_data( 47 | @project_id, 48 | dataset_id, 49 | table_id, 50 | option_parameters 51 | ) 52 | ) 53 | end 54 | 55 | # Returns all rows of table data 56 | # 57 | # @param tableId [String] id of the table to look for 58 | # @param dataset [String] dataset to look for 59 | # @param options [Hash] hash of optional query parameters (maxResults, startIndex) 60 | # @return [Hash] json api response 61 | def table_data(table_id, dataset_id = @dataset, options = {}) 62 | response = table_raw_data(table_id, dataset_id, options) 63 | response['rows'] || [] 64 | end 65 | 66 | # insert row into table 67 | # 68 | # @param tableId [String] table id to insert into 69 | # @param opts [Hash] field value hash to be inserted 70 | # @return [Hash] 71 | def insert(table_id, opts) 72 | if opts.class == Array 73 | rows = opts.map do |x| 74 | Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(json: x) 75 | end 76 | else 77 | rows = [Google::Apis::BigqueryV2::InsertAllTableDataRequest::Row.new(json: opts)] 78 | end 79 | request = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(rows: rows) 80 | 81 | api( 82 | @client.insert_all_table_data( 83 | @project_id, 84 | @dataset, 85 | table_id, 86 | request 87 | ) 88 | ) 89 | end 90 | 91 | # Creating a new table 92 | # 93 | # @param tableId [String] table id to insert into 94 | # @param schema [Hash] name => opts hash for the schema 95 | # 96 | # examples: 97 | # 98 | # @bq.create_table('new_table', id: { type: 'INTEGER', mode: 'required' }) 99 | # @bq.create_table('new_table', price: { type: 'FLOAT' }) 100 | def create_table(table_id, schema={}) 101 | table = Google::Apis::BigqueryV2::Table.new( 102 | table_reference: { project_id: @project_id, dataset_id: @dataset, table_id: table_id }, 103 | schema: { fields: validate_schema(schema) } 104 | ) 105 | api( 106 | @client.insert_table( 107 | @project_id, 108 | @dataset, 109 | table 110 | ) 111 | ) 112 | end 113 | 114 | # Deletes the given table_id 115 | # 116 | # @param table_id [String] table id to insert into 117 | def delete_table(table_id) 118 | api( 119 | @client.delete_table( 120 | @project_id, 121 | @dataset, 122 | table_id 123 | ) 124 | ) 125 | end 126 | 127 | # Patching a exsiting table 128 | # 129 | # @param tableId [String] table id to insert into 130 | # @param schema [Hash] name => opts hash for the schema 131 | # 132 | # examples: 133 | # 134 | # @bq.patch_table('existing_table', id: { type: 'INTEGER', mode: 'required' }, price: { type: 'FLOAT' }) 135 | # It should be provide entire schema including the difference between the existing schema 136 | # Otherwise 'BigQuery::Errors::BigQueryError: Provided Schema does not match Table' occur 137 | def patch_table(table_id, schema={}) 138 | table = Google::Apis::BigqueryV2::Table.new( 139 | table_reference: { project_id: @project_id, dataset_id: @dataset, table_id: table_id }, 140 | schema: { fields: validate_schema(schema) } 141 | ) 142 | api( 143 | @client.patch_table( 144 | @project_id, 145 | @dataset, 146 | table_id, 147 | table 148 | ) 149 | ) 150 | end 151 | 152 | # Updating a exsiting table 153 | # 154 | # @param tableId [String] table id to insert into 155 | # @param schema [Hash] name => opts hash for the schema 156 | # 157 | # examples: 158 | # 159 | # @bq.update_table('existing_table', id: { type: 'INTEGER', mode: 'required' }, price: { type: 'FLOAT' }) 160 | # It should be provide entire schema including the difference between the existing schema 161 | # Otherwise 'BigQuery::Errors::BigQueryError: Provided Schema does not match Table' occur 162 | def update_table(table_id, schema={}) 163 | table = Google::Apis::BigqueryV2::Table.new( 164 | table_reference: { project_id: @project_id, dataset_id: @dataset, table_id: table_id }, 165 | schema: { fields: validate_schema(schema) } 166 | ) 167 | api( 168 | @client.update_table( 169 | @project_id, 170 | @dataset, 171 | table_id, 172 | table 173 | ) 174 | ) 175 | end 176 | 177 | # Describe the schema of the given tableId 178 | # 179 | # @param tableId [String] table id to describe 180 | # @param dataset [String] dataset to look for 181 | # @return [Hash] json api response 182 | def describe_table(table_id, dataset = @dataset) 183 | api( 184 | @client.get_table( 185 | @project_id, 186 | dataset, 187 | table_id 188 | ) 189 | ) 190 | end 191 | 192 | protected 193 | 194 | # Translate given schema to a one understandable by bigquery 195 | # 196 | # @param [Hash] schema like { field_nane => { type: 'TYPE', mode: 'MODE' }, ... } 197 | # @return [Array] 198 | def validate_schema(schema) 199 | fields = [] 200 | schema.map do |name, options| 201 | type = (ALLOWED_FIELD_TYPES & [options[:type].to_s]).first 202 | mode = (ALLOWED_FIELD_MODES & [options[:mode].to_s]).first 203 | field = { "name" => name.to_s, "type" => type } 204 | field["mode"] = mode if mode 205 | if field["type"] == 'RECORD' 206 | field["fields"] = validate_schema(options[:fields]) 207 | end 208 | fields << deep_symbolize_keys(field) 209 | end 210 | fields 211 | end 212 | end 213 | end 214 | end 215 | -------------------------------------------------------------------------------- /lib/big_query/errors.rb: -------------------------------------------------------------------------------- 1 | # BigQuery API errors 2 | module BigQuery 3 | module Errors 4 | class BigQueryError < StandardError; end 5 | class NotFound < BigQueryError; end 6 | class BadDataset < BigQueryError; end 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /lib/big_query/version.rb: -------------------------------------------------------------------------------- 1 | module BigQuery 2 | VERSION = '0.9.1' 3 | end 4 | -------------------------------------------------------------------------------- /test/bigquery.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'minitest/autorun' 3 | require 'yaml' 4 | require 'big_query' 5 | require 'pry-byebug' 6 | 7 | module BigQuery 8 | class Client 9 | attr_accessor :client 10 | end 11 | end 12 | 13 | class BigQueryTest < MiniTest::Test 14 | def setup 15 | @bq = BigQuery::Client.new(config) 16 | if @bq.tables_formatted.include? 'test' 17 | @bq.delete_table('test') 18 | end 19 | @bq.create_table('test', id: { type: 'INTEGER', mode: 'REQUIRED' }, type: { type: 'STRING', mode: 'NULLABLE' }) 20 | end 21 | 22 | def config 23 | if defined? @config 24 | return @config 25 | else 26 | config_data ||= File.expand_path(File.dirname(__FILE__) + "/../.bigquery_settings.yml") 27 | @config = YAML.load_file(config_data) 28 | end 29 | end 30 | 31 | def test_faraday_option_config 32 | assert_equal @bq.client.client.request_options.timeout, 999 33 | end 34 | 35 | def test_for_tables 36 | table = @bq.tables.select{|t| t['id'] == "#{config['project_id']}:#{config['dataset']}.test"}.first 37 | 38 | assert_equal table['kind'], "bigquery#table" 39 | assert_equal table['tableReference']['tableId'], 'test' 40 | end 41 | 42 | def test_for_tables_formatted 43 | result = @bq.tables_formatted 44 | 45 | assert_includes result, 'test' 46 | end 47 | 48 | def test_for_table_raw_data 49 | result = @bq.table_raw_data('test') 50 | 51 | assert_kind_of Hash, result 52 | assert_equal result['kind'], "bigquery#tableDataList" 53 | end 54 | 55 | def test_for_table_data_maxResults 56 | result = @bq.table_data('test', @bq.dataset, maxResults: 100) 57 | 58 | assert_kind_of Array, result 59 | end 60 | 61 | def test_for_table_data_startIndex 62 | # startIndex is Zero-based 63 | result = @bq.table_data('test', @bq.dataset, maxResults: 100, startIndex: 100) 64 | 65 | assert_kind_of Array, result 66 | end 67 | 68 | def test_for_create_table 69 | if @bq.tables_formatted.include? 'test123' 70 | @bq.delete_table('test123') 71 | end 72 | 73 | schema = { 74 | id: { type: 'INTEGER'}, 75 | city: { 76 | name:"city", 77 | type:"RECORD", 78 | mode: "nullable", 79 | fields: { 80 | id: {name:"id", type:"INTEGER" }, 81 | name: {name:"name", type:"STRING" }, 82 | country: { name:"country", type:"STRING" }, 83 | time: { name:"time", type:"TIMESTAMP" } 84 | } 85 | } 86 | } 87 | 88 | result = @bq.create_table('test123', schema) 89 | 90 | assert_equal result['kind'], "bigquery#table" 91 | assert_equal result['tableReference']['tableId'], "test123" 92 | assert_equal result['schema']['fields'], [ 93 | {"name"=>"id", "type"=>"INTEGER"}, 94 | { 95 | "name"=>"city", 96 | "type"=>"RECORD", 97 | "fields"=>[ 98 | {"name"=>"id", "type"=>"INTEGER"}, 99 | {"name"=>"name", "type"=>"STRING"}, 100 | {"name"=>"country", "type"=>"STRING"}, 101 | {"name"=>"time", "type"=>"TIMESTAMP"} 102 | ] 103 | } 104 | ] 105 | end 106 | 107 | def test_for_delete_table 108 | if !@bq.tables_formatted.include? 'test123' 109 | @bq.create_table('test123', id: { type: 'INTEGER' }) 110 | end 111 | @bq.delete_table('test123') 112 | 113 | tables = @bq.tables_formatted 114 | 115 | refute_includes tables, 'test123' 116 | end 117 | 118 | def test_for_patch_table 119 | schema = { 120 | id: { type: 'INTEGER', mode: 'REQUIRED' }, 121 | type: { type: 'STRING', mode: 'NULLABLE' }, 122 | date: { type: 'TIMESTAMP' }, 123 | city: { 124 | name: 'city', 125 | type: 'RECORD', 126 | mode: 'nullable', 127 | fields: { 128 | id: { name: 'id', type: 'INTEGER' } 129 | } 130 | } 131 | } 132 | 133 | result = @bq.patch_table('test', schema) 134 | 135 | assert_equal result['kind'], "bigquery#table" 136 | assert_equal result['tableReference']['tableId'], "test" 137 | assert_equal result['schema']['fields'], [ 138 | { 'name' => 'id', 'type' => 'INTEGER', 'mode' => 'REQUIRED' }, 139 | { 'name' => 'type', 'type' => 'STRING', 'mode' => 'NULLABLE' }, 140 | { 'name' => 'date', 'type' => 'TIMESTAMP' }, 141 | { 142 | 'name' => 'city', 143 | 'type' => 'RECORD', 144 | 'fields' => [ 145 | { 'name' => 'id', 'type' => 'INTEGER' }, 146 | ] 147 | } 148 | ] 149 | end 150 | 151 | def test_for_update_table 152 | schema = { 153 | id: { type: 'INTEGER', mode: 'REQUIRED' }, 154 | type: { type: 'STRING', mode: 'NULLABLE' }, 155 | name: { type: 'STRING' } 156 | } 157 | 158 | result = @bq.update_table('test', schema) 159 | 160 | assert_equal result['kind'], "bigquery#table" 161 | assert_equal result['tableReference']['tableId'], "test" 162 | assert_equal result['schema']['fields'], [ 163 | { 'name' => 'id', 'type' => 'INTEGER', 'mode' => 'REQUIRED' }, 164 | { 'name' => 'type', 'type' => 'STRING', 'mode' => 'NULLABLE' }, 165 | { 'name' => 'name', 'type' => 'STRING' } 166 | ] 167 | end 168 | 169 | def test_for_describe_table 170 | result = @bq.describe_table('test') 171 | 172 | assert_equal result['kind'], "bigquery#table" 173 | assert_equal result['type'], "TABLE" 174 | assert_equal result['id'], "#{config['project_id']}:#{config['dataset']}.test" 175 | assert_equal result['tableReference']['tableId'], 'test' 176 | assert_equal result['schema']['fields'][0]['name'], 'id' 177 | assert_equal result['schema']['fields'][0]['type'], 'INTEGER' 178 | assert_equal result['schema']['fields'][0]['mode'], 'REQUIRED' 179 | assert_equal result['schema']['fields'][1]['name'], 'type' 180 | assert_equal result['schema']['fields'][1]['type'], 'STRING' 181 | assert_equal result['schema']['fields'][1]['mode'], 'NULLABLE' 182 | end 183 | 184 | def test_for_query 185 | result = @bq.query("SELECT * FROM [#{config['dataset']}.test] LIMIT 1") 186 | 187 | assert_equal result['kind'], "bigquery#queryResponse" 188 | assert_equal result['jobComplete'], true 189 | end 190 | 191 | def test_for_query_useQueryCache 192 | result = @bq.query("SELECT * FROM [#{config['dataset']}.test] LIMIT 1", useQueryCache: true) 193 | result = @bq.query("SELECT * FROM [#{config['dataset']}.test] LIMIT 1", useQueryCache: true) 194 | 195 | assert_equal result['cacheHit'], true 196 | end 197 | 198 | def test_for_query_dryRun 199 | result = @bq.query("SELECT * FROM [#{config['dataset']}.test] LIMIT 1", dryRun: true) 200 | 201 | assert_equal result['jobReference']['jobId'], nil 202 | end 203 | 204 | def test_for_insert 205 | result = @bq.insert('test' ,"id" => 123, "type" => "Task") 206 | 207 | assert_equal result['kind'], "bigquery#tableDataInsertAllResponse" 208 | end 209 | 210 | def test_for_insert_array 211 | data = [ 212 | {"id" => 123, "type" => "Task"}, 213 | {"id" => 321, "type" => "Other task"} 214 | ] 215 | 216 | result = @bq.insert('test' , data) 217 | 218 | assert_equal result['kind'], "bigquery#tableDataInsertAllResponse" 219 | 220 | # You can check the results. However, the test is slightly slower 221 | # sleep 5 222 | # result = @bq.query("SELECT * FROM [#{config['dataset']}.test]") 223 | # assert_equal result['totalRows'], "2" 224 | end 225 | 226 | def test_for_insert_job 227 | result = @bq.insert_job(query: {query: "SELECT * FROM [#{config['dataset']}.test] LIMIT 1"}) 228 | 229 | assert_equal result['kind'], "bigquery#job" 230 | end 231 | 232 | def test_for_datasets 233 | dataset = @bq.datasets.select{|t| t['id'] == "#{config['project_id']}:#{config['dataset']}"}.first 234 | 235 | assert_equal dataset['kind'], "bigquery#dataset" 236 | assert_equal dataset['datasetReference']['datasetId'], config['dataset'] 237 | end 238 | 239 | def test_for_datasets_formatted 240 | result = @bq.datasets_formatted 241 | 242 | assert_includes result, config['dataset'] 243 | end 244 | 245 | def test_for_create_datasets 246 | if @bq.datasets_formatted.include? 'test123' 247 | @bq.delete_dataset('test123') 248 | end 249 | 250 | result = @bq.create_dataset('test123') 251 | 252 | assert_equal result['kind'], "bigquery#dataset" 253 | assert_equal result['datasetReference']['datasetId'], 'test123' 254 | end 255 | 256 | def test_for_delete_datasets 257 | if !@bq.datasets_formatted.include? 'test123' 258 | @bq.create_dataset('test123') 259 | end 260 | 261 | @bq.delete_dataset('test123') 262 | 263 | datasets = @bq.datasets_formatted 264 | 265 | refute_includes datasets, 'test123' 266 | end 267 | end 268 | --------------------------------------------------------------------------------