├── exe ├── bq-migrate └── bq_migrate ├── example ├── example.yml ├── schema.json ├── copy_table.yml ├── table_info.yml ├── insert_select.yml ├── migrate_table.yml ├── application_default.yml ├── migrate_partitioned_table.yml └── migrate_clustered_table.yml ├── .rspec ├── Gemfile ├── lib ├── bigquery_migration │ ├── version.rb │ ├── error.rb │ ├── hash_util.rb │ ├── logger.rb │ ├── time_with_zone.rb │ ├── config_loader.rb │ ├── action_runner.rb │ ├── cli.rb │ ├── action.rb │ ├── table_data.rb │ ├── schema.rb │ └── bigquery_wrapper.rb └── bigquery_migration.rb ├── .travis.yml ├── bin ├── setup └── console ├── .gitignore ├── Rakefile ├── test ├── helper.rb ├── test_schema.rb ├── test_table_data.rb └── test_bigquery_wrapper.rb ├── LICENSE.txt ├── bigquery_migration.gemspec ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md └── README.md /exe/bq-migrate: -------------------------------------------------------------------------------- 1 | bq_migrate -------------------------------------------------------------------------------- /example/example.yml: -------------------------------------------------------------------------------- 1 | migrate_table.yml -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /lib/bigquery_migration/version.rb: -------------------------------------------------------------------------------- 1 | class BigqueryMigration 2 | VERSION = "0.3.2" 3 | end 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.3.0 4 | before_install: gem install bundler -v 1.11.2 5 | -------------------------------------------------------------------------------- /exe/bq_migrate: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require_relative '../lib/bigquery_migration/cli' 4 | BigqueryMigration::CLI.start(ARGV) 5 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | your-project-000.json 11 | .tags 12 | .ruby-version 13 | -------------------------------------------------------------------------------- /lib/bigquery_migration/error.rb: -------------------------------------------------------------------------------- 1 | class BigqueryMigration 2 | class Error < StandardError; end 3 | class ConfigError < Error; end 4 | class JobTimeoutError < Error; end 5 | class NotFoundError < Error; end 6 | end 7 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | 3 | require 'rake/testtask' 4 | desc 'Run test_unit based test' 5 | Rake::TestTask.new(:test) do |t| 6 | t.libs << "test" 7 | t.test_files = Dir["test/**/test_*.rb"].sort 8 | t.verbose = true 9 | end 10 | task :default => :test 11 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'test/unit' 4 | require 'test/unit/rr' 5 | require 'pry' 6 | require 'bigquery_migration' 7 | 8 | APP_ROOT = File.dirname(__dir__) 9 | TEST_ROOT = File.join(APP_ROOT, 'test') 10 | JSON_KEYFILE = File.join(APP_ROOT, "example/your-project-000.json") 11 | 12 | BigqueryMigration.logger = Logger.new(nil) 13 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "bundler/setup" 4 | require "bigquery_migration" 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | # (If you use this, don't forget to add pry to your Gemfile!) 10 | # require "pry" 11 | # Pry.start 12 | 13 | require "irb" 14 | IRB.start 15 | -------------------------------------------------------------------------------- /example/schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name":"timestamp", 4 | "type":"TIMESTAMP" 5 | }, 6 | { 7 | "name":"long", 8 | "type":"INTEGER" 9 | }, 10 | { 11 | "name":"string", 12 | "type":"STRING" 13 | }, 14 | { 15 | "name":"double", 16 | "type":"FLOAT" 17 | }, 18 | { 19 | "name":"boolean", 20 | "type":"BOOLEAN" 21 | }, 22 | { 23 | "name":"date", 24 | "type":"DATE" 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /example/copy_table.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | credentials_file: example/your-project-000.json 3 | dataset: your_dataset_name 4 | table: your_table_name 5 | 6 | actions: 7 | - action: create_dataset 8 | <<: *bigquery 9 | - action: migrate_table 10 | <<: *bigquery 11 | columns: 12 | - { name: 'timestamp', type: 'TIMESTAMP' } 13 | - name: 'record' 14 | type: 'RECORD' 15 | fields: 16 | - { name: 'string', type: 'STRING' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - action: copy_table 19 | <<: *bigquery 20 | destination_table: your_table_name_copy 21 | -------------------------------------------------------------------------------- /example/table_info.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | credentials_file: example/your-project-000.json 3 | dataset: your_dataset_name 4 | table: your_table_name 5 | 6 | actions: 7 | - action: create_dataset 8 | <<: *bigquery 9 | - action: migrate_table 10 | <<: *bigquery 11 | columns: 12 | - { name: 'timestamp', type: 'TIMESTAMP' } 13 | - name: 'record' 14 | type: 'RECORD' 15 | fields: 16 | - { name: 'string', type: 'STRING' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - action: table_info 19 | table: your_table_name 20 | <<: *bigquery 21 | - action: table_info 22 | prefix: your_table_name 23 | <<: *bigquery 24 | -------------------------------------------------------------------------------- /example/insert_select.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | credentials_file: example/your-project-000.json 3 | dataset: your_dataset_name 4 | table: your_table_name 5 | 6 | actions: 7 | - action: create_dataset 8 | <<: *bigquery 9 | - action: migrate_table 10 | <<: *bigquery 11 | columns: 12 | - { name: 'timestamp', type: 'TIMESTAMP' } 13 | - name: 'record' 14 | type: 'RECORD' 15 | fields: 16 | - { name: 'string', type: 'STRING' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - action: insert_select 19 | <<: *bigquery 20 | destination_table: your_table_name_insert_select 21 | query: select * from [your_dataset_name.your_table_name] 22 | -------------------------------------------------------------------------------- /example/migrate_table.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | credentials_file: example/your-project-000.json 3 | dataset: your_dataset_name 4 | table: your_table_name 5 | 6 | actions: 7 | - action: create_dataset 8 | <<: *bigquery 9 | - action: migrate_table 10 | <<: *bigquery 11 | columns: 12 | - { name: 'timestamp', type: 'TIMESTAMP' } 13 | - name: 'record' 14 | type: 'RECORD' 15 | fields: 16 | - { name: 'string', type: 'STRING' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - { name: 'bytes', type: 'BYTES' } 19 | - action: migrate_table 20 | <<: *bigquery 21 | schema_file: example/schema.json 22 | - action: delete_table 23 | <<: *bigquery 24 | -------------------------------------------------------------------------------- /example/application_default.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | # project: read from ~/.config/gcloud/configurations/config_default 3 | dataset: your_dataset_name 4 | table: your_table_name 5 | 6 | actions: 7 | - action: create_dataset 8 | <<: *bigquery 9 | - action: migrate_table 10 | <<: *bigquery 11 | columns: 12 | - { name: 'timestamp', type: 'TIMESTAMP' } 13 | - name: 'record' 14 | type: 'RECORD' 15 | fields: 16 | - { name: 'string', type: 'STRING' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - { name: 'bytes', type: 'BYTES' } 19 | - action: migrate_table 20 | <<: *bigquery 21 | schema_file: example/schema.json 22 | - action: delete_table 23 | <<: *bigquery 24 | -------------------------------------------------------------------------------- /example/migrate_partitioned_table.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | credentials_file: example/your-project-000.json 3 | dataset: your_dataset_name 4 | table: your_table_name 5 | 6 | actions: 7 | - action: create_dataset 8 | <<: *bigquery 9 | - action: migrate_partitioned_table 10 | <<: *bigquery 11 | columns: 12 | - { name: 'timestamp', type: 'TIMESTAMP' } 13 | - name: 'record' 14 | type: 'RECORD' 15 | fields: 16 | - { name: 'string', type: 'STRING' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - { name: 'bytes', type: 'BYTES' } 19 | - action: migrate_partitioned_table 20 | <<: *bigquery 21 | schema_file: example/schema.json 22 | - action: delete_table 23 | <<: *bigquery 24 | -------------------------------------------------------------------------------- /lib/bigquery_migration.rb: -------------------------------------------------------------------------------- 1 | require "bigquery_migration/version" 2 | require "bigquery_migration/error" 3 | require "bigquery_migration/schema" 4 | require "bigquery_migration/logger" 5 | require "bigquery_migration/bigquery_wrapper" 6 | 7 | class BigqueryMigration 8 | def self.logger 9 | @logger ||= Logger.new(STDOUT) 10 | end 11 | 12 | def self.logger=(logger) 13 | @logger = logger 14 | end 15 | 16 | def initialize(*args) 17 | @wrapper = BigqueryWrapper.new(*args) 18 | end 19 | 20 | # Delegate to BigqueryWrapper instance 21 | BigqueryWrapper.instance_methods(false).each do |name| 22 | next if method_defined?(name) 23 | class_eval <<-"EOS", __FILE__, __LINE__ + 1 24 | def #{name}(*args, &block) 25 | @wrapper.#{name}(*args, &block) 26 | end 27 | EOS 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /example/migrate_clustered_table.yml: -------------------------------------------------------------------------------- 1 | bigquery: &bigquery 2 | credentials_file: example/your-project-000.json 3 | dataset: your_dataset_name 4 | table: your_clustered_table_name 5 | clustering: 6 | fields: 7 | - timestamp 8 | - integer 9 | 10 | actions: 11 | - action: create_dataset 12 | <<: *bigquery 13 | - action: migrate_partitioned_table 14 | <<: *bigquery 15 | columns: 16 | - { name: 'timestamp', type: 'TIMESTAMP' } 17 | - { name: 'integer', type: 'INTEGER' } 18 | - name: 'record' 19 | type: 'RECORD' 20 | fields: 21 | - { name: 'string', type: 'STRING' } 22 | - { name: 'integer', type: 'INTEGER' } 23 | - { name: 'bytes', type: 'BYTES' } 24 | - action: migrate_partitioned_table 25 | <<: *bigquery 26 | schema_file: example/schema.json 27 | - action: delete_table 28 | <<: *bigquery 29 | -------------------------------------------------------------------------------- /lib/bigquery_migration/hash_util.rb: -------------------------------------------------------------------------------- 1 | class BigqueryMigration 2 | class HashUtil 3 | def self.deep_symbolize_keys(hash) 4 | if hash.is_a?(Hash) 5 | hash.map do |key, val| 6 | new_key = key.to_sym 7 | new_val = deep_symbolize_keys(val) 8 | [new_key, new_val] 9 | end.to_h 10 | elsif hash.is_a?(Array) 11 | hash.map do |val| 12 | deep_symbolize_keys(val) 13 | end 14 | else 15 | hash 16 | end 17 | end 18 | 19 | def self.deep_stringify_keys(hash) 20 | if hash.is_a?(Hash) 21 | hash.map do |key, val| 22 | new_key = key.to_s 23 | new_val = deep_stringify_keys(val) 24 | [new_key, new_val] 25 | end.to_h 26 | elsif hash.is_a?(Array) 27 | hash.map do |val| 28 | deep_stringify_keys(val) 29 | end 30 | else 31 | hash 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/bigquery_migration/logger.rb: -------------------------------------------------------------------------------- 1 | require 'logger' 2 | 3 | class BigqueryMigration 4 | class LogFormatter 5 | FORMAT = "%s [%s] %s\n" 6 | 7 | def initialize(opts={}) 8 | end 9 | 10 | def call(severity, time, progname, msg) 11 | FORMAT % [format_datetime(time), severity, format_message(msg)] 12 | end 13 | 14 | private 15 | def format_datetime(time) 16 | time.iso8601 17 | end 18 | 19 | def format_severity(severity) 20 | severity 21 | end 22 | 23 | def format_message(message) 24 | case message 25 | when ::Exception 26 | e = message 27 | "#{e.class} (#{e.message})\n #{e.backtrace.join("\n ")}" 28 | else 29 | message.to_s 30 | end 31 | end 32 | end 33 | 34 | class Logger < ::Logger 35 | def initialize(logdev, shift_age = 0, shift_size = 1048576) 36 | logdev = STDOUT if logdev == 'STDOUT' 37 | super(logdev, shift_age, shift_size) 38 | @formatter = LogFormatter.new 39 | end 40 | 41 | def write(msg) 42 | @logdev.write msg 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /lib/bigquery_migration/time_with_zone.rb: -------------------------------------------------------------------------------- 1 | require 'tzinfo' 2 | 3 | class BigqueryMigration 4 | class TimeWithZone 5 | # [+-]HH:MM, [+-]HHMM, [+-]HH 6 | NUMERIC_PATTERN = %r{\A[+-]\d\d(:?\d\d)?\z} 7 | 8 | # Region/Zone, Region/Zone/Zone 9 | NAME_PATTERN = %r{\A[^/]+/[^/]+(/[^/]+)?\z} 10 | 11 | class << self 12 | def time_with_zone(time, timezone) 13 | time.localtime(zone_offset(timezone)) 14 | end 15 | 16 | def strptime_with_zone(date, format, timezone) 17 | time = Time.strptime(date, format) 18 | _utc_offset = time.utc_offset 19 | _zone_offset = zone_offset(timezone) 20 | time.localtime(_zone_offset) + _utc_offset - _zone_offset 21 | end 22 | 23 | private 24 | def zone_offset(timezone) 25 | if NUMERIC_PATTERN === timezone 26 | Time.zone_offset(timezone) 27 | elsif NAME_PATTERN === timezone 28 | tz = TZInfo::Timezone.get(timezone) 29 | tz.current_period.utc_total_offset 30 | elsif "UTC" == timezone # special treatment 31 | 0 32 | else 33 | raise ArgumentError, "timezone format is invalid: #{timezone}" 34 | end 35 | end 36 | end 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /bigquery_migration.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'bigquery_migration/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "bigquery_migration" 8 | spec.version = BigqueryMigration::VERSION 9 | spec.authors = ["Naotoshi Seo", "kysnm", "potato2003"] 10 | spec.email = ["sonots@gmail.com", "tokyoincidents.g@gmail.com", "potato2003@gmail.com"] 11 | 12 | spec.summary = %q{Migrate BigQuery table schema} 13 | spec.description = %q{Migrate BigQuery table schema.} 14 | spec.homepage = "https://github.com/sonots/bigquery_migration" 15 | spec.license = "MIT" 16 | 17 | spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } 18 | spec.bindir = "exe" 19 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 20 | spec.require_paths = ["lib"] 21 | 22 | spec.add_dependency "google-api-client" 23 | spec.add_dependency "tzinfo" 24 | spec.add_dependency "thor" 25 | spec.add_dependency "inifile" 26 | 27 | spec.add_development_dependency "bundler", "~> 1.11" 28 | spec.add_development_dependency "rake", "~> 10.0" 29 | spec.add_development_dependency "pry-byebug" 30 | spec.add_development_dependency "test-unit" 31 | spec.add_development_dependency "test-unit-rr" 32 | end 33 | -------------------------------------------------------------------------------- /lib/bigquery_migration/config_loader.rb: -------------------------------------------------------------------------------- 1 | require 'set' 2 | require 'yaml' 3 | require 'erb' 4 | require 'ostruct' 5 | 6 | class BigqueryMigration 7 | class ConfigLoader 8 | attr_reader :config_path, :namespace 9 | 10 | class AlreayIncluded < ::StandardError; end 11 | 12 | def initialize(config_path, vars = {}) 13 | @config_path = File.expand_path(config_path) 14 | @included_files = Set.new 15 | @namespace = OpenStruct.new(vars) 16 | 17 | unless @namespace.respond_to?(:include_file) 18 | itself = self 19 | # ToDo: better way? 20 | @namespace.define_singleton_method(:include_file) do |path| 21 | caller_path = caller[0][/^([^:]+):\d+:in `[^']*'$/, 1] 22 | abs_path = File.expand_path(path, File.dirname(caller_path)) 23 | if File.extname(path) == '.erb' 24 | itself.load_erb(abs_path) 25 | else 26 | File.read(abs_path) 27 | end 28 | end 29 | end 30 | end 31 | 32 | def load 33 | if File.extname(config_path) == '.erb' 34 | YAML.load(load_erb(config_path)) 35 | else 36 | YAML.load(File.read(config_path)) 37 | end 38 | end 39 | 40 | def load_erb(path = config_path) 41 | unless @included_files.add?(path) 42 | raise AlreayIncluded, "#{path} was included twice" 43 | end 44 | 45 | raw = File.read(path) 46 | erb = ERB.new(raw, nil, "-") 47 | erb.filename = path 48 | erb.result(namespace.instance_eval { binding }) 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.3.2 (2019/04/29) 2 | 3 | Enhancements: 4 | 5 | * Support clustered table 6 | 7 | # 0.3.1 (2018/05/23) 8 | 9 | Enhancements: 10 | 11 | * Support newly added location option of google-api-ruby-client. 12 | 13 | # 0.3.0 (2017/04/26) 14 | 15 | Enhancements: 16 | 17 | * Support more authentication methods such as oauth, compute_engine, application_default 18 | 19 | # 0.2.2 (2017/04/04) 20 | 21 | Enhancements: 22 | 23 | * Support google-api-ruby-client >= v0.11.0 24 | 25 | # 0.2.1 (2017/03/31) 26 | 27 | Enhancements: 28 | 29 | * Accept DATE, DATETIME, TIME as column types 30 | 31 | # 0.2.0 (2016/10/03) 32 | 33 | Enhancements: 34 | 35 | * Support migrate_partitioned_table 36 | 37 | Fixes: 38 | 39 | * Fix list_table_data for when a value is an empty hash 40 | 41 | # 0.1.7 (2016/09/17) 42 | 43 | Fixes: 44 | 45 | * Prohibit to create a table with empty columns 46 | * Create a table only if a table does not exist 47 | 48 | # 0.1.6 (2016/07/26) 49 | 50 | Fixes: 51 | 52 | * Fix empty hash to nil for list table data 53 | 54 | # 0.1.5 (2016/07/25) 55 | 56 | Enhancements: 57 | 58 | * Support record type and repeated mode for list table data 59 | 60 | # 0.1.4 (2016/07/12) 61 | 62 | Fixes: 63 | 64 | * Fix to allow downcase type and mode 65 | 66 | # 0.1.3 (2016/04/22) 67 | 68 | Enhancements: 69 | 70 | * Support new BYTES types 71 | * Add exe/bq-migrate as an alias to exe/bq_migrate 72 | 73 | # 0.1.2 (2016/04/14) 74 | 75 | Changes: 76 | 77 | * Genearate job_id on client side as [google recommends](https://cloud.google.com/bigquery/docs/managing_jobs_datasets_projects#managingjobs) 78 | 79 | # 0.1.1 (2016/04/12) 80 | 81 | Changes: 82 | 83 | * Expose wait_load method 84 | 85 | # 0.1.0 (2016/04/08) 86 | 87 | Initial release 88 | -------------------------------------------------------------------------------- /lib/bigquery_migration/action_runner.rb: -------------------------------------------------------------------------------- 1 | require_relative 'config_loader' 2 | require_relative 'error' 3 | require_relative 'action' 4 | require_relative 'hash_util' 5 | 6 | class BigqueryMigration 7 | class ActionRunner 8 | attr_reader :config, :config_path, :opts 9 | 10 | def initialize(config_path = nil, opts = {}) 11 | @config_path = config_path 12 | @opts = opts 13 | config = ConfigLoader.new(@config_path, opts[:vars]).load 14 | @config = HashUtil.deep_symbolize_keys(config) 15 | validate_config! 16 | end 17 | 18 | def run 19 | success, responses = run_actions 20 | { success: success, dry_run: @opts[:dry_run], actions: responses } 21 | end 22 | 23 | def run_actions 24 | success = true 25 | responses = [] 26 | 27 | @config[:actions].each do |action_config| 28 | _success, result = Action.new(action_config, @opts).run 29 | response = action_config.merge({'result' => result}) 30 | responses << response 31 | unless _success 32 | success = false 33 | break 34 | end 35 | end 36 | 37 | [success, responses] 38 | end 39 | 40 | def validate_config! 41 | unless config.is_a?(Hash) 42 | raise ConfigError, "config file format has to be YAML Hash" 43 | end 44 | 45 | unless config[:actions] 46 | raise ConfigError, "config must have `actions` key" 47 | end 48 | 49 | unless config[:actions].is_a?(Array) 50 | raise ConfigError, "config[:actions] must be an Array" 51 | end 52 | 53 | config[:actions].each do |action_config| 54 | unless action_config[:action] 55 | raise ConfigError, "Elements of `config[:actions]` must have `action` key" 56 | end 57 | end 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, and in the interest of 4 | fostering an open and welcoming community, we pledge to respect all people who 5 | contribute through reporting issues, posting feature requests, updating 6 | documentation, submitting pull requests or patches, and other activities. 7 | 8 | We are committed to making participation in this project a harassment-free 9 | experience for everyone, regardless of level of experience, gender, gender 10 | identity and expression, sexual orientation, disability, personal appearance, 11 | body size, race, ethnicity, age, religion, or nationality. 12 | 13 | Examples of unacceptable behavior by participants include: 14 | 15 | * The use of sexualized language or imagery 16 | * Personal attacks 17 | * Trolling or insulting/derogatory comments 18 | * Public or private harassment 19 | * Publishing other's private information, such as physical or electronic 20 | addresses, without explicit permission 21 | * Other unethical or unprofessional conduct 22 | 23 | Project maintainers have the right and responsibility to remove, edit, or 24 | reject comments, commits, code, wiki edits, issues, and other contributions 25 | that are not aligned to this Code of Conduct, or to ban temporarily or 26 | permanently any contributor for other behaviors that they deem inappropriate, 27 | threatening, offensive, or harmful. 28 | 29 | By adopting this Code of Conduct, project maintainers commit themselves to 30 | fairly and consistently applying these principles to every aspect of managing 31 | this project. Project maintainers who do not follow or enforce the Code of 32 | Conduct may be permanently removed from the project team. 33 | 34 | This code of conduct applies both within project spaces and in public spaces 35 | when an individual is representing the project or its community. 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 38 | reported by contacting a project maintainer at sonots@gmail.com. All 39 | complaints will be reviewed and investigated and will result in a response that 40 | is deemed necessary and appropriate to the circumstances. Maintainers are 41 | obligated to maintain confidentiality with regard to the reporter of an 42 | incident. 43 | 44 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 45 | version 1.3.0, available at 46 | [http://contributor-covenant.org/version/1/3/0/][version] 47 | 48 | [homepage]: http://contributor-covenant.org 49 | [version]: http://contributor-covenant.org/version/1/3/0/ -------------------------------------------------------------------------------- /lib/bigquery_migration/cli.rb: -------------------------------------------------------------------------------- 1 | require 'thor' 2 | require 'json' 3 | require 'bigquery_migration' 4 | require_relative 'action_runner' 5 | require_relative 'hash_util' 6 | 7 | class BigqueryMigration 8 | class CLI < Thor 9 | # cf. http://qiita.com/KitaitiMakoto/items/c6b9d6311c20a3cc21f9 10 | def self.exit_on_failure? 11 | true 12 | end 13 | 14 | # `run` is reserved by thor, we have to use def _run 15 | map "run" => "_run" 16 | 17 | option :config_path, :aliases => ['-c'], :type => :string, 18 | :default => 'config.yml' 19 | option :log_level, :aliases => ["-l"], :type => :string, 20 | :desc => 'Log level such as fatal, error, warn, info, or debug', 21 | :default => 'info' 22 | option :log, :type => :string, 23 | :desc => 'Output log to a file', 24 | :default => 'STDOUT' 25 | option :stdout, :type => :string, 26 | :desc => 'Redirect STDOUT to a file', 27 | :default => 'STDOUT' 28 | option :stderr, :type => :string, 29 | :desc => 'Redirect STDERR to a file', 30 | :default => 'STDERR' 31 | option :exec, :type => :boolean, 32 | :desc => 'Execute or dry-run (Default: dry-run)', 33 | :default => false 34 | option :vars, :type => :hash, 35 | :desc => 'Variables used in ERB, thor hash format' 36 | option :output, :aliases => ["-o"], :type => :string, 37 | :desc => 'Output result yaml to a file', 38 | :default => 'STDOUT' 39 | 40 | desc 'run ', 'run bigquery_migration' 41 | def _run(config_path) 42 | opts = options.merge( 43 | dry_run: !options[:exec] 44 | ) 45 | 46 | init_logger 47 | reopen_stdout 48 | reopen_stderr 49 | 50 | result = ActionRunner.new(config_path, opts).run 51 | open_output do |io| 52 | io.puts mask_secret(HashUtil.deep_stringify_keys(result).to_yaml) 53 | logger.info { "DRY-RUN has finished. Use --exec option to run." } if opts[:dry_run] 54 | end 55 | exit(1) unless result[:success] 56 | end 57 | 58 | private 59 | 60 | def logger 61 | BigqueryMigration.logger 62 | end 63 | 64 | def init_logger 65 | logger = BigqueryMigration::Logger.new(options[:log]) 66 | logger.level = options[:log_level] 67 | BigqueryMigration.logger = logger 68 | end 69 | 70 | def reopen_stdout 71 | unless options[:stdout] == 'STDOUT' 72 | $stdout.reopen(options[:stdout]) 73 | end 74 | $stdout.sync = true 75 | end 76 | 77 | def reopen_stderr 78 | unless options[:stderr] == 'STDERR' 79 | $stderr.reopen(options[:stderr]) 80 | end 81 | $stderr.sync = true 82 | end 83 | 84 | def open_output 85 | output = options[:output] 86 | if output == 'STDOUT' 87 | yield($stdout) 88 | elsif output == 'STDERR' 89 | yield($stderr) 90 | else 91 | File.open(output, 'w') do |io| 92 | yield(io) 93 | end 94 | end 95 | end 96 | 97 | def mask_secret(yaml_string) 98 | %w(password key).each do |secret| 99 | yaml_string.gsub!(/([^ ]*#{secret}): .*$/, '\1: xxxxx') 100 | end 101 | yaml_string.gsub!(/(-----BEGIN\s+PRIVATE\s+KEY-----)[0-9A-Za-z+\/=\s\\]+(-----END\s+PRIVATE\s+KEY-----)/m, '\1 xxxxx \2') 102 | yaml_string 103 | end 104 | end 105 | end 106 | -------------------------------------------------------------------------------- /lib/bigquery_migration/action.rb: -------------------------------------------------------------------------------- 1 | require_relative 'schema' 2 | require_relative 'error' 3 | require_relative 'hash_util' 4 | require_relative 'bigquery_wrapper' 5 | 6 | class BigqueryMigration 7 | class Action 8 | attr_reader :config, :opts 9 | 10 | def initialize(config, opts = {}) 11 | @config = HashUtil.deep_symbolize_keys(config) 12 | @opts = HashUtil.deep_symbolize_keys(opts) 13 | 14 | @action = @config[:action] 15 | unless self.class.supported_actions.include?(@action) 16 | raise ConfigError, "Action #{@action} is not supported" 17 | end 18 | end 19 | 20 | def run 21 | begin 22 | success = true 23 | result = send(@action) 24 | rescue => e 25 | result = { error: e.message, error_class: e.class.to_s, error_backtrace: e.backtrace } 26 | success = false 27 | ensure 28 | success = false if result[:success] == false 29 | end 30 | [success, result] 31 | end 32 | 33 | def self.supported_actions 34 | Set.new(%w[ 35 | create_dataset 36 | create_table 37 | delete_table 38 | patch_table 39 | migrate_table 40 | insert 41 | preview 42 | insert_select 43 | copy_table 44 | table_info 45 | migrate_partitioned_table 46 | ]) 47 | end 48 | 49 | def client 50 | @client ||= BigqueryMigration.new(@config, @opts) 51 | end 52 | 53 | def create_dataset 54 | client.create_dataset 55 | end 56 | 57 | def create_table 58 | client.create_table(columns: config[:columns]) 59 | end 60 | 61 | def delete_table 62 | client.delete_table 63 | end 64 | 65 | def patch_table 66 | client.patch_table( 67 | columns: config[:columns], 68 | add_columns: config[:add_columns] 69 | ) 70 | end 71 | 72 | def migrate_table 73 | client.migrate_table( 74 | schema_file: config[:schema_file], 75 | columns: config[:columns], 76 | backup_dataset: config[:backup_dataset], 77 | backup_table: config[:backup_table] 78 | ) 79 | end 80 | 81 | def migrate_partitioned_table 82 | client.migrate_partitioned_table( 83 | schema_file: config[:schema_file], 84 | columns: config[:columns], 85 | ) 86 | end 87 | 88 | def insert 89 | client.insert_all_table_data(rows: config[:rows]) 90 | end 91 | 92 | def preview 93 | client.list_table_data(max_results: config[:max_results]) 94 | end 95 | 96 | def copy_table 97 | client.copy_table( 98 | destination_table: config[:destination_table], 99 | destination_dataset: config[:destination_dataset], 100 | source_table: config[:source_table], 101 | source_dataset: config[:source_dataset], 102 | write_disposition: config[:write_disposition], 103 | ) 104 | end 105 | 106 | def insert_select 107 | client.insert_select( 108 | query: config[:query], 109 | destination_table: config[:destination_table], 110 | destination_dataset: config[:destination_dataset], 111 | write_disposition: config[:write_disposition], 112 | ) 113 | end 114 | 115 | def table_info 116 | if config[:prefix] 117 | tables = client.list_tables[:tables].select {|table| table.start_with?(config[:prefix]) } 118 | table_infos = tables.map do |table| 119 | result = client.get_table(table: table) 120 | result.delete(:responses) 121 | result 122 | end 123 | result = { 124 | sum_num_bytes: table_infos.map {|info| info[:num_bytes].to_i }.inject(:+), 125 | sum_num_rows: table_infos.map {|info| info[:num_rows].to_i }.inject(:+), 126 | table_infos: table_infos, 127 | } 128 | else 129 | client.get_table 130 | end 131 | end 132 | end 133 | end 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BigqueryMigration 2 | 3 | BigqueryMigraiton is a tool or a ruby library to migrate (or alter) BigQuery table schema. 4 | 5 | ## Requirements 6 | 7 | * Ruby >= 2.3.0 8 | 9 | ## Installation 10 | 11 | Add this line to your application's Gemfile: 12 | 13 | ```ruby 14 | gem 'bigquery_migration' 15 | ``` 16 | 17 | And then execute: 18 | 19 | $ bundle 20 | 21 | Or install it yourself as: 22 | 23 | $ gem install bigquery_migration 24 | 25 | ## Usage 26 | 27 | Define your desired schema, this tool automatically detects differences with the target table, and takes care of adding columns, or dropping columns (actually, select & copy is issued), or changing types. 28 | 29 | ### CLI 30 | 31 | config.yml 32 | 33 | ```yaml 34 | bigquery: &bigquery 35 | json_keyfile: your-project-000.json 36 | dataset: your_dataset_name 37 | table: your_table_name 38 | # If your data is in a location other than the US or EU multi-region, you must specify the location 39 | # location: asia-northeast1 40 | 41 | actions: 42 | - action: create_dataset 43 | <<: *bigquery 44 | - action: migrate_table 45 | <<: *bigquery 46 | columns: 47 | - { name: 'timestamp', type: 'TIMESTAMP' } 48 | - name: 'record' 49 | type: 'RECORD' 50 | fields: 51 | - { name: 'string', type: 'STRING' } 52 | - { name: 'integer', type: 'INTEGER' } 53 | ``` 54 | 55 | Run 56 | 57 | ``` 58 | $ bundle exec bq_migrate run config.yml # dry-run 59 | $ bundle exec bq_migrate run config.yml --exec 60 | ``` 61 | 62 | ### Library 63 | 64 | ```ruby 65 | require 'bigquery_migration' 66 | 67 | config = { 68 | json_keyfile: '/path/to/your-project-000.json', 69 | dataset: 'your_dataset_name', 70 | table: 'your_table_name', 71 | 72 | # If your data is in a location other than the US or EU multi-region, you must specify the location 73 | # location: asia-northeast1, 74 | } 75 | columns = [ 76 | { name: 'string', type: 'STRING' }, 77 | { name: 'record', type: 'RECORD', fields: [ 78 | { name: 'integer', type: 'INTEGER' }, 79 | { name: 'timestamp', type: 'TIMESTAMP' }, 80 | ] } 81 | ] 82 | 83 | migrator = BigqueryMigration.new(config) 84 | migrator.migrate_table(columns: columns) 85 | # migrator.migrate_table(schema_file: '/path/to/schema.json') 86 | ``` 87 | 88 | ## LIMITATIONS 89 | 90 | There are serveral limitations because of BigQuery API limitations: 91 | 92 | * Can not handle `mode: REPEATED` columns 93 | * Can add only `mode: NULLABLE` columns 94 | * Columns become `mode: NULLABLE` after type changing 95 | * Will be charged because a query is issued (If only adding columns, it is not charged because it uses patch_table API) 96 | 97 | This tool has an advantage that it is **faster** than reloading data entirely. 98 | 99 | ## Further Details 100 | 101 | * See [BigQueryテーブルのスキーマを変更する - sonots:blog](http://blog.livedoor.jp/sonots/archives/47294596.html) (Japanese) 102 | 103 | ## Development 104 | 105 | ### Run example: 106 | 107 | **Service Account** 108 | 109 | Prepare your service account json at `example/your-project-000.json`, then 110 | 111 | ``` 112 | $ bundle exec bq_migrate run example/example.yml # dry-run 113 | $ bundle exec bq_migrate run example/example.yml --exec 114 | ``` 115 | 116 | **OAuth** 117 | 118 | Install gcloud into your development environment: 119 | 120 | ``` 121 | curl https://sdk.cloud.google.com | bash 122 | gcloud init 123 | gcloud auth login 124 | gcloud auth application-default login 125 | gcloud config set project 126 | ``` 127 | 128 | Make sure `gcloud` works 129 | 130 | ``` 131 | gcloud compute instances list 132 | ``` 133 | 134 | Run as: 135 | 136 | ``` 137 | $ bundle exec bq_migrate run example/application_default.yml # dry-run 138 | $ bundle exec bq_migrate run example/application_default.yml --exec 139 | ``` 140 | 141 | ### Run test: 142 | 143 | ``` 144 | $ bundle exec rake test 145 | ``` 146 | 147 | To run tests which directly connects to BigQuery, prepare `example/your-project-000.json`, then 148 | 149 | ``` 150 | $ bundle exec rake test 151 | ``` 152 | 153 | ## Contributing 154 | 155 | Bug reports and pull requests are welcome on GitHub at https://github.com/sonots/bigquery_migration. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. 156 | 157 | 158 | ## License 159 | 160 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). 161 | -------------------------------------------------------------------------------- /lib/bigquery_migration/table_data.rb: -------------------------------------------------------------------------------- 1 | # This codes are translated from BigQuery Web console's JavaScript 2 | require_relative 'error' 3 | 4 | class BigqueryMigration 5 | class TableData 6 | attr_reader :rows, :columns 7 | 8 | def logger 9 | BigqueryMigration.logger 10 | end 11 | 12 | def initialize(columns, rows) 13 | @columns = columns || raise(Error, '`columns` is required.') 14 | @rows = rows || raise(Error, '`rows` is required.') 15 | end 16 | 17 | # format list_table_data response rows which is like 18 | # 19 | # [ 20 | # { f: [ 21 | # { v: "foo" }, 22 | # { v: "1" }, 23 | # { v: [] }, 24 | # { v: "1.1" }, 25 | # { v: "true" }, 26 | # { v: "1.444435200E9" } 27 | # ] }, 28 | # { f: [ 29 | # { v: "foo" }, 30 | # { v: "2" }, 31 | # { v: [ 32 | # { v: "foo" }, 33 | # { v: "bar" } 34 | # ] }, 35 | # { v: "2.2" }, 36 | # { v: "false" }, 37 | # { v: "1.444435200E9" } 38 | # ] } 39 | # ] 40 | # 41 | # into 42 | # 43 | # [ 44 | # # first row 45 | # [ 46 | # [ "foo", "1", nil, "1.1", "true", "1.444435200E9" ] 47 | # ], 48 | # # second row 49 | # [ 50 | # [ "foo", "2", "foo", "2.2", "false", "1.444435200E9" ], 51 | # [ nil, nil, "bar", nil, nil, nil ], 52 | # ], 53 | # ] 54 | def values 55 | values = @rows.map do |row| 56 | repeated_count = repeated_count(columns: @columns, rows: row) 57 | formatted_row = [] 58 | repeated_count.times do |count| 59 | formatted_row << format_row(columns: @columns, rows: row, count: count) 60 | end 61 | formatted_row 62 | end 63 | # flattern if there is no repeated column for backward compatibility 64 | values.map(&:length).max > 1 ? values : values.flatten(1) 65 | end 66 | 67 | private 68 | 69 | # Count maximum number of rows on repeated columns 70 | # 71 | # This method called recursively, rows must be a hash and hash has key f: 72 | def repeated_count(columns: nil, rows: nil) 73 | return 1 if (rows.nil? || rows.empty?) 74 | validate_rows!(rows) 75 | rows[:f].zip(columns).map do |row, column| 76 | if column[:type] == 'RECORD' 77 | if column[:mode] == 'REPEATED' 78 | if row[:v].length == 0 79 | 1 80 | else 81 | row[:v].map do |v| 82 | v[:repeated_count] = repeated_count(columns: column[:fields], rows: v[:v]) 83 | end.inject(:+) 84 | end 85 | else 86 | repeated_count(columns: column[:fields], rows: row[:v]) 87 | end 88 | elsif column[:mode] == 'REPEATED' 89 | [(row[:v] || []).length, 1].max 90 | else 91 | 1 92 | end 93 | end.max 94 | end 95 | 96 | # This method called recursively. 97 | # So, rows must be a hash and hash has key f:. 98 | def format_row(columns: nil, rows: nil, count: nil) 99 | formatted_row = [] 100 | return [nil] if (rows.nil? || rows.empty?) 101 | validate_rows!(rows) 102 | rows[:f].zip(columns).each do |row, column| 103 | if column[:type] == 'RECORD' 104 | if column[:mode] == 'REPEATED' 105 | recursive = false 106 | current = 0 107 | row[:v].each do |v| 108 | repeated_count = v[:repeated_count] 109 | if current <= count && count < (current + repeated_count) 110 | formatted_row.concat format_row(columns: column[:fields], rows: v[:v], count: count - current) 111 | recursive = true 112 | end 113 | current = current + repeated_count 114 | end 115 | unless recursive 116 | nil_count = get_nil_count(column[:fields]) 117 | formatted_row.concat(Array.new(nil_count)) 118 | end 119 | elsif row[:v].nil? 120 | nil_count = get_nil_count(column[:fields]) 121 | formatted_row.concat(Array.new(nil_count)) 122 | else 123 | formatted_row.concat format_row(columns: column[:fields], rows: row[:v], count: count) 124 | end 125 | elsif column[:mode] == 'REPEATED' 126 | v = row[:v] 127 | count < v.length ? formatted_row.push(normalize_value(v[count][:v])) : formatted_row.push(nil) 128 | elsif count == 0 129 | formatted_row.push((normalize_value(row[:v]))) 130 | else 131 | formatted_row.push(nil) 132 | end 133 | end 134 | formatted_row 135 | end 136 | 137 | # special treatment empty hash. 138 | # nil is converted into {} by to_h 139 | def normalize_value(v) 140 | v.is_a?(Hash) && v.empty? ? nil : v 141 | end 142 | 143 | def get_nil_count(fields) 144 | fields.inject(0) do |acc, f| 145 | f[:type] == 'RECORD' ? acc + get_nil_count(f[:fields]) : acc + 1 146 | end 147 | end 148 | 149 | def validate_rows!(rows) 150 | raise Error, '`rows` must be a hash and hash has key `:f`.' if !rows.is_a?(Hash) || !rows.has_key?(:f) 151 | end 152 | end 153 | end 154 | -------------------------------------------------------------------------------- /lib/bigquery_migration/schema.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | require 'json' 3 | require_relative 'error' 4 | 5 | class BigqueryMigration 6 | class Schema < ::Array 7 | ALLOWED_FIELD_TYPES = Set.new(['STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', 'RECORD', 'TIMESTAMP', 'BYTES', 'DATE', 'TIME', 'DATETIME']) 8 | ALLOWED_FIELD_MODES = Set.new(['NULLABLE', 'REQUIRED', 'REPEATED']) 9 | 10 | def initialize(columns = []) 11 | normalized = self.class.normalize_columns(columns) 12 | super(normalized) 13 | validate_columns! 14 | end 15 | 16 | def find_column_by_name(name) 17 | self.class.find_column_by_name(self, name) 18 | end 19 | 20 | def validate_columns! 21 | self.class.validate_columns!(self) 22 | end 23 | 24 | def validate_permitted_operations!(source_columns) 25 | target_columns = self 26 | self.class.validate_permitted_operations!(source_columns, target_columns) 27 | end 28 | 29 | def normalize_columns 30 | self.class.normalize_columns(self) 31 | end 32 | 33 | def shallow_normalize_columns 34 | self.class.shallow_normalize_columns(self) 35 | end 36 | def shallow_normalize_columns! 37 | self.class.shallow_normalize_column!(self) 38 | end 39 | 40 | def flattened_columns 41 | self.class.flattened_columns(self) 42 | end 43 | 44 | def equals?(source_columns) 45 | self.class.equals?(source_columns, self) 46 | end 47 | 48 | # self - source_columns 49 | def diff_columns(source_columns) 50 | self.class.diff_columns(source_columns, self) 51 | end 52 | 53 | # diff with only column names 54 | # self - source_columns 55 | def diff_columns_by_name(source_columns) 56 | self.class.diff_columns_by_name(source_columns, self) 57 | end 58 | 59 | # A.merge!(B) => B overwrites A 60 | # A.reverse_merge!(B) => A overwrites B, but A is modified 61 | def reverse_merge!(source_columns) 62 | self.class.reverse_merge!(source_columns, self) 63 | end 64 | 65 | def reject_columns!(drop_columns) 66 | self.class.reject_columns!(drop_columns, self) 67 | end 68 | 69 | def build_query_fields(source_columns) 70 | self.class.build_query_fields(source_columns, self) 71 | end 72 | 73 | class << self 74 | # The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_), 75 | # and must start with a letter or underscore. The maximum length is 128 characters. 76 | def validate_name!(name) 77 | unless name =~ /\A[a-zA-Z_]+\w*\Z/ 78 | raise ConfigError, "Column name `#{name}` is invalid format" 79 | end 80 | unless name.length < 128 81 | raise ConfigError, "Column name `#{name}` must be less than 128" 82 | end 83 | end 84 | 85 | def validate_type!(type) 86 | unless ALLOWED_FIELD_TYPES.include?(type.upcase) 87 | raise ConfigError, "Column type `#{type}` is not allowed type" 88 | end 89 | end 90 | 91 | def validate_mode!(mode) 92 | unless ALLOWED_FIELD_MODES.include?(mode.upcase) 93 | raise ConfigError, "Column mode `#{mode}` is not allowed mode" 94 | end 95 | end 96 | 97 | def validate_columns!(columns) 98 | columns.each do |column| 99 | validate_name!(column[:name]) 100 | validate_type!(column[:type]) 101 | validate_mode!(column[:mode]) if column[:mode] 102 | 103 | if column[:type] == 'RECORD' 104 | validate_columns!(column[:fields]) 105 | end 106 | end 107 | end 108 | 109 | def find_column_by_name(columns, name) 110 | (columns || []).find { |c| c[:name] == name } 111 | end 112 | 113 | # validates permitted changes from old schema to new schema 114 | def validate_permitted_operations!(source_columns, target_columns) 115 | flattened_source_columns = flattened_columns(normalize_columns(source_columns)) 116 | flattened_target_columns = flattened_columns(normalize_columns(target_columns)) 117 | 118 | flattened_target_columns.keys.each do |flattened_name| 119 | next unless flattened_source_columns.key?(flattened_name) 120 | validate_permitted_operations_for_type!( 121 | flattened_source_columns[flattened_name], 122 | flattened_target_columns[flattened_name] 123 | ) 124 | validate_permitted_operations_for_mode!( 125 | flattened_source_columns[flattened_name], 126 | flattened_target_columns[flattened_name] 127 | ) 128 | end 129 | end 130 | 131 | # @param [Hash] source_column 132 | # @param [Hash] target_column 133 | # 134 | # Disallowed conversion rule is as follows: 135 | # 136 | # type: RECORD => type: others 137 | # mode: REPEATED => change type 138 | # 139 | def validate_permitted_operations_for_type!(source_column, target_column) 140 | source_column = shallow_normalize_column(source_column) 141 | target_column = shallow_normalize_column(target_column) 142 | 143 | msg = "(#{source_column.to_h} => #{target_column.to_h})" 144 | if source_column[:type] == 'RECORD' 145 | if target_column[:type] != 'RECORD' 146 | raise ConfigError, "`RECORD` can not be changed #{msg}" 147 | end 148 | end 149 | if source_column[:mode] and source_column[:mode] == 'REPEATED' 150 | if source_column[:type] != target_column[:type] 151 | raise ConfigError, "`REPEATED` mode column's type can not be changed #{msg}" 152 | end 153 | end 154 | end 155 | 156 | # @param [Hash] source_column 157 | # @param [Hash] target_column 158 | # 159 | # Allowed conversion rule is as follows: 160 | # 161 | # (new) => NULLABLE, REPEATED 162 | # NULLABLE => NULLABLE 163 | # REQUIRED => REQUIRED, NULLABLE 164 | # REPEATED => REPEATED 165 | def validate_permitted_operations_for_mode!(source_column, target_column) 166 | source_column = shallow_normalize_column(source_column) 167 | target_column = shallow_normalize_column(target_column) 168 | source_mode = source_column[:mode] 169 | target_mode = target_column[:mode] 170 | 171 | return if source_mode == target_mode 172 | msg = "(#{source_column.to_h} => #{target_column.to_h})" 173 | 174 | case source_mode 175 | when nil 176 | if target_mode == 'REQUIRED' 177 | raise ConfigError, "Newly adding a `REQUIRED` column is not allowed #{msg}" 178 | end 179 | when 'NULLABLE' 180 | raise ConfigError, "`NULLABLE` column can not be changed #{msg}" 181 | when 'REQUIRED' 182 | if target_mode == 'REPEATED' 183 | raise ConfigError, "`REQUIRED` column can not be changed to `REPEATED` #{msg}" 184 | end 185 | when 'REPEATED' 186 | raise ConfigError, "`REPEATED` column can not be changed #{msg}" 187 | end 188 | end 189 | 190 | def normalize_columns(columns) 191 | columns = shallow_normalize_columns(columns) 192 | columns.map do |column| 193 | if column[:type] == 'RECORD' and column[:fields] 194 | column[:fields] = normalize_columns(column[:fields]) 195 | end 196 | column 197 | end 198 | end 199 | 200 | def shallow_normalize_columns(columns) 201 | columns.map {|column| shallow_normalize_column(column) } 202 | end 203 | 204 | def shallow_normalize_columns!(columns) 205 | columns.each {|column| shallow_normalize_column!(column) } 206 | columns 207 | end 208 | 209 | def shallow_normalize_column(column) 210 | shallow_normalize_column!(column.dup) 211 | end 212 | 213 | def shallow_normalize_column!(column) 214 | symbolize_keys!(column) 215 | column[:type] = column[:type].upcase if column[:type] 216 | column[:mode] ||= 'NULLABLE' 217 | column[:mode] = column[:mode].upcase 218 | column 219 | end 220 | 221 | def symbolize_keys!(column) 222 | new_column = column.map do |key, val| 223 | [key.to_sym, val] 224 | end.to_h 225 | column.replace(new_column) 226 | end 227 | 228 | # @param [Array] columns 229 | # [{ 230 | # name: 'citiesLived', 231 | # type: 'RECORD', 232 | # fields: [ 233 | # { 234 | # name: 'place', type: 'RECORD', 235 | # fields: [ 236 | # { name: 'city', type: 'STRING' }, { name: 'postcode', type: 'STRING' } 237 | # ] 238 | # }, 239 | # { name: 'yearsLived', type: 'INTEGER' } 240 | # ] 241 | # }] 242 | # @return Hash 243 | # { 244 | # 'citiesLived.place.city' => { 245 | # type: 'STRING' 246 | # }, 247 | # 'citiesLived.place.postcode' => { 248 | # type: 'STRING' 249 | # }, 250 | # 'citiesLived.yearsLived' => { 251 | # type: 'INTEGER' 252 | # } 253 | # } 254 | def flattened_columns(columns, parent_name: nil) 255 | result = {} 256 | columns.each do |column| 257 | column_name = parent_name.nil? ? column[:name] : "#{parent_name}.#{column[:name]}" 258 | if column[:type].upcase != 'RECORD' 259 | result[column_name] = {}.tap do |value| 260 | value[:type] = column[:type] 261 | value[:mode] = column[:mode] if column[:mode] 262 | end 263 | else 264 | result.merge!(flattened_columns(column[:fields], parent_name: column_name)) 265 | end 266 | end 267 | result 268 | end 269 | 270 | def equals?(source_columns, target_columns) 271 | diff_columns(source_columns, target_columns).empty? and \ 272 | diff_columns(target_columns, source_columns).empty? 273 | end 274 | 275 | # target_columns - source_columns 276 | def diff_columns(source_columns, target_columns) 277 | _target_columns = shallow_normalize_columns(target_columns) 278 | _source_columns = shallow_normalize_columns(source_columns) 279 | diff_columns = _target_columns - _source_columns # shallow diff 280 | 281 | diff_columns.map do |target_column| 282 | t = target_column 283 | source_column = find_column_by_name(_source_columns, target_column[:name]) 284 | next t unless source_column 285 | next t unless target_column[:type] == 'RECORD' and source_column[:type] == 'RECORD' 286 | next t unless target_column[:fields] and source_column[:fields] 287 | # recusive diff for RECORD columns 288 | diff_fields = diff_columns(source_column[:fields], target_column[:fields]) 289 | next nil if diff_fields.empty? # remove 290 | target_column[:fields] = diff_fields 291 | target_column 292 | end.compact 293 | end 294 | 295 | # diff with only column_names 296 | # target_columns - source_columns 297 | def diff_columns_by_name(source_columns, target_columns) 298 | _target_columns = shallow_normalize_columns(target_columns) 299 | _source_columns = shallow_normalize_columns(source_columns) 300 | diff_columns = _target_columns - _source_columns # shallow diff 301 | 302 | diff_columns.map do |target_column| 303 | t = target_column 304 | source_column = find_column_by_name(_source_columns, target_column[:name]) 305 | next t unless source_column 306 | next nil unless target_column[:type] == 'RECORD' and source_column[:type] == 'RECORD' 307 | next nil unless target_column[:fields] and source_column[:fields] 308 | # recusive diff for RECORD columns 309 | diff_fields = diff_columns_by_name(source_column[:fields], target_column[:fields]) 310 | next nil if diff_fields.empty? # remove 311 | target_column[:fields] = diff_fields 312 | target_column 313 | end.compact 314 | end 315 | 316 | # 1. target_column[:mode] ||= source_column[:mode] || 'NULLABLE' (not overwrite, but set if does not exist) 317 | # 2. Add into target_columns if a source column does not exist in target_columns 318 | # 319 | # @param [Array] source_columns 320 | # @param [Array] target_columns 321 | def reverse_merge!(source_columns, target_columns) 322 | shallow_normalize_columns!(source_columns) 323 | shallow_normalize_columns!(target_columns) 324 | 325 | source_columns.map do |source_column| 326 | if target_column = find_column_by_name(target_columns, source_column[:name]) 327 | target_column[:mode] ||= source_column[:mode] || 'NULLABLE' 328 | target_column[:type] ||= source_column[:type] # should never be happened 329 | # Recursive merge fields of `RECORD` type 330 | if target_column[:type] == 'RECORD' and target_column[:fields] and source_column[:fields] 331 | reverse_merge!(source_column[:fields], target_column[:fields]) 332 | end 333 | else 334 | target_column = source_column.dup 335 | target_column[:mode] ||= 'NULLABLE' 336 | target_columns << target_column 337 | end 338 | end 339 | target_columns 340 | end 341 | 342 | def reject_columns!(drop_columns, target_columns) 343 | flattened_drop_columns = flattened_columns(drop_columns) 344 | 345 | flattened_drop_columns.keys.each do |flattened_name| 346 | # paths like a %w(citiesLived place city child1) 347 | paths = flattened_name.split('.') 348 | # object_id of fields and target_columns are different. 349 | # But the internal elements refer to the same ones 350 | fields = target_columns 351 | paths.each do |path| 352 | # The last element of the path does not have the fields 353 | next if path == paths.last 354 | # find recursively 355 | column = fields.find { |f| f[:name] == path } 356 | next if column.nil? 357 | fields = column[:fields] 358 | end 359 | 360 | unless fields.empty? 361 | fields.delete_if { |f| f[:name] == paths.last } 362 | end 363 | end 364 | target_columns 365 | end 366 | 367 | def build_query_fields(source_columns, target_columns) 368 | flattened_source_columns = flattened_columns(source_columns) 369 | flattened_target_columns = flattened_columns(target_columns) 370 | 371 | query_fields = flattened_target_columns.map do |flattened_name, flattened_target_column| 372 | flattened_source_column = flattened_source_columns[flattened_name] 373 | target_type = flattened_target_column[:type].upcase 374 | 375 | if flattened_source_column 376 | "#{target_type}(#{flattened_name}) AS #{flattened_name}" 377 | else 378 | flattened_name 379 | # MEMO: NULL cast like "#{target_type}(NULL) AS #{flattened_name}" breaks RECORD columns as 380 | # INTEGER(NULL) AS add_record.add_record.add_column1 => add_record_add_record_add_column1 381 | # We have to add columns with patch_table beforehand 382 | end 383 | end 384 | end 385 | 386 | def make_nullable!(columns) 387 | columns.each do |column| 388 | if column[:fields] 389 | make_nullable!(column[:fields]) 390 | else 391 | column[:mode] = 'NULLABLE' 392 | end 393 | end 394 | columns 395 | end 396 | end 397 | end 398 | end 399 | -------------------------------------------------------------------------------- /test/test_schema.rb: -------------------------------------------------------------------------------- 1 | require_relative 'helper.rb' 2 | require 'bigquery_migration/schema' 3 | 4 | class BigqueryMigration 5 | class TestSchema < Test::Unit::TestCase 6 | def columns 7 | [ 8 | {name: 'boolean', type: 'BOOLEAN', mode: 'NULLABLE'}, 9 | {name: 'integer', type: 'INTEGER'}, 10 | {name: 'float', type: 'FLOAT'}, 11 | {name: 'string', type: 'STRING'}, 12 | {name: 'timstamp', type: 'TIMESTAMP'}, 13 | {name: 'record', type: 'RECORD', fields: [ 14 | {name: 'record', type: 'RECORD', fields: [ 15 | {name: 'string', type: 'STRING', mode: 'NULLABLE'}, 16 | ]}, 17 | ]} 18 | ] 19 | end 20 | 21 | sub_test_case "find_column_by_name" do 22 | def test_find_column_by_name 23 | expected = {name: 'boolean', type: 'BOOLEAN', mode: 'NULLABLE'} 24 | assert { Schema.find_column_by_name(columns, 'boolean') == expected } 25 | assert { Schema.new(columns).find_column_by_name('boolean') == expected } 26 | end 27 | end 28 | 29 | sub_test_case "validate_columns!" do 30 | def test_validate_columns_with_valid 31 | assert_nothing_raised { Schema.new(columns).validate_columns! } 32 | assert_nothing_raised { Schema.validate_columns!(columns) } 33 | 34 | no_mode = [{name: 'name', type: 'STRING'}] 35 | assert_nothing_raised { Schema.validate_columns!(no_mode) } 36 | 37 | downcase_type = [{name: 'name', type: 'string'}] 38 | assert_nothing_raised { Schema.validate_columns!(downcase_type) } 39 | 40 | upcase_type = [{name: 'name', type: 'STRING'}] 41 | assert_nothing_raised { Schema.validate_columns!(upcase_type) } 42 | 43 | downcase_mode = [{name: 'name', type: 'STRING', mode: 'nullable'}] 44 | assert_nothing_raised { Schema.validate_columns!(downcase_mode) } 45 | 46 | upcase_mode = [{name: 'name', type: 'STRING', mode: 'NULLABLE'}] 47 | assert_nothing_raised { Schema.validate_columns!(upcase_mode) } 48 | end 49 | 50 | def test_validate_columns_with_invalid 51 | no_name = [{}] 52 | assert_raise { Schema.validate_columns!(no_name) } 53 | 54 | invalid_name = [{name: '%&%&^**'}] 55 | assert_raise { Schema.validate_columns!(invalid_name) } 56 | 57 | long_name = [{name: 'a'*129}] 58 | assert_raise { Schema.validate_columns!(long_name) } 59 | 60 | no_type = [{name: 'name'}] 61 | assert_raise { Schema.validate_columns!(no_type) } 62 | 63 | invalid_type = [{name: 'name', type: 'foobar'}] 64 | assert_raise { Schema.validate_columns!(invalid_type) } 65 | 66 | invalid_mode = [{name: 'name', type: 'STRING', mode: 'foobar'}] 67 | assert_raise { Schema.validate_columns!(no_mode) } 68 | end 69 | end 70 | 71 | sub_test_case "normalize_columns" do 72 | def test_normalize_columns 73 | downcase_columns = [ 74 | {name: 'boolean', type: 'boolean', mode: 'nullable'}, 75 | {name: 'integer', type: 'integer'}, 76 | {name: 'float', type: 'float'}, 77 | {name: 'string', type: 'string'}, 78 | {name: 'timstamp', type: 'timestamp'}, 79 | {name: 'record', type: 'record', fields: [ 80 | {name: 'record', type: 'record', fields: [ 81 | {name: 'string', type: 'string', mode: 'nullable'}, 82 | ]}, 83 | ]} 84 | ] 85 | expected = [ 86 | {name: 'boolean', type: 'BOOLEAN', mode: 'NULLABLE'}, 87 | {name: 'integer', type: 'INTEGER', mode: 'NULLABLE'}, 88 | {name: 'float', type: 'FLOAT', mode: 'NULLABLE'}, 89 | {name: 'string', type: 'STRING', mode: 'NULLABLE'}, 90 | {name: 'timstamp', type: 'TIMESTAMP', mode: 'NULLABLE'}, 91 | {name: 'record', type: 'RECORD', mode: 'NULLABLE', fields: [ 92 | {name: 'record', type: 'RECORD', mode: 'NULLABLE', fields: [ 93 | {name: 'string', type: 'STRING', mode: 'NULLABLE'}, 94 | ]}, 95 | ]} 96 | ] 97 | result = Schema.normalize_columns(downcase_columns) 98 | assert { result == expected } 99 | result = Schema.new(downcase_columns).normalize_columns 100 | assert { result == expected } 101 | end 102 | end 103 | 104 | sub_test_case "flattened_columns" do 105 | def test_flattened_columns 106 | columns = [ 107 | { name: 'id', type: 'INTEGER' }, 108 | { name: 'citiesLived', type: 'RECORD', fields: [ 109 | { name: 'place', type: 'RECORD', fields: [ 110 | { name: 'city', type: 'STRING' }, 111 | { name: 'postcode', type: 'STRING' } 112 | ] }, 113 | { name: 'yearsLived', type: 'INTEGER' } 114 | ] } 115 | ] 116 | 117 | expected = { 118 | 'id' => { type: 'INTEGER' }, 119 | 'citiesLived.place.city' => { type: 'STRING' }, 120 | 'citiesLived.place.postcode' => { type: 'STRING' }, 121 | 'citiesLived.yearsLived' => { type: 'INTEGER' } 122 | } 123 | result = Schema.flattened_columns(columns) 124 | assert { result == expected } 125 | end 126 | end 127 | 128 | sub_test_case "diff_columns" do 129 | sub_test_case "without intersect" do 130 | def subset 131 | [ 132 | {:name=>"remained_column", :type=>"INTEGER"}, 133 | {:name=>"record", 134 | :type=>"RECORD", 135 | :fields=>[ 136 | {:name=>"record", :type=>"RECORD", :fields=>[ 137 | {:name=>"remained_column", :type=>"STRING"} 138 | ]} 139 | ]} 140 | ] 141 | end 142 | 143 | def superset 144 | [ 145 | {:name=>"remained_column", :type=>"INTEGER"}, 146 | {:name=>"record", :type=>"RECORD", :fields=>[ 147 | {:name=>"record", :type=>"RECORD", :fields=>[ 148 | {:name=>"remained_column", :type=>"STRING"}, 149 | {:name=>"new_column", :type=>"INTEGER"} 150 | ]}, 151 | {:name=>"new_record", :type=>"RECORD", :fields=>[ 152 | {:name=>"new_column", :type=>"INTEGER"} 153 | ]} 154 | ]}, 155 | {:name=>"new_required_column", :type=>"INTEGER", :mode=>"REQUIRED"} 156 | ] 157 | end 158 | 159 | def test_diff_columns_subset 160 | result = Schema.new(subset).diff_columns(superset) 161 | assert { result == [] } 162 | end 163 | 164 | def test_diff_columns_superset 165 | expected = [ 166 | {:name=>"record", :type=>"RECORD", :fields=>[ 167 | {:name=>"record", :type=>"RECORD", :fields=>[ 168 | {:name=>"new_column", :type=>"INTEGER", :mode=>"NULLABLE" } 169 | ], :mode=>"NULLABLE"}, 170 | {:name=>"new_record", :type=>"RECORD", :fields=>[ 171 | {"name"=>"new_column", "type"=>"INTEGER", :mode=>"NULLABLE" } 172 | ], :mode=>"NULLABLE"} 173 | ], :mode=>"NULLABLE"}, 174 | {:name=>"new_required_column", :type=>"INTEGER", :mode=>"REQUIRED"} 175 | ] 176 | result = Schema.new(superset).diff_columns(subset) 177 | assert { Schema.equals?(result, expected) } 178 | end 179 | end 180 | 181 | sub_test_case "with intersect" do 182 | def before_columns 183 | [ 184 | {"name"=>"drop_column", "type"=>"INTEGER"}, 185 | {"name"=>"remained_column", "type"=>"INTEGER"}, 186 | {"name"=>"record", "type"=>"RECORD", "fields"=>[ 187 | {"name"=>"record", "type"=>"RECORD", "fields"=>[ 188 | {"name"=>"drop_column", "type"=>"INTEGER"}, 189 | {"name"=>"remained_column", "type"=>"STRING"} 190 | ]} 191 | ]} 192 | ] 193 | end 194 | 195 | def after_columns 196 | [ 197 | {"name"=>"remained_column", "type"=>"INTEGER"}, 198 | {"name"=>"record", "type"=>"RECORD", "fields"=>[ 199 | {"name"=>"record", "type"=>"RECORD", "fields"=>[ 200 | {"name"=>"remained_column", "type"=>"STRING"}, 201 | {"name"=>"new_column", "type"=>"INTEGER"} 202 | ]}, 203 | {"name"=>"new_record", "type"=>"RECORD", "fields"=>[ 204 | {:name=>"new_column", :type=>"INTEGER"} 205 | ]} 206 | ]}, 207 | {"name"=>"new_required_column", "type"=>"INTEGER", "mode"=>"REQUIRED"} 208 | ] 209 | end 210 | 211 | def test_diff_columns_drop_columns 212 | drop_columns = Schema.new(before_columns).diff_columns(after_columns) 213 | expected = [ 214 | {:name=>"drop_column", :type=>"INTEGER", :mode=>"NULLABLE"}, 215 | {:name=>"record", :type=>"RECORD", :mode=>"NULLABLE", :fields=>[ 216 | {:name=>"record", :type=>"RECORD", :mode=>"NULLABLE", :fields=>[ 217 | {:name=>"drop_column", :type=>"INTEGER", :mode=>"NULLABLE" } 218 | ]} 219 | ]} 220 | ] 221 | assert { Schema.equals?(drop_columns, expected) } 222 | end 223 | 224 | def test_diff_columns_add_columns 225 | add_columns = Schema.new(after_columns).diff_columns(before_columns) 226 | expected = [ 227 | {:name=>"record", :type=>"RECORD", :mode=>"NULLABLE", :fields=>[ 228 | {:name=>"record", :type=>"RECORD", :mode=>"NULLABLE", :fields=>[ 229 | {:name=>"new_column", :type=>"INTEGER", :mode=>"NULLABLE"} 230 | ]}, 231 | {:name=>"new_record", :type=>"RECORD", :mode=>"NULLABLE", :fields=>[ 232 | {"name"=>"new_column", "type"=>"INTEGER", :mode=>"NULLABLE"} 233 | ]} 234 | ]}, 235 | {:name=>"new_required_column", :type=>"INTEGER", :mode=>"REQUIRED"} 236 | ] 237 | assert { Schema.equals?(add_columns, expected) } 238 | end 239 | end 240 | end 241 | 242 | sub_test_case "diff_columns_by_name" do 243 | def before_columns 244 | [ 245 | {:name=>"drop_column", :type=>"INTEGER"}, 246 | {:name=>"record", :type=>"RECORD", :fields=>[ 247 | {:name=>"record", :type=>"RECORD", :fields=>[ 248 | {:name=>"drop_column", :type=>"INTEGER"}, 249 | ]} 250 | ]} 251 | ] 252 | end 253 | 254 | def after_columns 255 | [ 256 | {:name=>"drop_column", :type=>"STRING"}, 257 | {:name=>"record", :type=>"RECORD", :fields=>[ 258 | {:name=>"record", :type=>"RECORD", :fields=>[ 259 | {:name=>"drop_column", :type=>"STRING"}, 260 | {:name=>"new_column", :type=>"INTEGER"} 261 | ]}, 262 | {:name=>"new_record", :type=>"RECORD", :fields=>[ 263 | {:name=>"new_column", :type=>"INTEGER"} 264 | ]} 265 | ]}, 266 | {:name=>"new_required_column", :type=>"INTEGER", :mode=>"REQUIRED"} 267 | ] 268 | end 269 | 270 | def test_diff_columns_by_name 271 | diff_columns = Schema.new(after_columns).diff_columns_by_name(before_columns) 272 | expected = [ 273 | {:name=>"record", :type=>"RECORD", :fields=>[ 274 | {:name=>"record", :type=>"RECORD", :fields=>[ 275 | {:name=>"new_column", :type=>"INTEGER"} 276 | ]}, 277 | {:name=>"new_record", :type=>"RECORD", :fields=>[ 278 | {:name=>"new_column", :type=>"INTEGER"} 279 | ]} 280 | ]}, 281 | {:name=>"new_required_column", :type=>"INTEGER", :mode=>"REQUIRED"} 282 | ] 283 | 284 | assert { Schema.equals?(expected, diff_columns) } 285 | end 286 | end 287 | 288 | sub_test_case "reverse_merge!" do 289 | def test_reverse_merge! 290 | source_columns = [ 291 | { name: 'id', type: 'INTEGER', mode: 'NULLABLE' }, 292 | { name: 'name', type: 'RECORD', mode: 'REQUIRED', fields: [ 293 | { name: 'first_name', type: 'STRING', mode: 'NULLABLE' }, 294 | { name: 'last_name', type: 'STRING' }, 295 | { name: 'new_column', type: 'STRING' }, 296 | ] } 297 | ] 298 | 299 | target_columns = [ 300 | { name: 'id', type: 'INTEGER' }, 301 | { name: 'name', type: 'RECORD', mode: 'NULLABLE', fields: [ 302 | { name: 'first_name', type: 'STRING' }, 303 | { name: 'last_name', type: 'STRING' }, 304 | ] }, 305 | ] 306 | 307 | expected = [ 308 | { name: 'id', type: 'INTEGER', mode: 'NULLABLE' }, 309 | { name: 'name', type: 'RECORD', mode: 'NULLABLE', fields: [ 310 | { name: 'first_name', type: 'STRING', mode: 'NULLABLE' }, 311 | { name: 'last_name', type: 'STRING', mode: 'NULLABLE' }, 312 | { name: 'new_column', type: 'STRING', mode: 'NULLABLE' }, 313 | ] } 314 | ] 315 | 316 | result = Schema.new(target_columns).reverse_merge!(source_columns) 317 | assert { result == expected } 318 | end 319 | end 320 | 321 | sub_test_case "reject_columns!" do 322 | def test_reject_columns! 323 | target_columns = [ 324 | { name: 'id', type: 'INTEGER' }, 325 | { name: 'citiesLived', type: 'RECORD', fields: [ 326 | { name: 'place', type: 'RECORD', fields: [ 327 | { name: 'city', type: 'RECORD', fields: [ 328 | { name: 'child1', type: 'STRING' }, 329 | { name: 'child2', type: 'STRING' } 330 | ] }, 331 | { name: 'postcode', type: 'STRING' } 332 | ] }, 333 | { name: 'yearsLived', type: 'INTEGER' } 334 | ] } 335 | ] 336 | 337 | drop_columns = [ 338 | { name: 'citiesLived', type: 'RECORD', fields: [ 339 | { name: 'place', type: 'RECORD', fields: [ 340 | { name: 'city', type: 'RECORD', fields: [ 341 | { name: 'child2', type: 'STRING' }, 342 | ] } 343 | ] } 344 | ] } 345 | ] 346 | 347 | expected = [ 348 | { name: 'id', type: 'INTEGER' }, 349 | { name: 'citiesLived', type: 'RECORD', fields: [ 350 | { name: 'place', type: 'RECORD', fields: [ 351 | { name: 'city', type: 'RECORD', fields: [ 352 | name: 'child1', type: 'STRING' 353 | ] 354 | }, 355 | { name: 'postcode', type: 'STRING' } 356 | ] }, 357 | { name: 'yearsLived', type: 'INTEGER' } 358 | ] } 359 | ] 360 | 361 | result = Schema.reject_columns!(drop_columns, target_columns) 362 | assert { result == expected } 363 | end 364 | end 365 | 366 | sub_test_case "build_query_fields" do 367 | def subset 368 | subset = [ 369 | {name: "remained_column", type: "INTEGER"}, 370 | {name: "record", type: "RECORD", fields: [ 371 | {name: "record", type: "RECORD", fields: [ 372 | {name: "remained_column", type: "STRING" } 373 | ]} 374 | ]} 375 | ] 376 | end 377 | 378 | def superset 379 | [ 380 | {name: "remained_column", type: "INTEGER"}, 381 | {name: "record", type: "RECORD", fields: [ 382 | {name: "record", type: "RECORD", fields: [ 383 | {name: "remained_column", type: "STRING" }, 384 | {name: "new_column", type: "INTEGER" } 385 | ]}, 386 | {name: "new_record", type: "RECORD", fields: [ 387 | {name: "new_column", type: "INTEGER"} 388 | ]} 389 | ]}, 390 | {name: "new_required_column", type: "INTEGER", mode: "REQUIRED" } 391 | ] 392 | end 393 | 394 | def test_build_query_fields_for_subset 395 | target_columns = subset 396 | source_columns = superset 397 | 398 | schema = Schema.new(target_columns) 399 | result = schema.build_query_fields(source_columns) 400 | expected = [ 401 | "INTEGER(remained_column) AS remained_column", 402 | "STRING(record.record.remained_column) AS record.record.remained_column" 403 | ] 404 | assert { expected == result } 405 | end 406 | 407 | def test_build_query_fields_for_superset 408 | target_columns = superset 409 | source_columns = subset 410 | 411 | schema = Schema.new(target_columns) 412 | result = schema.build_query_fields(source_columns) 413 | expected = [ 414 | "INTEGER(remained_column) AS remained_column", 415 | "STRING(record.record.remained_column) AS record.record.remained_column", 416 | "record.record.new_column", 417 | "record.new_record.new_column", 418 | "new_required_column" 419 | ] 420 | assert { expected == result } 421 | end 422 | end 423 | end 424 | end 425 | -------------------------------------------------------------------------------- /test/test_table_data.rb: -------------------------------------------------------------------------------- 1 | require_relative 'helper.rb' 2 | require 'bigquery_migration/table_data' 3 | 4 | class BigqueryMigration 5 | class TestTableData < Test::Unit::TestCase 6 | sub_test_case "values" do 7 | def test_values_simple 8 | columns = [ 9 | { name: 'string', type: 'STRING', mode: 'NULLABLE'}, 10 | { name: 'integer', type: 'INTEGER', mode: 'NULLABLE'}, 11 | { name: 'float', type: 'FLOAT', mode: 'NULLABLE'}, 12 | { name: 'boolean', type: 'BOOLEAN', mode: 'NULLABLE'}, 13 | { name: 'timestamp', type: 'TIMESTAMP', mode: 'NULLABLE'}, 14 | ] 15 | 16 | rows = [ 17 | { f: [ 18 | {v: "foo"}, 19 | {v: "1"}, 20 | {v: "1.1"}, 21 | {v: "true"}, 22 | {v: "1.444435200E9"} 23 | ] }, 24 | { f: [ 25 | {v: "bar"}, 26 | {v: "2"}, 27 | {v: "2.2"}, 28 | {v: "false"}, 29 | {v: "1.444435200E9"} 30 | ] } 31 | ] 32 | 33 | expected = [ 34 | [ "foo", "1", "1.1", "true", "1.444435200E9" ], 35 | [ "bar", "2", "2.2", "false", "1.444435200E9" ] 36 | ] 37 | 38 | assert { TableData.new(columns, rows).values == expected } 39 | end 40 | 41 | def test_values_with_empty_hash 42 | columns = [ 43 | {name: "category", type: "STRING"}, 44 | {name: "number", type: "INTEGER"}, 45 | {name: "null_string", type: "STRING"}, 46 | {name: "d", type: "STRING"}, 47 | {name: "t", type: "TIMESTAMP"} 48 | ] 49 | 50 | rows = [ 51 | { f: [ 52 | {v: "dummyEventCategory03"}, 53 | {v: "5678"}, 54 | {v: {}}, 55 | {v: "2016-07-25"}, 56 | {v: "1.4693724E9"} 57 | ] } 58 | ] 59 | 60 | expected = [ 61 | [ "dummyEventCategory03", "5678", nil, "2016-07-25", "1.4693724E9" ], 62 | ] 63 | 64 | assert { TableData.new(columns, rows).values == expected } 65 | end 66 | 67 | def test_values_repeated_and_record_simple 68 | columns = [ 69 | { name: 'repeated_record', type: 'RECORD', mode: 'REPEATED', fields: [ 70 | { name: 'record', type: 'RECORD', mode: 'NULLABLE', fields: [ 71 | { name: 'repeated_time', type: 'TIMESTAMP', mode: 'REPEATED' } 72 | ] }, 73 | ] } 74 | ] 75 | 76 | rows = [ 77 | { f: [ 78 | { v: [ 79 | { v: 80 | { f: [ 81 | { v: 82 | { f: [ 83 | { v: [ 84 | {v: "1.444435200E9"}, 85 | {v: "1.444435200E9"} 86 | ] } 87 | ] } 88 | } 89 | ] } 90 | }, 91 | v: { 92 | f: [ 93 | { v: 94 | { f: [ 95 | { v: [ 96 | {v: "1.444435200E9"}, 97 | {v: "1.444435200E9"}, 98 | {v: "1.444435200E9"} 99 | ] } 100 | ] } 101 | } 102 | ] 103 | } 104 | ] } 105 | ] } 106 | ] 107 | 108 | expected = [ 109 | # only single row 110 | [ 111 | ["1.444435200E9"], 112 | ["1.444435200E9"], 113 | ["1.444435200E9"], 114 | ["1.444435200E9"], 115 | ["1.444435200E9"] 116 | ] 117 | ] 118 | 119 | assert { TableData.new(columns, rows).values == expected } 120 | end 121 | 122 | def test_values_repeated_and_record_multiple 123 | columns = [ 124 | { name: 'repeated_record', type: 'RECORD', mode: 'REPEATED', fields: [ 125 | { name: 'record', type: 'RECORD', mode: 'NULLABLE', fields: [ 126 | { name: 'repeated_time', type: 'TIMESTAMP', mode: 'REPEATED' } 127 | ] }, 128 | ] } 129 | ] 130 | 131 | rows = [ 132 | { f: [ 133 | { v: [ 134 | { v: 135 | { f: [ 136 | { v: 137 | { f: [ 138 | { v: [ 139 | {v: "1.444435200E9"}, 140 | {v: "1.444435200E9"} 141 | ] } 142 | ] } 143 | } 144 | ] } 145 | } 146 | ] } 147 | ] }, 148 | { f: [ 149 | { v: [ 150 | { v: 151 | { f: [ 152 | { v: 153 | { f: [ 154 | { v: [ 155 | {v: "1.444435200E9"}, 156 | {v: "1.444435200E9"} 157 | ] } 158 | ] } 159 | } 160 | ] } 161 | } 162 | ] } 163 | ] } 164 | ] 165 | 166 | expected = [ 167 | # first row 168 | [ 169 | ["1.444435200E9"], 170 | ["1.444435200E9"], 171 | ], 172 | # second row 173 | [ 174 | ["1.444435200E9"], 175 | ["1.444435200E9"], 176 | ] 177 | ] 178 | 179 | assert { TableData.new(columns, rows).values == expected } 180 | end 181 | 182 | def test_values_repeated_in_middle_row 183 | columns = [ 184 | { "name": "string", "type": "STRING", "mode": "NULLABLE" }, 185 | { "name": "integer", "type": "INTEGER", "mode": "NULLABLE" }, 186 | { "name": "repeated", "type": "STRING", "mode": "REPEATED" }, 187 | { "name": "float", "type": "FLOAT", "mode": "NULLABLE" }, 188 | { "name": "boolean", "type": "BOOLEAN", "mode": "NULLABLE" }, 189 | { "name": "timestamp", "type": "TIMESTAMP", "mode": "NULLABLE" } 190 | ] 191 | 192 | rows = [ 193 | { f: [ 194 | { v: "foo" }, 195 | { v: "1" }, 196 | { v: [] }, 197 | { v: "1.1" }, 198 | { v: "true" }, 199 | { v: "1.444435200E9" } 200 | ] }, 201 | { f: [ 202 | { v: "foo" }, 203 | { v: "3" }, 204 | { v: [] }, 205 | { v: "3.3" }, 206 | { v: "true" }, 207 | { v: "1.444435200E9" } 208 | ] }, 209 | { f: [ 210 | { v: "foo" }, 211 | { v: "4" }, 212 | { v: [] }, 213 | { v: "4.4" }, 214 | { v: "false" }, 215 | { v: "1.444435200E9" } 216 | ] }, 217 | { f: [ 218 | { v: "foo" }, 219 | { v: "2" }, 220 | { v: [ 221 | { v: "foo" }, 222 | { v: "bar" } 223 | ] }, 224 | { v: "2.2" }, 225 | { v: "false" }, 226 | { v: "1.444435200E9" } 227 | ] } 228 | ] 229 | 230 | expected = [ 231 | # first row 232 | [ 233 | [ "foo", "1", nil, "1.1", "true", "1.444435200E9" ] 234 | ], 235 | # second row 236 | [ 237 | [ "foo", "3", nil, "3.3", "true", "1.444435200E9" ], 238 | ], 239 | # third row 240 | [ 241 | [ "foo", "4", nil, "4.4", "false", "1.444435200E9" ] 242 | ], 243 | # fourth row 244 | [ 245 | [ "foo", "2", "foo", "2.2", "false", "1.444435200E9" ], 246 | [ nil, nil, "bar", nil, nil, nil ], 247 | ], 248 | ] 249 | 250 | 251 | assert { TableData.new(columns, rows).values == expected } 252 | end 253 | 254 | def test_values_repeated_and_record_in_middle_row 255 | columns = [ 256 | { "name": "string", "type": "STRING", "mode": "NULLABLE" }, 257 | { "name": "integer", "type": "INTEGER", "mode": "NULLABLE" }, 258 | { "name": "repeated", "type": "RECORD", "mode": "REPEATED", "fields": [ 259 | { "name": "record", "type": "STRING", "mode": "REPEATED" } 260 | ] }, 261 | { "name": "float", "type": "FLOAT", "mode": "NULLABLE" }, 262 | { "name": "boolean", "type": "BOOLEAN", "mode": "NULLABLE" }, 263 | { "name": "timestamp", "type": "TIMESTAMP", "mode": "NULLABLE" } 264 | ] 265 | 266 | rows = [ 267 | { f: [ 268 | { v: "foo" }, 269 | { v: "1" }, 270 | { v: [] }, 271 | { v: "1.1" }, 272 | { v: "true" }, 273 | { v: "1.444435200E9" } 274 | ] }, 275 | { f: [ 276 | { v: "foo" }, 277 | { v: "4" }, 278 | { v: [] }, 279 | { v: "4.4" }, 280 | { v: "true" }, 281 | { v: "1.444435200E9" } 282 | ] }, 283 | { f: [ 284 | { v: "foo" }, 285 | { v: "5" }, 286 | { v: [] }, 287 | { v: "5.5" }, 288 | { v: "false" }, 289 | { v: "1.444435200E9" } 290 | ] }, 291 | { f: [ 292 | { v: "foo" }, 293 | { v: "2" }, 294 | { v: [ 295 | { v: 296 | { f: [ 297 | { v: [ 298 | { v: "foo" }, 299 | { v: "bar" } 300 | ] } 301 | ] } 302 | }, 303 | { v: 304 | { f: [ 305 | { v: [ 306 | { v: "foo" }, 307 | { v: "bar" } 308 | ] } 309 | ] } 310 | } 311 | ] }, 312 | { v: "2.2" }, 313 | { v: "false" }, 314 | { v: "1.444435200E9" } 315 | ] }, 316 | { f: [ 317 | { v: "foo" }, 318 | { v: "3" }, 319 | { v: [ 320 | { v: 321 | { f: [ 322 | { v: [ 323 | { v: "foo" }, 324 | { v: "bar" } 325 | ] } 326 | ] } 327 | }, 328 | { v: 329 | { f: [ 330 | { v: [ 331 | { v: "foo" }, 332 | { v: "bar" } 333 | ] } 334 | ] } 335 | } 336 | ] }, 337 | { v: "3.3" }, 338 | { v: "false" }, 339 | { v: "1.444435200E9" } 340 | ] } 341 | ] 342 | 343 | expected = [ 344 | # first row 345 | [ 346 | [ "foo", "1", nil, "1.1", "true", "1.444435200E9" ] 347 | ], 348 | # second row 349 | [ 350 | [ "foo", "4", nil, "4.4", "true", "1.444435200E9" ] 351 | ], 352 | # third row 353 | [ 354 | [ "foo", "5", nil, "5.5", "false", "1.444435200E9" ] 355 | ], 356 | # fourth row 357 | [ 358 | [ "foo", "2", "foo", "2.2", "false", "1.444435200E9" ], 359 | [ nil, nil, "bar", nil, nil, nil ], 360 | [ nil, nil, "foo", nil, nil, nil ], 361 | [ nil, nil, "bar", nil, nil, nil ] 362 | ], 363 | # fifth row 364 | [ 365 | [ "foo", "3", "foo", "3.3", "false", "1.444435200E9" ], 366 | [ nil, nil, "bar", nil, nil, nil ], 367 | [ nil, nil, "foo", nil, nil, nil ], 368 | [ nil, nil, "bar", nil, nil, nil ] 369 | ], 370 | ] 371 | 372 | assert { TableData.new(columns, rows).values == expected } 373 | end 374 | 375 | def test_values_repeated_and_record_complex 376 | columns = [ 377 | { name: 'repeated_record', type: 'RECORD', mode: 'REPEATED', fields: [ 378 | { name: 'record', type: 'RECORD', mode: 'NULLABLE', fields: [ 379 | { name: 'child', type: 'STRING', mode: 'NULLABLE' }, 380 | { name: 'repeated_time', type: 'TIMESTAMP', mode: 'REPEATED' } 381 | ] }, 382 | { name: 'repeated_time', type: 'TIMESTAMP', mode: 'REPEATED' } 383 | ] }, 384 | { name: 'repeated_string', type: 'STRING', mode: 'REPEATED' }, 385 | { name: 'repeated_int', type: 'INTEGER', mode: 'REPEATED' }, 386 | { name: 'repeated_record2', type: 'RECORD', mode: 'REPEATED', fields: [ 387 | { name: 'record2', type: 'RECORD', mode: 'NULLABLE', fields: [ 388 | { name: 'repeated_float', type: 'FLOAT', mode: 'REPEATED' }, 389 | { name: 'child2', type: 'STRING', mode: 'REQUIRED' } 390 | ] } 391 | ] } 392 | ] 393 | 394 | rows = [ 395 | { f: [ 396 | { v: [ 397 | { v: 398 | { f: [ 399 | { v: 400 | { f: [ 401 | { v: "foo"}, 402 | { v: [ 403 | { v: "1.44423E9"}, 404 | { v: "1.4443164E9"} 405 | ] } 406 | ] } 407 | }, 408 | { v: [ 409 | { v: "1.4444028E9"}, 410 | { v: "1.4444028E9"} 411 | ] } 412 | ] } 413 | }, 414 | { v: 415 | { f: [ 416 | { v: 417 | { f: [ 418 | { v: "fuga"}, 419 | { v: [] } 420 | ] } 421 | }, 422 | { v: [ 423 | { v: "1.4445756E9"}, 424 | { v: "1.444662E9"} 425 | ] } 426 | ] } 427 | } 428 | ] }, 429 | { v: [ 430 | { v: "one"}, 431 | { v: "two"}, 432 | { v: "three"} 433 | ] }, 434 | { v: [ 435 | { v: "1"}, 436 | { v: "2"} 437 | ] }, 438 | { v: [ 439 | { v: 440 | { f: [ 441 | { v: 442 | { f: [ 443 | { v: [ 444 | { v: "1.1"}, 445 | { v: "2.2"}, 446 | { v: "3.3"} 447 | ] }, 448 | { v: "foo2"} 449 | ] } 450 | } 451 | ] } 452 | }, 453 | { v: 454 | { f: [ 455 | { v: 456 | { f: [ 457 | { v: [ 458 | { v: "4.4"}, 459 | { v: "5.5"}, 460 | { v: "6.6"}, 461 | { v: "7.7"} 462 | ] }, 463 | { v: "bar"} 464 | ] } 465 | } 466 | ] } 467 | } 468 | ] } 469 | ] } 470 | ] 471 | 472 | expected = [ 473 | # only single row 474 | [ 475 | ["foo", "1.44423E9", "1.4444028E9", "one", "1", "1.1", "foo2"], 476 | [nil, "1.4443164E9", "1.4444028E9", "two", "2", "2.2", nil], 477 | ["fuga", nil, "1.4445756E9", "three", nil, "3.3", nil], 478 | [nil, nil, "1.444662E9", nil, nil, "4.4", "bar"], 479 | [nil, nil, nil, nil, nil, "5.5", nil], 480 | [nil, nil, nil, nil, nil, "6.6", nil], 481 | [nil, nil, nil, nil, nil, "7.7", nil] 482 | ] 483 | ] 484 | 485 | assert { TableData.new(columns, rows).values == expected } 486 | end 487 | 488 | def test_values_record_with_empty_hash 489 | columns = [ 490 | { name: "test", type: "STRING" }, 491 | { name: "record1", type: "RECORD", fields: [ 492 | { name: "child", type: "STRING" }, 493 | ] }, 494 | { name: "record2", type: "RECORD", fields: [ 495 | { name: "child", type: "STRING" } 496 | ] }, 497 | { name: "record3", type: "RECORD", mode: "REPEATED", fields: [ 498 | { name: "array", type: "INTEGER", mode: "REPEATED" } 499 | ] }, 500 | { name: "date", type: "STRING" }, 501 | { name: "timestamp", type: "TIMESTAMP" } 502 | ] 503 | 504 | rows = [ 505 | { f: [ 506 | { v: 'fuga' }, 507 | { v: 508 | { f: [ 509 | { v: 'hoge' }, 510 | ] } 511 | }, 512 | { v: {} }, 513 | 514 | { v: [ 515 | { v: 516 | { f: [ 517 | { v: [ 518 | { v: '1' } 519 | ] } 520 | ] } 521 | }, 522 | { v: 523 | { f: [ 524 | { v: [ 525 | { v: '4' } 526 | ] } 527 | ] } 528 | } 529 | ] }, 530 | { v: '2016-10-17' }, 531 | { v: '1.47663E9' } 532 | ] } 533 | ] 534 | 535 | expected = [ 536 | [ 537 | ["fuga", "hoge", nil, "1", "2016-10-17", "1.47663E9"], 538 | [nil, nil, nil, "4", nil, nil] 539 | ] 540 | ] 541 | 542 | assert { TableData.new(columns, rows).values == expected } 543 | end 544 | end 545 | end 546 | end 547 | -------------------------------------------------------------------------------- /test/test_bigquery_wrapper.rb: -------------------------------------------------------------------------------- 1 | require_relative 'helper.rb' 2 | require 'bigquery_migration/bigquery_wrapper' 3 | 4 | unless File.exist?(JSON_KEYFILE) 5 | puts "#{JSON_KEYFILE} is not found. Skip test/test_bigquery_wrapper.rb" 6 | else 7 | class BigqueryMigration 8 | class TestBigqueryWrapper < Test::Unit::TestCase 9 | def instance 10 | @instance ||= BigqueryWrapper.new(config) 11 | end 12 | 13 | def config 14 | { 15 | 'json_keyfile' => JSON_KEYFILE, 16 | 'dataset' => 'bigquery_migration_unittest', 17 | 'table' => 'test', 18 | } 19 | end 20 | 21 | def config_for_location 22 | self.config.merge({ 23 | 'dataset' => 'bigquery_migration_unittest_asia_northeast1', 24 | 'location' => 'asia-northeast1', 25 | }) 26 | end 27 | 28 | sub_test_case "configure" do 29 | def test_configure_json_keyfile 30 | config = { 31 | 'json_keyfile' => JSON_KEYFILE, 32 | 'dataset' => 'bigquery_migration_unittest', 33 | 'table' => 'test', 34 | } 35 | instance = BigqueryWrapper.new(config) 36 | assert_nothing_raised { instance.project } 37 | assert_nothing_raised { instance.dataset } 38 | assert_nothing_raised { instance.table } 39 | assert_nothing_raised { instance.client } 40 | end 41 | 42 | def test_configure_json_keyfile_content_json 43 | config = { 44 | 'json_keyfile' => { 45 | 'content' => File.read(JSON_KEYFILE), 46 | }, 47 | 'dataset' => 'bigquery_migration_unittest', 48 | 'table' => 'test', 49 | } 50 | instance = BigqueryWrapper.new(config) 51 | assert_nothing_raised { instance.project } 52 | assert_nothing_raised { instance.dataset } 53 | assert_nothing_raised { instance.table } 54 | assert_nothing_raised { instance.client } 55 | end 56 | 57 | def test_configure_json_keyfile_content_hash 58 | config = { 59 | 'json_keyfile' => { 60 | 'content' => JSON.parse(File.read(JSON_KEYFILE)), 61 | }, 62 | 'dataset' => 'bigquery_migration_unittest', 63 | 'table' => 'test', 64 | } 65 | instance = BigqueryWrapper.new(config) 66 | assert_nothing_raised { instance.project } 67 | assert_nothing_raised { instance.dataset } 68 | assert_nothing_raised { instance.table } 69 | assert_nothing_raised { instance.client } 70 | end 71 | end 72 | 73 | sub_test_case "create_dataset" do 74 | def test_create_dataset 75 | assert_nothing_raised { instance.create_dataset } 76 | assert_nothing_raised { instance.get_dataset } 77 | end 78 | 79 | sub_test_case "with location option" do 80 | def test_create_dataset 81 | instance = BigqueryWrapper.new(config_for_location) 82 | assert_nothing_raised { instance.create_dataset } 83 | result = instance.get_dataset 84 | assert { result[:responses][:get_dataset].location == 'asia-northeast1' } 85 | end 86 | end 87 | end 88 | 89 | sub_test_case "create_table" do 90 | def test_create_table 91 | instance.drop_table rescue nil 92 | columns = [ 93 | { name: 'column1', type: 'INTEGER' }, 94 | { name: 'column2', type: 'STRING' }, 95 | { name: 'column3', type: 'FLOAT' }, 96 | { name: 't', type: 'TIMESTAMP' }, 97 | { name: 'record', type: 'RECORD', fields:[ 98 | { name: 'column4', type: 'STRING' }, 99 | { name: 'column5', type: 'INTEGER' }, 100 | ]}, 101 | ] 102 | assert_nothing_raised { instance.create_table(columns: columns) } 103 | assert_nothing_raised { instance.get_table } 104 | end 105 | 106 | sub_test_case "with location option" do 107 | def test_create_table 108 | instance = BigqueryWrapper.new(config_for_location) 109 | instance.drop_table rescue nil 110 | columns = [ 111 | { name: 'column1', type: 'INTEGER' }, 112 | ] 113 | assert_nothing_raised { instance.create_table(columns: columns) } 114 | result = instance.get_table 115 | assert { result[:location] == 'asia-northeast1' } 116 | end 117 | end 118 | end 119 | 120 | def test_drop_table 121 | instance.create_table(columns: [{ name: 'column1', type: 'INTEGER' }]) 122 | assert_nothing_raised { instance.drop_table } 123 | assert_raise(NotFoundError) { instance.get_table } 124 | end 125 | 126 | def test_list_tables 127 | instance.create_table(table: 'table1', columns: [{ name: 'column1', type: 'INTEGER' }]) 128 | instance.create_table(table: 'table2', columns: [{ name: 'column1', type: 'INTEGER' }]) 129 | result = instance.list_tables 130 | assert { result[:tables] == ['table1', 'table2'] } 131 | instance.drop_table(table: 'table1') 132 | instance.drop_table(table: 'table2') 133 | end 134 | 135 | sub_test_case "purge_tables" do 136 | def before_tables 137 | %w[ 138 | test_20160301 139 | test_20160301_00 140 | test_20160229 141 | test_20160229_23 142 | test_20160229_22 143 | test_20160228 144 | test_23_20160229 145 | test_22_20160229 146 | test_00_20160301 147 | ] 148 | end 149 | 150 | def test_purge_tables_daily 151 | stub(instance).list_tables { { tables: before_tables } } 152 | result = instance.purge_tables( 153 | table_prefix: 'test_', suffix_format: '%Y%m%d', purge_before: '20160229' 154 | ) 155 | expected = %w[test_20160229 test_20160228] 156 | assert { result[:delete_tables] == expected } 157 | end 158 | 159 | def test_purge_tables_hourly_1 160 | stub(instance).list_tables { { tables: before_tables } } 161 | result = instance.purge_tables( 162 | table_prefix: 'test_', suffix_format: '%Y%m%d_%H', purge_before: '20160229_23' 163 | ) 164 | expected = %w[test_20160229_23 test_20160229_22] 165 | assert { result[:delete_tables] == expected } 166 | end 167 | 168 | def test_purge_tables_hourly_2 169 | stub(instance).list_tables { { tables: before_tables } } 170 | result = instance.purge_tables( 171 | table_prefix: 'test_', suffix_format: '%H_%Y%m%d', purge_before: '23_20160229' 172 | ) 173 | expected = %w[test_23_20160229 test_22_20160229] 174 | assert { result[:delete_tables] == expected } 175 | end 176 | end 177 | 178 | sub_test_case "table_data" do 179 | def setup 180 | instance.drop_table 181 | end 182 | 183 | def teardown 184 | instance.drop_table 185 | end 186 | 187 | # Streaming insert takes time to be reflected. Let me coment out.... 188 | =begin 189 | def test_insert_all_and_list_table_data 190 | instance.create_table(columns: [ 191 | { 'name' => 'repeated_record', 'type' => 'RECORD', 'mode' => 'REPEATED', 'fields' => [ 192 | { 'name' => 'record', 'type' => 'RECORD', 'mode' => 'NULLABLE', 'fields' => [ 193 | { 'name' => 'child', 'type' => 'STRING', 'mode' => 'NULLABLE' }, 194 | { 'name' => 'repeated_time', 'type' => 'TIMESTAMP', 'mode' => 'REPEATED' } 195 | ] }, 196 | { 'name' => 'repeated_time', 'type' => 'TIMESTAMP', 'mode' => 'REPEATED' } 197 | ] }, 198 | { 'name' => 'repeated_string', 'type' => 'STRING', 'mode' => 'REPEATED' }, 199 | { 'name' => 'repeated_int', 'type' => 'INTEGER', 'mode' => 'REPEATED' }, 200 | { 'name' => 'repeated_record2', 'type' => 'RECORD', 'mode' => 'REPEATED', 'fields' => [ 201 | { 'name' => 'record2', 'type' => 'RECORD', 'mode' => 'NULLABLE', 'fields' => [ 202 | { 'name' => 'repeated_float', 'type' => 'FLOAT', 'mode' => 'REPEATED' }, 203 | { 'name' => 'child2', 'type' => 'STRING', 'mode' => 'REQUIRED' } 204 | ] } 205 | ] } 206 | ]) 207 | 208 | assert_nothing_raised do 209 | instance.insert_all_table_data(rows: [ 210 | { 'repeated_record' => [ 211 | { 'record' => 212 | { 'child' => 'hoge', 213 | 'repeated_time' => [ 214 | '2015-10-08 00:00:00 +09:00', 215 | '2015-10-09 00:00:00 +09:00' 216 | ] 217 | }, 218 | 'repeated_time' => [ 219 | '2015-10-10 00:00:00 +09:00', 220 | '2015-10-10 00:00:00 +09:00' 221 | ] }, 222 | { 'record' => 223 | { 'child' => 'fuga'}, 224 | 'repeated_time' => [ 225 | '2015-10-12 00:00:00 +09:00', 226 | '2015-10-13 00:00:00 +09:00' 227 | ] 228 | } 229 | ], 230 | 'repeated_string' => [ 231 | 'one', 232 | 'two', 233 | 'three' 234 | ], 235 | 'repeated_int' => [ 236 | 1, 237 | 2, 238 | ], 239 | 'repeated_record2' => [ 240 | { 'record2' => 241 | { 'child2' => 'hoge2', 242 | 'repeated_float' => [ 243 | 1.1, 244 | 2.2, 245 | 3.3 246 | ] 247 | } 248 | }, 249 | { 'record2' => 250 | { 'child2' => 'fuga2', 251 | 'repeated_float' => [ 252 | 4.4, 253 | 5.5, 254 | 6.6, 255 | 7.7 256 | ] 257 | } 258 | } 259 | ] }, 260 | ]) 261 | end 262 | 263 | result = {} 264 | assert_nothing_raised { result = instance.list_table_data } 265 | 60.times do 266 | break if result[:values] 267 | sleep 1 268 | result = instance.list_table_data 269 | end 270 | 271 | expected = { 272 | total_rows: 4, 273 | columns: [ 274 | { name: 'repeated_record.record.child', type: 'STRING', mode: 'NULLABLE' }, 275 | { name: 'repeated_record.record.repeated_time', type: 'TIMESTAMP', mode: 'REPEATED' }, 276 | { name: 'repeated_record.repeated_time', type: 'TIMESTAMP', mode: 'REPEATED' }, 277 | { name: 'repeated_string', type: 'STRING', mode: 'REPEATED' }, 278 | { name: 'repeated_int', type: 'INTEGER', mode: 'REPEATED' }, 279 | { name: 'repeated_record2.record2.repeated_float', type: 'FLOAT', mode: 'REPEATED' }, 280 | { name: 'repeated_record2.record2.child2', type: 'STRING', mode: 'REQUIRED' }, 281 | ], 282 | values: [ 283 | [ 284 | ["hoge", "1.44423E9", "1.4444028E9", "one", "1", "1.1", "hoge2"], 285 | [nil, "1.4443164E9", "1.4444028E9", "two", "2", "2.2", nil], 286 | ["fuga", nil, "1.4445756E9", "three", nil, "3.3", nil], 287 | [nil, nil, "1.444662E9", nil, nil, "4.4", "fuga2"], 288 | [nil, nil, nil, nil, nil, "5.5", nil], 289 | [nil, nil, nil, nil, nil, "6.6", nil], 290 | [nil, nil, nil, nil, nil, "7.7", nil] 291 | ] 292 | ] 293 | } 294 | assert { result[:columns] == expected[:columns] } 295 | assert { result[:values] == expected[:values] } 296 | # total_rows is not reflected by streming insert .... 297 | # assert { result[:total_rows] == expected[:total_rows] } 298 | end 299 | =end 300 | end 301 | 302 | sub_test_case "patch_table" do 303 | def setup 304 | instance.drop_table 305 | end 306 | 307 | def teardown 308 | instance.drop_table 309 | end 310 | 311 | def test_add_columns 312 | before_columns = [ 313 | { 'name' => 'id', 'type' => 'INTEGER' }, 314 | { 'name' => 'string', 'type' => 'STRING', 'mode' => 'REQUIRED' }, 315 | { 'name' => 'record', 'type' => 'RECORD', 'fields' => [ 316 | { 'name' => 'child1', 'type' => 'STRING' }, 317 | ] }, 318 | ] 319 | instance.create_table(columns: before_columns) 320 | 321 | add_columns = [ 322 | {"name"=>"new_nullable_column", "type"=>"STRING", "mode"=>"NULLABLE"}, 323 | {"name"=>"new_repeated_column", "type"=>"STRING", "mode"=>"REPEATED"}, 324 | {"name"=>"new_record", "type"=>"RECORD", "fields"=>[ 325 | {"name"=>"new_record_child2", "type"=>"RECORD", "fields"=>[ 326 | {"name"=>"new_record_child3", "type"=>"STRING"} 327 | ]} 328 | ]} 329 | ] 330 | expected = before_columns + add_columns 331 | 332 | result = instance.patch_table(add_columns: add_columns) 333 | after_columns = result[:after_columns] 334 | 335 | assert { Schema.diff_columns(expected, after_columns) == [] } 336 | end 337 | 338 | def test_mode_change 339 | before_columns = [ 340 | {"name"=>"id", "type"=>"INTEGER"}, 341 | {"name"=>"record", "type"=>"RECORD", "fields"=> [ 342 | {"name"=>"record", "type"=>"RECORD", "fields"=> [ 343 | {"name"=>"mode_change", "type"=>"STRING", "mode"=>"REQUIRED"} 344 | ]} 345 | ]} 346 | ] 347 | instance.create_table(columns: before_columns) 348 | 349 | add_columns = [ 350 | {"name"=>"record", "type"=>"RECORD", "fields"=> [ 351 | {"name"=>"record", "type"=>"RECORD", "fields"=> [ 352 | {"name"=>"mode_change", "type"=>"STRING", "mode"=>"NULLABLE"} 353 | ]} 354 | ]} 355 | ] 356 | 357 | expected = [ 358 | {"name"=>"id", "type"=>"INTEGER"}, 359 | {"name"=>"record", "type"=>"RECORD", "fields"=> [ 360 | {"name"=>"record", "type"=>"RECORD", "fields"=> [ 361 | {"name"=>"mode_change", "type"=>"STRING", "mode"=>"NULLABLE"} 362 | ]} 363 | ]} 364 | ] 365 | 366 | result = instance.patch_table(add_columns: add_columns) 367 | after_columns = result[:after_columns] 368 | 369 | assert { Schema.diff_columns(expected, after_columns) == [] } 370 | end 371 | end 372 | 373 | sub_test_case "insert_select" do 374 | def setup 375 | instance.drop_table 376 | end 377 | 378 | def teardown 379 | instance.drop_table 380 | end 381 | 382 | def test_insert_select 383 | columns = [{ 'name' => 'id', 'type' => 'INTEGER' }] 384 | instance.create_table(columns: columns) 385 | 386 | query = "SELECT id FROM [#{config['dataset']}.#{config['table']}]" 387 | assert_nothing_raised do 388 | instance.insert_select(destination_table: 'insert_table', query: query) 389 | end 390 | assert_nothing_raised { instance.get_table(table: 'insert_table') } 391 | ensure 392 | instance.drop_table(table: 'insert_table') 393 | end 394 | 395 | sub_test_case "with location option" do 396 | def test_insert_select 397 | columns = [{ 'name' => 'id', 'type' => 'INTEGER' }] 398 | 399 | instance = BigqueryWrapper.new(config_for_location) 400 | instance.drop_table rescue nil 401 | instance.create_table(columns: columns) 402 | 403 | query = "SELECT id FROM [#{config_for_location['dataset']}.#{config_for_location['table']}]" 404 | assert_nothing_raised do 405 | instance.insert_select(destination_table: 'insert_table', query: query) 406 | end 407 | result = instance.get_table(table: 'insert_table') 408 | assert { result[:location] == 'asia-northeast1' } 409 | ensure 410 | instance.drop_table(table: 'insert_table') 411 | end 412 | end 413 | end 414 | 415 | sub_test_case "drop_column" do 416 | def setup 417 | instance.drop_table 418 | end 419 | 420 | def teardown 421 | instance.drop_table 422 | end 423 | 424 | def test_drop_column_with_drop_columns 425 | before_columns = [ 426 | { name: 'drop_column', type: 'INTEGER' }, 427 | { name: 'remained_column', type: 'STRING' }, 428 | { name: 'record', type: 'RECORD', fields:[ 429 | { name: 'drop_column', type: 'STRING' }, 430 | { name: 'remained_column', type: 'STRING' }, 431 | ] } 432 | ] 433 | instance.create_table(columns: before_columns) 434 | 435 | drop_columns = [ 436 | { name: 'drop_column', type: 'STRING' }, 437 | { name: 'record', type: 'RECORD', fields:[ 438 | { name: 'drop_column', type: 'STRING' }, 439 | ] }, 440 | ] 441 | expected = [ 442 | { name: 'remained_column', type: 'STRING' }, 443 | { name: 'record', type: 'RECORD', fields:[ 444 | { name: 'remained_column', type: 'STRING' }, 445 | ] } 446 | ] 447 | 448 | result = instance.drop_column(drop_columns: drop_columns) 449 | after_columns = result[:after_columns] 450 | 451 | assert { Schema.diff_columns(expected, after_columns) == [] } 452 | end 453 | 454 | def test_drop_column_with_columns 455 | before_columns = [ 456 | { name: 'drop_column', type: 'INTEGER' }, 457 | { name: 'remained_column', type: 'STRING' }, 458 | { name: 'record', type: 'RECORD', fields:[ 459 | { name: 'drop_column', type: 'STRING' }, 460 | { name: 'remained_column', type: 'STRING' }, 461 | ] } 462 | ] 463 | instance.create_table(columns: before_columns) 464 | 465 | columns = [ 466 | { name: 'remained_column', type: 'STRING' }, 467 | { name: 'record', type: 'RECORD', fields:[ 468 | { name: 'remained_column', type: 'STRING' }, 469 | { name: 'add_column', type: 'STRING' }, 470 | ] }, 471 | { name: 'add_column', type: 'STRING' }, 472 | ] 473 | expected = columns.dup 474 | 475 | result = instance.drop_column(columns: columns) 476 | after_columns = result[:after_columns] 477 | 478 | assert { Schema.diff_columns(expected, after_columns) == [] } 479 | end 480 | 481 | end 482 | 483 | sub_test_case "migrate_table" do 484 | def setup 485 | instance.drop_table 486 | end 487 | 488 | def teardown 489 | instance.drop_table 490 | end 491 | 492 | def test_add_columns 493 | before_columns = [ 494 | { name: 'remained_column', type: 'INTEGER' }, 495 | { name: 'record', type: 'RECORD', fields: [ 496 | { name: 'record', type: 'RECORD', fields: [ 497 | { name: 'remained_column', type: 'STRING' }, 498 | ] } 499 | ] } 500 | ] 501 | instance.create_table(columns: before_columns) 502 | 503 | columns = [ 504 | { name: 'remained_column', type: 'INTEGER' }, 505 | { name: 'record', type: 'RECORD', fields: [ 506 | { name: 'record', type: 'RECORD', fields: [ 507 | { name: 'remained_column', type: 'STRING' }, 508 | { name: 'new_column', type: 'INTEGER' }, 509 | { name: 'new_record', type: 'RECORD', fields: [ 510 | { name: 'new_column', type: 'INTEGER' }, 511 | ] } 512 | ] } 513 | ] }, 514 | { name: 'new_column', type: 'INTEGER' }, 515 | ] 516 | expected = columns.dup 517 | 518 | result = instance.migrate_table(columns: columns) 519 | after_columns = result[:after_columns] 520 | 521 | assert { Schema.diff_columns(expected, after_columns) == [] } 522 | end 523 | 524 | def test_drop_columns 525 | before_columns = [ 526 | { name: 'drop_column', type: 'INTEGER' }, 527 | { name: 'remained_column', type: 'INTEGER' }, 528 | { name: 'record', type: 'RECORD', fields: [ 529 | { name: 'record', type: 'RECORD', fields: [ 530 | { name: 'drop_column', type: 'STRING' }, 531 | { name: 'remained_column', type: 'STRING' }, 532 | ] } 533 | ] } 534 | ] 535 | instance.create_table(columns: before_columns) 536 | 537 | columns = [ 538 | { name: 'remained_column', type: 'INTEGER' }, 539 | { name: 'record', type: 'RECORD', fields: [ 540 | { name: 'record', type: 'RECORD', fields: [ 541 | { name: 'remained_column', type: 'STRING' }, 542 | ] } 543 | ] } 544 | ] 545 | expected = columns.dup 546 | 547 | result = instance.migrate_table(columns: columns) 548 | after_columns = result[:after_columns] 549 | 550 | assert { Schema.diff_columns(expected, after_columns) == [] } 551 | end 552 | 553 | def test_add_drop 554 | before_columns = [ 555 | { name: 'remained_column', type: 'INTEGER' }, 556 | { name: 'record', type: 'RECORD', fields: [ 557 | { name: 'record', type: 'RECORD', fields: [ 558 | { name: 'remained_column', type: 'STRING' }, 559 | { name: 'drop_column', type: 'STRING' }, 560 | ] } 561 | ] }, 562 | { name: 'drop_column', type: 'INTEGER' }, 563 | ] 564 | instance.create_table(columns: before_columns) 565 | 566 | columns = [ 567 | { name: 'remained_column', type: 'INTEGER' }, 568 | { name: 'record', type: 'RECORD', fields: [ 569 | { name: 'record', type: 'RECORD', fields: [ 570 | { name: 'remained_column', type: 'STRING' }, 571 | { name: 'add_column', type: 'INTEGER' }, 572 | ] }, 573 | ] }, 574 | { name: 'add_column', type: 'STRING', mode: 'REPEATED' }, 575 | { name: 'add_record', type: 'RECORD', fields: [ 576 | { name: 'add_record', type: 'RECORD', fields: [ 577 | { name: 'add_column1', type: 'STRING' }, 578 | { name: 'add_column2', type: 'INTEGER' }, 579 | ] } 580 | ]} 581 | ] 582 | expected = columns.dup 583 | 584 | result = instance.migrate_table(columns: columns) 585 | after_columns = result[:after_columns] 586 | 587 | assert { Schema.diff_columns(expected, after_columns) == [] } 588 | end 589 | 590 | def test_type_change 591 | before_columns = [ 592 | { name: 'type_change', type: 'STRING' }, 593 | { name: 'remained_column', type: 'INTEGER' }, 594 | { name: 'record', type: 'RECORD', fields: [ 595 | { name: 'record', type: 'RECORD', fields: [ 596 | { name: 'type_change', type: 'STRING' }, 597 | { name: 'remained_column', type: 'STRING' }, 598 | ] } 599 | ] } 600 | ] 601 | instance.create_table(columns: before_columns) 602 | 603 | columns = [ 604 | { name: 'type_change', type: 'INTEGER' }, 605 | { name: 'remained_column', type: 'INTEGER' }, 606 | { name: 'record', type: 'RECORD', fields: [ 607 | { name: 'record', type: 'RECORD', fields: [ 608 | { name: 'type_change', type: 'INTEGER' }, 609 | { name: 'remained_column', type: 'STRING' }, 610 | ] } 611 | ]} 612 | ] 613 | expected = columns.dup 614 | 615 | result = instance.migrate_table(columns: columns) 616 | after_columns = result[:after_columns] 617 | 618 | assert { Schema.diff_columns(expected, after_columns) == [] } 619 | end 620 | 621 | def test_mode_change 622 | before_columns = [ 623 | { name: 'mode_change', type: 'STRING', mode: 'REQUIRED' }, 624 | { name: 'remained_column', type: 'INTEGER' }, 625 | { name: 'record', type: 'RECORD', fields: [ 626 | { name: 'record', type: 'RECORD', fields: [ 627 | { name: 'mode_change', type: 'STRING', mode: 'REQUIRED' }, 628 | { name: 'remained_column', type: 'STRING' }, 629 | ] } 630 | ] } 631 | ] 632 | instance.create_table(columns: before_columns) 633 | 634 | columns = [ 635 | { name: 'mode_change', type: 'STRING', mode: 'NULLABLE' }, 636 | { name: 'remained_column', type: 'INTEGER' }, 637 | { name: 'record', type: 'RECORD', fields: [ 638 | { name: 'record', type: 'RECORD', fields: [ 639 | { name: 'mode_change', type: 'STRING', mode: 'NULLABLE' }, 640 | { name: 'remained_column', type: 'STRING' }, 641 | ] } 642 | ] } 643 | ] 644 | expected = columns.dup 645 | 646 | result = instance.migrate_table(columns: columns) 647 | after_columns = result[:after_columns] 648 | 649 | assert { Schema.diff_columns(expected, after_columns) == [] } 650 | end 651 | end 652 | 653 | sub_test_case "migrate_partitioned_table" do 654 | def setup 655 | instance.drop_table 656 | end 657 | 658 | def teardown 659 | instance.drop_table 660 | end 661 | 662 | def test_create_partitioned_table 663 | columns = [ 664 | { name: 'remained_column', type: 'INTEGER' }, 665 | { name: 'record', type: 'RECORD', fields: [ 666 | { name: 'record', type: 'RECORD', fields: [ 667 | { name: 'remained_column', type: 'STRING' }, 668 | ] } 669 | ] } 670 | ] 671 | expected = columns.dup 672 | 673 | result = instance.migrate_partitioned_table(columns: columns) 674 | after_columns = result[:after_columns] 675 | 676 | assert { result[:responses][:insert_table].time_partitioning.type == 'DAY' } 677 | assert { Schema.diff_columns(expected, after_columns) == [] } 678 | assert { Schema.diff_columns(after_columns, expected) == [] } 679 | end 680 | 681 | def test_add_columns 682 | before_columns = [ 683 | { name: 'remained_column', type: 'INTEGER' }, 684 | { name: 'record', type: 'RECORD', fields: [ 685 | { name: 'record', type: 'RECORD', fields: [ 686 | { name: 'remained_column', type: 'STRING' }, 687 | ] } 688 | ] } 689 | ] 690 | instance.create_partitioned_table(columns: before_columns) 691 | 692 | columns = [ 693 | { name: 'remained_column', type: 'INTEGER' }, 694 | { name: 'record', type: 'RECORD', fields: [ 695 | { name: 'record', type: 'RECORD', fields: [ 696 | { name: 'remained_column', type: 'STRING' }, 697 | { name: 'new_column', type: 'INTEGER' }, 698 | { name: 'new_record', type: 'RECORD', fields: [ 699 | { name: 'new_column', type: 'INTEGER' }, 700 | ] } 701 | ] } 702 | ] }, 703 | { name: 'new_column', type: 'INTEGER' }, 704 | ] 705 | expected = columns.dup 706 | 707 | result = instance.migrate_partitioned_table(columns: columns) 708 | after_columns = result[:after_columns] 709 | 710 | assert { Schema.diff_columns(expected, after_columns) == [] } 711 | assert { Schema.diff_columns(after_columns, expected) == [] } 712 | end 713 | 714 | def test_add_drop 715 | before_columns = [ 716 | { name: 'remained_column', type: 'INTEGER' }, 717 | { name: 'record', type: 'RECORD', fields: [ 718 | { name: 'record', type: 'RECORD', fields: [ 719 | { name: 'remained_column', type: 'STRING' }, 720 | { name: 'drop_column', type: 'STRING' }, 721 | ] } 722 | ] }, 723 | { name: 'drop_column', type: 'INTEGER' }, 724 | ] 725 | instance.create_partitioned_table(columns: before_columns) 726 | 727 | columns = [ 728 | { name: 'remained_column', type: 'INTEGER' }, 729 | { name: 'record', type: 'RECORD', fields: [ 730 | { name: 'record', type: 'RECORD', fields: [ 731 | { name: 'remained_column', type: 'STRING' }, 732 | { name: 'add_column', type: 'INTEGER' }, 733 | ] }, 734 | ] }, 735 | { name: 'add_column', type: 'STRING', mode: 'REPEATED' }, 736 | { name: 'add_record', type: 'RECORD', fields: [ 737 | { name: 'add_column1', type: 'STRING' }, 738 | ]} 739 | ] 740 | 741 | expected = [ 742 | { name: 'remained_column', type: 'INTEGER' }, 743 | { name: 'record', type: 'RECORD', fields: [ 744 | { name: 'record', type: 'RECORD', fields: [ 745 | { name: 'remained_column', type: 'STRING' }, 746 | { name: 'drop_column', type: 'STRING', mode: 'NULLABLE'}, 747 | { name: 'add_column', type: 'INTEGER' }, 748 | ] }, 749 | ] }, 750 | { name: 'drop_column', type: 'INTEGER', mode: 'NULLABLE' }, 751 | { name: 'add_column', type: 'STRING', mode: 'REPEATED' }, 752 | { name: 'add_record', type: 'RECORD', fields: [ 753 | { name: 'add_column1', type: 'STRING' }, 754 | ]} 755 | ] 756 | 757 | result = instance.migrate_partitioned_table(columns: columns) 758 | after_columns = result[:after_columns] 759 | 760 | assert { Schema.diff_columns(expected, after_columns) == [] } 761 | assert { Schema.diff_columns(after_columns, expected) == [] } 762 | end 763 | 764 | def test_type_change_raised 765 | before_columns = [ 766 | { name: 'type_change', type: 'STRING' }, 767 | ] 768 | instance.create_partitioned_table(columns: before_columns) 769 | 770 | columns = [ 771 | { name: 'type_change', type: 'INTEGER' }, 772 | ] 773 | 774 | assert_raise { instance.migrate_partitioned_table(columns: columns) } 775 | end 776 | 777 | sub_test_case "with clustering option" do 778 | def test_create_partitioned_table 779 | instance = BigqueryWrapper.new(config.merge({ 780 | clustering: { 781 | fields: ['remained_column_a', 'remained_column_b'], 782 | }, 783 | })) 784 | 785 | columns = [ 786 | { name: 'remained_column_a', type: 'STRING' }, 787 | { name: 'remained_column_b', type: 'INTEGER' }, 788 | { name: 'remained_column_c', type: 'INTEGER' }, 789 | { name: 'record', type: 'RECORD', fields: [ 790 | { name: 'record', type: 'RECORD', fields: [ 791 | { name: 'remained_column', type: 'STRING' }, 792 | ] } 793 | ] } 794 | ] 795 | expected = columns.dup 796 | 797 | result = instance.migrate_partitioned_table(columns: columns) 798 | after_columns = result[:after_columns] 799 | 800 | assert { result[:responses][:insert_table].time_partitioning.type == 'DAY' } 801 | assert { result[:responses][:insert_table].clustering.fields == ['remained_column_a', 'remained_column_b'] } 802 | assert { Schema.diff_columns(expected, after_columns) == [] } 803 | assert { Schema.diff_columns(after_columns, expected) == [] } 804 | ensure 805 | instance.drop_table 806 | end 807 | end 808 | end 809 | end 810 | end 811 | end 812 | -------------------------------------------------------------------------------- /lib/bigquery_migration/bigquery_wrapper.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | require 'json' 3 | require_relative 'schema' 4 | require_relative 'table_data' 5 | require_relative 'error' 6 | require_relative 'time_with_zone' 7 | require_relative 'hash_util' 8 | require 'google/apis/bigquery_v2' 9 | require 'google/api_client/auth/key_utils' 10 | require 'securerandom' 11 | require 'inifile' 12 | 13 | class BigqueryMigration 14 | class BigqueryWrapper 15 | attr_reader :config 16 | 17 | def logger 18 | BigqueryMigration.logger 19 | end 20 | 21 | def initialize(config, opts = {}) 22 | @config = HashUtil.deep_symbolize_keys(config) 23 | @opts = HashUtil.deep_symbolize_keys(opts) 24 | end 25 | 26 | def client 27 | return @cached_client if @cached_client && @cached_client_expiration > Time.now 28 | 29 | client = Google::Apis::BigqueryV2::BigqueryService.new 30 | client.request_options.retries = retries 31 | client.client_options.open_timeout_sec = open_timeout_sec 32 | if client.request_options.respond_to?(:timeout_sec) 33 | client.request_options.timeout_sec = timeout_sec 34 | else # google-api-ruby-client >= v0.11.0 35 | if timeout_sec 36 | logger.warn { "timeout_sec is deprecated in google-api-ruby-client >= v0.11.0. Use read_timeout_sec instead" } 37 | end 38 | client.client_options.send_timeout_sec = send_timeout_sec 39 | client.client_options.read_timeout_sec = read_timeout_sec 40 | end 41 | logger.debug { "client_options: #{client.client_options.to_h}" } 42 | logger.debug { "request_options: #{client.request_options.to_h}" } 43 | 44 | scope = "https://www.googleapis.com/auth/bigquery" 45 | 46 | case auth_method 47 | when 'authorized_user' 48 | auth = Signet::OAuth2::Client.new( 49 | token_credential_uri: "https://accounts.google.com/o/oauth2/token", 50 | audience: "https://accounts.google.com/o/oauth2/token", 51 | scope: scope, 52 | client_id: credentials[:client_id], 53 | client_secret: credentials[:client_secret], 54 | refresh_token: credentials[:refresh_token] 55 | ) 56 | auth.refresh! 57 | when 'compute_engine' 58 | auth = Google::Auth::GCECredentials.new 59 | when 'service_account' 60 | key = StringIO.new(credentials.to_json) 61 | auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope) 62 | when 'application_default' 63 | auth = Google::Auth.get_application_default([scope]) 64 | else 65 | raise ConfigError, "Unknown auth method: #{auth_method}" 66 | end 67 | 68 | client.authorization = auth 69 | 70 | @cached_client_expiration = Time.now + 1800 71 | @cached_client = client 72 | end 73 | 74 | def existing_columns 75 | begin 76 | result = get_table 77 | response = result[:responses][:get_table] 78 | return [] unless response 79 | return [] unless response.schema 80 | return [] unless response.schema.fields 81 | response.schema.fields.map {|column| column.to_h } 82 | rescue NotFoundError 83 | return [] 84 | end 85 | end 86 | 87 | def get_dataset(dataset: nil) 88 | dataset ||= self.dataset 89 | begin 90 | logger.info { "Get dataset... #{project}:#{dataset}" } 91 | response = client.get_dataset(project, dataset) 92 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 93 | if e.status_code == 404 94 | raise NotFoundError, "Dataset #{project}:#{dataset} is not found" 95 | end 96 | 97 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 98 | raise Error, "Failed to get_dataset(#{project}, #{dataset}), response:#{response}" 99 | end 100 | 101 | { responses: { get_dataset: response } } 102 | end 103 | 104 | def insert_dataset(dataset: nil, reference: nil) 105 | dataset ||= self.dataset 106 | begin 107 | logger.info { "#{head}Insert (create) dataset... #{project}:#{dataset}" } 108 | hint = {} 109 | if reference 110 | response = get_dataset(reference) 111 | hint = { access: response.access } 112 | end 113 | body = { 114 | dataset_reference: { 115 | project_id: project, 116 | dataset_id: dataset, 117 | }, 118 | }.merge(hint) 119 | body[:location] = location if location 120 | opts = {} 121 | 122 | logger.debug { "#{head}insert_dataset(#{project}, #{body}, #{opts})" } 123 | unless dry_run? 124 | response = client.insert_dataset(project, body, opts) 125 | end 126 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 127 | if e.status_code == 409 && /Already Exists:/ =~ e.message 128 | # ignore 'Already Exists' error 129 | return {} 130 | end 131 | 132 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 133 | raise Error, "Failed to insert_dataset(#{project}, #{body}, #{opts}), response:#{response}" 134 | end 135 | 136 | { responses: { insert_dataset: response } } 137 | end 138 | alias :create_dataset :insert_dataset 139 | 140 | def get_table(dataset: nil, table: nil) 141 | dataset ||= self.dataset 142 | table ||= self.table 143 | begin 144 | logger.debug { "Get table... #{project}:#{dataset}.#{table}" } 145 | response = client.get_table(project, dataset, table) 146 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 147 | if e.status_code == 404 # not found 148 | raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found" 149 | end 150 | 151 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 152 | raise Error, "Failed to get_table(#{project}, #{dataset}, #{table}), response:#{response}" 153 | end 154 | 155 | result = {} 156 | if response 157 | result = { 158 | table_id: response.id, 159 | creation_time: response.creation_time.to_i, # millisec 160 | last_modified_time: response.last_modified_time.to_i, # millisec 161 | location: response.location, 162 | num_bytes: response.num_bytes.to_i, 163 | num_rows: response.num_rows.to_i, 164 | } 165 | end 166 | 167 | result.merge!({ responses: { get_table: response } }) 168 | end 169 | 170 | def insert_table(dataset: nil, table: nil, columns:, options: {}) 171 | dataset ||= self.dataset 172 | table ||= self.table 173 | raise Error, "columns is empty" if columns.empty? 174 | schema = Schema.new(columns) 175 | 176 | begin 177 | logger.info { "#{head}Insert (create) table... #{project}:#{dataset}.#{table}" } 178 | body = { 179 | table_reference: { 180 | table_id: table, 181 | }, 182 | schema: { 183 | fields: schema, 184 | } 185 | } 186 | 187 | if options['time_partitioning'] 188 | body[:time_partitioning] = { 189 | type: options['time_partitioning']['type'], 190 | expiration_ms: options['time_partitioning']['expiration_ms'], 191 | } 192 | end 193 | 194 | if clustering && clustering[:fields] 195 | body[:clustering] = { 196 | fields: clustering[:fields] 197 | } 198 | end 199 | 200 | opts = {} 201 | logger.debug { "#{head}insert_table(#{project}, #{dataset}, #{body}, #{opts})" } 202 | unless dry_run? 203 | response = client.insert_table(project, dataset, body, opts) 204 | end 205 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 206 | if e.status_code == 409 && /Already Exists:/ =~ e.message 207 | # ignore 'Already Exists' error 208 | return {} 209 | end 210 | 211 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 212 | raise Error, "Failed to insert_table(#{project}, #{dataset}, #{body}, #{opts}), response:#{response}" 213 | end 214 | 215 | { responses: { insert_table: response } } 216 | end 217 | alias :create_table :insert_table 218 | 219 | def insert_partitioned_table(dataset: nil, table: nil, columns:, options: {}) 220 | options['time_partitioning'] = {'type'=>'DAY'} 221 | insert_table(dataset: dataset, table: table, columns: columns, options: options) 222 | end 223 | alias :create_partitioned_table :insert_partitioned_table 224 | 225 | def delete_table(dataset: nil, table: nil) 226 | dataset ||= self.dataset 227 | table ||= self.table 228 | 229 | begin 230 | logger.info { "#{head}Delete (drop) table... #{project}:#{dataset}.#{table}" } 231 | unless dry_run? 232 | client.delete_table(project, dataset, table) # no response 233 | success = true 234 | end 235 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 236 | if e.status_code == 404 && /Not found:/ =~ e.message 237 | # ignore 'Not Found' error 238 | return {} 239 | end 240 | 241 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 242 | raise Error, "Failed to delete_table(#{project}, #{dataset}, #{table}), response:#{response}" 243 | end 244 | 245 | { success: success } 246 | end 247 | alias :drop_table :delete_table 248 | 249 | def list_tables(dataset: nil, max_results: 999999) 250 | dataset ||= self.dataset 251 | 252 | tables = [] 253 | begin 254 | logger.info { "List tables... #{project}:#{dataset}" } 255 | response = client.list_tables(project, dataset, max_results: max_results) 256 | while true 257 | _tables = (response.tables || []).map { |t| t.table_reference.table_id.to_s } 258 | tables.concat(_tables) 259 | if next_page_token = response.next_page_token 260 | response = client.list_tables(project, dataset, page_token: next_page_token, max_results: max_results) 261 | else 262 | break 263 | end 264 | end 265 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 266 | if e.status_code == 404 && /Not found:/ =~ e.message 267 | raise NotFoundError, "Dataset #{project}:#{dataset} is not found" 268 | end 269 | 270 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 271 | logger.error { "list_tables(#{project}, #{dataset}), response:#{response}" } 272 | raise Error, "failed to list tables #{project}:#{dataset}, response:#{response}" 273 | end 274 | 275 | { tables: tables } 276 | end 277 | 278 | def purge_tables(dataset: nil, table_prefix: , suffix_format: , purge_before: , timezone: nil) 279 | dataset ||= self.dataset 280 | timezone ||= Time.now.strftime('%z') 281 | 282 | before_tables = list_tables[:tables] 283 | 284 | purge_before_t = TimeWithZone.strptime_with_zone(purge_before, suffix_format, timezone) 285 | tables = before_tables.select do |tbl| 286 | suffix = tbl.gsub(table_prefix, '') 287 | begin 288 | suffix_t = TimeWithZone.strptime_with_zone(suffix, suffix_format, timezone) 289 | rescue 290 | next 291 | end 292 | # skip if different from the suffix_format 293 | next if suffix_t.strftime(suffix_format) != suffix 294 | suffix_t <= purge_before_t 295 | end 296 | 297 | tables.each do |_table| 298 | delete_table(table: _table) 299 | # If you make more than 100 requests per second, throttling might occur. 300 | # See https://cloud.google.com/bigquery/quota-policy#apirequests 301 | sleep 1 302 | end 303 | 304 | { delete_tables: tables } 305 | end 306 | 307 | # rows: 308 | # - id: 1 309 | # type: one 310 | # record: 311 | # child1: 'child1' 312 | # child2: 'child2' 313 | # - id: 2 314 | # type: two 315 | # record: 316 | # child1: 'child3' 317 | # child2: 'child4' 318 | def insert_all_table_data(dataset: nil, table: nil, rows: ) 319 | dataset ||= self.dataset 320 | table ||= self.table 321 | 322 | begin 323 | logger.info { "#{head}insertAll tableData... #{project}:#{dataset}.#{table}" } 324 | body = { 325 | rows: rows.map {|row| { json: row } }, 326 | } 327 | opts = {} 328 | unless dry_run? 329 | response = client.insert_all_table_data(project, dataset, table, body, opts) 330 | end 331 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 332 | if e.status_code == 404 # not found 333 | raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found" 334 | end 335 | 336 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 337 | Medjed::Bulk.logger.error { 338 | "insert_all_table_data(#{project}, #{dataset}, #{table}, #{opts}), response:#{response}" 339 | } 340 | raise Error, "failed to insert_all table_data #{project}:#{dataset}.#{table}, response:#{response}" 341 | end 342 | 343 | { responses: { insert_all_table_data: response } } 344 | end 345 | 346 | # @return Hash result of list table_data 347 | # 348 | # Example: 349 | # { 350 | # columns: 351 | # [ 352 | # { 353 | # name: id, 354 | # type: INTEGER 355 | # }, 356 | # { 357 | # name: type, 358 | # type: STRING 359 | # }, 360 | # { 361 | # name: record.child1, 362 | # type: STRING 363 | # }, 364 | # { 365 | # name: record.child2, 366 | # type: STRING 367 | # }, 368 | # values: 369 | # [ 370 | # [2,"two","child3","child4"], 371 | # [1,"one","child1","child2"] 372 | # ], 373 | # total_rows: 2 374 | # } 375 | def list_table_data(dataset: nil, table: nil, max_results: 100) 376 | dataset ||= self.dataset 377 | table ||= self.table 378 | 379 | begin 380 | logger.info { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" } 381 | response = client.list_table_data(project, dataset, table, max_results: max_results) 382 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 383 | if e.status_code == 404 # not found 384 | raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found" 385 | end 386 | 387 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 388 | logger.error { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" } 389 | raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}" 390 | end 391 | 392 | columns = existing_columns 393 | flattened_columns = Schema.new(columns).flattened_columns.map do |name, column| 394 | {name: name}.merge!(column) 395 | end 396 | if rows = response.to_h[:rows] 397 | values = TableData.new(columns, rows).values 398 | end 399 | 400 | { 401 | total_rows: response.total_rows, 402 | columns: flattened_columns, 403 | values: values, 404 | responses: { 405 | list_table_data: response, 406 | } 407 | } 408 | end 409 | 410 | def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil) 411 | dataset ||= self.dataset 412 | table ||= self.table 413 | 414 | if columns.nil? and add_columns.nil? 415 | raise ArgumentError, 'patch_table: `columns` or `add_columns` is required' 416 | end 417 | 418 | before_columns = existing_columns 419 | if columns # if already given 420 | schema = Schema.new(columns) 421 | else 422 | schema = Schema.new(add_columns) 423 | schema.reverse_merge!(before_columns) 424 | end 425 | schema.validate_permitted_operations!(before_columns) 426 | 427 | begin 428 | logger.info { "#{head}Patch table... #{project}:#{dataset}.#{table}" } 429 | fields = schema.map {|column| HashUtil.deep_symbolize_keys(column) } 430 | body = { 431 | schema: { 432 | fields: fields, 433 | } 434 | } 435 | opts = {} 436 | logger.debug { "#{head}patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts})" } 437 | unless dry_run? 438 | response = client.patch_table(project, dataset, table, body, options: opts) 439 | end 440 | rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e 441 | if e.status_code == 404 # not found 442 | raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found" 443 | end 444 | 445 | response = {status_code: e.status_code, message: e.message, error_class: e.class} 446 | logger.error { 447 | "patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts}), response:#{response}" 448 | } 449 | raise Error, "Failed to patch table #{project}:#{dataset}.#{table}, response:#{response}" 450 | end 451 | 452 | after_columns = existing_columns 453 | 454 | { 455 | before_columns: before_columns, 456 | after_columns: after_columns, 457 | responses: { patch_table: response }, 458 | } 459 | end 460 | alias :add_column :patch_table 461 | 462 | def copy_table(destination_table:, destination_dataset: nil, source_table: nil, source_dataset: nil, write_disposition: nil) 463 | source_table ||= self.table 464 | source_dataset ||= self.dataset 465 | destination_dataset ||= source_dataset 466 | write_disposition ||= 'WRITE_TRUNCATE' 467 | 468 | body = { 469 | job_reference: { 470 | project_id: self.project, 471 | job_id: "job_#{SecureRandom.uuid}", 472 | }, 473 | configuration: { 474 | copy: { 475 | create_deposition: 'CREATE_IF_NEEDED', 476 | write_disposition: write_disposition, 477 | source_table: { 478 | project_id: project, 479 | dataset_id: source_dataset, 480 | table_id: source_table, 481 | }, 482 | destination_table: { 483 | project_id: project, 484 | dataset_id: destination_dataset, 485 | table_id: destination_table, 486 | }, 487 | } 488 | } 489 | } 490 | body[:job_reference][:location] = location if location 491 | opts = {} 492 | 493 | logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" } 494 | unless dry_run? 495 | response = client.insert_job(project, body, opts) 496 | get_response = wait_load('copy', response) 497 | end 498 | 499 | { 500 | responses: { 501 | insert_job: response, 502 | last_get_job: get_response, 503 | } 504 | } 505 | end 506 | 507 | def insert_select(query:, destination_table: nil, destination_dataset: nil, write_disposition: nil) 508 | destination_table ||= self.table 509 | destination_dataset ||= self.dataset 510 | write_disposition ||= 'WRITE_TRUNCATE' 511 | 512 | body = { 513 | job_reference: { 514 | project_id: self.project, 515 | job_id: "job_#{SecureRandom.uuid}", 516 | }, 517 | configuration: { 518 | query: { 519 | allow_large_results: true, 520 | flatten_results: false, 521 | write_disposition: write_disposition, 522 | query: query, 523 | destination_table: { 524 | project_id: self.project, 525 | dataset_id: destination_dataset, 526 | table_id: destination_table, 527 | }, 528 | } 529 | } 530 | } 531 | body[:job_reference][:location] = location if location 532 | opts = {} 533 | 534 | logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" } 535 | unless dry_run? 536 | response = client.insert_job(project, body, opts) 537 | get_response = wait_load('query', response) 538 | end 539 | 540 | { 541 | responses: { 542 | insert_job: response, 543 | last_get_job: get_response, 544 | } 545 | } 546 | end 547 | 548 | def wait_load(kind, response) 549 | started = Time.now 550 | 551 | wait_interval = self.job_status_polling_interval 552 | max_polling_time = self.job_status_max_polling_time 553 | _response = response 554 | 555 | while true 556 | job_id = _response.job_reference.job_id 557 | elapsed = Time.now - started 558 | status = _response.status.state 559 | if status == "DONE" 560 | logger.info { 561 | "#{kind} job completed... " \ 562 | "job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]" 563 | } 564 | break 565 | elsif elapsed.to_i > max_polling_time 566 | message = "#{kind} job checking... " \ 567 | "job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[TIMEOUT]" 568 | logger.info { message } 569 | raise JobTimeoutError.new(message) 570 | else 571 | logger.info { 572 | "#{kind} job checking... " \ 573 | "job_id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]" 574 | } 575 | sleep wait_interval 576 | if support_location_keyword? 577 | _response = client.get_job(project, job_id, location: location) 578 | else 579 | _response = client.get_job(project, job_id) 580 | end 581 | end 582 | end 583 | 584 | # cf. http://www.rubydoc.info/github/google/google-api-ruby-client/Google/Apis/BigqueryV2/JobStatus#errors-instance_method 585 | # `errors` returns Array if any error exists. 586 | # Otherwise, this returns nil. 587 | if _errors = _response.status.errors 588 | raise Error, "Failed during waiting a job, get_job(#{project}, #{job_id}), errors:#{_errors.map(&:to_h)}" 589 | end 590 | 591 | _response 592 | end 593 | 594 | def drop_column(table: nil, columns: nil, drop_columns: nil, backup_dataset: nil, backup_table: nil) 595 | table ||= self.table 596 | backup_dataset ||= self.dataset 597 | if columns.nil? and drop_columns.nil? 598 | raise ArgumentError, '`drop_columns` or `columns` is required' 599 | end 600 | 601 | result = { responses: {} } 602 | 603 | before_columns = existing_columns 604 | 605 | if columns # if already given 606 | schema = Schema.new(columns) 607 | else 608 | schema = Schema.new(existing_columns) 609 | schema.reject_columns!(drop_columns) 610 | end 611 | if schema.empty? && !dry_run? 612 | raise Error, 'No column is remained' 613 | end 614 | 615 | schema.validate_permitted_operations!(before_columns) 616 | 617 | unless backup_dataset == self.dataset 618 | create_dataset(dataset: backup_dataset) 619 | end 620 | 621 | if backup_table 622 | _result = copy_table(source_table: table, destination_table: backup_table, destination_dataset: backup_dataset) 623 | result[:responses].merge!(_result[:responses]) 624 | end 625 | 626 | unless (add_columns = schema.diff_columns_by_name(before_columns)).empty? 627 | _result = patch_table(add_columns: add_columns) 628 | result[:responses].merge!(_result[:responses]) 629 | end 630 | 631 | query_fields = schema.build_query_fields(before_columns) 632 | query = "SELECT #{query_fields.join(',')} FROM [#{dataset}.#{table}]" 633 | _result = insert_select(query: query, destination_table: table) 634 | result[:responses].merge!(_result[:responses]) 635 | 636 | after_columns = existing_columns 637 | 638 | result.merge!({before_columns: before_columns, after_columns: after_columns}) 639 | end 640 | 641 | def migrate_table(table: nil, schema_file: nil, columns: nil, backup_dataset: nil, backup_table: nil) 642 | table ||= self.table 643 | backup_dataset ||= self.dataset 644 | 645 | if schema_file.nil? and columns.nil? 646 | raise ArgumentError, '`schema_file` or `columns` is required' 647 | end 648 | if schema_file 649 | columns = HashUtil.deep_symbolize_keys(JSON.parse(File.read(schema_file))) 650 | end 651 | Schema.validate_columns!(columns) 652 | 653 | result = {} 654 | begin 655 | get_table 656 | rescue NotFoundError 657 | before_columns = [] 658 | result = create_table(table: table, columns: columns) 659 | else 660 | before_columns = existing_columns 661 | add_columns = Schema.diff_columns(before_columns, columns) 662 | drop_columns = Schema.diff_columns(columns, before_columns) 663 | 664 | if !drop_columns.empty? 665 | drop_column(table: table, columns: columns, 666 | backup_dataset: backup_dataset, backup_table: backup_table) 667 | elsif !add_columns.empty? 668 | add_column(table: table, columns: columns) 669 | end 670 | end 671 | 672 | after_columns = existing_columns 673 | 674 | if after_columns.empty? and !dry_run? 675 | raise Error, "after_columns is empty. " \ 676 | "before_columns: #{before_columns}, after_columns: #{after_columns}, columns: #{columns}" 677 | end 678 | 679 | result.merge!( before_columns: before_columns, after_columns: after_columns ) 680 | end 681 | 682 | # creates a table with time_partitioning option 683 | # this version only uses patch table API (no query job) because querying partitioned table should cost lots 684 | def migrate_partitioned_table(table: nil, schema_file: nil, columns: nil, options: {}) 685 | table ||= self.table 686 | 687 | if schema_file.nil? and columns.nil? 688 | raise ArgumentError, '`schema_file` or `columns` is required' 689 | end 690 | if schema_file 691 | columns = HashUtil.deep_symbolize_keys(JSON.parse(File.read(schema_file))) 692 | end 693 | Schema.validate_columns!(columns) 694 | 695 | result = {} 696 | begin 697 | get_table 698 | rescue NotFoundError 699 | before_columns = [] 700 | result = create_partitioned_table(table: table, columns: columns, options: options) 701 | else 702 | before_columns = existing_columns 703 | add_columns = Schema.diff_columns(before_columns, columns) 704 | drop_columns = Schema.diff_columns(columns, before_columns) 705 | 706 | if !drop_columns.empty? || !add_columns.empty? 707 | Schema.make_nullable!(drop_columns) # drop columns will be NULLABLE columns 708 | Schema.reverse_merge!(columns, patch_columns = drop_columns) 709 | Schema.reverse_merge!(patch_columns, patch_columns = add_columns) 710 | patch_table(table: table, columns: patch_columns) 711 | end 712 | end 713 | 714 | after_columns = existing_columns 715 | 716 | if after_columns.empty? and !dry_run? 717 | raise Error, "after_columns is empty. " \ 718 | "before_columns: #{before_columns}, after_columns: #{after_columns}, columns: #{columns}" 719 | end 720 | 721 | result.merge!( before_columns: before_columns, after_columns: after_columns ) 722 | end 723 | 724 | # the location keyword arguments are available in google-api-client v0.19.6 or later 725 | def support_location_keyword? 726 | @support_location_keyword ||= client.method(:get_job).parameters.include?([:key, :location]) 727 | end 728 | 729 | # For old version compatibility 730 | # Use credentials_file or credentials instead 731 | def json_key 732 | if json_keyfile = config[:json_keyfile] 733 | begin 734 | case json_keyfile 735 | when String 736 | return HashUtil.deep_symbolize_keys(JSON.parse(File.read(json_keyfile))) 737 | when Hash 738 | case json_keyfile[:content] 739 | when String 740 | return HashUtil.deep_symbolize_keys(JSON.parse(json_keyfile[:content])) 741 | when Hash 742 | return json_keyfile[:content] 743 | else 744 | raise ConfigError.new "Unsupported json_keyfile type" 745 | end 746 | else 747 | raise ConfigError.new "Unsupported json_keyfile type" 748 | end 749 | rescue => e 750 | raise ConfigError.new "json_keyfile is not a JSON file" 751 | end 752 | end 753 | nil 754 | end 755 | 756 | # compute_engine, authorized_user, service_account 757 | def auth_method 758 | @auth_method ||= ENV['AUTH_METHOD'] || config.fetch(:auth_method, nil) || credentials[:type] || 'compute_engine' 759 | end 760 | 761 | def credentials 762 | json_key || HashUtil.deep_symbolize_keys(JSON.parse(config.fetch(:credentials, nil) || File.read(credentials_file))) 763 | end 764 | 765 | def credentials_file 766 | @credentials_file ||= File.expand_path( 767 | # ref. https://developers.google.com/identity/protocols/application-default-credentials 768 | ENV['GOOGLE_APPLICATION_CREDENTIALS'] || 769 | config.fetch(:credentials_file, nil) || 770 | (File.exist?(global_application_default_credentials_file) ? global_application_default_credentials_file : application_default_credentials_file) 771 | ) 772 | end 773 | 774 | def application_default_credentials_file 775 | @application_default_credentials_file ||= File.expand_path("~/.config/gcloud/application_default_credentials.json") 776 | end 777 | 778 | def global_application_default_credentials_file 779 | @global_application_default_credentials_file ||= '/etc/google/auth/application_default_credentials.json' 780 | end 781 | 782 | def config_default_file 783 | File.expand_path('~/.config/gcloud/configurations/config_default') 784 | end 785 | 786 | def config_default 787 | # {core:{account:'xxx',project:'xxx'},compute:{zone:'xxx}} 788 | @config_default ||= File.readable?(config_default_file) ? HashUtil.deep_symbolize_keys(IniFile.load(config_default_file).to_h) : {} 789 | end 790 | 791 | def service_account_default 792 | (config_default[:core] || {})[:account] 793 | end 794 | 795 | def project_default 796 | (config_default[:core] || {})[:project] 797 | end 798 | 799 | def zone_default 800 | (config_default[:compute] || {})[:zone] 801 | end 802 | 803 | def service_account 804 | @service_account ||= ENV['GOOGLE_SERVICE_ACCOUNT'] || config.fetch(:service_account, nil) || credentials[:client_email] || service_account_default 805 | end 806 | 807 | def retries 808 | @retries ||= ENV['RETRIES'] || config.fetch(:retries, nil) || 5 809 | end 810 | 811 | # For google-api-client < 0.11.0. Deprecated 812 | def timeout_sec 813 | @timeout_sec ||= ENV['TIMEOUT_SEC'] || config.fetch(:timeout_sec, nil) 814 | end 815 | 816 | def send_timeout_sec 817 | @send_timeout_sec ||= ENV['SEND_TIMEOUT_SEC'] || config.fetch(:send_timeout_sec, nil) || 60 818 | end 819 | 820 | def read_timeout_sec 821 | @read_timeout_sec ||= ENV['READ_TIMEOUT_SEC'] || config.fetch(:read_timeout_sec, nil) || timeout_sec || 300 822 | end 823 | 824 | def open_timeout_sec 825 | @open_timeout_sec ||= ENV['OPEN_TIMEOUT_SEC'] || config.fetch(:open_timeout_sec, nil) || 300 826 | end 827 | 828 | def project 829 | @project ||= ENV['GOOGLE_PROJECT'] || config.fetch(:project, nil) || credentials[:project_id] 830 | @project ||= credentials[:client_email].chomp('.iam.gserviceaccount.com').split('@').last if credentials[:client_email] 831 | @project ||= project_default || raise(ConfigError, '`project` is required.') 832 | end 833 | 834 | def dataset 835 | @dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.') 836 | end 837 | 838 | def table 839 | @table ||= config[:table] || raise(ConfigError, '`table` is required.') 840 | end 841 | 842 | def location 843 | config[:location] 844 | end 845 | 846 | def clustering 847 | config[:clustering] 848 | end 849 | 850 | def job_status_polling_interval 851 | @job_status_polling_interval ||= config[:job_status_polling_interval] || 5 852 | end 853 | 854 | def job_status_max_polling_time 855 | @job_status_max_polling_time ||= config[:job_status_polling_time] || 3600 856 | end 857 | 858 | def dry_run? 859 | @opts[:dry_run] 860 | end 861 | 862 | def head 863 | dry_run? ? '(DRY-RUN) ' : '(EXECUTE) ' 864 | end 865 | end 866 | end 867 | --------------------------------------------------------------------------------