├── .rspec ├── lib ├── real_data_tests │ ├── version.rb │ ├── engine.rb │ ├── test_data_builder.rb │ ├── data_anonymizer.rb │ ├── configuration.rb │ ├── record_collector.rb │ ├── pg_dump_generator.rb │ └── rspec_helper.rb └── real_data_tests.rb ├── bin ├── setup ├── console └── real_data_tests ├── .gitignore ├── Rakefile ├── spec ├── support │ └── database.rb ├── spec_helper.rb ├── real_data_tests │ ├── engine_spec.rb │ ├── pg_dump_generator_spec.rb │ ├── data_anonymizer_spec.rb │ ├── rspec_helper_spec.rb │ ├── record_collector_spec.rb │ ├── record_collector_polymorphic_spec.rb │ └── sql_cleaner_spec.rb └── real_data_tests_spec.rb ├── Gemfile ├── LICENSE.txt ├── real_data_tests.gemspec ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── Gemfile.lock └── README.md /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | --require spec_helper 4 | -------------------------------------------------------------------------------- /lib/real_data_tests/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module RealDataTests 4 | VERSION = "0.3.17" 5 | end -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | 10 | # rspec failure tracking 11 | .rspec_status 12 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/gem_tasks" 4 | require "rspec/core/rake_task" 5 | 6 | RSpec::Core::RakeTask.new(:spec) 7 | 8 | task default: :spec 9 | -------------------------------------------------------------------------------- /spec/support/database.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | 3 | ActiveRecord::Base.establish_connection( 4 | adapter: 'postgresql', 5 | database: 'real_data_tests_test', 6 | host: 'localhost' 7 | ) -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | # Specify your gem's dependencies in real_data_tests.gemspec 6 | gemspec 7 | 8 | gem "rake", "~> 13.0" 9 | 10 | gem "rspec", "~> 3.0" 11 | -------------------------------------------------------------------------------- /lib/real_data_tests/engine.rb: -------------------------------------------------------------------------------- 1 | require 'rails' 2 | 3 | module RealDataTests 4 | class Engine < ::Rails::Engine 5 | isolate_namespace RealDataTests 6 | 7 | config.before_configuration do 8 | RealDataTests.configuration 9 | end 10 | end 11 | end -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require "bundler/setup" 5 | require "real_data_tests" 6 | 7 | # You can add fixtures and/or initialization code here to make experimenting 8 | # with your gem easier. You can also use a different console, if you like. 9 | 10 | # (If you use this, don't forget to add pry to your Gemfile!) 11 | # require "pry" 12 | # Pry.start 13 | 14 | require "irb" 15 | IRB.start(__FILE__) 16 | -------------------------------------------------------------------------------- /bin/real_data_tests: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "thor" 4 | require "real_data_tests" 5 | 6 | module RealDataTests 7 | class CLI < Thor 8 | desc "create_dump MODEL_NAME ID [NAME]", "Create a dump file from a record" 9 | def create_dump(model_name, id, name = nil) 10 | require "./config/environment" 11 | 12 | model = model_name.classify.constantize 13 | record = model.find(id) 14 | 15 | dump_path = RealDataTests.create_dump_file(record, name: name) 16 | puts "Created dump file: #{dump_path}" 17 | end 18 | end 19 | end 20 | 21 | RealDataTests::CLI.start(ARGV) -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2025 Kevin Dias 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/real_data_tests/test_data_builder.rb: -------------------------------------------------------------------------------- 1 | module RealDataTests 2 | class TestDataBuilder 3 | def initialize(record, name: nil) 4 | @record = record 5 | @name = name || "#{record.class.name.underscore}_#{record.id}" 6 | end 7 | 8 | def create_dump_file 9 | records = RealDataTests::RecordCollector.new(@record).collect 10 | 11 | # Only anonymize if rules are configured in the current preset 12 | if RealDataTests.configuration.current_preset.anonymization_rules.any? 13 | puts "\nAnonymizing records..." 14 | anonymizer = RealDataTests::DataAnonymizer.new(RealDataTests.configuration.current_preset) 15 | records = anonymizer.anonymize_records(records) 16 | end 17 | 18 | dump_content = RealDataTests::PgDumpGenerator.new(records).generate 19 | dump_path = dump_file_path 20 | FileUtils.mkdir_p(File.dirname(dump_path)) 21 | File.write(dump_path, dump_content) 22 | puts "\nDump file created at: #{dump_path}" 23 | dump_path 24 | end 25 | 26 | private 27 | 28 | def dump_file_path 29 | File.join(RealDataTests.configuration.dump_path, "#{@name}.sql") 30 | end 31 | end 32 | end -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | ENV['RAILS_ENV'] = 'test' 3 | 4 | require 'rails' 5 | require 'active_record' 6 | require 'real_data_tests' 7 | require 'database_cleaner/active_record' 8 | require 'support/database' 9 | 10 | RSpec.configure do |config| 11 | # Enable flags like --only-failures and --next-failure 12 | config.example_status_persistence_file_path = ".rspec_status" 13 | 14 | # Disable RSpec exposing methods globally on `Module` and `main` 15 | config.disable_monkey_patching! 16 | 17 | config.expect_with :rspec do |c| 18 | c.syntax = :expect 19 | end 20 | 21 | config.before(:suite) do 22 | # Make sure connection is established before setting up DatabaseCleaner 23 | begin 24 | ActiveRecord::Base.connection 25 | rescue ActiveRecord::NoDatabaseError 26 | system('createdb real_data_tests_test') 27 | ActiveRecord::Base.establish_connection( 28 | adapter: 'postgresql', 29 | database: 'real_data_tests_test', 30 | host: 'localhost' 31 | ) 32 | end 33 | 34 | DatabaseCleaner.strategy = :transaction 35 | DatabaseCleaner.clean_with(:truncation) 36 | end 37 | 38 | config.around(:each) do |example| 39 | DatabaseCleaner.cleaning do 40 | example.run 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /lib/real_data_tests/data_anonymizer.rb: -------------------------------------------------------------------------------- 1 | # lib/real_data_tests/data_anonymizer.rb 2 | require 'faker' 3 | 4 | module RealDataTests 5 | class DataAnonymizer 6 | def initialize(preset_config) 7 | @preset_config = preset_config 8 | end 9 | 10 | def anonymize_records(records) 11 | records.map do |record| 12 | anonymize_record(record) 13 | end 14 | end 15 | 16 | def anonymize_record(record) 17 | return record unless should_anonymize?(record) 18 | 19 | anonymization_rules = @preset_config.anonymization_rules[record.class.name] 20 | anonymization_rules.each do |attribute, anonymizer| 21 | begin 22 | new_value = case anonymizer 23 | when String 24 | process_faker_string(anonymizer) 25 | when Proc, Lambda 26 | anonymizer.call(record) 27 | else 28 | raise Error, "Unsupported anonymizer type: #{anonymizer.class}" 29 | end 30 | record.send("#{attribute}=", new_value) 31 | rescue => e 32 | raise Error, "Failed to anonymize #{attribute} using #{anonymizer.inspect}: #{e.message}" 33 | end 34 | end 35 | record 36 | end 37 | 38 | private 39 | 40 | def should_anonymize?(record) 41 | @preset_config.anonymization_rules.key?(record.class.name) 42 | end 43 | 44 | def process_faker_string(faker_method) 45 | faker_class, faker_method = faker_method.split('::')[1..].join('::').split('.') 46 | faker_class = Object.const_get("Faker::#{faker_class}") 47 | faker_class.send(faker_method) 48 | rescue => e 49 | raise Error, "Failed to process Faker method '#{faker_method}': #{e.message}" 50 | end 51 | end 52 | end -------------------------------------------------------------------------------- /real_data_tests.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "lib/real_data_tests/version" 4 | 5 | Gem::Specification.new do |spec| 6 | spec.name = "real_data_tests" 7 | spec.version = RealDataTests::VERSION 8 | spec.authors = ["Kevin Dias"] 9 | spec.email = ["diasks2@gmail.com"] 10 | 11 | spec.summary = "Create realistic test data from local db records" 12 | spec.description = "A Ruby gem that helps create test data by analyzing and extracting real records and their associations from your database." 13 | spec.homepage = "https://github.com/diasks2/real_data_tests" 14 | spec.license = "MIT" 15 | spec.required_ruby_version = ">= 2.6.0" 16 | 17 | spec.metadata["homepage_uri"] = spec.homepage 18 | spec.metadata["source_code_uri"] = spec.homepage 19 | spec.metadata["changelog_uri"] = spec.homepage 20 | 21 | # Specify which files should be added to the gem when it is released. 22 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git. 23 | spec.files = Dir.chdir(__dir__) do 24 | `git ls-files -z`.split("\x0").reject do |f| 25 | (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)}) 26 | end 27 | end 28 | spec.bindir = "exe" 29 | spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) } 30 | spec.require_paths = ["lib"] 31 | 32 | spec.add_dependency "rails", ">= 5.0" 33 | spec.add_dependency "activerecord", ">= 5.0" 34 | spec.add_dependency "thor", "~> 1.0" 35 | spec.add_dependency "pg", ">= 1.1" 36 | spec.add_development_dependency "rake", "~> 13.0" 37 | spec.add_development_dependency "rspec", "~> 3.0" 38 | spec.add_development_dependency "database_cleaner", "~> 2.0" 39 | spec.add_development_dependency "database_cleaner-active_record" 40 | spec.add_dependency "faker", "~> 3.0" 41 | end -------------------------------------------------------------------------------- /lib/real_data_tests.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rails' 4 | require_relative 'real_data_tests/version' 5 | require_relative 'real_data_tests/configuration' 6 | require_relative 'real_data_tests/data_anonymizer' 7 | require_relative 'real_data_tests/engine' if defined?(Rails) 8 | require_relative 'real_data_tests/pg_dump_generator' 9 | require_relative 'real_data_tests/record_collector' 10 | require_relative 'real_data_tests/rspec_helper' 11 | require_relative 'real_data_tests/test_data_builder' 12 | 13 | module RealDataTests 14 | class Error < StandardError; end 15 | class ConfigurationError < Error; end 16 | class DumpFileError < Error; end 17 | 18 | class << self 19 | def configuration 20 | @configuration ||= Configuration.new 21 | end 22 | 23 | def configure 24 | yield(configuration) if block_given? 25 | configuration 26 | end 27 | 28 | def reset_configuration! 29 | @configuration = Configuration.new 30 | end 31 | 32 | def use_preset(name) 33 | configuration.use_preset(name) 34 | end 35 | 36 | def with_preset(name) 37 | previous_preset = configuration.current_preset 38 | configuration.use_preset(name) 39 | yield if block_given? 40 | ensure 41 | configuration.current_preset = previous_preset 42 | end 43 | 44 | def create_dump_file(record, name: nil) 45 | raise ConfigurationError, "Configuration not initialized" unless @configuration 46 | 47 | begin 48 | TestDataBuilder.new(record, name: name).create_dump_file 49 | rescue => e 50 | raise DumpFileError, "Failed to create dump file: #{e.message}" 51 | end 52 | end 53 | 54 | def root 55 | File.expand_path('../..', __FILE__) 56 | end 57 | 58 | def env 59 | @env ||= (ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development') 60 | end 61 | end 62 | end -------------------------------------------------------------------------------- /spec/real_data_tests/engine_spec.rb: -------------------------------------------------------------------------------- 1 | # spec/real_data_tests/engine_spec.rb 2 | require 'spec_helper' 3 | require 'rails' 4 | 5 | RSpec.describe RealDataTests::Engine do 6 | # Create a minimal Rails application for testing 7 | def create_rails_app 8 | Class.new(Rails::Application) do 9 | config.eager_load = false 10 | config.active_support.deprecation = :log 11 | config.secret_key_base = 'test' 12 | end 13 | end 14 | 15 | before(:all) do 16 | @original_rails = Rails.application if defined?(Rails.application) 17 | @app = create_rails_app 18 | Rails.application = @app 19 | end 20 | 21 | after(:all) do 22 | Rails.application = @original_rails 23 | end 24 | 25 | before(:each) do 26 | RealDataTests.reset_configuration! 27 | end 28 | 29 | it "initializes configuration when loaded" do 30 | expect(RealDataTests.configuration).not_to be_nil 31 | expect(RealDataTests.configuration.presets).to include(:default) 32 | end 33 | 34 | it "maintains preset configuration" do 35 | RealDataTests.configure do |config| 36 | config.preset(:test_preset) do |p| 37 | p.include_associations(:user, :profile) 38 | p.anonymize('User', { 39 | email: -> (_) { "anonymous@example.com" } 40 | }) 41 | end 42 | end 43 | 44 | expect(RealDataTests.configuration.presets).to include(:test_preset) 45 | preset = RealDataTests.configuration.presets[:test_preset] 46 | expect(preset.association_filter_list).to contain_exactly(:user, :profile) 47 | expect(preset.anonymization_rules['User']).to be_present 48 | end 49 | 50 | it "isolates the engine namespace" do 51 | expect(RealDataTests::Engine.isolated?).to be true 52 | end 53 | 54 | it "loads as a Rails engine" do 55 | expect(Rails.application.railties.any? { |r| r.is_a?(RealDataTests::Engine) }) 56 | .to be true 57 | end 58 | 59 | describe "configuration" do 60 | it "allows setting configuration after initialization" do 61 | RealDataTests.configure do |config| 62 | config.dump_path = "custom/path" 63 | end 64 | 65 | expect(RealDataTests.configuration.dump_path).to eq("custom/path") 66 | end 67 | 68 | it "preserves configuration across resets" do 69 | RealDataTests.configure do |config| 70 | config.preset(:test_preset) do |p| 71 | p.include_associations(:test) 72 | end 73 | end 74 | 75 | original_preset = RealDataTests.configuration.presets[:test_preset] 76 | RealDataTests.reset_configuration! 77 | 78 | # Configuration should start fresh after reset 79 | expect(RealDataTests.configuration.presets).not_to include(:test_preset) 80 | expect(RealDataTests.configuration.presets[:default]).to be_present 81 | end 82 | end 83 | end -------------------------------------------------------------------------------- /spec/real_data_tests/pg_dump_generator_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'spec_helper' 4 | 5 | RSpec.describe RealDataTests::PgDumpGenerator do 6 | describe '#generate' do 7 | context 'with JSONB fields' do 8 | before(:all) do 9 | # Define our mock class at the top level 10 | class MockTreatmentReport 11 | attr_reader :id, :service_history_log_data 12 | 13 | def initialize(id, service_history_log_data) 14 | @id = id 15 | @service_history_log_data = service_history_log_data 16 | end 17 | 18 | def self.table_name 19 | 'treatment_reports' 20 | end 21 | 22 | def self.column_names 23 | ['id', 'service_history_log_data'] 24 | end 25 | 26 | def self.columns_hash 27 | { 28 | 'id' => OpenStruct.new( 29 | name: 'id', 30 | type: :integer, 31 | sql_type: 'integer', 32 | array: false 33 | ), 34 | 'service_history_log_data' => OpenStruct.new( 35 | name: 'service_history_log_data', 36 | type: :jsonb, 37 | sql_type: 'jsonb', 38 | array: false 39 | ) 40 | } 41 | end 42 | 43 | def self.reflect_on_all_associations(macro = nil) 44 | [] 45 | end 46 | 47 | def [](name) 48 | instance_variable_get("@#{name}") 49 | end 50 | end 51 | end 52 | 53 | after(:all) do 54 | Object.send(:remove_const, :MockTreatmentReport) if defined?(MockTreatmentReport) 55 | end 56 | 57 | let(:record_with_empty_jsonb) do 58 | MockTreatmentReport.new(1, "") 59 | end 60 | 61 | let(:record_with_nil_jsonb) do 62 | MockTreatmentReport.new(1, nil) 63 | end 64 | 65 | let(:record_with_json_data) do 66 | MockTreatmentReport.new(1, { "key" => "value" }) 67 | end 68 | 69 | it 'converts empty string JSONB to empty object {}' do 70 | generator = described_class.new([record_with_empty_jsonb]) 71 | sql = generator.generate 72 | 73 | expect(sql).to include("'{}'") 74 | expect(sql).not_to include("'\"\"'") 75 | end 76 | 77 | it 'converts nil JSONB to NULL' do 78 | generator = described_class.new([record_with_nil_jsonb]) 79 | sql = generator.generate 80 | 81 | expect(sql).to include("NULL") 82 | end 83 | 84 | it 'properly handles valid JSON data' do 85 | generator = described_class.new([record_with_json_data]) 86 | sql = generator.generate 87 | 88 | expect(sql).to include('\'{"key":"value"}\'') 89 | end 90 | 91 | it 'generates valid INSERT statements' do 92 | generator = described_class.new([record_with_empty_jsonb]) 93 | sql = generator.generate 94 | 95 | # More specific expectations for the SQL statement 96 | expect(sql).to include("INSERT INTO treatment_reports") 97 | expect(sql).to include("(id, service_history_log_data)") 98 | expect(sql).to include("VALUES (1, '{}')") 99 | expect(sql).to include("ON CONFLICT (id) DO NOTHING") 100 | end 101 | end 102 | end 103 | end -------------------------------------------------------------------------------- /spec/real_data_tests/data_anonymizer_spec.rb: -------------------------------------------------------------------------------- 1 | # spec/real_data_tests/data_anonymizer_spec.rb 2 | require 'spec_helper' 3 | 4 | RSpec.describe RealDataTests::DataAnonymizer do 5 | # First define a test model 6 | before(:all) do 7 | ActiveRecord::Schema.define do 8 | create_table :test_users, force: true do |t| 9 | t.string :first_name 10 | t.string :last_name 11 | t.string :email 12 | t.timestamps 13 | end 14 | end 15 | 16 | class TestUser < ActiveRecord::Base 17 | self.table_name = 'test_users' 18 | end 19 | end 20 | 21 | let(:test_user) do 22 | TestUser.create!( 23 | first_name: "John", 24 | last_name: "Doe", 25 | email: "john@example.com" 26 | ) 27 | end 28 | 29 | before(:each) do 30 | RealDataTests.reset_configuration! 31 | end 32 | 33 | describe "anonymization with presets" do 34 | it "anonymizes data using a preset configuration" do 35 | RealDataTests.configure do |config| 36 | config.preset(:test_preset) do |p| 37 | p.anonymize('TestUser', { 38 | first_name: -> (_) { "Anonymous" }, 39 | last_name: -> (_) { "User" }, 40 | email: -> (user) { "user#{user.id}@anonymous.com" } 41 | }) 42 | end 43 | end 44 | 45 | RealDataTests.with_preset(:test_preset) do 46 | anonymizer = RealDataTests::DataAnonymizer.new(RealDataTests.configuration.current_preset) 47 | anonymized_user = anonymizer.anonymize_record(test_user) 48 | 49 | expect(anonymized_user.first_name).to eq("Anonymous") 50 | expect(anonymized_user.last_name).to eq("User") 51 | expect(anonymized_user.email).to eq("user#{test_user.id}@anonymous.com") 52 | end 53 | end 54 | 55 | it "handles Faker-based anonymization" do 56 | RealDataTests.configure do |config| 57 | config.preset(:faker_preset) do |p| 58 | p.anonymize('TestUser', { 59 | first_name: -> (_) { Faker::Name.first_name }, 60 | last_name: -> (_) { Faker::Name.last_name }, 61 | email: -> (_) { Faker::Internet.email } 62 | }) 63 | end 64 | end 65 | 66 | RealDataTests.with_preset(:faker_preset) do 67 | anonymizer = RealDataTests::DataAnonymizer.new(RealDataTests.configuration.current_preset) 68 | anonymized_user = anonymizer.anonymize_record(test_user) 69 | 70 | expect(anonymized_user.first_name).not_to eq("John") 71 | expect(anonymized_user.last_name).not_to eq("Doe") 72 | expect(anonymized_user.email).not_to eq("john@example.com") 73 | expect(anonymized_user.email).to include('@') 74 | end 75 | end 76 | 77 | it "handles multiple records" do 78 | users = 3.times.map do |i| 79 | TestUser.create!( 80 | first_name: "User#{i}", 81 | last_name: "Test#{i}", 82 | email: "user#{i}@example.com" 83 | ) 84 | end 85 | 86 | RealDataTests.configure do |config| 87 | config.preset(:batch_preset) do |p| 88 | p.anonymize('TestUser', { 89 | first_name: -> (_) { "Anon" }, 90 | last_name: -> (_) { "User" }, 91 | email: -> (user) { "anon#{user.id}@example.com" } 92 | }) 93 | end 94 | end 95 | 96 | RealDataTests.with_preset(:batch_preset) do 97 | anonymizer = RealDataTests::DataAnonymizer.new(RealDataTests.configuration.current_preset) 98 | anonymized_users = anonymizer.anonymize_records(users) 99 | 100 | anonymized_users.each do |user| 101 | expect(user.first_name).to eq("Anon") 102 | expect(user.last_name).to eq("User") 103 | expect(user.email).to match(/anon\d+@example\.com/) 104 | end 105 | end 106 | end 107 | end 108 | end -------------------------------------------------------------------------------- /spec/real_data_tests_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require 'spec_helper' 3 | 4 | RSpec.describe RealDataTests do 5 | let(:configuration) { described_class.configuration } 6 | 7 | before(:each) do 8 | described_class.reset_configuration! 9 | end 10 | 11 | it "has a version number" do 12 | expect(RealDataTests::VERSION).not_to be nil 13 | end 14 | 15 | describe "Configuration" do 16 | it "initializes with default values" do 17 | expect(configuration.dump_path).to eq('spec/fixtures/real_data_dumps') 18 | expect(configuration.presets).to include(:default) 19 | expect(configuration.current_preset).not_to be_nil 20 | end 21 | 22 | it "allows setting dump path" do 23 | configuration.dump_path = "custom/path" 24 | expect(configuration.dump_path).to eq("custom/path") 25 | end 26 | end 27 | 28 | describe "PresetConfig" do 29 | let(:preset) { RealDataTests::PresetConfig.new } 30 | 31 | it "starts with empty configuration" do 32 | expect(preset.association_filter_mode).to be_nil 33 | expect(preset.association_filter_list).to be_empty 34 | expect(preset.model_specific_associations).to be_empty 35 | end 36 | 37 | it "handles included associations" do 38 | preset.include_associations(:user, :profile) 39 | expect(preset.association_filter_mode).to eq(:whitelist) 40 | expect(preset.association_filter_list).to contain_exactly(:user, :profile) 41 | end 42 | 43 | it "handles excluded associations" do 44 | preset.exclude_associations(:admin, :system) 45 | expect(preset.association_filter_mode).to eq(:blacklist) 46 | expect(preset.association_filter_list).to contain_exactly(:admin, :system) 47 | end 48 | 49 | it "prevents mixing include and exclude" do 50 | preset.include_associations(:user) 51 | expect { 52 | preset.exclude_associations(:admin) 53 | }.to raise_error(RealDataTests::Error) 54 | end 55 | 56 | it "handles model-specific associations" do 57 | preset.include_associations_for("User", :posts, :comments) 58 | expect(preset.model_specific_associations["User"]).to contain_exactly(:posts, :comments) 59 | end 60 | 61 | it "properly processes associations" do 62 | preset.include_associations(:profile) 63 | expect(preset.should_process_association?("User", :profile)).to be true 64 | expect(preset.should_process_association?("User", :admin)).to be false 65 | end 66 | end 67 | 68 | describe "Preset Management" do 69 | it "creates and manages presets" do 70 | configuration.preset(:test_preset) do |p| 71 | p.include_associations(:user, :profile) 72 | p.limit_association("User.posts", 5) 73 | end 74 | 75 | expect(configuration.presets).to include(:test_preset) 76 | end 77 | 78 | it "switches between presets" do 79 | configuration.preset(:preset1) { |p| p.include_associations(:user) } 80 | configuration.preset(:preset2) { |p| p.include_associations(:profile) } 81 | 82 | configuration.use_preset(:preset1) 83 | expect(configuration.current_preset.association_filter_list).to contain_exactly(:user) 84 | 85 | configuration.use_preset(:preset2) 86 | expect(configuration.current_preset.association_filter_list).to contain_exactly(:profile) 87 | end 88 | 89 | it "handles preset blocks correctly" do 90 | original_preset = configuration.current_preset 91 | 92 | # First create the preset 93 | configuration.preset(:test_preset) { |p| p.include_associations(:user) } 94 | 95 | configuration.with_preset(:test_preset) do 96 | expect(configuration.current_preset).not_to eq(original_preset) 97 | expect(configuration.current_preset.association_filter_list).to contain_exactly(:user) 98 | end 99 | 100 | # Should return to original preset after block 101 | expect(configuration.current_preset).to eq(original_preset) 102 | end 103 | 104 | it "raises error for non-existent presets" do 105 | expect { 106 | configuration.use_preset(:nonexistent) 107 | }.to raise_error(RealDataTests::Error) 108 | end 109 | end 110 | end -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [Unreleased] 2 | 3 | ## [0.3.5 - 0.3.17] - 2025-01-14 4 | ### Fixed 5 | - Enhanced SQL statement handling in native loader 6 | - Added proper UUID value quoting in VALUES clauses 7 | - Fixed string value formatting in SQL statements 8 | - Improved error reporting with detailed SQL statement context 9 | - Added robust SQL statement cleaning and normalization 10 | 11 | ## [0.3.4] - 2025-01-14 12 | ### Added 13 | - Alternative native SQL loading method for CI environments 14 | - Added `load_real_test_data_native` method that doesn't rely on system commands 15 | - Works in restricted environments like GitHub Actions 16 | - Uses ActiveRecord's native connection for SQL execution 17 | - Maintains same transaction and foreign key handling behavior 18 | 19 | ## [0.3.3] - 2025-01-14 20 | ### Fixed 21 | - Improved circular dependency handling in PgDumpGenerator for self-referential associations 22 | - Added robust checks for self-referential associations during topological sort 23 | - Updated dependency graph building to properly exclude prevented circular dependencies 24 | - Fixed model name handling in circular dependency error messages 25 | - Improved error reporting for circular dependency detection 26 | - Enhanced PresetConfiguration circular dependency prevention 27 | - Added more reliable tracking of prevented reciprocal associations using Sets 28 | - Improved handling of both class and string model names in prevention checks 29 | - Better support for multiple prevented dependencies per model 30 | - Updated record collection depth handling 31 | - Fixed max depth enforcement for nested associations 32 | - Added proper depth tracking for self-referential relationships 33 | - Improved interaction between max depth and circular dependency prevention 34 | 35 | ## [0.3.2] - 2025-01-14 36 | ### Fixed 37 | - Enhanced association statistics tracking in RecordCollector 38 | - Added separate statistics tracking method to ensure accurate counts 39 | - Stats are now tracked before circular dependency checks 40 | - Fixed parent-child relationship counting in recursive associations 41 | - Improved initialization of statistics structures for better reliability 42 | 43 | ## [0.3.1] - 2025-01-14 44 | ### Fixed 45 | - Fixed circular dependency handling in RecordCollector to correctly limit record collection 46 | - Moved prevention logic earlier in the collection process to stop circular dependencies before record collection 47 | - Improved tracking of visited associations for more accurate prevention 48 | - Added better logging for dependency prevention decisions 49 | - Fixed test case for circular dependency prevention in nested associations 50 | 51 | ## [0.3.0] - 2025-01-13 52 | ### Added 53 | - **Polymorphic Association Support**: 54 | - RecordCollector now supports tracking and collecting records from polymorphic associations. 55 | - Polymorphic `belongs_to`, `has_many`, and `has_one` associations are automatically detected and processed during data collection. 56 | - Added tracking for polymorphic types in `@collection_stats` to provide detailed insights into polymorphic relationships. 57 | - Graceful handling of missing records in polymorphic associations using error logging. 58 | 59 | ### Fixed 60 | - Improved error handling for `ActiveRecord::RecordNotFound` exceptions when loading polymorphic associations. 61 | - Correctly initializes and updates association statistics for polymorphic associations in `@collection_stats`. 62 | 63 | ## [0.2.1] - 2025-01-13 64 | ### Fixed 65 | - Fixed JSONB field handling to output '{}' instead of empty string for blank values 66 | - Added test coverage for JSONB field handling in PgDumpGenerator 67 | 68 | ## [0.2.0] - 2025-01-13 69 | ### Added 70 | - New preset system for managing different test data configurations 71 | - Added `preset`, `use_preset`, and `with_preset` methods for configuration 72 | - Support for multiple named configuration presets 73 | - Added documentation for using presets 74 | - New PresetConfig class to handle preset-specific configurations 75 | 76 | ### Changed 77 | - Refactored Configuration class to use preset-based approach 78 | - Moved configuration methods into PresetConfig class 79 | - Updated documentation with preset usage examples and best practices 80 | 81 | ## [0.1.0] - 2025-01-11 82 | - Initial release -------------------------------------------------------------------------------- /spec/real_data_tests/rspec_helper_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | RSpec.describe RealDataTests::RSpecHelper do 4 | let(:helper) { Class.new { include RealDataTests::RSpecHelper }.new } 5 | 6 | describe 'SqlBlock' do 7 | let(:sql_block) { helper.send(:parse_sql_blocks, sql_content).first } 8 | 9 | context 'with INSERT statements' do 10 | let(:sql_content) do 11 | <<~SQL 12 | INSERT INTO organizations 13 | (id, dba_name, legal_name, slug, about, settings, approved, deleted) 14 | VALUES ('e50d8052-4481-4246-9502-7f8e5659abcb', 'Ratke Group', 15 | 'Terry-Carroll', 'r1coul335sza439x', NULL, 16 | '{"billing":{"claim_submission":""},"print_settings":{"hide_logo_in_header":"0"}}', 17 | false, false) 18 | ON CONFLICT (id) DO NOTHING; 19 | SQL 20 | end 21 | 22 | it 'correctly identifies block type' do 23 | expect(sql_block.type).to eq(:insert) 24 | end 25 | 26 | it 'extracts table name' do 27 | expect(sql_block.table_name).to eq('organizations') 28 | end 29 | 30 | it 'preserves ON CONFLICT clause' do 31 | expect(sql_block.content).to include('ON CONFLICT (id) DO NOTHING') 32 | end 33 | 34 | it 'maintains proper spacing around ON CONFLICT' do 35 | expect(sql_block.content).to match(/\)\s+ON CONFLICT/) 36 | end 37 | end 38 | 39 | context 'with complex INSERT statements containing multiple parentheses' do 40 | let(:sql_content) do 41 | <<~SQL 42 | INSERT INTO organizations ( 43 | id, settings, timezone 44 | ) VALUES ( 45 | 'abc-123', 46 | '{"time_settings": {"zone": "Eastern Time (US & Canada)"}}', 47 | 'Eastern Time (US & Canada)' 48 | ) ON CONFLICT (id) DO NOTHING; 49 | SQL 50 | end 51 | 52 | it 'correctly preserves nested parentheses in values' do 53 | expect(sql_block.content).to include('Eastern Time (US & Canada)') 54 | end 55 | 56 | it 'maintains proper structure of JSON with parentheses' do 57 | expect(sql_block.content).to include('"zone": "Eastern Time (US & Canada)"') 58 | end 59 | end 60 | 61 | context 'with multiple INSERT statements' do 62 | let(:sql_content) do 63 | <<~SQL 64 | INSERT INTO organizations (id, name) VALUES ('org-1', 'Org 1') ON CONFLICT (id) DO NOTHING; 65 | INSERT INTO users (id, org_id) VALUES ('user-1', 'org-1') ON CONFLICT (id) DO NOTHING; 66 | SQL 67 | end 68 | 69 | it 'correctly splits multiple statements' do 70 | blocks = helper.send(:parse_sql_blocks, sql_content) 71 | expect(blocks.length).to eq(2) 72 | expect(blocks[0].table_name).to eq('organizations') 73 | expect(blocks[1].table_name).to eq('users') 74 | end 75 | 76 | it 'preserves ON CONFLICT clauses for each statement' do 77 | blocks = helper.send(:parse_sql_blocks, sql_content) 78 | blocks.each do |block| 79 | expect(block.content).to include('ON CONFLICT') 80 | expect(block.content).to include('DO NOTHING') 81 | end 82 | end 83 | end 84 | 85 | context 'with different ON CONFLICT actions' do 86 | let(:sql_content) do 87 | <<~SQL 88 | INSERT INTO config (key, value) 89 | VALUES ('setting1', 'value1') 90 | ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value; 91 | SQL 92 | end 93 | 94 | it 'preserves DO UPDATE clauses' do 95 | expect(sql_block.content).to include('DO UPDATE SET value = EXCLUDED.value') 96 | end 97 | 98 | it 'maintains proper spacing around complex ON CONFLICT clauses' do 99 | expect(sql_block.content).to match(/\)\s+ON CONFLICT/) 100 | expect(sql_block.content).to match(/DO UPDATE SET/) 101 | end 102 | end 103 | 104 | context 'with COPY statements' do 105 | let(:sql_content) do 106 | [ 107 | "COPY public.organizations (id, name) FROM stdin;", 108 | "abc-123\tOrg Name", 109 | "def-456\tOrg 2", 110 | "\\.", 111 | "" 112 | ].map { |line| line + "\n" }.join 113 | end 114 | 115 | let(:expected_content) do 116 | [ 117 | "COPY public.organizations (id, name) FROM stdin;", 118 | "abc-123\tOrg Name", 119 | "def-456\tOrg 2", 120 | "\\." 121 | ].join("\n") 122 | end 123 | 124 | it 'identifies COPY blocks' do 125 | expect(sql_block.type).to eq(:copy) 126 | end 127 | 128 | it 'preserves COPY content including terminator' do 129 | expect(sql_block.content).to eq(expected_content) 130 | end 131 | 132 | it 'preserves tab characters in COPY data' do 133 | expect(sql_block.content).to include("abc-123\tOrg Name") 134 | end 135 | 136 | it 'includes the complete COPY block' do 137 | expect(sql_block.content).to eq(expected_content) 138 | end 139 | end 140 | end 141 | end -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. 8 | 9 | ## Our Standards 10 | 11 | Examples of behavior that contributes to a positive environment for our community include: 12 | 13 | * Demonstrating empathy and kindness toward other people 14 | * Being respectful of differing opinions, viewpoints, and experiences 15 | * Giving and gracefully accepting constructive feedback 16 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 17 | * Focusing on what is best not just for us as individuals, but for the overall community 18 | 19 | Examples of unacceptable behavior include: 20 | 21 | * The use of sexualized language or imagery, and sexual attention or 22 | advances of any kind 23 | * Trolling, insulting or derogatory comments, and personal or political attacks 24 | * Public or private harassment 25 | * Publishing others' private information, such as a physical or email 26 | address, without their explicit permission 27 | * Other conduct which could reasonably be considered inappropriate in a 28 | professional setting 29 | 30 | ## Enforcement Responsibilities 31 | 32 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. 33 | 34 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. 35 | 36 | ## Scope 37 | 38 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 39 | 40 | ## Enforcement 41 | 42 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at diasks2@gmail.com. All complaints will be reviewed and investigated promptly and fairly. 43 | 44 | All community leaders are obligated to respect the privacy and security of the reporter of any incident. 45 | 46 | ## Enforcement Guidelines 47 | 48 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 49 | 50 | ### 1. Correction 51 | 52 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. 53 | 54 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 55 | 56 | ### 2. Warning 57 | 58 | **Community Impact**: A violation through a single incident or series of actions. 59 | 60 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 61 | 62 | ### 3. Temporary Ban 63 | 64 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. 65 | 66 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 67 | 68 | ### 4. Permanent Ban 69 | 70 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. 71 | 72 | **Consequence**: A permanent ban from any sort of public interaction within the community. 73 | 74 | ## Attribution 75 | 76 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, 77 | available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 78 | 79 | Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). 80 | 81 | [homepage]: https://www.contributor-covenant.org 82 | 83 | For answers to common questions about this code of conduct, see the FAQ at 84 | https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. 85 | -------------------------------------------------------------------------------- /lib/real_data_tests/configuration.rb: -------------------------------------------------------------------------------- 1 | module RealDataTests 2 | class Configuration 3 | attr_accessor :dump_path, :current_preset 4 | attr_reader :presets 5 | 6 | def initialize 7 | @dump_path = 'spec/fixtures/real_data_dumps' 8 | @presets = {} 9 | @current_preset = nil 10 | create_preset(:default) # Always have a default preset 11 | end 12 | 13 | private def create_preset(name) 14 | @presets[name] = PresetConfig.new 15 | @current_preset = @presets[name] 16 | end 17 | 18 | def get_association_limit(record_class, association_name) 19 | current_preset&.get_association_limit(record_class, association_name) 20 | end 21 | 22 | def prevent_reciprocal?(record_class, association_name) 23 | current_preset&.prevent_reciprocal?(record_class, association_name) 24 | end 25 | 26 | def preset(name, &block) 27 | name = name.to_sym 28 | @presets[name] = PresetConfig.new 29 | @current_preset = @presets[name] 30 | yield(@current_preset) if block_given? 31 | @current_preset = @presets[:default] 32 | end 33 | 34 | def use_preset(name) 35 | name = name.to_sym 36 | raise Error, "Preset '#{name}' not found" unless @presets.key?(name) 37 | @current_preset = @presets[name] 38 | end 39 | 40 | def with_preset(name) 41 | previous_preset = @current_preset 42 | use_preset(name) 43 | yield if block_given? 44 | ensure 45 | @current_preset = previous_preset 46 | end 47 | 48 | def method_missing(method_name, *args, &block) 49 | if @current_preset.respond_to?(method_name) 50 | @current_preset.public_send(method_name, *args, &block) 51 | else 52 | super 53 | end 54 | end 55 | 56 | def respond_to_missing?(method_name, include_private = false) 57 | @current_preset.respond_to?(method_name) || super 58 | end 59 | end 60 | 61 | class PresetConfig 62 | attr_reader :association_filter_mode, :association_filter_list, 63 | :model_specific_associations, :association_limits, 64 | :prevent_reciprocal_loading, :anonymization_rules, 65 | :prevented_reciprocals 66 | 67 | attr_accessor :max_depth, :max_self_ref_depth 68 | 69 | def initialize 70 | @association_filter_mode = nil 71 | @association_filter_list = [] 72 | @model_specific_associations = {} 73 | @association_limits = {} 74 | @prevent_reciprocal_loading = {} 75 | @anonymization_rules = {} 76 | @prevented_reciprocals = Set.new 77 | @max_depth = 10 78 | @max_self_ref_depth = 2 79 | end 80 | 81 | def prevent_circular_dependency(klass, association_name) 82 | key = if klass.is_a?(String) 83 | "#{klass}:#{association_name}" 84 | else 85 | "#{klass.name}:#{association_name}" 86 | end 87 | @prevented_reciprocals << key 88 | end 89 | 90 | def max_self_ref_depth=(depth) 91 | @max_self_ref_depth = depth 92 | end 93 | 94 | def get_max_self_ref_depth(model) 95 | @max_self_ref_depth 96 | end 97 | 98 | def has_circular_dependency?(klass, association_name) 99 | key = if klass.is_a?(String) 100 | "#{klass}:#{association_name}" 101 | else 102 | "#{klass.name}:#{association_name}" 103 | end 104 | @prevented_reciprocals.include?(key) 105 | end 106 | 107 | def include_associations(*associations) 108 | if @association_filter_mode == :blacklist 109 | raise Error, "Cannot set included_associations when excluded_associations is already set" 110 | end 111 | @association_filter_mode = :whitelist 112 | @association_filter_list = associations.flatten 113 | end 114 | 115 | def exclude_associations(*associations) 116 | if @association_filter_mode == :whitelist 117 | raise Error, "Cannot set excluded_associations when included_associations is already set" 118 | end 119 | @association_filter_mode = :blacklist 120 | @association_filter_list = associations.flatten 121 | end 122 | 123 | def include_associations_for(model, *associations) 124 | model_name = model.is_a?(String) ? model : model.name 125 | @model_specific_associations[model_name] = associations.flatten 126 | end 127 | 128 | def limit_association(path, limit) 129 | @association_limits[path.to_s] = limit 130 | end 131 | 132 | def get_association_limit(record_class, association_name) 133 | path = "#{record_class.name}.#{association_name}" 134 | @association_limits[path] 135 | end 136 | 137 | def set_association_limit(model_name, association_name, limit) 138 | path = "#{model_name}.#{association_name}" 139 | @association_limits[path] = limit 140 | end 141 | 142 | def prevent_reciprocal?(record_class, association_name) 143 | path = "#{record_class.name}.#{association_name}" 144 | @prevent_reciprocal_loading[path] || has_circular_dependency?(record_class, association_name) 145 | end 146 | 147 | def prevent_reciprocal(path) 148 | @prevent_reciprocal_loading[path.to_s] = true 149 | end 150 | 151 | def anonymize(model_name, mappings = {}) 152 | @anonymization_rules[model_name.to_s] = mappings 153 | end 154 | 155 | def should_process_association?(model, association_name) 156 | model_name = model.is_a?(Class) ? model.name : model.class.name 157 | 158 | if @model_specific_associations.key?(model_name) 159 | return @model_specific_associations[model_name].include?(association_name) 160 | end 161 | 162 | case @association_filter_mode 163 | when :whitelist 164 | @association_filter_list.include?(association_name) 165 | when :blacklist 166 | !@association_filter_list.include?(association_name) 167 | else 168 | true 169 | end 170 | end 171 | end 172 | end -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | real_data_tests (0.2.1) 5 | activerecord (>= 5.0) 6 | faker (~> 3.0) 7 | pg (>= 1.1) 8 | rails (>= 5.0) 9 | thor (~> 1.0) 10 | 11 | GEM 12 | remote: https://rubygems.org/ 13 | specs: 14 | actioncable (7.2.2.1) 15 | actionpack (= 7.2.2.1) 16 | activesupport (= 7.2.2.1) 17 | nio4r (~> 2.0) 18 | websocket-driver (>= 0.6.1) 19 | zeitwerk (~> 2.6) 20 | actionmailbox (7.2.2.1) 21 | actionpack (= 7.2.2.1) 22 | activejob (= 7.2.2.1) 23 | activerecord (= 7.2.2.1) 24 | activestorage (= 7.2.2.1) 25 | activesupport (= 7.2.2.1) 26 | mail (>= 2.8.0) 27 | actionmailer (7.2.2.1) 28 | actionpack (= 7.2.2.1) 29 | actionview (= 7.2.2.1) 30 | activejob (= 7.2.2.1) 31 | activesupport (= 7.2.2.1) 32 | mail (>= 2.8.0) 33 | rails-dom-testing (~> 2.2) 34 | actionpack (7.2.2.1) 35 | actionview (= 7.2.2.1) 36 | activesupport (= 7.2.2.1) 37 | nokogiri (>= 1.8.5) 38 | racc 39 | rack (>= 2.2.4, < 3.2) 40 | rack-session (>= 1.0.1) 41 | rack-test (>= 0.6.3) 42 | rails-dom-testing (~> 2.2) 43 | rails-html-sanitizer (~> 1.6) 44 | useragent (~> 0.16) 45 | actiontext (7.2.2.1) 46 | actionpack (= 7.2.2.1) 47 | activerecord (= 7.2.2.1) 48 | activestorage (= 7.2.2.1) 49 | activesupport (= 7.2.2.1) 50 | globalid (>= 0.6.0) 51 | nokogiri (>= 1.8.5) 52 | actionview (7.2.2.1) 53 | activesupport (= 7.2.2.1) 54 | builder (~> 3.1) 55 | erubi (~> 1.11) 56 | rails-dom-testing (~> 2.2) 57 | rails-html-sanitizer (~> 1.6) 58 | activejob (7.2.2.1) 59 | activesupport (= 7.2.2.1) 60 | globalid (>= 0.3.6) 61 | activemodel (7.2.2.1) 62 | activesupport (= 7.2.2.1) 63 | activerecord (7.2.2.1) 64 | activemodel (= 7.2.2.1) 65 | activesupport (= 7.2.2.1) 66 | timeout (>= 0.4.0) 67 | activestorage (7.2.2.1) 68 | actionpack (= 7.2.2.1) 69 | activejob (= 7.2.2.1) 70 | activerecord (= 7.2.2.1) 71 | activesupport (= 7.2.2.1) 72 | marcel (~> 1.0) 73 | activesupport (7.2.2.1) 74 | base64 75 | benchmark (>= 0.3) 76 | bigdecimal 77 | concurrent-ruby (~> 1.0, >= 1.3.1) 78 | connection_pool (>= 2.2.5) 79 | drb 80 | i18n (>= 1.6, < 2) 81 | logger (>= 1.4.2) 82 | minitest (>= 5.1) 83 | securerandom (>= 0.3) 84 | tzinfo (~> 2.0, >= 2.0.5) 85 | base64 (0.2.0) 86 | benchmark (0.4.0) 87 | bigdecimal (3.1.9) 88 | builder (3.3.0) 89 | concurrent-ruby (1.3.4) 90 | connection_pool (2.5.0) 91 | crass (1.0.6) 92 | database_cleaner (2.1.0) 93 | database_cleaner-active_record (>= 2, < 3) 94 | database_cleaner-active_record (2.2.0) 95 | activerecord (>= 5.a) 96 | database_cleaner-core (~> 2.0.0) 97 | database_cleaner-core (2.0.1) 98 | date (3.4.1) 99 | diff-lcs (1.5.1) 100 | drb (2.2.1) 101 | erubi (1.13.1) 102 | faker (3.5.1) 103 | i18n (>= 1.8.11, < 2) 104 | globalid (1.2.1) 105 | activesupport (>= 6.1) 106 | i18n (1.14.6) 107 | concurrent-ruby (~> 1.0) 108 | io-console (0.8.0) 109 | irb (1.14.3) 110 | rdoc (>= 4.0.0) 111 | reline (>= 0.4.2) 112 | logger (1.6.5) 113 | loofah (2.24.0) 114 | crass (~> 1.0.2) 115 | nokogiri (>= 1.12.0) 116 | mail (2.8.1) 117 | mini_mime (>= 0.1.1) 118 | net-imap 119 | net-pop 120 | net-smtp 121 | marcel (1.0.4) 122 | mini_mime (1.1.5) 123 | minitest (5.25.4) 124 | net-imap (0.5.5) 125 | date 126 | net-protocol 127 | net-pop (0.1.2) 128 | net-protocol 129 | net-protocol (0.2.2) 130 | timeout 131 | net-smtp (0.5.0) 132 | net-protocol 133 | nio4r (2.7.4) 134 | nokogiri (1.18.1-x86_64-darwin) 135 | racc (~> 1.4) 136 | pg (1.5.9) 137 | psych (5.2.2) 138 | date 139 | stringio 140 | racc (1.8.1) 141 | rack (3.1.8) 142 | rack-session (2.1.0) 143 | base64 (>= 0.1.0) 144 | rack (>= 3.0.0) 145 | rack-test (2.2.0) 146 | rack (>= 1.3) 147 | rackup (2.2.1) 148 | rack (>= 3) 149 | rails (7.2.2.1) 150 | actioncable (= 7.2.2.1) 151 | actionmailbox (= 7.2.2.1) 152 | actionmailer (= 7.2.2.1) 153 | actionpack (= 7.2.2.1) 154 | actiontext (= 7.2.2.1) 155 | actionview (= 7.2.2.1) 156 | activejob (= 7.2.2.1) 157 | activemodel (= 7.2.2.1) 158 | activerecord (= 7.2.2.1) 159 | activestorage (= 7.2.2.1) 160 | activesupport (= 7.2.2.1) 161 | bundler (>= 1.15.0) 162 | railties (= 7.2.2.1) 163 | rails-dom-testing (2.2.0) 164 | activesupport (>= 5.0.0) 165 | minitest 166 | nokogiri (>= 1.6) 167 | rails-html-sanitizer (1.6.2) 168 | loofah (~> 2.21) 169 | nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0) 170 | railties (7.2.2.1) 171 | actionpack (= 7.2.2.1) 172 | activesupport (= 7.2.2.1) 173 | irb (~> 1.13) 174 | rackup (>= 1.0.0) 175 | rake (>= 12.2) 176 | thor (~> 1.0, >= 1.2.2) 177 | zeitwerk (~> 2.6) 178 | rake (13.2.1) 179 | rdoc (6.10.0) 180 | psych (>= 4.0.0) 181 | reline (0.6.0) 182 | io-console (~> 0.5) 183 | rspec (3.13.0) 184 | rspec-core (~> 3.13.0) 185 | rspec-expectations (~> 3.13.0) 186 | rspec-mocks (~> 3.13.0) 187 | rspec-core (3.13.2) 188 | rspec-support (~> 3.13.0) 189 | rspec-expectations (3.13.3) 190 | diff-lcs (>= 1.2.0, < 2.0) 191 | rspec-support (~> 3.13.0) 192 | rspec-mocks (3.13.2) 193 | diff-lcs (>= 1.2.0, < 2.0) 194 | rspec-support (~> 3.13.0) 195 | rspec-support (3.13.2) 196 | securerandom (0.4.1) 197 | stringio (3.1.2) 198 | thor (1.3.2) 199 | timeout (0.4.3) 200 | tzinfo (2.0.6) 201 | concurrent-ruby (~> 1.0) 202 | useragent (0.16.11) 203 | websocket-driver (0.7.7) 204 | base64 205 | websocket-extensions (>= 0.1.0) 206 | websocket-extensions (0.1.5) 207 | zeitwerk (2.6.18) 208 | 209 | PLATFORMS 210 | x86_64-darwin-21 211 | 212 | DEPENDENCIES 213 | database_cleaner (~> 2.0) 214 | database_cleaner-active_record 215 | rake (~> 13.0) 216 | real_data_tests! 217 | rspec (~> 3.0) 218 | 219 | BUNDLED WITH 220 | 2.3.27 221 | -------------------------------------------------------------------------------- /spec/real_data_tests/record_collector_spec.rb: -------------------------------------------------------------------------------- 1 | # spec/real_data_tests/record_collector_spec.rb 2 | require 'spec_helper' 3 | 4 | RSpec.describe RealDataTests::RecordCollector do 5 | # Mock classes to simulate ActiveRecord behavior 6 | class MockAssociation 7 | attr_reader :name, :macro, :options, :klass 8 | 9 | def initialize(name, macro, options = {}) 10 | @name = name 11 | @macro = macro 12 | @options = options 13 | @klass = Object.const_get(options[:class_name]) if options[:class_name] 14 | end 15 | 16 | def belongs_to? 17 | macro == :belongs_to 18 | end 19 | 20 | def polymorphic? 21 | options[:polymorphic] 22 | end 23 | end 24 | 25 | class MockRecord 26 | attr_reader :id, :class_name 27 | attr_accessor :associations 28 | 29 | def initialize(id, class_name) 30 | @id = id 31 | @class_name = class_name 32 | @associations = {} 33 | end 34 | 35 | def self.name 36 | to_s 37 | end 38 | 39 | def self.reflect_on_all_associations(_type = nil) 40 | @associations ||= [] 41 | end 42 | 43 | def self.reflect_on_association(name) 44 | reflect_on_all_associations.find { |a| a.name.to_sym == name.to_sym } 45 | end 46 | 47 | def public_send(method_name) 48 | associations[method_name] 49 | end 50 | end 51 | 52 | class MockServiceRate < MockRecord 53 | @associations = [ 54 | MockAssociation.new(:parent_rate, :belongs_to, class_name: 'MockServiceRate'), 55 | MockAssociation.new(:child_rates, :has_many, class_name: 'MockServiceRate') 56 | ] 57 | 58 | def self.reflect_on_all_associations(_type = nil) 59 | @associations 60 | end 61 | end 62 | 63 | # Test setup 64 | let(:configuration) { RealDataTests.configuration } 65 | 66 | let!(:parent_rate) { MockServiceRate.new(1, 'MockServiceRate') } 67 | let!(:child_rate) { MockServiceRate.new(2, 'MockServiceRate') } 68 | let!(:grandchild_rate) { MockServiceRate.new(3, 'MockServiceRate') } 69 | 70 | before do 71 | # Set up relationships after objects are created to avoid recursion 72 | parent_rate.associations = { 73 | parent_rate: nil, 74 | child_rates: [child_rate] 75 | } 76 | 77 | child_rate.associations = { 78 | parent_rate: parent_rate, 79 | child_rates: [grandchild_rate] 80 | } 81 | 82 | grandchild_rate.associations = { 83 | parent_rate: child_rate, 84 | child_rates: [] 85 | } 86 | end 87 | 88 | before(:each) do 89 | RealDataTests.configure do |config| 90 | config.preset :test do |p| 91 | p.include_associations('MockServiceRate', :parent_rate, :child_rates) 92 | end 93 | end 94 | end 95 | 96 | describe '#collect' do 97 | context 'when handling circular dependencies' do 98 | it 'collects records without infinite recursion' do 99 | collector = described_class.new(parent_rate) 100 | collected_records = collector.collect 101 | 102 | expect(collected_records).to include(parent_rate) 103 | expect(collected_records).to include(child_rate) 104 | expect(collected_records).to include(grandchild_rate) 105 | 106 | # Verify each record appears exactly once 107 | service_rates = collected_records.select { |r| r.is_a?(MockServiceRate) } 108 | expect(service_rates.count).to eq(3) 109 | end 110 | 111 | it 'maintains correct association statistics' do 112 | collector = described_class.new(parent_rate) 113 | collector.collect 114 | stats = collector.collection_stats 115 | 116 | expect(stats['MockServiceRate'][:associations]['parent_rate']).to eq(2) 117 | expect(stats['MockServiceRate'][:associations]['child_rates']).to be > 0 118 | end 119 | end 120 | 121 | context 'with circular dependency prevention configured' do 122 | before do 123 | RealDataTests.configure do |config| 124 | config.preset :test do |p| 125 | p.include_associations_for 'MockServiceRate', :parent_rate, :child_rates 126 | p.prevent_circular_dependency(MockServiceRate, :parent_rate) 127 | p.max_depth = 10 # Ensure depth isn't the limiting factor 128 | end 129 | end 130 | RealDataTests.configuration.use_preset(:test) 131 | end 132 | 133 | it 'respects prevention configuration' do 134 | collector = described_class.new(parent_rate) 135 | collected_records = collector.collect 136 | 137 | # Should collect immediate relationships but prevent deep recursion 138 | expect(collected_records).to include(parent_rate) 139 | expect(collected_records).to include(child_rate) 140 | 141 | # Verify prevention of deep circular dependencies 142 | expect(collected_records.count { |r| r.is_a?(MockServiceRate) }).to eq(3) 143 | end 144 | end 145 | 146 | context 'with max depth configuration' do 147 | before do 148 | RealDataTests.configure do |config| 149 | config.preset :test do |p| 150 | p.include_associations_for 'MockServiceRate', :parent_rate, :child_rates 151 | p.max_depth = 1 # Only allow one level of depth 152 | end 153 | end 154 | RealDataTests.configuration.use_preset(:test) 155 | end 156 | 157 | it 'respects max depth setting' do 158 | collector = described_class.new(parent_rate) 159 | collected_records = collector.collect 160 | 161 | expect(collected_records).to include(parent_rate) 162 | expect(collected_records).to include(child_rate) 163 | expect(collected_records).not_to include(grandchild_rate) 164 | end 165 | end 166 | end 167 | 168 | describe '#should_process_association?' do 169 | let(:collector) { described_class.new(parent_rate) } 170 | 171 | it 'detects self-referential associations' do 172 | association = MockServiceRate.reflect_on_association(:parent_rate) 173 | result = collector.send(:self_referential_association?, MockServiceRate, association) 174 | expect(result).to be true 175 | end 176 | 177 | it 'prevents processing same association multiple times' do 178 | association = MockServiceRate.reflect_on_association(:parent_rate) 179 | 180 | first_attempt = collector.send(:should_process_association?, parent_rate, association) 181 | expect(first_attempt).to be true 182 | 183 | second_attempt = collector.send(:should_process_association?, parent_rate, association) 184 | expect(second_attempt).to be false 185 | end 186 | end 187 | end -------------------------------------------------------------------------------- /spec/real_data_tests/record_collector_polymorphic_spec.rb: -------------------------------------------------------------------------------- 1 | # spec/real_data_tests/record_collector_polymorphic_spec.rb 2 | 3 | require 'spec_helper' 4 | require 'active_record' 5 | 6 | RSpec.describe RealDataTests::RecordCollector do 7 | # Set up test models 8 | before(:all) do 9 | ActiveRecord::Schema.define do 10 | create_table :payments do |t| 11 | t.references :billable, polymorphic: true 12 | t.decimal :amount 13 | t.timestamps 14 | end 15 | 16 | create_table :insurance_companies do |t| 17 | t.string :name 18 | t.timestamps 19 | end 20 | 21 | create_table :patients do |t| 22 | t.string :name 23 | t.timestamps 24 | end 25 | 26 | create_table :facilities do |t| 27 | t.string :name 28 | t.timestamps 29 | end 30 | 31 | create_table :comments do |t| 32 | t.references :commentable, polymorphic: true 33 | t.text :content 34 | t.timestamps 35 | end 36 | end 37 | 38 | class Payment < ActiveRecord::Base 39 | belongs_to :billable, polymorphic: true 40 | end 41 | 42 | class InsuranceCompany < ActiveRecord::Base 43 | has_many :payments, as: :billable 44 | has_many :comments, as: :commentable 45 | end 46 | 47 | class Patient < ActiveRecord::Base 48 | has_many :payments, as: :billable 49 | has_one :comment, as: :commentable 50 | end 51 | 52 | class Facility < ActiveRecord::Base 53 | has_many :payments, as: :billable 54 | has_many :comments, as: :commentable 55 | end 56 | 57 | class Comment < ActiveRecord::Base 58 | belongs_to :commentable, polymorphic: true 59 | end 60 | end 61 | 62 | after(:all) do 63 | ActiveRecord::Base.connection.drop_table(:payments) 64 | ActiveRecord::Base.connection.drop_table(:insurance_companies) 65 | ActiveRecord::Base.connection.drop_table(:patients) 66 | ActiveRecord::Base.connection.drop_table(:facilities) 67 | ActiveRecord::Base.connection.drop_table(:comments) 68 | end 69 | 70 | let(:insurance_company) { InsuranceCompany.create!(name: "Test Insurance") } 71 | let(:patient) { Patient.create!(name: "John Doe") } 72 | let(:facility) { Facility.create!(name: "Test Facility") } 73 | 74 | # Set up base configuration before each test 75 | before(:each) do 76 | RealDataTests.configure do |config| 77 | config.preset :test_preset do |p| 78 | p.include_associations_for 'Payment', :billable 79 | p.include_associations_for 'InsuranceCompany', :payments, :comments 80 | p.include_associations_for 'Patient', :payments, :comment 81 | p.include_associations_for 'Facility', :payments, :comments 82 | p.include_associations_for 'Comment', :commentable 83 | end 84 | config.use_preset(:test_preset) 85 | end 86 | end 87 | 88 | describe "polymorphic belongs_to associations" do 89 | it "collects records from polymorphic belongs_to relationships" do 90 | payment = Payment.create!(billable: insurance_company, amount: 100) 91 | collector = described_class.new(payment) 92 | collected_records = collector.collect 93 | 94 | expect(collected_records).to include(payment) 95 | expect(collected_records).to include(insurance_company) 96 | end 97 | 98 | it "tracks polymorphic types in collection stats" do 99 | # Let's add some debug output 100 | puts "\nCurrent preset: #{RealDataTests.configuration.current_preset}" 101 | 102 | # Create our test data 103 | insurance_company = InsuranceCompany.create!(name: "Test Insurance") 104 | payment1 = Payment.create!(billable: insurance_company, amount: 100) 105 | 106 | # Let's verify the payment data 107 | puts "\nPayment1 data:" 108 | puts " billable_type: #{payment1.billable_type}" 109 | puts " billable_id: #{payment1.billable_id}" 110 | 111 | # Create and run collector 112 | collector = described_class.new(payment1) 113 | collected_records = collector.collect 114 | 115 | # Get stats and inspect them 116 | stats = collector.collection_stats 117 | puts "\nFull collection stats:" 118 | puts JSON.pretty_generate(stats.transform_values(&:to_h)) 119 | 120 | # Check the specific value we're looking for 121 | polymorphic_types = stats['Payment'][:polymorphic_types][:billable] 122 | puts "\nPolymorphic types for Payment.billable:" 123 | puts " #{polymorphic_types.inspect}" 124 | 125 | # Run our expectation 126 | puts "Stats at assertion point: #{stats['Payment'][:polymorphic_types]}" 127 | 128 | expect(polymorphic_types).to include('InsuranceCompany') 129 | end 130 | end 131 | 132 | describe "polymorphic has_many associations" do 133 | it "collects records from polymorphic has_many relationships" do 134 | payment1 = Payment.create!(billable: insurance_company, amount: 100) 135 | payment2 = Payment.create!(billable: insurance_company, amount: 200) 136 | 137 | collector = described_class.new(insurance_company) 138 | collected_records = collector.collect 139 | 140 | expect(collected_records).to include(insurance_company) 141 | expect(collected_records).to include(payment1) 142 | expect(collected_records).to include(payment2) 143 | end 144 | 145 | it "respects configured limits for polymorphic has_many associations" do 146 | 5.times { Payment.create!(billable: insurance_company, amount: 100) } 147 | 148 | RealDataTests.configure do |config| 149 | config.preset :limit_test_preset do |p| 150 | # Include all the base associations 151 | p.include_associations_for 'Payment', :billable 152 | p.include_associations_for 'InsuranceCompany', :payments, :comments 153 | p.include_associations_for 'Patient', :payments, :comment 154 | p.include_associations_for 'Facility', :payments, :comments 155 | p.include_associations_for 'Comment', :commentable 156 | 157 | # Add the limit configuration 158 | p.set_association_limit 'InsuranceCompany', :payments, 2 159 | end 160 | config.use_preset(:limit_test_preset) 161 | end 162 | 163 | collector = described_class.new(insurance_company) 164 | collected_records = collector.collect 165 | 166 | payment_count = collected_records.count { |r| r.is_a?(Payment) } 167 | expect(payment_count).to eq(2) 168 | end 169 | end 170 | 171 | describe "polymorphic has_one associations" do 172 | it "collects records from polymorphic has_one relationships" do 173 | comment = Comment.create!(commentable: patient, content: "Test comment") 174 | 175 | collector = described_class.new(patient) 176 | collected_records = collector.collect 177 | 178 | expect(collected_records).to include(patient) 179 | expect(collected_records).to include(comment) 180 | end 181 | end 182 | 183 | describe "nested polymorphic associations" do 184 | it "handles nested polymorphic relationships" do 185 | payment = Payment.create!(billable: insurance_company, amount: 100) 186 | comment = Comment.create!(commentable: insurance_company, content: "Test comment") 187 | 188 | collector = described_class.new(payment) 189 | collected_records = collector.collect 190 | 191 | expect(collected_records).to include(payment) 192 | expect(collected_records).to include(insurance_company) 193 | expect(collected_records).to include(comment) 194 | end 195 | end 196 | 197 | describe "error handling" do 198 | it "gracefully handles errors in polymorphic association loading" do 199 | payment = Payment.create!(billable: insurance_company, amount: 100) 200 | allow(payment).to receive(:billable).and_raise(ActiveRecord::RecordNotFound) 201 | 202 | collector = described_class.new(payment) 203 | expect { collector.collect }.not_to raise_error 204 | end 205 | end 206 | 207 | describe "collection statistics" do 208 | it "provides detailed statistics for polymorphic associations" do 209 | payment1 = Payment.create!(billable: insurance_company, amount: 100) 210 | payment2 = Payment.create!(billable: patient, amount: 200) 211 | comment = Comment.create!(commentable: insurance_company, content: "Test") 212 | 213 | collector = described_class.new(payment1) 214 | collector.collect 215 | 216 | stats = collector.instance_variable_get(:@collection_stats) 217 | expect(stats['Payment'][:polymorphic_types][:billable]).to include('InsuranceCompany') 218 | expect(stats['InsuranceCompany'][:associations]['comments']).to eq(1) 219 | end 220 | end 221 | end -------------------------------------------------------------------------------- /lib/real_data_tests/record_collector.rb: -------------------------------------------------------------------------------- 1 | module RealDataTests 2 | class RecordCollector 3 | attr_reader :collection_stats, :collected_records 4 | 5 | def initialize(record) 6 | @record = record 7 | @collected_records = Set.new 8 | @collection_stats = {} 9 | @processed_associations = Set.new 10 | @association_path = [] 11 | @current_depth = 0 12 | @visited_associations = {} 13 | @processed_self_refs = Hash.new { |h, k| h[k] = Set.new } 14 | 15 | init_collection_stats(record) 16 | end 17 | 18 | def collect 19 | puts "\nStarting record collection from: #{@record.class.name}##{@record.id}" 20 | filter_mode = RealDataTests.configuration.current_preset.association_filter_mode 21 | filter_list = RealDataTests.configuration.current_preset.association_filter_list 22 | puts "Using #{filter_mode || 'no'} filter with #{filter_list.any? ? filter_list.join(', ') : 'no associations'}" 23 | collect_record(@record, 0) 24 | print_collection_stats 25 | @collected_records.to_a 26 | end 27 | 28 | private 29 | 30 | def init_collection_stats(record) 31 | @collection_stats[record.class.name] = { 32 | count: 0, 33 | associations: Hash.new(0), 34 | polymorphic_types: {} 35 | } 36 | 37 | record.class.reflect_on_all_associations(:belongs_to).each do |assoc| 38 | if assoc.polymorphic? 39 | @collection_stats[record.class.name][:polymorphic_types][assoc.name.to_s] ||= Set.new 40 | end 41 | end 42 | end 43 | 44 | def collect_record(record, depth) 45 | return if @collected_records.include?(record) 46 | return unless record # Guard against nil records 47 | return if depth > RealDataTests.configuration.current_preset.max_depth 48 | 49 | puts "\nCollecting record: #{record.class.name}##{record.id}" 50 | @collected_records.add(record) 51 | 52 | # Initialize stats structure 53 | @collection_stats[record.class.name] ||= { 54 | count: 0, 55 | associations: {}, 56 | polymorphic_types: {} 57 | } 58 | @collection_stats[record.class.name][:count] += 1 59 | 60 | # Track types for polymorphic belongs_to associations 61 | record.class.reflect_on_all_associations(:belongs_to).each do |assoc| 62 | next unless assoc.polymorphic? 63 | 64 | type = record.public_send("#{assoc.name}_type") 65 | @collection_stats[record.class.name][:polymorphic_types][assoc.name.to_sym] ||= Set.new 66 | 67 | begin 68 | associated_record = record.public_send(assoc.name) 69 | if associated_record 70 | puts " Adding polymorphic type '#{type}' for #{assoc.name}" 71 | @collection_stats[record.class.name][:polymorphic_types][assoc.name.to_sym] << associated_record.class.name 72 | else 73 | puts " Skipping polymorphic type for #{assoc.name} due to missing associated record" 74 | end 75 | rescue StandardError => e 76 | puts " Error loading polymorphic association #{assoc.name}: #{e.message}" 77 | end 78 | end 79 | 80 | collect_associations(record, depth) 81 | end 82 | 83 | def collect_associations(record, depth) 84 | return unless record.class.respond_to?(:reflect_on_all_associations) 85 | return if depth >= RealDataTests.configuration.current_preset.max_depth 86 | 87 | associations = record.class.reflect_on_all_associations 88 | puts "\nProcessing associations for: #{record.class.name}##{record.id}" 89 | puts "Found #{associations.length} associations" 90 | 91 | associations.each do |association| 92 | association_key = "#{record.class.name}##{record.id}:#{association.name}" 93 | puts " Checking if should process: #{association_key}" 94 | 95 | if RealDataTests.configuration.current_preset.prevent_reciprocal?(record.class, association.name) 96 | track_key = "#{record.class.name}:#{association.name}" 97 | @visited_associations[track_key] ||= Set.new 98 | 99 | # Skip if we've already processed this association type for this class 100 | if @visited_associations[track_key].any? 101 | puts " Skipping prevented reciprocal association: #{track_key}" 102 | next 103 | end 104 | @visited_associations[track_key].add(record.id) 105 | end 106 | 107 | next unless should_process_association?(record, association, depth) 108 | 109 | puts " Processing #{association.macro} #{association.polymorphic? ? 'polymorphic ' : ''}association: #{association.name}" 110 | process_association(record, association, depth) 111 | end 112 | end 113 | 114 | def should_process_association?(record, association, depth = 0) 115 | return false if depth >= RealDataTests.configuration.current_preset.max_depth 116 | 117 | # Handle self-referential associations 118 | if self_referential_association?(record.class, association) 119 | track_key = "#{record.class.name}:#{association.name}" 120 | return false if @processed_self_refs[track_key].include?(record.id) 121 | @processed_self_refs[track_key].add(record.id) 122 | end 123 | 124 | association_key = "#{record.class.name}##{record.id}:#{association.name}" 125 | return false if @processed_associations.include?(association_key) 126 | 127 | # Check if the association is allowed by configuration 128 | should_process = RealDataTests.configuration.current_preset.should_process_association?(record, association.name) 129 | 130 | if should_process 131 | @processed_associations.add(association_key) 132 | true 133 | else 134 | false 135 | end 136 | end 137 | 138 | def process_association(record, association, depth) 139 | @association_path.push(association.name) 140 | 141 | begin 142 | related_records = fetch_related_records(record, association) 143 | count = related_records.length 144 | 145 | # Track statistics even if we're going to skip processing 146 | track_association_stats(record.class.name, association.name, count) 147 | 148 | # Check for circular dependency after getting the related records 149 | if detect_circular_dependency?(record, association) 150 | puts " Skipping circular dependency for #{association.name} on #{record.class.name}##{record.id}" 151 | return 152 | end 153 | 154 | # For self-referential associations, check depth 155 | if self_referential_association?(record.class, association) 156 | max_self_ref_depth = 2 # Default max depth for self-referential associations 157 | if depth >= max_self_ref_depth 158 | puts " Reached max self-referential depth for #{association.name}" 159 | return 160 | end 161 | end 162 | 163 | related_records.each { |related_record| collect_record(related_record, depth + 1) } 164 | rescue => e 165 | puts " Error processing association #{association.name}: #{e.message}" 166 | ensure 167 | @association_path.pop 168 | end 169 | end 170 | 171 | def track_association_stats(class_name, association_name, count) 172 | # Initialize stats for this class if not already done 173 | @collection_stats[class_name] ||= { 174 | count: 0, 175 | associations: Hash.new(0), 176 | polymorphic_types: {} 177 | } 178 | 179 | # Update the association count 180 | @collection_stats[class_name][:associations][association_name.to_s] ||= 0 181 | @collection_stats[class_name][:associations][association_name.to_s] += count 182 | end 183 | 184 | def self_referential_association?(klass, association) 185 | return false unless association.options[:class_name] 186 | return false if association.polymorphic? 187 | 188 | target_class_name = if association.options[:class_name].is_a?(String) 189 | association.options[:class_name] 190 | else 191 | association.options[:class_name].name 192 | end 193 | 194 | klass.name == target_class_name 195 | end 196 | 197 | def detect_circular_dependency?(record, association) 198 | return false unless association.belongs_to? 199 | return false if association.polymorphic? 200 | 201 | target_class = association.klass 202 | return false unless target_class 203 | 204 | if self_referential_association?(record.class, association) 205 | track_key = "#{target_class.name}:#{association.name}" 206 | return @processed_self_refs[track_key].include?(record.id) 207 | end 208 | 209 | path_key = "#{target_class.name}:#{association.name}" 210 | visited_count = @association_path.count { |assoc| "#{target_class.name}:#{assoc}" == path_key } 211 | 212 | visited_count > 1 213 | end 214 | 215 | def fetch_related_records(record, association) 216 | case association.macro 217 | when :belongs_to, :has_one 218 | Array(record.public_send(association.name)).compact 219 | when :has_many, :has_and_belongs_to_many 220 | relation = record.public_send(association.name) 221 | 222 | if limit = RealDataTests.configuration.current_preset.get_association_limit(record.class, association.name) 223 | puts " Applying configured limit of #{limit} records for #{record.class.name}.#{association.name}" 224 | relation = relation[0...limit] 225 | end 226 | 227 | relation 228 | else 229 | [] 230 | end 231 | end 232 | 233 | def print_collection_stats 234 | puts "\n=== Collection Statistics ===" 235 | @collection_stats.each do |model, stats| 236 | puts "\n#{model}:" 237 | puts " Total records: #{stats[:count]}" 238 | 239 | if stats[:associations].any? 240 | puts " Associations:" 241 | stats[:associations].each do |assoc_name, count| 242 | puts " #{assoc_name}: #{count} records" 243 | end 244 | end 245 | 246 | if stats[:polymorphic_types].any? 247 | puts " Polymorphic Types:" 248 | stats[:polymorphic_types].each do |assoc_name, types| 249 | puts " #{assoc_name}: #{types.to_a.join(', ')}" 250 | end 251 | end 252 | end 253 | puts "\nTotal unique records collected: #{@collected_records.size}" 254 | puts "==============================\n" 255 | end 256 | end 257 | end -------------------------------------------------------------------------------- /lib/real_data_tests/pg_dump_generator.rb: -------------------------------------------------------------------------------- 1 | require 'csv' 2 | require 'tmpdir' 3 | require 'fileutils' 4 | require 'json' 5 | require 'set' 6 | 7 | module RealDataTests 8 | class PgDumpGenerator 9 | def initialize(records) 10 | @records = records 11 | end 12 | 13 | def generate 14 | sorted_records = sort_by_dependencies(@records) 15 | insert_statements = collect_inserts(sorted_records) 16 | insert_statements.join("\n") 17 | end 18 | 19 | private 20 | 21 | def sort_by_dependencies(records) 22 | # Group records by their model class 23 | tables_with_records = records.group_by(&:class) 24 | 25 | # Build dependency graph directly from the models we have 26 | dependencies = build_dependency_graph(tables_with_records.keys) 27 | 28 | # Sort models based on dependencies 29 | sorted_models = topological_sort(dependencies) 30 | 31 | # Map back to the actual records in dependency order 32 | sorted_models.flat_map { |model| tables_with_records[model] || [] } 33 | end 34 | 35 | def build_dependency_graph(models) 36 | models.each_with_object({}) do |model, deps| 37 | # Get direct dependencies from belongs_to associations 38 | direct_dependencies = model.reflect_on_all_associations(:belongs_to) 39 | .reject(&:polymorphic?) # Skip polymorphic associations 40 | .reject do |assoc| 41 | # Skip self-referential associations that are configured to prevent circular deps 42 | assoc.klass == model && 43 | RealDataTests.configuration.current_preset.prevent_reciprocal?(model, assoc.name) 44 | end 45 | .map(&:klass) 46 | .select { |klass| models.include?(klass) } 47 | .uniq 48 | 49 | # Handle HABTM associations 50 | habtm_dependencies = model.reflect_on_all_associations(:has_and_belongs_to_many) 51 | .map { |assoc| assoc.join_table_model } 52 | .compact 53 | .select { |join_model| models.include?(join_model) } 54 | .uniq 55 | 56 | deps[model] = (direct_dependencies + habtm_dependencies).uniq 57 | end 58 | end 59 | 60 | def topological_sort(dependencies) 61 | sorted = [] 62 | visited = Set.new 63 | temporary = Set.new 64 | 65 | dependencies.each_key do |model| 66 | visit_model(model, dependencies, sorted, visited, temporary) unless visited.include?(model) 67 | end 68 | 69 | sorted 70 | end 71 | 72 | def visit_model(model, dependencies, sorted, visited, temporary) 73 | return if visited.include?(model) 74 | 75 | if temporary.include?(model) 76 | # Only raise if this isn't a prevented self-reference 77 | unless RealDataTests.configuration.current_preset.prevent_reciprocal?(model, model.model_name.singular) 78 | cycle = detect_cycle(model, dependencies, temporary) 79 | raise "Circular dependency detected: #{cycle.map(&:name).join(' -> ')}" 80 | end 81 | return 82 | end 83 | 84 | temporary.add(model) 85 | 86 | (dependencies[model] || []).each do |dependency| 87 | visit_model(dependency, dependencies, sorted, visited, temporary) unless visited.include?(dependency) 88 | end 89 | 90 | temporary.delete(model) 91 | visited.add(model) 92 | sorted << model 93 | end 94 | 95 | def detect_cycle(start_model, dependencies, temporary) 96 | cycle = [start_model] 97 | current = dependencies[start_model]&.find { |dep| temporary.include?(dep) } 98 | 99 | while current && current != start_model 100 | cycle << current 101 | current = dependencies[current]&.find { |dep| temporary.include?(dep) } 102 | end 103 | 104 | cycle << start_model if current == start_model 105 | cycle 106 | end 107 | 108 | def collect_inserts(records) 109 | records.map do |record| 110 | table_name = record.class.table_name 111 | columns = record.class.column_names 112 | 113 | values = columns.map do |column| 114 | if record.class.respond_to?(:defined_enums) && record.class.defined_enums.key?(column) 115 | raw_value = record.read_attribute_before_type_cast(column) 116 | raw_value.nil? ? 'NULL' : raw_value.to_s 117 | else 118 | quote_value(record[column], get_column_info(record.class, column)) 119 | end 120 | end 121 | 122 | <<~SQL.strip 123 | INSERT INTO #{table_name} 124 | (#{columns.join(', ')}) 125 | VALUES (#{values.join(', ')}) 126 | ON CONFLICT (id) DO NOTHING; 127 | SQL 128 | end 129 | end 130 | 131 | def get_column_info(model, column_name) 132 | column = model.columns_hash[column_name] 133 | { 134 | type: column.type, 135 | sql_type: column.sql_type, 136 | array: column.array 137 | } 138 | end 139 | 140 | def quote_value(value, column_info) 141 | return 'NULL' if value.nil? 142 | 143 | case column_info[:type] 144 | when :integer, :decimal, :float 145 | value.to_s 146 | when :boolean 147 | value.to_s 148 | when :jsonb, :json 149 | if value.blank? 150 | "'{}'" # Return empty JSON object for blank JSONB/JSON fields 151 | else 152 | sanitize_string(value.is_a?(String) ? value : value.to_json) 153 | end 154 | when :array 155 | parse_and_format_array(value, column_info[:sql_type]) 156 | else 157 | if column_info[:array] 158 | parse_and_format_array(value, column_info[:sql_type]) 159 | else 160 | sanitize_string(value.to_s) 161 | end 162 | end 163 | end 164 | 165 | def parse_and_format_special_type(value, column_info) 166 | if column_info[:array] || column_info[:type] == :array 167 | parse_and_format_array(value, column_info[:sql_type]) 168 | else 169 | # Handle JSON/JSONB 170 | json_value = value.is_a?(String) ? value : value.to_json 171 | sanitize_string(json_value) 172 | end 173 | end 174 | 175 | def parse_and_format_array(value, sql_type) 176 | # Always cast empty or string representations of empty arrays to proper type 177 | if value.nil? || value == '[]' || value == '{}' || (value.is_a?(Array) && value.empty?) 178 | base_type = extract_base_type(sql_type) 179 | return "'{}'" + "::#{base_type}[]" 180 | end 181 | 182 | # Parse the array if it's a string 183 | array_value = case value 184 | when String 185 | begin 186 | JSON.parse(value) 187 | rescue JSON::ParserError 188 | value.gsub(/[{}"]/, '').split(',') 189 | end 190 | when Array 191 | value 192 | else 193 | [value] 194 | end 195 | 196 | # Format the array elements 197 | elements = array_value.map do |element| 198 | case element 199 | when String 200 | sanitize_string(element) 201 | when Numeric 202 | element.to_s 203 | when nil 204 | 'NULL' 205 | else 206 | sanitize_string(element.to_s) 207 | end 208 | end 209 | 210 | base_type = extract_base_type(sql_type) 211 | "ARRAY[#{elements.join(',')}]::#{base_type}[]" 212 | end 213 | 214 | def extract_base_type(sql_type) 215 | case sql_type 216 | when /character varying\[\]/i, /varchar\[\]/i 217 | 'varchar' 218 | when /text\[\]/i 219 | 'text' 220 | when /integer\[\]/i 221 | 'integer' 222 | when /bigint\[\]/i 223 | 'bigint' 224 | when /jsonb\[\]/i 225 | 'jsonb' 226 | when /json\[\]/i 227 | 'json' 228 | else 229 | sql_type.sub(/\[\]$/, '') 230 | end 231 | end 232 | 233 | def format_array(value, column_info) 234 | # Handle empty arrays 235 | if value.nil? || value == '[]' || value == '{}' || (value.is_a?(Array) && value.empty?) 236 | return "'{}'" + "::character varying[]" if column_info[:type] == :string 237 | return "'{}'" + "::#{extract_base_type(column_info[:sql_type])}[]" 238 | end 239 | 240 | # Parse the array if it's a string 241 | array_value = case value 242 | when String 243 | begin 244 | JSON.parse(value) 245 | rescue JSON::ParserError 246 | value.gsub(/[{}"]/, '').split(',') 247 | end 248 | when Array 249 | value 250 | else 251 | [value] 252 | end 253 | 254 | # Format array elements 255 | elements = array_value.map do |element| 256 | case element 257 | when String 258 | sanitize_string(element) 259 | when Numeric 260 | element.to_s 261 | when nil 262 | 'NULL' 263 | else 264 | sanitize_string(element.to_s) 265 | end 266 | end 267 | 268 | # Use character varying[] for string arrays 269 | array_type = if column_info[:type] == :string 270 | 'character varying[]' 271 | else 272 | "#{extract_base_type(column_info[:sql_type])}[]" 273 | end 274 | 275 | "ARRAY[#{elements.join(',')}]::#{array_type}" 276 | end 277 | 278 | def extract_base_type(sql_type) 279 | case sql_type 280 | when /character varying\[\]/i, /varchar\[\]/i 281 | 'character varying' 282 | when /text\[\]/i 283 | 'text' 284 | when /integer\[\]/i 285 | 'integer' 286 | when /bigint\[\]/i 287 | 'bigint' 288 | when /jsonb\[\]/i 289 | 'jsonb' 290 | when /json\[\]/i 291 | 'json' 292 | else 293 | sql_type.sub(/\[\]$/, '') 294 | end 295 | end 296 | 297 | def sanitize_string(str) 298 | "'#{str.gsub("'", "''")}'" 299 | end 300 | 301 | def connection_options 302 | config = if ActiveRecord::Base.respond_to?(:connection_db_config) 303 | ActiveRecord::Base.connection_db_config.configuration_hash 304 | else 305 | ActiveRecord::Base.connection_config 306 | end 307 | 308 | options = [] 309 | options << "-h #{config[:host]}" if config[:host] 310 | options << "-p #{config[:port]}" if config[:port] 311 | options << "-U #{config[:username]}" if config[:username] 312 | options << "-d #{config[:database]}" 313 | options << "-q" # Run quietly 314 | options.join(" ") 315 | end 316 | end 317 | end -------------------------------------------------------------------------------- /lib/real_data_tests/rspec_helper.rb: -------------------------------------------------------------------------------- 1 | module RealDataTests 2 | module RSpecHelper 3 | class SqlBlock 4 | attr_reader :type, :content, :table_name 5 | 6 | def initialize(content) 7 | @content = content.strip 8 | @type = determine_block_type 9 | @table_name = extract_table_name if @type == :insert 10 | end 11 | 12 | private 13 | 14 | def determine_block_type 15 | case @content 16 | when /\AINSERT INTO/i 17 | :insert 18 | when /\ACOPY.*FROM stdin/i 19 | :copy 20 | when /\AALTER TABLE/i 21 | :alter 22 | when /\ASET/i 23 | :set 24 | else 25 | :other 26 | end 27 | end 28 | 29 | def extract_table_name 30 | if @content =~ /INSERT INTO\s+"?([^\s"(]+)"?\s/i 31 | $1 32 | end 33 | end 34 | end 35 | 36 | def load_real_test_data(name) 37 | dump_path = File.join(RealDataTests.configuration.dump_path, "#{name}.sql") 38 | raise Error, "Test data file not found: #{dump_path}" unless File.exist?(dump_path) 39 | ActiveRecord::Base.transaction do 40 | # Disable foreign key checks 41 | ActiveRecord::Base.connection.execute('SET session_replication_role = replica;') 42 | begin 43 | # Load the SQL dump quietly 44 | result = system("psql #{connection_options} -q < #{dump_path}") 45 | raise Error, "Failed to load test data: #{dump_path}" unless result 46 | ensure 47 | # Re-enable foreign key checks 48 | ActiveRecord::Base.connection.execute('SET session_replication_role = DEFAULT;') 49 | end 50 | end 51 | end 52 | 53 | def load_real_test_data_native(name) 54 | dump_path = File.join(RealDataTests.configuration.dump_path, "#{name}.sql") 55 | raise Error, "Test data file not found: #{dump_path}" unless File.exist?(dump_path) 56 | 57 | sql_content = File.read(dump_path) 58 | blocks = parse_sql_blocks(sql_content) 59 | 60 | ActiveRecord::Base.transaction do 61 | connection = ActiveRecord::Base.connection 62 | 63 | # Disable foreign key checks 64 | connection.execute('SET session_replication_role = replica;') 65 | 66 | begin 67 | blocks.each_with_index do |block, index| 68 | execute_block(block, index + 1, blocks.length) 69 | end 70 | ensure 71 | connection.execute('SET session_replication_role = DEFAULT;') 72 | end 73 | end 74 | end 75 | 76 | private 77 | 78 | def connection_options 79 | config = if ActiveRecord::Base.respond_to?(:connection_db_config) 80 | ActiveRecord::Base.connection_db_config.configuration_hash 81 | else 82 | ActiveRecord::Base.connection_config 83 | end 84 | options = [] 85 | options << "-h #{config[:host]}" if config[:host] 86 | options << "-p #{config[:port]}" if config[:port] 87 | options << "-U #{config[:username]}" if config[:username] 88 | options << "-d #{config[:database]}" 89 | options << "-q" 90 | options.join(" ") 91 | end 92 | 93 | class SqlBlock 94 | attr_reader :type, :content, :table_name 95 | 96 | def initialize(content) 97 | @content = content.strip 98 | @type = determine_block_type 99 | @table_name = extract_table_name if @type == :insert 100 | end 101 | 102 | private 103 | 104 | def determine_block_type 105 | if @content.match?(/\AINSERT INTO/i) 106 | :insert 107 | elsif @content.match?(/\ACOPY.*FROM stdin/i) 108 | :copy 109 | elsif @content.match?(/\AALTER TABLE/i) 110 | :alter 111 | elsif @content.match?(/\ASET/i) 112 | :set 113 | else 114 | :other 115 | end 116 | end 117 | 118 | def extract_table_name 119 | if @content =~ /INSERT INTO\s+"?([^\s"(]+)"?\s/i 120 | $1 121 | end 122 | end 123 | end 124 | 125 | def parse_sql_blocks(content) 126 | blocks = [] 127 | current_block = [] 128 | in_copy_block = false 129 | 130 | content.each_line do |line| 131 | line = line.chomp 132 | 133 | # Skip empty lines and comments unless in COPY block 134 | next if !in_copy_block && (line.empty? || line.start_with?('--')) 135 | 136 | # Handle start of COPY block 137 | if !in_copy_block && line.upcase.match?(/\ACOPY.*FROM stdin/i) 138 | current_block = [line] 139 | in_copy_block = true 140 | next 141 | end 142 | 143 | # Handle end of COPY block 144 | if in_copy_block && line == '\\.' 145 | current_block << line 146 | blocks << SqlBlock.new(current_block.join("\n")) 147 | current_block = [] 148 | in_copy_block = false 149 | next 150 | end 151 | 152 | # Accumulate lines in COPY block 153 | if in_copy_block 154 | current_block << line 155 | next 156 | end 157 | 158 | # Handle regular SQL statements 159 | current_block << line 160 | if line.end_with?(';') 161 | blocks << SqlBlock.new(current_block.join("\n")) 162 | current_block = [] 163 | end 164 | end 165 | 166 | # Handle any remaining block 167 | blocks << SqlBlock.new(current_block.join("\n")) unless current_block.empty? 168 | blocks 169 | end 170 | 171 | def execute_block(block, index, total) 172 | case block.type 173 | when :insert 174 | execute_insert_block(block, index, total) 175 | when :copy 176 | execute_copy_block(block, index, total) 177 | else 178 | execute_regular_block(block, index, total) 179 | end 180 | end 181 | 182 | def execute_insert_block(block, index, total) 183 | # puts "Executing INSERT block #{index}/#{total} for table: #{block.table_name}" 184 | # Don't modify statements that already end with semicolon 185 | statement = if block.content.strip.end_with?(';') 186 | block.content 187 | else 188 | "#{block.content};" 189 | end 190 | 191 | begin 192 | ActiveRecord::Base.connection.execute(statement) 193 | rescue ActiveRecord::StatementInvalid => e 194 | if e.message.include?('syntax error at or near "ON"') 195 | # Try alternative formatting for ON CONFLICT 196 | modified_statement = statement.gsub(/\)\s+ON\s+CONFLICT/, ') ON CONFLICT') 197 | ActiveRecord::Base.connection.execute(modified_statement) 198 | else 199 | raise 200 | end 201 | end 202 | end 203 | 204 | def execute_copy_block(block, index, total) 205 | # puts "Executing COPY block #{index}/#{total}" 206 | ActiveRecord::Base.connection.execute(block.content) 207 | end 208 | 209 | def execute_regular_block(block, index, total) 210 | # puts "Executing block #{index}/#{total} of type: #{block.type}" 211 | ActiveRecord::Base.connection.execute(block.content) 212 | end 213 | 214 | def normalize_insert_statement(statement) 215 | # First clean up any excess whitespace around parentheses 216 | statement = statement.gsub(/\(\s+/, '(') 217 | .gsub(/\s+\)/, ')') 218 | .gsub(/\)\s+ON\s+CONFLICT/, ') ON CONFLICT') 219 | 220 | # Ensure proper spacing around ON CONFLICT 221 | if statement =~ /(.*?)\s*ON\s+CONFLICT\s+(.*?)\s*(?:DO\s+.*?)?\s*;\s*\z/i 222 | base = $1.strip 223 | conflict_part = $2.strip 224 | action_part = $3&.strip || 'DO NOTHING' 225 | 226 | # Rebuild the statement with consistent formatting 227 | "#{base} ON CONFLICT #{conflict_part} #{action_part};" 228 | else 229 | # If no ON CONFLICT clause, just clean up the spacing 230 | statement.strip.sub(/;?\s*$/, ';') 231 | end 232 | end 233 | 234 | def split_sql_statements(sql) 235 | statements = [] 236 | current_statement = '' 237 | in_string = false 238 | escaped = false 239 | 240 | sql.each_char do |char| 241 | if char == '\\' 242 | escaped = !escaped 243 | elsif char == "'" && !escaped 244 | in_string = !in_string 245 | elsif char == ';' && !in_string 246 | # Add the completed statement 247 | statements << current_statement.strip unless current_statement.strip.empty? 248 | current_statement = '' 249 | next 250 | end 251 | escaped = false 252 | current_statement << char 253 | end 254 | 255 | # Add the last statement if it exists 256 | statements << current_statement.strip unless current_statement.strip.empty? 257 | 258 | # Normalize `ON CONFLICT` clauses 259 | statements = statements.each_with_object([]) do |stmt, result| 260 | if stmt.strip.upcase.start_with?('ON CONFLICT') 261 | result[-1] = "#{result.last.strip} #{stmt.strip}" 262 | else 263 | result << stmt.strip 264 | end 265 | end 266 | 267 | # Ensure semicolons and spacing 268 | statements.map! do |stmt| 269 | stmt = stmt.gsub(/\)\s*ON CONFLICT/, ') ON CONFLICT') # Normalize spacing 270 | stmt.strip.end_with?(';') ? stmt.strip : "#{stmt.strip};" 271 | end 272 | 273 | statements 274 | end 275 | 276 | def extract_conflict_clause(statement) 277 | # Use a more precise regex that handles multiple closing parentheses 278 | if statement =~ /(.+?\))\s*(ON\s+CONFLICT\s+.*?)(?:;?\s*$)/i 279 | [$1, $2.strip] 280 | else 281 | [statement.sub(/;?\s*$/, ''), nil] 282 | end 283 | end 284 | 285 | def clean_sql_statement(statement) 286 | # Match either INSERT INTO...VALUES or just VALUES 287 | if statement =~ /(?:INSERT INTO.*)?VALUES\s*\(/i 288 | # Split the statement into parts, being careful with the ending 289 | if statement =~ /(.*?VALUES\s*\()(.*)(\)\s*(?:ON CONFLICT.*)?;?\s*$)/i 290 | pre_values = $1 291 | values_content = $2 292 | post_values = $3 293 | 294 | # Clean the values content while preserving complex JSON 295 | cleaned_values = clean_complex_values(values_content) 296 | 297 | # Reassemble the statement, ensuring exactly one semicolon at the end 298 | statement = "#{pre_values}#{cleaned_values}#{post_values}" 299 | statement = statement.gsub(/;*\s*$/, '') # Remove any trailing semicolons and whitespace 300 | statement += ";" 301 | end 302 | end 303 | statement 304 | end 305 | 306 | def clean_complex_values(values_str) 307 | current_value = '' 308 | values = [] 309 | in_quotes = false 310 | in_json = false 311 | json_brace_count = 0 312 | escaped = false 313 | 314 | chars = values_str.chars 315 | i = 0 316 | while i < chars.length 317 | char = chars[i] 318 | 319 | case char 320 | when '\\' 321 | current_value << char 322 | escaped = !escaped 323 | when "'" 324 | if !escaped 325 | in_quotes = !in_quotes 326 | end 327 | escaped = false 328 | current_value << char 329 | when '{' 330 | if !in_quotes 331 | in_json = true 332 | json_brace_count += 1 333 | end 334 | current_value << char 335 | when '}' 336 | if !in_quotes 337 | json_brace_count -= 1 338 | in_json = json_brace_count > 0 339 | end 340 | current_value << char 341 | when ',' 342 | if !in_quotes && !in_json 343 | values << clean_value(current_value.strip) 344 | current_value = '' 345 | else 346 | current_value << char 347 | end 348 | else 349 | escaped = false 350 | current_value << char 351 | end 352 | i += 1 353 | end 354 | 355 | # Add the last value 356 | values << clean_value(current_value.strip) unless current_value.strip.empty? 357 | 358 | values.join(', ') 359 | end 360 | 361 | def clean_value(value) 362 | return value if value.start_with?("'") # Already quoted 363 | return value if value.start_with?("'{") # JSON object 364 | return 'NULL' if value.upcase == 'NULL' 365 | return value.downcase if ['true', 'false'].include?(value.downcase) 366 | return value if value.match?(/^\d+$/) # Numbers 367 | 368 | if value.match?(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i) 369 | "'#{value}'" # UUID 370 | else 371 | # Handle any other string value, including those with commas 372 | "'#{value}'" # Other strings 373 | end 374 | end 375 | end 376 | end -------------------------------------------------------------------------------- /spec/real_data_tests/sql_cleaner_spec.rb: -------------------------------------------------------------------------------- 1 | # spec/lib/real_data_tests/sql_cleaner_spec.rb 2 | require 'spec_helper' 3 | 4 | RSpec.describe RealDataTests::RSpecHelper do 5 | let(:helper) { Class.new { include RealDataTests::RSpecHelper }.new } 6 | let(:complex_json_settings) do 7 | '{"billing":{"claim_submission":"","automatic_59_modifier":"1"},' \ 8 | '"print_settings":{"hide_logo_in_header":"0"},' \ 9 | '"patient_portal_settings":{"patient_invoices":"none"},' \ 10 | '"preferred_payment_types":["private-commercial-insurance","credit-card"]}' 11 | end 12 | 13 | def remove_whitespace(sql) 14 | sql.gsub(/\s+/, ' ').strip 15 | end 16 | 17 | describe '#clean_sql_statement' do 18 | it 'handles complex INSERT with JSON and ON CONFLICT' do 19 | sql = <<~SQL 20 | INSERT INTO organizations (id, name, settings, active, timezone, verified) 21 | VALUES ('e50d8052-4481-4246-9502-7f8e5659abcb', 'Test Org', '#{complex_json_settings}', false, 'Eastern Time (US & Canada)', true) 22 | ON CONFLICT (id) DO NOTHING; 23 | SQL 24 | 25 | cleaned = helper.send(:clean_sql_statement, sql) 26 | expect(remove_whitespace(cleaned)).to include("'e50d8052-4481-4246-9502-7f8e5659abcb'") 27 | expect(remove_whitespace(cleaned)).to include("'Test Org'") 28 | expect(remove_whitespace(cleaned)).to include(complex_json_settings) 29 | expect(remove_whitespace(cleaned)).to include("'Eastern Time (US & Canada)'") 30 | expect(remove_whitespace(cleaned)).to match(/true\)\s+ON CONFLICT/) 31 | expect(remove_whitespace(cleaned)).to match(/DO NOTHING;$/) # Changed this line 32 | end 33 | 34 | it 'preserves nested JSON with commas and quotes' do 35 | json_with_commas = '{"values":["first,value", "second,value"]}' 36 | sql = <<~SQL 37 | INSERT INTO config (id, data) 38 | VALUES (1, '#{json_with_commas}') 39 | ON CONFLICT (id) DO NOTHING; 40 | SQL 41 | 42 | cleaned = helper.send(:clean_sql_statement, sql) 43 | expect(cleaned).to include(json_with_commas) 44 | end 45 | 46 | it 'handles multiple complex values with various types' do 47 | sql = <<~SQL 48 | INSERT INTO organizations 49 | (id, name, active, config, created_at, count, uuid) 50 | VALUES 51 | ('1', 'Company, Inc.', true, '{"key": "value"}', '2025-01-14 10:00:00', 42, 'abc-123') 52 | ON CONFLICT (id) DO NOTHING; 53 | SQL 54 | 55 | cleaned = helper.send(:clean_sql_statement, sql) 56 | expect(cleaned).to include("'Company, Inc.'") 57 | expect(cleaned).to include("true") 58 | expect(cleaned).to include("'2025-01-14 10:00:00'") 59 | expect(cleaned).to include("42") 60 | expect(cleaned).to include("'abc-123'") 61 | end 62 | 63 | it 'handles NULL values correctly in complex statements' do 64 | sql = <<~SQL 65 | INSERT INTO organizations 66 | (id, name, parent_id, config) 67 | VALUES 68 | ('1', 'Test Corp', NULL, '{"setting": null}') 69 | ON CONFLICT (id) DO NOTHING; 70 | SQL 71 | 72 | cleaned = helper.send(:clean_sql_statement, sql) 73 | expect(cleaned).to include("NULL") 74 | expect(cleaned).to include("'Test Corp'") 75 | expect(cleaned).to include('{"setting": null}') 76 | end 77 | 78 | it 'preserves spacing in complex JSON strings' do 79 | json_with_spaces = '{"description": "This is a test with spaces and, commas"}' 80 | sql = "INSERT INTO data (id, config) VALUES (1, '#{json_with_spaces}');" 81 | 82 | cleaned = helper.send(:clean_sql_statement, sql) 83 | expect(cleaned).to include(json_with_spaces) 84 | end 85 | 86 | it 'handles actual production SQL with complex settings' do 87 | sql = <<~SQL 88 | VALUES ('e50d8052-4481-4246-9502-7f8e5659abcb', 'Hettinger, Stiedemann and White', 89 | 'Wuckert-Bartoletti', 'fd7pfbe3je79fpp0', NULL, '', 90 | '7c4ab8dc-66ef-4617-a7c7-8a0bd49ae909', '761e96fa-ebf8-40b3-842b-8c47901519e0', 91 | false, false, 92 | '{"billing":{"claim_submission":"","automatic_59_modifier":"1"},"print_settings":{"hide_logo_in_header":"0"},"preferred_payment_types":["private-commercial-insurance","credit-card"]}', 93 | false, false, false, 'http://leannon.test/burl_pfeffer', 94 | '2023-10-04 16:33:02 UTC', '2024-12-09 17:28:00 UTC', '', 95 | 'ebumlenoinivyghb.com', false, 'Eastern Time (US & Canada)', true) 96 | ON CONFLICT (id) DO NOTHING; 97 | SQL 98 | 99 | cleaned = helper.send(:clean_sql_statement, sql) 100 | expect(remove_whitespace(cleaned)).to include("'Hettinger, Stiedemann and White'") 101 | expect(remove_whitespace(cleaned)).to include("'Eastern Time (US & Canada)'") 102 | expect(remove_whitespace(cleaned)).to match(/true\)\s+ON CONFLICT/) 103 | expect(remove_whitespace(cleaned)).to match(/DO NOTHING;$/) # Changed this line 104 | end 105 | 106 | it 'handles VALUES statement without INSERT INTO' do 107 | sql = "VALUES (e50d8052-4481-4246-9502-7f8e5659abcb, Lebsack, Glover, false);" 108 | cleaned = helper.send(:clean_sql_statement, sql) 109 | expect(remove_whitespace(cleaned)).to eq( 110 | "VALUES ('e50d8052-4481-4246-9502-7f8e5659abcb', 'Lebsack', 'Glover', false);" 111 | ) 112 | end 113 | 114 | it 'properly quotes UUIDs in bare VALUES statements' do 115 | sql = "VALUES (e50d8052-4481-4246-9502-7f8e5659abcb, 'Test');" 116 | cleaned = helper.send(:clean_sql_statement, sql) 117 | expect(remove_whitespace(cleaned)).to eq( 118 | "VALUES ('e50d8052-4481-4246-9502-7f8e5659abcb', 'Test');" 119 | ) 120 | end 121 | end 122 | 123 | describe '#clean_complex_values' do 124 | it 'handles values with nested JSON correctly' do 125 | values = "'id123', 'name', '{\"key\": \"value\"}', true" 126 | result = helper.send(:clean_complex_values, values) 127 | expect(result).to eq("'id123', 'name', '{\"key\": \"value\"}', true") 128 | end 129 | 130 | it 'preserves complex JSON structures' do 131 | values = "'id', '#{complex_json_settings}', false" 132 | result = helper.send(:clean_complex_values, values) 133 | expect(result).to include(complex_json_settings) 134 | expect(result).to end_with(", false") 135 | end 136 | end 137 | 138 | describe '#clean_sql_statement' do 139 | it 'handles boolean values correctly at the end of VALUES clause' do 140 | sql = <<~SQL 141 | INSERT INTO organizations (id, name, enabled) 142 | VALUES ('123', 'Test Org', true) 143 | ON CONFLICT (id) DO NOTHING; 144 | SQL 145 | cleaned = helper.send(:clean_sql_statement, sql) 146 | expect(remove_whitespace(cleaned)).to include("VALUES ('123', 'Test Org', true)") 147 | expect(remove_whitespace(cleaned)).to match(/true\) ON CONFLICT/) 148 | end 149 | 150 | it 'preserves boolean false values without quotes' do 151 | sql = "INSERT INTO table (active) VALUES (false);" 152 | cleaned = helper.send(:clean_sql_statement, sql) 153 | expect(remove_whitespace(cleaned)).to include("VALUES (false)") 154 | end 155 | 156 | it 'handles complex JSON settings' do 157 | sql = "INSERT INTO settings (id, config) VALUES (1, '#{complex_json_settings}');" 158 | cleaned = helper.send(:clean_sql_statement, sql) 159 | expect(cleaned).to include(complex_json_settings) 160 | end 161 | 162 | it 'properly quotes UUIDs' do 163 | sql = "INSERT INTO table (id) VALUES (123e4567-e89b-12d3-a456-426614174000);" 164 | cleaned = helper.send(:clean_sql_statement, sql) 165 | expect(cleaned).to include("'123e4567-e89b-12d3-a456-426614174000'") 166 | end 167 | 168 | it 'preserves NULL values without quotes' do 169 | sql = "INSERT INTO table (id, name) VALUES (1, NULL);" 170 | cleaned = helper.send(:clean_sql_statement, sql) 171 | expect(cleaned).to include("NULL") 172 | end 173 | 174 | it 'preserves numeric values without quotes' do 175 | sql = "INSERT INTO table (id, count) VALUES (1, 42);" 176 | cleaned = helper.send(:clean_sql_statement, sql) 177 | expect(cleaned).to include("42") 178 | end 179 | 180 | it 'handles ON CONFLICT clause correctly' do 181 | sql = "INSERT INTO table (id) VALUES (1) ON CONFLICT (id) DO NOTHING;" 182 | cleaned = helper.send(:clean_sql_statement, sql) 183 | expect(remove_whitespace(cleaned)).to match(/\) ON CONFLICT \(id\) DO NOTHING;$/) 184 | end 185 | end 186 | 187 | describe '#clean_sql_statement' do 188 | it 'handles complex INSERT with multiple closing parentheses and ON CONFLICT' do 189 | sql = <<~SQL 190 | INSERT INTO users (email, active, timezone, verified) 191 | VALUES ('test@example.com', false, 'Eastern Time (US & Canada)', true) 192 | ON CONFLICT (email) DO NOTHING; 193 | SQL 194 | cleaned = helper.send(:clean_sql_statement, sql) 195 | expect(remove_whitespace(cleaned)).to include("'Eastern Time (US & Canada)'") 196 | expect(remove_whitespace(cleaned)).to match(/true\)\s+ON CONFLICT/) 197 | end 198 | 199 | it 'preserves spacing around ON CONFLICT clause' do 200 | sql = "INSERT INTO users (id) VALUES (1) ON CONFLICT (id) DO NOTHING;" 201 | cleaned = helper.send(:clean_sql_statement, sql) 202 | expect(cleaned).to match(/\)\s+ON CONFLICT/) 203 | end 204 | end 205 | 206 | describe '#split_sql_statements' do 207 | it 'correctly handles multi-line INSERT statements with ON CONFLICT clauses' do 208 | sql = <<~SQL 209 | INSERT INTO organizations 210 | (id, dba_name, legal_name, settings) 211 | VALUES ('abc-123', 'Test Org', 'Legal Name', '{"setting":"value"}') 212 | ON CONFLICT (id) DO NOTHING; 213 | INSERT INTO patients 214 | (id, first_name, last_name, settings) 215 | VALUES ('def-456', 'John', 'Doe', '{"key":"value"}') 216 | ON CONFLICT (id) DO NOTHING; 217 | INSERT INTO simple_table (id) VALUES (1); 218 | SQL 219 | 220 | statements = helper.send(:split_sql_statements, sql) 221 | 222 | # Should have exactly 3 statements 223 | expect(statements.length).to eq(3) 224 | 225 | # First statement should be a complete INSERT with ON CONFLICT 226 | expect(statements[0]).to include('INSERT INTO organizations') 227 | expect(statements[0]).to include('VALUES') 228 | expect(statements[0]).to include('ON CONFLICT (id) DO NOTHING;') 229 | expect(statements[0].scan(';').length).to eq(1) 230 | 231 | # Second statement should also be complete 232 | expect(statements[1]).to include('INSERT INTO patients') 233 | expect(statements[1]).to include('VALUES') 234 | expect(statements[1]).to include('ON CONFLICT (id) DO NOTHING;') 235 | expect(statements[1].scan(';').length).to eq(1) 236 | 237 | # Third statement should be a simple INSERT 238 | expect(statements[2]).to eq('INSERT INTO simple_table (id) VALUES (1);') 239 | 240 | # No statements should start with ON CONFLICT 241 | expect(statements.none? { |stmt| stmt.strip.upcase.start_with?('ON CONFLICT') }).to be true 242 | 243 | # Each statement should contain proper spacing around ON CONFLICT 244 | statements.each do |stmt| 245 | if stmt.include?('ON CONFLICT') 246 | expect(stmt).to match(/\) ON CONFLICT/) # Space after closing parenthesis 247 | expect(stmt).to match(/DO NOTHING;$/) # Proper ending 248 | end 249 | end 250 | end 251 | end 252 | 253 | describe '#split_sql_statements' do 254 | it 'keeps ON CONFLICT clauses with their INSERT statements' do 255 | sql = <<~SQL 256 | INSERT INTO table1 (id, name) VALUES (1, 'test1') ON CONFLICT (id) DO NOTHING; 257 | INSERT INTO table2 (id) VALUES (2); 258 | INSERT INTO table3 (id, name) VALUES (3, 'test3') ON CONFLICT (id) DO UPDATE SET name = excluded.name; 259 | SQL 260 | 261 | statements = helper.send(:split_sql_statements, sql) 262 | 263 | expect(statements.length).to eq(3) 264 | expect(statements[0]).to include('INSERT INTO table1') 265 | expect(statements[0]).to include('ON CONFLICT (id) DO NOTHING') 266 | expect(statements[1]).to eq('INSERT INTO table2 (id) VALUES (2);') 267 | expect(statements[2]).to include('INSERT INTO table3') 268 | expect(statements[2]).to include('ON CONFLICT (id) DO UPDATE SET name = excluded.name') 269 | end 270 | end 271 | 272 | describe '#clean_complex_values' do 273 | let(:helper) { Class.new { include RealDataTests::RSpecHelper }.new } 274 | 275 | it 'correctly handles values with spaces' do 276 | values = "value1, 'Ratke Group', value3" 277 | result = helper.send(:clean_complex_values, values) 278 | expect(result).to eq("'value1', 'Ratke Group', 'value3'") 279 | end 280 | 281 | it 'handles complex multi-line INSERT statements with ON CONFLICT clauses' do 282 | sql = <<~SQL 283 | INSERT INTO organizations (id, name, active) 284 | VALUES ('abc-123', 'Test Org', true) 285 | ON CONFLICT (id) DO NOTHING; 286 | INSERT INTO users (id, email) VALUES ('user-1', 'test@example.com'); 287 | SQL 288 | 289 | statements = helper.send(:split_sql_statements, sql) 290 | 291 | expect(statements.length).to eq(2) 292 | expect(statements[0]).to include('ON CONFLICT (id) DO NOTHING;') 293 | expect(statements[1]).to include('INSERT INTO users') 294 | end 295 | 296 | it 'preserves quoted strings with commas' do 297 | values = "value1, 'string, with comma', value3" 298 | result = helper.send(:clean_complex_values, values) 299 | expect(result).to eq("'value1', 'string, with comma', 'value3'") 300 | end 301 | 302 | it 'handles nested JSON objects' do 303 | values = "value1, '{\"key\": \"value, with comma\"}', value3" 304 | result = helper.send(:clean_complex_values, values) 305 | expect(result).to eq("'value1', '{\"key\": \"value, with comma\"}', 'value3'") 306 | end 307 | 308 | it 'preserves boolean values without quotes' do 309 | values = "true, false, 'string'" 310 | result = helper.send(:clean_complex_values, values) 311 | expect(result).to eq("true, false, 'string'") 312 | end 313 | end 314 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real Data Tests 2 | 3 | Create realistic test data in your Rails applications by extracting real records and their associations from your PostgreSQL database. 4 | 5 | > **Note**: This gem currently only supports PostgreSQL databases. MySQL and other database adapters are not supported. 6 | 7 | ## Why use Real Data Tests? 8 | 9 | Testing with realistic data is crucial for catching edge cases and ensuring your application works with real-world data structures. However, creating complex test fixtures that accurately represent your data relationships can be time-consuming and error-prone. 10 | 11 | Real Data Tests solves this by: 12 | - Automatically analyzing and extracting real records and their associations 13 | - Creating reusable SQL dumps that can be committed to your repository 14 | - Making it easy to load realistic test data in your specs 15 | - Supporting data anonymization for sensitive information 16 | 17 | ## Requirements 18 | 19 | - Rails 5.0 or higher 20 | - PostgreSQL database 21 | - `pg_dump` command-line utility installed and accessible 22 | - Database user needs sufficient permissions to run `pg_dump` 23 | 24 | ## Installation 25 | 26 | Add this line to your application's Gemfile: 27 | 28 | ```ruby 29 | gem 'real_data_tests' 30 | ``` 31 | 32 | And then execute: 33 | ```bash 34 | $ bundle install 35 | ``` 36 | 37 | Or install it yourself as: 38 | ```bash 39 | $ gem install real_data_tests 40 | ``` 41 | 42 | ## Configuration 43 | 44 | Create an initializer in your Rails application: 45 | 46 | ```ruby 47 | # config/initializers/real_data_tests.rb 48 | Rails.application.config.after_initialize do 49 | RealDataTests.configure do |config| 50 | # Directory where SQL dumps will be stored 51 | config.dump_path = 'spec/fixtures/real_data_dumps' 52 | 53 | # Define a preset for collecting patient visit data 54 | config.preset :patient_visits do |p| 55 | p.include_associations( 56 | :visit_note_type, 57 | :patient_status 58 | ) 59 | 60 | p.include_associations_for 'Patient', 61 | :visit_notes, 62 | :treatment_reports 63 | 64 | p.prevent_reciprocal 'VisitNoteType.visit_notes' 65 | 66 | p.anonymize 'Patient', { 67 | first_name: -> (_) { Faker::Name.first_name }, 68 | last_name: -> (_) { Faker::Name.last_name } 69 | } 70 | end 71 | 72 | # Define a preset for organization structure 73 | config.preset :org_structure do |p| 74 | p.include_associations( 75 | :organization, 76 | :user 77 | ) 78 | 79 | p.include_associations_for 'Department', 80 | :employees, 81 | :managers 82 | 83 | p.limit_association 'Department.employees', 100 84 | 85 | p.anonymize 'User', { 86 | email: -> (user) { Faker::Internet.email(name: "user#{user.id}") } 87 | } 88 | end 89 | end 90 | end 91 | ``` 92 | 93 | ## Polymorphic Association Support 94 | 95 | Real Data Tests supports collecting records through polymorphic associations. This feature allows you to: 96 | - Automatically detect and collect records for polymorphic `belongs_to`, `has_many`, and `has_one` associations. 97 | - Track and report the types of records collected through polymorphic associations in detailed collection statistics. 98 | 99 | ### Example 100 | If your model includes a polymorphic association like this: 101 | 102 | ```ruby 103 | class Payment < ApplicationRecord 104 | belongs_to :billable, polymorphic: true 105 | end 106 | ``` 107 | 108 | Real Data Tests will: 109 | 1. Collect the associated `billable` records regardless of their type (e.g., `InsuranceCompany`, `Patient`). 110 | 2. Include the `billable_type` in the collection statistics for transparency and reporting. 111 | 112 | ### Configuration for Polymorphic Associations 113 | Polymorphic associations are automatically handled based on your existing configuration. You can also explicitly include or limit polymorphic associations, like so: 114 | 115 | ```ruby 116 | RealDataTests.configure do |config| 117 | config.include_associations_for 'Payment', :billable 118 | config.limit_association 'Payment.billable', 5 119 | end 120 | ``` 121 | 122 | This ensures a robust and flexible way to handle even the most complex relationships in your data. 123 | 124 | ## Using Presets 125 | 126 | Real Data Tests allows you to define multiple configuration presets for different data extraction needs. This is particularly useful when you need different association rules and anonymization settings for different testing scenarios. 127 | 128 | ### Defining Presets 129 | 130 | You can define presets in your configuration: 131 | 132 | ```ruby 133 | RealDataTests.configure do |config| 134 | # Define a preset for patient data 135 | config.preset :patient_data do |p| 136 | p.include_associations(:patient_status, :visit_note_type) 137 | p.include_associations_for 'Patient', :visit_notes 138 | p.limit_association 'Patient.visit_notes', 10 139 | end 140 | 141 | # Define another preset for billing data 142 | config.preset :billing_data do |p| 143 | p.include_associations(:payment_method, :insurance_provider) 144 | p.include_associations_for 'Invoice', :line_items, :payments 145 | p.anonymize 'PaymentMethod', { 146 | account_number: -> (_) { Faker::Finance.credit_card } 147 | } 148 | end 149 | end 150 | ``` 151 | 152 | ### Using Presets in Your Code 153 | 154 | You can use presets in several ways: 155 | 156 | ```ruby 157 | # Create dump file using a specific preset 158 | RealDataTests.with_preset(:patient_data) do 159 | RealDataTests.create_dump_file(patient, name: "patient_with_visits") 160 | end 161 | 162 | # Switch to a different preset 163 | RealDataTests.use_preset(:billing_data) 164 | RealDataTests.create_dump_file(invoice, name: "invoice_with_payments") 165 | 166 | # Use in tests 167 | RSpec.describe "Patient Visits" do 168 | it "loads visit data correctly" do 169 | RealDataTests.with_preset(:patient_data) do 170 | load_real_test_data("patient_with_visits") 171 | # Your test code here 172 | end 173 | end 174 | end 175 | ``` 176 | 177 | ### Benefits of Using Presets 178 | 179 | - **Organized Configuration**: Keep related association rules and anonymization settings together 180 | - **Reusability**: Define configurations once and reuse them across different tests 181 | - **Clarity**: Make it clear what data is being extracted for each testing scenario 182 | - **Flexibility**: Easily switch between different data extraction rules 183 | - **Maintainability**: Update all related settings in one place 184 | 185 | ### Best Practices for Presets 186 | 187 | 1. **Descriptive Names**: Use clear, purpose-indicating names for your presets 188 | 2. **Single Responsibility**: Each preset should focus on a specific testing scenario 189 | 3. **Documentation**: Comment your presets to explain their purpose and usage 190 | 4. **Composition**: Group related models and their associations in the same preset 191 | 5. **Version Control**: Keep preset definitions with your test code for easy reference 192 | 193 | ## Usage 194 | 195 | ### 1. Preparing Test Data 196 | 197 | You can create SQL dumps from your development or production database in two ways: 198 | 199 | **From Rails console:** 200 | ```ruby 201 | # Find a record you want to use as test data 202 | user = User.find(1) 203 | 204 | # Create a dump file including the user and all related records 205 | RealDataTests.create_dump_file(user, name: "active_user_with_posts") 206 | ``` 207 | 208 | **Or from command line:** 209 | ```bash 210 | $ bundle exec real_data_tests create_dump User 1 active_user_with_posts 211 | ``` 212 | 213 | This will: 214 | 1. Find the specified User record 215 | 2. Collect all associated records based on your configuration 216 | 3. Apply any configured anonymization rules 217 | 4. Generate a SQL dump file in your configured dump_path 218 | 219 | ### 2. Using in Tests 220 | 221 | First, include the helper in your test setup: 222 | 223 | ```ruby 224 | # spec/rails_helper.rb or spec/spec_helper.rb 225 | require 'real_data_tests' 226 | 227 | RSpec.configure do |config| 228 | config.include RealDataTests::RSpecHelper 229 | end 230 | ``` 231 | 232 | Then use it in your tests: 233 | 234 | ```ruby 235 | RSpec.describe "Blog" do 236 | it "displays user's posts correctly" do 237 | # Load the previously created dump file 238 | load_real_test_data("active_user_with_posts") 239 | 240 | # Your test code here - the database now contains 241 | # the user and all their associated records 242 | visit user_posts_path(User.first) 243 | expect(page).to have_content("My First Post") 244 | end 245 | end 246 | ``` 247 | 248 | ## Association Control 249 | 250 | Real Data Tests provides several ways to control how associations are collected and loaded. 251 | 252 | ### Global Association Filtering 253 | 254 | You can control which associations are collected globally using either whitelist or blacklist mode: 255 | 256 | ```ruby 257 | # Whitelist Mode - ONLY collect these associations 258 | config.include_associations( 259 | :user, 260 | :organization, 261 | :profile 262 | ) 263 | 264 | # OR Blacklist Mode - collect all EXCEPT these associations 265 | config.exclude_associations( 266 | :very_large_association, 267 | :unused_association 268 | ) 269 | ``` 270 | 271 | ### Model-Specific Associations 272 | 273 | For more granular control, you can specify which associations should be collected for specific models: 274 | 275 | ```ruby 276 | RealDataTests.configure do |config| 277 | # Global associations that apply to all models 278 | config.include_associations( 279 | :organization, 280 | :user 281 | ) 282 | 283 | # Model-specific associations 284 | config.include_associations_for 'Patient', 285 | :visit_notes, 286 | :treatment_reports, 287 | :patient_status 288 | 289 | config.include_associations_for 'Discipline', 290 | :organization, # Will collect this even though it's in global associations 291 | :credentials, 292 | :specialty_types 293 | end 294 | ``` 295 | 296 | This is particularly useful when: 297 | - Different models need different association rules 298 | - The same association name means different things on different models 299 | - You want to collect an association from one model but not another 300 | - You need to maintain a clean separation of concerns in your test data 301 | 302 | ### Polymorphic Associations 303 | Polymorphic associations are fully supported. Include and configure them as needed: 304 | 305 | ```ruby 306 | RealDataTests.configure do |config| 307 | config.include_associations_for 'Payment', :billable 308 | end 309 | ``` 310 | 311 | You can also limit or prevent reciprocal loading for polymorphic associations: 312 | 313 | ```ruby 314 | config.limit_association 'Payment.billable', 10 315 | config.prevent_reciprocal 'Payment.billable' 316 | ``` 317 | 318 | ### Association Loading Control 319 | 320 | You can further refine how associations are loaded using limits and reciprocal prevention: 321 | 322 | ```ruby 323 | RealDataTests.configure do |config| 324 | # Limit the number of records loaded for specific associations 325 | config.limit_association 'Patient.visit_notes', 10 326 | 327 | # Prevent loading associations in the reverse direction 328 | config.prevent_reciprocal 'VisitNoteType.visit_notes' 329 | end 330 | ``` 331 | 332 | ### Best Practices for Association Control 333 | 334 | 1. **Start with Global Rules**: Define global association rules that apply to most models 335 | 2. **Add Model-Specific Rules**: Use `include_associations_for` when you need different rules for specific models 336 | 3. **Control Data Volume**: Use `limit_association` for has_many relationships that could return large numbers of records 337 | 4. **Prevent Cycles**: Use `prevent_reciprocal` to break circular references in your association chain 338 | 5. **Monitor Performance**: Watch the size of your dump files and adjust your association rules as needed 339 | 340 | ## Association Filtering 341 | 342 | Real Data Tests provides two mutually exclusive approaches to control which associations are collected: 343 | 344 | ### Whitelist Mode 345 | Use this when you want to ONLY collect specific associations: 346 | ```ruby 347 | RealDataTests.configure do |config| 348 | config.include_associations( 349 | :user, 350 | :profile, 351 | :posts, 352 | :comments 353 | ) 354 | end 355 | ``` 356 | 357 | ### Blacklist Mode 358 | Use this when you want to collect all associations EXCEPT specific ones: 359 | ```ruby 360 | RealDataTests.configure do |config| 361 | config.exclude_associations( 362 | :large_association, 363 | :unused_association 364 | ) 365 | end 366 | ``` 367 | 368 | > **Note**: You must choose either blacklist or whitelist mode, not both. Attempting to use both will raise an error. 369 | 370 | ## Data Anonymization 371 | 372 | Real Data Tests uses lambdas with the Faker gem for flexible data anonymization. Each anonymization rule receives the record as an argument, allowing for dynamic value generation: 373 | 374 | ```ruby 375 | RealDataTests.configure do |config| 376 | config.anonymize 'User', { 377 | # Simple value replacement 378 | first_name: -> (_) { Faker::Name.first_name }, 379 | 380 | # Dynamic value based on record 381 | email: -> (user) { Faker::Internet.email(name: "user#{user.id}") }, 382 | 383 | # Custom anonymization logic 384 | full_name: -> (user) { 385 | "#{Faker::Name.first_name} #{Faker::Name.last_name}" 386 | } 387 | } 388 | end 389 | ``` 390 | 391 | ### Common Faker Examples 392 | 393 | ```ruby 394 | { 395 | name: -> (_) { Faker::Name.name }, 396 | username: -> (_) { Faker::Internet.username }, 397 | email: -> (_) { Faker::Internet.email }, 398 | phone: -> (_) { Faker::PhoneNumber.phone_number }, 399 | address: -> (_) { Faker::Address.street_address }, 400 | company: -> (_) { Faker::Company.name }, 401 | description: -> (_) { Faker::Lorem.paragraph } 402 | } 403 | ``` 404 | 405 | See the [Faker documentation](https://github.com/faker-ruby/faker) for a complete list of available generators. 406 | 407 | ## Database Cleaner Integration 408 | 409 | If you're using DatabaseCleaner with models that have foreign key constraints, you'll need to handle the cleanup order carefully. 410 | 411 | ### Disable Foreign Key Constraints During Cleanup 412 | Add this to your DatabaseCleaner configuration: 413 | 414 | ```ruby 415 | config.append_after(:suite) do 416 | # Disable foreign key constraints 417 | ActiveRecord::Base.connection.execute('SET session_replication_role = replica;') 418 | begin 419 | # Your cleanup code here 420 | SKIP_MODELS.each { |model| model.delete_all } 421 | ensure 422 | # Re-enable foreign key constraints 423 | ActiveRecord::Base.connection.execute('SET session_replication_role = DEFAULT;') 424 | end 425 | end 426 | ``` 427 | 428 | ## How It Works 429 | 430 | 1. **Record Collection**: The gem analyzes your ActiveRecord associations to find all related records. 431 | 2. **Dump Generation**: It creates a PostgreSQL dump file containing only the necessary records. 432 | 3. **Test Loading**: During tests, it loads the dump file into your test database. 433 | 434 | ## Best Practices 435 | 436 | 1. **Version Control**: Commit your SQL dumps to version control so all developers have access to the same test data. 437 | 2. **Meaningful Names**: Use descriptive names for your dump files that indicate the scenario they represent. 438 | 3. **Data Privacy**: Always use anonymization for sensitive data before creating dumps. 439 | 4. **Association Control**: Use association filtering to keep dumps focused and maintainable. 440 | 5. **Unique Identifiers**: Use record IDs in anonymized data to maintain uniqueness (e.g., emails). 441 | 442 | ## Development 443 | 444 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. 445 | 446 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org). 447 | 448 | ## Contributing 449 | 450 | Bug reports and pull requests are welcome on GitHub at https://github.com/diasks2/real_data_tests. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/diasks2/real_data_tests/blob/main/CODE_OF_CONDUCT.md). 451 | 452 | ## License 453 | 454 | The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). 455 | 456 | ## Code of Conduct 457 | 458 | Everyone interacting in the Real Data Tests project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/diasks2/real_data_tests/blob/main/CODE_OF_CONDUCT.md). --------------------------------------------------------------------------------