├── .coveralls.yml ├── .documentup.json ├── .gitignore ├── .rspec ├── .ruby-gemset ├── .ruby-version ├── .travis.yml ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── bin └── datanon ├── commands.txt ├── data-anonymization.gemspec ├── examples ├── blacklist_dsl.rb ├── mongodb_blacklist_dsl.rb ├── mongodb_whitelist_dsl.rb ├── whitelist_dsl.rb └── whitelist_dsl_threads.rb ├── lib ├── core │ ├── database.rb │ ├── dsl.rb │ ├── field.rb │ ├── fields_missing_strategy.rb │ └── table_errors.rb ├── data-anonymization.rb ├── parallel │ └── table.rb ├── strategy │ ├── base.rb │ ├── blacklist.rb │ ├── field │ │ ├── anonymize_array.rb │ │ ├── anonymous.rb │ │ ├── contact │ │ │ ├── geojson_base.rb │ │ │ ├── random_address.rb │ │ │ ├── random_city.rb │ │ │ ├── random_phone_number.rb │ │ │ ├── random_province.rb │ │ │ └── random_zipcode.rb │ │ ├── datetime │ │ │ ├── anonymize_date.rb │ │ │ ├── anonymize_datetime.rb │ │ │ ├── anonymize_time.rb │ │ │ ├── date_delta.rb │ │ │ ├── date_time_delta.rb │ │ │ └── time_delta.rb │ │ ├── default_anon.rb │ │ ├── email │ │ │ ├── gmail_template.rb │ │ │ ├── random_email.rb │ │ │ └── random_mailinator_email.rb │ │ ├── fields.rb │ │ ├── name │ │ │ ├── random_first_name.rb │ │ │ ├── random_full_name.rb │ │ │ ├── random_last_name.rb │ │ │ └── random_user_name.rb │ │ ├── number │ │ │ ├── random_big_decimal_delta.rb │ │ │ ├── random_float.rb │ │ │ ├── random_float_delta.rb │ │ │ ├── random_integer.rb │ │ │ └── random_integer_delta.rb │ │ ├── random_boolean.rb │ │ ├── string │ │ │ ├── formatted_string_numbers.rb │ │ │ ├── lorem_ipsum.rb │ │ │ ├── random_formatted_string.rb │ │ │ ├── random_string.rb │ │ │ ├── random_url.rb │ │ │ ├── select_from_database.rb │ │ │ ├── select_from_file.rb │ │ │ ├── select_from_list.rb │ │ │ └── string_template.rb │ │ └── whitelist.rb │ ├── mongodb │ │ ├── anonymize_field.rb │ │ ├── blacklist.rb │ │ └── whitelist.rb │ ├── strategies.rb │ └── whitelist.rb ├── tasks │ └── rake_tasks.rb ├── thor │ ├── helpers │ │ ├── mongodb_dsl_generator.rb │ │ └── rdbms_dsl_generator.rb │ └── templates │ │ ├── mongodb_whitelist_template.erb │ │ └── whitelist_template.erb ├── utils │ ├── database.rb │ ├── geojson_parser.rb │ ├── logging.rb │ ├── parallel_progress_bar.rb │ ├── progress_bar.rb │ ├── random_float.rb │ ├── random_int.rb │ ├── random_string.rb │ ├── random_string_chars_only.rb │ ├── resource.rb │ └── template_helper.rb └── version.rb ├── resources ├── UK_addresses.geojson ├── US_addresses.geojson ├── first_names.txt └── last_names.txt ├── sample-data ├── chinook.sqlite ├── chinook_data.sql ├── chinook_schema.sql └── mongo │ ├── plans.json │ └── users.json └── spec ├── acceptance ├── mongodb_blacklist_spec.rb ├── mongodb_whitelist_spec.rb ├── rdbms_blacklist_spec.rb ├── rdbms_whitelist_spec.rb └── rdbms_whitelist_with_primary_key_spec.rb ├── core └── fields_missing_strategy_spec.rb ├── resource └── sample.geojson ├── spec_helper.rb ├── strategy ├── field │ ├── contact │ │ ├── random_address_spec.rb │ │ ├── random_city_spec.rb │ │ ├── random_phone_number_spec.rb │ │ ├── random_province_spec.rb │ │ └── random_zipcode_spec.rb │ ├── datetime │ │ ├── anonymize_date_spec.rb │ │ ├── anonymize_datetime_spec.rb │ │ ├── anonymize_time_spec.rb │ │ ├── date_delta_spec.rb │ │ ├── date_time_delta_spec.rb │ │ └── time_delta_spec.rb │ ├── default_anon_spec.rb │ ├── email │ │ ├── gmail_template_spec.rb │ │ ├── random_email_spec.rb │ │ └── random_mailinator_email_spec.rb │ ├── name │ │ ├── random_first_name_spec.rb │ │ ├── random_full_name_spec.rb │ │ ├── random_last_name_spec.rb │ │ └── random_user_name_spec.rb │ ├── number │ │ ├── random_big_decimal_delta_spec.rb │ │ ├── random_float_delta_spec.rb │ │ ├── random_float_spec.rb │ │ ├── random_integer_delta_spec.rb │ │ └── random_integer_spec.rb │ ├── random_boolean_spec.rb │ ├── string │ │ ├── formatted_string_numbers_spec.rb │ │ ├── lorem_ipsum_spec.rb │ │ ├── random_formatted_string_spec.rb │ │ ├── random_string_spec.rb │ │ ├── random_url_spec.rb │ │ ├── select_from_database_spec.rb │ │ ├── select_from_file_spec.rb │ │ ├── select_from_list_spec.rb │ │ └── string_template_spec.rb │ └── whitelist_spec.rb └── mongodb │ └── anonymize_field_spec.rb ├── support └── customer_sample.rb └── utils ├── database_spec.rb ├── geojson_parser_spec.rb ├── random_float_spec.rb ├── random_int_spec.rb ├── random_string_char_only_spec.rb ├── random_string_spec.rb └── template_helper_spec.rb /.coveralls.yml: -------------------------------------------------------------------------------- 1 | repo_token: iq3YwsHsWi20COgLsNkJMLsbXin813TLt -------------------------------------------------------------------------------- /.documentup.json: -------------------------------------------------------------------------------- 1 | { 2 | "repo": "sunitparekh/data-anonymization", 3 | "name": "Data Anonymization", 4 | "theme": "v1", 5 | "color": "#336699", 6 | "travis": true, 7 | "twitter": ["dataanon"], 8 | "google_analytics":"UA-34000799-1" 9 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | .idea 19 | sample-data/chinook-empty.sqlite 20 | tmp 21 | examples/mongodb_whitelist_generated.rb 22 | data -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | #--profile 3 | #--format documentation -------------------------------------------------------------------------------- /.ruby-gemset: -------------------------------------------------------------------------------- 1 | data-anon 2 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | ruby-3.2.1 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | services: 3 | - mongodb 4 | before_install: gem install bundler 5 | before_script: rake empty_dest 6 | rvm: 7 | - 2.7.7 8 | - 3.0.5 9 | - 3.1.3 10 | - 3.2.1 11 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | group :development, :test do 6 | gem 'foreman' 7 | gem 'rake' 8 | gem 'rspec' 9 | gem 'pry' 10 | gem 'sqlite3' 11 | gem 'mongo' 12 | gem 'coveralls', require: false 13 | end 14 | 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Sunit Parekh 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data::Anonymization 2 | Afraid of using production data due to privacy issues? Data Anonymization is a tool that helps you build anonymized production data dumps which you can use for performance testing, security testing, debugging and development. 3 | 4 | ## Java/Kotlin version 5 | 6 | Java/Kotlin version of tool supporting RDBMS databases is available with similar easy to use DSL. 7 | * [Kotlin/Java Data Anonymization Tool](https://github.com/dataanon/data-anon) 8 | * [Kotlin Maven Sample Project](https://github.com/dataanon/dataanon-kotlin-sample) 9 | * [Java Maven Sample Project](https://github.com/dataanon/dataanon-java-sample) 10 | 11 | 12 | ---------------------- 13 | 14 | 15 | [](http://travis-ci.org/sunitparekh/data-anonymization) 16 | [](https://gemnasium.com/sunitparekh/data-anonymization) 17 | [](https://codeclimate.com/github/sunitparekh/data-anonymization) 18 | [![Coverage Status](https://coveralls.io/repos/sunitparekh/data-anonymization/badge.png?branch=master)](https://coveralls.io/r/sunitparekh/data-anonymization?branch=master) 19 | [![Gem Version](https://badge.fury.io/rb/data-anonymization.svg)](http://badge.fury.io/rb/data-anonymization) 20 | 21 | ## Getting started 22 | 23 | Install gem using: 24 | 25 | $ gem install data-anonymization 26 | 27 | Install required database adapter library for active record: 28 | 29 | $ gem install sqlite3 30 | 31 | Create ruby program using data-anonymization DSL as following `my_dsl.rb`: 32 | 33 | ```ruby 34 | require 'data-anonymization' 35 | 36 | database 'DatabaseName' do 37 | strategy DataAnon::Strategy::Blacklist # whitelist (default) or blacklist 38 | 39 | # database config as active record connection hash 40 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite' 41 | 42 | # User -> table name (case sensitive) 43 | table 'User' do 44 | # id, DateOfBirth, FirstName, LastName, UserName, Password -> table column names (case sensitive) 45 | primary_key 'id' # composite key is also supported 46 | anonymize 'DateOfBirth','FirstName','LastName' # uses default anonymization based on data types 47 | anonymize('UserName').using FieldStrategy::StringTemplate.new('user#{row_number}') 48 | anonymize('Password') { |field| "password" } 49 | end 50 | 51 | ... 52 | 53 | end 54 | ``` 55 | 56 | Run using: 57 | 58 | $ ruby my_dsl.rb 59 | 60 | Liked it? please share 61 | 62 | [](https://twitter.com/share?text=A+simple+ruby+DSL+based+data+anonymization&url=http:%2F%2Fsunitparekh.github.com%2Fdata-anonymization&via=dataanon&hashtags=dataanon) 63 | 64 | ## Examples 65 | 66 | SQLite database 67 | 68 | 1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb) 69 | 2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/blacklist_dsl.rb) 70 | 71 | MongoDB 72 | 73 | 1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/mongodb_whitelist_dsl.rb) 74 | 2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/mongodb_blacklist_dsl.rb) 75 | 76 | Postgresql database having **composite primary key** 77 | 78 | 1. [Whitelist](https://github.com/sunitparekh/test-anonymization/blob/master/dell_whitelist.rb) 79 | 2. [Blacklist](https://github.com/sunitparekh/test-anonymization/blob/master/dell_blacklist.rb) 80 | 81 | 82 | ## Changelog 83 | 84 | #### 0.8.7 (Jan 14, 2022) 85 | 1. Upgraded to rails 7.x 86 | 87 | #### 0.8.5 (May 28, 2020) 88 | 1. Upgraded to rails 6.x 89 | 90 | #### 0.8.1 (Aug 19, 2017) 91 | 1. Multi-threading support added by [stanislav-tyutin](https://github.com/stanislav-tyutin) using Pull Request. 92 | 2. Fixed to work with Ruby 2.4.x, issue with Integer data type 93 | 94 | #### 0.8.0 (Oct 31, 2016) 95 | 1. Upgraded to rails 5.x 96 | 97 | #### 0.7.4 (Oct 29, 2016) 98 | 1. Continue to work on rails 4.x. Minor changes based on feedback. 99 | 100 | #### 0.8.0.rc1 (Sep 5, 2016) 101 | 1. Upgraded to rails 5.0, please report any issue or use case not working. 102 | 103 | #### 0.7.3 (Feb 5, 2016) 104 | 1. Fixed issue with batchsize. Thanks to [Jan Raasch](https://github.com/janraasch) for sending pull request. 105 | 106 | #### 0.7.2 (Sep 26, 2015) 107 | 1. Upgraded MongoDB to latest gem version 2.1.0 and tested with MongoDB 3.x version. 108 | 2. Upgraded gems to latest version 109 | 3. Adding limit functionality - Merge pull request #27 from yanismydj/master 110 | 111 | #### 0.7.1 (Jun 13, 2015) 112 | 1. Fixed issues with empty array data for MongoDB 113 | 2. Added feature to skip and continue records during anaonymisation, this is useful to apply different strategies for different types of records. 114 | 115 | 116 | #### 0.7.0 (Mar 9, 2015) 117 | 1. Removed downcase from field name since it was causing issues with upper case field names. So now for databsae where case matters field name case should be maintained. 118 | 2. Upgraded gems to latest version 119 | 120 | 121 | #### 0.6.7 (Jan 17, 2015) 122 | 1. Upgraded gems to latest version including activerecord to 4.2. Please try it out and provide feedback. 123 | 124 | 125 | #### 0.6.6 (Oct 31, 2014) 126 | 1. Upgraded gems to latest version. 127 | 128 | 129 | #### 0.6.5 (Jun 02, 2014) 130 | 1. Upgraded most of the gems to latest version. major change is rails activerecord gem to latest versions 4.1.1, please provide feedback. 131 | 132 | #### 0.6.0 (Dec 09, 2013) 133 | 1. Upgraded rails activerecord gem to latest versions 4.0.2, please provide feedback. 134 | 135 | #### 0.5.5 (Dec 4, 2013) 136 | 1. Upgraded gems to latest versions 137 | 138 | 139 | #### 0.5.2 (Jan 29, 2013) 140 | 141 | 1. Fixed [issue #17](https://github.com/sunitparekh/data-anonymization/issues/17) 142 | 2. Upgraded Thor dependency to latest version 143 | 144 | 145 | #### 0.5.2 (Jan 20, 2013) 146 | 147 | 1. Upgraded all gem to latest and greatest including Rails activerecord and activesupport. 148 | 149 | #### 0.5.1 (Oct 26, 2012) 150 | 151 | 1. Minor fixes release, no major functionality or feature added. 152 | 153 | Please see the [Github 0.5.1 milestone page](https://github.com/sunitparekh/data-anonymization/issues?milestone=3&state=open) for more details on changes/fixes in release 0.5.1 154 | 155 | #### 0.5.0 (Sep 28, 2012) 156 | 157 | Major changes: 158 | 159 | 1. MongoDB support 160 | 2. Command line utility to generate whitelist DSL for RDBMS & MongoDB (reduces pain for writing whitelist dsl) 161 | 3. Added support for reporting fields missing mapping in case of whitelist 162 | 4. Errors reported at the end of process. Job doesn't fail for a single error, it fails it more than 100 records failed during anonymization. 163 | 164 | 165 | Please see the [Github 0.5.0 milestone page](https://github.com/sunitparekh/data-anonymization/issues?milestone=2&state=open) for more details on changes/fixes in release 0.5.0 166 | 167 | #### 0.3.0 (Sep 4, 2012) 168 | 169 | Major changes: 170 | 171 | 1. Added support for Parallel table execution 172 | 2. Change in default String strategy from LoremIpsum to RandomString based on end user feedback. 173 | 3. Fixed issue with table column name 'type' as this is default name for STI in activerecord. 174 | 175 | Please see the [Github 0.3.0 milestone page](https://github.com/sunitparekh/data-anonymization/issues?milestone=1&state=closed) for more details on changes/fixes in release 0.3.0 176 | 177 | #### 0.2.0 (August 16, 2012) 178 | 179 | 1. Added the progress bar using 'powerbar' gem. Which also shows the ETA for each table. 180 | 2. Added More strategies 181 | 3. Fixed default anonymization strategies for boolean and integer values 182 | 4. Added support for composite primary key 183 | 184 | #### 0.1.2 (August 14, 2012) 185 | 186 | 1. First initial release 187 | 188 | ## Roadmap 189 | 190 | MVP done. Fix defects and support queries, suggestions, enhancements logged in Github issues :-) 191 | 192 | ## Share feedback 193 | 194 | Please use Github [issues](https://github.com/sunitparekh/data-anonymization/issues) to share feedback, feature suggestions and report issues. 195 | 196 | ## What is data anonymization? 197 | 198 | For almost all projects there is a need for production data dump in order to run performance tests, rehearse production releases and debug production issues. 199 | However, getting production data and using it is not feasible due to multiple reasons, primary being privacy concerns for user data. And thus the need for data anonymization. 200 | This tool helps you to get anonymized production data dump using either Blacklist or Whitelist strategies. 201 | 202 | Read more about [data anonymization here](http://sunitspace.blogspot.in/2012/09/data-anonymization.html) 203 | 204 | ## Anonymization Strategies 205 | 206 | ### Blacklist 207 | This approach essentially leaves all fields unchanged with the exception of those specified by the user, which are scrambled/anonymized (hence the name blacklist). 208 | For `Blacklist` create a copy of prod database and chooses the fields to be anonymized like e.g. username, password, email, name, geo location etc. based on user specification. Most of the fields have different rules e.g. password should be set to same value for all users, email needs to be valid. 209 | 210 | The problem with this approach is that when new fields are added they will not be anonymized by default. Human error in omitting users personal data could be damaging. 211 | 212 | ```ruby 213 | database 'DatabaseName' do 214 | strategy DataAnon::Strategy::Blacklist 215 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite' 216 | ... 217 | end 218 | ``` 219 | 220 | ### Whitelist 221 | This approach, by default scrambles/anonymizes all fields except a list of fields which are allowed to copied as is. Hence the name whitelist. 222 | By default all data needs to be anonymized. So from production database data is sanitized record by record and inserted as anonymized data into destination database. Source database needs to be readonly. 223 | All fields would be anonymized using default anonymization strategy which is based on the datatype, unless a special anonymization strategy is specified. For instance special strategies could be used for emails, passwords, usernames etc. 224 | A whitelisted field implies that it's okay to copy the data as is and anonymization isn't required. 225 | This way any new field will be anonymized by default and if we need them as is, add it to the whitelist explicitly. This prevents any human error and protects sensitive information. 226 | 227 | ```ruby 228 | database 'DatabaseName' do 229 | strategy DataAnon::Strategy::Whitelist 230 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite' 231 | destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite' 232 | ... 233 | end 234 | ``` 235 | 236 | Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/2012/09/data-anonymization-blacklist-whitelist.html) 237 | 238 | 239 | ## Tips 240 | 241 | 1. In Whitelist approach make source database connection READONLY. 242 | 2. Change [default field strategies](#default-field-strategies) to avoid using same strategy again and again in your DSL. 243 | 3. To run anonymization in parallel at Table level, provided no FK constraint on tables use DataAnon::Parallel::Table strategy 244 | 4. For large table to load them in batches from table set 'batch_size' and it will use RoR's batch mode processing. Checkout [example](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb) on how to use batch processing. 245 | 5. Make sure to give proper case for fields and table names. 246 | 6. Use skip and continue to apply different strategies for records. 247 | 7. Use 'limit' to limit the number of rows that will be imported in whitelist 248 | 8. RDBMS databases utilizing schemas can be specified via `schema_search_path`: `source_db { ... schema_search_path: 'public,my_special_schema' }` 249 | 250 | ## DSL Generation 251 | 252 | We provide a command line tool to generate whitelist scripts for RDBMS and NoSQL databases. The user needs to supply the connection details to the database and a script is generated by analyzing the schema. Below are examples of how to use the tool to generate the scripts for RDBMS and NoSQL datastores 253 | 254 | When you install the data-anonymization tool, the **datanon** command become available on the terminal. If you type **datanon --help** and execute you should see the below 255 | 256 | ``` 257 | Tasks: 258 | 259 | datanon generate_mongo_dsl -d, --database=DATABASE -h, --host=HOST # Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema 260 | datanon generate_rdbms_dsl -a, --adapter=ADAPTER -d, --database=DATABASE -h, --host=HOST # Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema 261 | datanon help [TASK] # Describe available tasks or one specific task 262 | 263 | ``` 264 | 265 | ### RDBMS whitelist generation 266 | 267 | The gem uses ActiveRecord(AR) abstraction to connect to relational databases. You can generate a whitelist script in seconds for any relational database supported by Active Record. To do so use the following command 268 | 269 | ``` 270 | datanon generate_rdbms_dsl [options] 271 | 272 | ``` 273 | 274 | The options available are : 275 | 276 | 1. adapter(-a) : The activerecord adapter to use to connect to the database (eg. mysql2, postgresql) 277 | 2. host(-h) : DB host name or IP address 278 | 3. database(-d) : The name of the database to generate the whitelist script for 279 | 4. username(-u) : Username for DB authentication 280 | 5. password(-w) : Password for DB authentication 281 | 6. port(-p) : The port the database service is running on. Default port provided by AR will be used if nothing is specififed. 282 | 283 | The adapter, host and database options are mandatory. The others are optional. 284 | 285 | A few examples of the command is shown below 286 | 287 | ``` 288 | datanon generate_rdbms_dsl -a mysql2 -h db.host.com -p 3306 -d production_db -u root -w password 289 | 290 | datanon generate_rdbms_dsl -a postgresql -h 123.456.7.8 -d production_db 291 | 292 | ``` 293 | 294 | The relevant db gems must be installed so that AR has the adapters required to establish the connection to the databases. The script generates a file named **rdbms_whitelist_generated.rb** in the same location as the project. 295 | 296 | ### MongoDB whitelist generation 297 | 298 | Similar to the the relational databases, a whitelist script for mongo db can be generated by analysing the database structure 299 | 300 | ``` 301 | datanon generate_mongo_dsl [options] 302 | 303 | ``` 304 | 305 | The options available are : 306 | 307 | 1. host(-h) : DB host name or IP address 308 | 2. database(-d) : The name of the database to generate the whitelist script for 309 | 3. username(-u) : Username for DB authentication 310 | 4. password(-w) : Password for DB authentication 311 | 5. port(-p) : The port the database service is running on. 312 | 6. whitelist patterns(-r): A regex expression which can be used to match records in the database to list as whitelisted fields in the generated script. 313 | 314 | The host and database options are mandatory. The others are optional. 315 | 316 | A few examples of the command is shown below 317 | 318 | ``` 319 | datanon generate_mongo_dsl -h db.host.com -d production_db -u root -w password 320 | 321 | datanon generate_mongo_dsl -h 123.456.7.8 -d production_db 322 | 323 | ``` 324 | 325 | The **mongo** gem is required in order to install the mongo db drivers. The script generates a file named **mongodb_whitelist_generated.rb** in the same location as the project. 326 | 327 | 328 | 329 | ## Running in Parallel 330 | Currently provides capability of running anonymization in parallel at table level provided no FK constraints on tables. 331 | It uses [Parallel gem](https://github.com/grosser/parallel) provided by Michael Grosser. 332 | By default it starts multiple parallel ruby processes processing table one by one. 333 | 334 | ```ruby 335 | database 'DellStore' do 336 | strategy DataAnon::Strategy::Whitelist 337 | execution_strategy DataAnon::Parallel::Table # by default sequential table processing 338 | ... 339 | end 340 | ``` 341 | 342 | 343 | ## DataAnon::Core::Field 344 | The object that gets passed along with the field strategies. 345 | 346 | has following attribute accessor 347 | 348 | - `name` current field/column name 349 | - `value` current field/column value 350 | - `row_number` current row number 351 | - `ar_record` active record of the current row under processing 352 | 353 | ## Field Strategies 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 |
ContentNameDescription
TextLoremIpsumGenerates a random Lorep Ipsum String
TextRandomStringGenerates a random string of equal length
TextStringTemplateGenerates a string based on provided template
TextSelectFromListRandomly selects a string from a provided list
TextSelectFromFileRandomly selects a string from a provided file
TextFormattedStringNumberRandomize digits in a string while maintaining the format
TextSelectFromDatabaseSelects randomly from the result of a query on a database
TextRandomUrlAnonymizes a URL while mainting the structure
403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 |
ContentNameDescription
NumberRandomIntegerGenerates a random integer between provided limits (default 0 to 100)
NumberRandomIntegerDeltaGenerates a random integer within -delta and delta of original integer
NumberRandomFloatGenerates a random float between provided limits (default 0.0 to 100.0)
NumberRandomFloatDeltaGenerates a random float within -delta and delta of original float
NumberRandomBigDecimalDeltaSimilar to previous but creates a big decimal object
434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 |
ContentNameDescription
AddressRandomAddressRandomly selects an address from a geojson flat file [Default US address]
CityRandomCitySimilar to address, picks a random city from a geojson flafile [Default US cities]
ProvinceRandomProvinceSimilar to address, picks a random city from a geojson flafile [Default US provinces]
Zip codeRandomZipcodeSimilar to address, picks a random zipcode from a geojson flafile [Default US zipcodes]
Phone numberRandomPhoneNumberRandomizes a phone number while preserving locale specific fomatting
465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 |
ContentNameDescription
DateTimeAnonymizeDateTimeAnonymizes each field (except year and seconds) within natural range of the field depending on true/false flag provided
TimeAnonymizeTimeExactly similar to above except returned object is of type 'Time'
DateAnonymizeDateAnonymizes day and month within natural ranges based on true/false flag
DateTimeDeltaDateTimeDeltaShifts data randomly within given range. Default shifts date within 10 days + or - and shifts time within 30 minutes.
TimeDeltaTimeDeltaExactly similar to above except returned object is of type 'Time'
DateDeltaDateDeltaShifts date randomly within given delta range. Default shits date within 10 days + or -
501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 |
ContentNameDescription
EmailRandomEmailGenerates email randomly using the given HOSTNAME and TLD.
EmailGmailTemplateGenerates a valid unique gmail address by taking advantage of the gmail + strategy
EmailRandomMailinatorEmailGenerates random email using mailinator hostname.
522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 |
ContentNameDescription
First nameRandomFirstNameRandomly picks up first name from the predefined list in the file. Default file is part of the gem.
Last nameRandomLastNameRandomly picks up last name from the predefined list in the file. Default file is part of the gem.
Full NameRandomFullNameGenerates full name using the RandomFirstName and RandomLastName strategies.
User nameRandomUserNameGenerates random user name of same length as original user name.
548 | 549 | 550 | ## Write you own field strategies 551 | field parameter in following code is [DataAnon::Core::Field](#dataanon-core-field) 552 | 553 | ```ruby 554 | class MyFieldStrategy 555 | 556 | # method anonymize is what required 557 | def anonymize field 558 | # write your code here 559 | end 560 | 561 | end 562 | ``` 563 | 564 | write your own anonymous field strategies within DSL, 565 | 566 | ```ruby 567 | table 'User' do 568 | anonymize('Password') { |field| "password" } 569 | anonymize('email') do |field| 570 | "test+#{field.row_number}@gmail.com" 571 | end 572 | end 573 | ``` 574 | 575 | ## Default field strategies 576 | 577 | ```ruby 578 | DEFAULT_STRATEGIES = {:string => FieldStrategy::RandomString.new, 579 | :fixnum => FieldStrategy::RandomIntegerDelta.new(5), 580 | :bignum => FieldStrategy::RandomIntegerDelta.new(5000), 581 | :float => FieldStrategy::RandomFloatDelta.new(5.0), 582 | :bigdecimal => FieldStrategy::RandomBigDecimalDelta.new(500.0), 583 | :datetime => FieldStrategy::DateTimeDelta.new, 584 | :time => FieldStrategy::TimeDelta.new, 585 | :date => FieldStrategy::DateDelta.new, 586 | :trueclass => FieldStrategy::RandomBoolean.new, 587 | :falseclass => FieldStrategy::RandomBoolean.new 588 | } 589 | ``` 590 | 591 | Overriding default field strategies & can be used to provide default strategy for missing data type. 592 | 593 | ```ruby 594 | database 'Chinook' do 595 | ... 596 | default_field_strategies :string => FieldStrategy::RandomString.new 597 | ... 598 | end 599 | ``` 600 | 601 | ## Logging 602 | 603 | How do I switch off the progress bar? 604 | 605 | ```ruby 606 | # add following line in your ruby file 607 | ENV['show_progress'] = 'false' 608 | ``` 609 | 610 | `Logger` provides debug level messages including database queries of active record. 611 | 612 | ```ruby 613 | DataAnon::Utils::Logging.logger.level = Logger::INFO 614 | ``` 615 | 616 | ## Skip and Continue records 617 | 618 | *Skip* is used to skip records during anonymization when condition returns true. This records are ignored, 619 | in blacklist it remains as it is in database and in case of whitelist this records will not be copied to destination database. 620 | 621 | ```ruby 622 | table 'customers' do 623 | skip { |index, record| record['age'] < 18 } 624 | 625 | primary_key 'cust_id' 626 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 627 | anonymize 'terms_n_condition', 'age' 628 | end 629 | ``` 630 | 631 | 632 | *Continue* is exactly opposite of Skip and it continue with anonymization only if given condition returns true. 633 | In case of blacklist records are anonymized for matching conditions and for whitelist records are anonymized and copied 634 | to new database for matching conditions. 635 | 636 | ```ruby 637 | table 'customers' do 638 | continue { |index, record| record['age'] > 18 } 639 | 640 | primary_key 'cust_id' 641 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 642 | anonymize 'terms_n_condition', 'age' 643 | end 644 | ``` 645 | 646 | 647 | ## Want to contribute? 648 | 649 | 1. Fork it 650 | 2. Create your feature branch (`git checkout -b my-new-feature`) 651 | 3. Commit your changes (`git commit -am 'Add some feature'`) 652 | 4. Push to the branch (`git push origin my-new-feature`) 653 | 5. Create new Pull Request 654 | 655 | ## License 656 | 657 | [MIT License](https://github.com/sunitparekh/data-anonymization/blob/master/LICENSE.txt) 658 | 659 | ## Credits 660 | 661 | - [ThoughtWorks Inc](http://www.thoughtworks.com), for allowing us to build this tool and make it open source. 662 | - [Panda](https://twitter.com/sarbashrestha) for reviewing the documentation. 663 | - [Dan Abel](http://www.linkedin.com/pub/dan-abel/0/61b/9b0) for introducing me to Blacklist and Whitelist approach for data anonymization. 664 | - [Chirga Doshi](https://twitter.com/chiragsdoshi) for encouraging me to get this done. 665 | - [Aditya Karle](https://twitter.com/adityakarle) for the Logo. (Coming Soon...) 666 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler.setup(:default, :test) 3 | require 'rspec/core/rake_task' 4 | require 'tasks/rake_tasks' 5 | 6 | Bundler::GemHelper.install_tasks 7 | RSpec::Core::RakeTask.new(:spec) 8 | DataAnonymization::RakeTasks.new 9 | 10 | task :default => :spec 11 | -------------------------------------------------------------------------------- /bin/datanon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # encoding: UTF-8 3 | 4 | require 'thor' 5 | require 'data-anonymization' 6 | 7 | class AnonymizationCLI < Thor 8 | 9 | include Thor::Actions 10 | 11 | desc "generate_rdbms_dsl", "Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema" 12 | 13 | def generate_rdbms_dsl 14 | 15 | configuration_hash = {:adapter => options["adapter"], 16 | :host => options["host"], 17 | :port => options["port"], 18 | :database => options["database"], 19 | :username => options["username"], 20 | :password => options["password"] 21 | } 22 | create_file "rdbms_whitelist_generated.rb" 23 | DataAnon::ThorHelpers::RDBMSDSLGenerator.new.generate_whitelist_script(configuration_hash) 24 | end 25 | 26 | method_option :adapter, :required => true, :aliases => "-a", :desc => "Activerecord database adapter to be used [required]", :for => :generate_rdbms_dsl 27 | method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_rdbms_dsl 28 | method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_rdbms_dsl 29 | method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port provided by AR will be used", :for => :generate_rdbms_dsl 30 | method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_rdbms_dsl 31 | method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_rdbms_dsl 32 | 33 | desc "generate_mongo_dsl", "Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema" 34 | 35 | def generate_mongo_dsl 36 | 37 | configuration_hash = {:host => options["host"], 38 | :port => options["port"], 39 | :database => options["database"], 40 | :username => options["username"], 41 | :password => options["password"] 42 | } 43 | 44 | create_file "mongodb_whitelist_generated.rb" 45 | DataAnon::ThorHelpers::MongoDBDSLGenerator.new(configuration_hash, options["whitelist_patterns"]).generate 46 | end 47 | 48 | method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_mongo_dsl 49 | method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_mongo_dsl 50 | method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port will be used", :for => :generate_mongo_dsl 51 | method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_mongo_dsl 52 | method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_mongo_dsl 53 | method_option :whitelist_patterns, :aliases => "-r", :desc => "Whitelist Patterns", :for => :generate_mongo_dsl 54 | 55 | end 56 | 57 | AnonymizationCLI.start 58 | -------------------------------------------------------------------------------- /commands.txt: -------------------------------------------------------------------------------- 1 | rbenv exec bundle update 2 | rbenv exec bundle outdated 3 | 4 | rbenv exec bundle exec rake 5 | -------------------------------------------------------------------------------- /data-anonymization.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'version' 5 | 6 | Gem::Specification.new do |gem| 7 | gem.name = 'data-anonymization' 8 | gem.version = DataAnonymization::VERSION 9 | gem.authors = ['Sunit Parekh', 'Anand Agrawal', 'Satyam Agarwala'] 10 | gem.email = %w(parekh.sunit@gmail.com anand.agrawal84@gmail.com satyamag@gmail.com) 11 | gem.description = %q{Data anonymization tool for RDBMS and MongoDB databases} 12 | gem.summary = %q{Tool to create anonymized production data dump to use for performance and testing environments.} 13 | gem.homepage = 'http://sunitparekh.github.com/data-anonymization' 14 | gem.license = 'MIT' 15 | 16 | 17 | gem.files = `git ls-files`.split($/).select { |f| !f.match(/^sample-data/) } 18 | gem.executables = 'datanon' 19 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) 20 | gem.require_paths = ['lib'] 21 | 22 | gem.add_dependency('activerecord', '~> 7.0') 23 | gem.add_dependency('activesupport', '~> 7.0') 24 | gem.add_dependency('composite_primary_keys', '~> 14.0') 25 | gem.add_dependency('parallel', '~> 1.21') 26 | gem.add_dependency('powerbar', '~> 2.0') 27 | gem.add_dependency('rgeo', '~> 2.4.0') 28 | gem.add_dependency('rgeo-geojson', '~> 2.1.1') 29 | gem.add_dependency('thor', '~> 1.2.1') 30 | end 31 | -------------------------------------------------------------------------------- /examples/blacklist_dsl.rb: -------------------------------------------------------------------------------- 1 | system 'bundle exec ruby examples/whitelist_dsl.rb' 2 | 3 | require 'data-anonymization' 4 | 5 | DataAnon::Utils::Logging.logger.level = Logger::INFO 6 | 7 | database 'Chinook' do 8 | strategy DataAnon::Strategy::Blacklist 9 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite' 10 | 11 | table 'Employee' do 12 | primary_key 'EmployeeId' 13 | anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1) 14 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new 15 | anonymize('LastName').using FieldStrategy::RandomLastName.new 16 | anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0) 17 | anonymize('Address').using FieldStrategy::RandomAddress.region_US 18 | anonymize('City').using FieldStrategy::RandomCity.region_US 19 | anonymize('State').using FieldStrategy::RandomProvince.region_US 20 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US 21 | anonymize('Country') {|field| 'USA'} 22 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new 23 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new 24 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 25 | end 26 | 27 | table 'Customer' do 28 | primary_key 'CustomerId' 29 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new 30 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new 31 | anonymize('LastName').using FieldStrategy::RandomLastName.new 32 | anonymize('Address').using FieldStrategy::RandomAddress.region_US 33 | anonymize('City').using FieldStrategy::RandomCity.region_US 34 | anonymize('State').using FieldStrategy::RandomProvince.region_US 35 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US 36 | anonymize('Country') {|field| 'USA'} 37 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new 38 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 39 | end 40 | 41 | end 42 | 43 | -------------------------------------------------------------------------------- /examples/mongodb_blacklist_dsl.rb: -------------------------------------------------------------------------------- 1 | require 'data-anonymization' 2 | require 'mongo' 3 | 4 | DataAnon::Utils::Logging.logger.level = Logger::INFO 5 | Mongo::Logger.logger.level = Logger::WARN 6 | 7 | Mongo::Client.new('mongodb://localhost/test').database.drop 8 | system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json' 9 | system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json' 10 | 11 | 12 | database 'test' do 13 | strategy DataAnon::Strategy::MongoDB::Blacklist 14 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test' 15 | 16 | collection 'users' do 17 | anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30) 18 | anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}') 19 | anonymize('email').using FieldStrategy::RandomMailinatorEmail.new 20 | anonymize('password') { |field| 'password'} 21 | anonymize('first_name').using FieldStrategy::RandomFirstName.new 22 | anonymize('last_name').using FieldStrategy::RandomLastName.new 23 | end 24 | 25 | collection 'plans' do 26 | anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(%w(Free Team Business Paid))) 27 | anonymize 'public_sharing','photo_sharing' 28 | 29 | document 'features' do 30 | anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200]) 31 | 32 | document 'users' do 33 | anonymize 'max', 'additional' 34 | end 35 | end 36 | 37 | end 38 | 39 | end 40 | 41 | -------------------------------------------------------------------------------- /examples/mongodb_whitelist_dsl.rb: -------------------------------------------------------------------------------- 1 | require 'data-anonymization' 2 | require 'mongo' 3 | 4 | DataAnon::Utils::Logging.logger.level = Logger::INFO 5 | Mongo::Logger.logger.level = Logger::WARN 6 | 7 | 8 | Mongo::Client.new('mongodb://localhost/test').database.drop 9 | Mongo::Client.new('mongodb://localhost/dest').database.drop 10 | system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json' 11 | system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json' 12 | 13 | database 'test' do 14 | strategy DataAnon::Strategy::MongoDB::Whitelist 15 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test' 16 | destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest' 17 | 18 | collection 'users' do 19 | whitelist '_id','failed_attempts','updated_at' 20 | anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30) 21 | anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}') 22 | anonymize('email').using FieldStrategy::RandomMailinatorEmail.new 23 | anonymize('password') { |field| 'password'} 24 | anonymize('first_name').using FieldStrategy::RandomFirstName.new 25 | anonymize('last_name').using FieldStrategy::RandomLastName.new 26 | anonymize 'password_reset_answer','password_reset_question' 27 | end 28 | 29 | collection 'plans' do 30 | whitelist '_id', 'name','term', 'created_at' 31 | anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(%w(Free Team Business Paid))) 32 | anonymize 'public_sharing','photo_sharing' 33 | 34 | collection 'features' do 35 | anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200]) 36 | whitelist 'type' 37 | 38 | document 'users' do 39 | anonymize 'max', 'additional' 40 | end 41 | end 42 | 43 | end 44 | 45 | end 46 | 47 | -------------------------------------------------------------------------------- /examples/whitelist_dsl.rb: -------------------------------------------------------------------------------- 1 | system 'rake empty_dest' # clean destination database on every call 2 | 3 | require 'data-anonymization' 4 | 5 | DataAnon::Utils::Logging.logger.level = Logger::INFO 6 | 7 | database 'Chinook' do 8 | strategy DataAnon::Strategy::Whitelist 9 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite' 10 | destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite' 11 | 12 | default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh') 13 | 14 | table 'Genre' do 15 | primary_key 'GenreId' 16 | whitelist 'GenreId' 17 | anonymize 'Name' do |field| 18 | field.value + ' test' 19 | end 20 | end 21 | 22 | table 'MediaType' do 23 | primary_key 'MediaTypeId' 24 | anonymize('MediaTypeId') { |field| field.value } # same as whitelist 25 | anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}') 26 | 27 | end 28 | 29 | table 'Customer' do 30 | primary_key 'CustomerId' 31 | batch_size 5 # batch_size works only if the primary_key is defined for the table 32 | limit 10 # will only take last 10 records 33 | 34 | whitelist 'CustomerId', 'SupportRepId', 'Company' 35 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new 36 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new 37 | anonymize('LastName').using FieldStrategy::RandomLastName.new 38 | anonymize('Address').using FieldStrategy::RandomAddress.region_US 39 | anonymize('City').using FieldStrategy::RandomCity.region_US 40 | anonymize('State').using FieldStrategy::RandomProvince.region_US 41 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US 42 | anonymize('Country') {|field| 'USA'} 43 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new 44 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 45 | end 46 | 47 | table 'Employee' do 48 | batch_size 5 # this won't work since there is no 'primary_key' defined 49 | 50 | whitelist 'EmployeeId', 'ReportsTo', 'Title' 51 | anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1) 52 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new 53 | anonymize('LastName').using FieldStrategy::RandomLastName.new 54 | anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0) 55 | anonymize('Address').using FieldStrategy::RandomAddress.region_US 56 | anonymize('City').using FieldStrategy::RandomCity.region_US 57 | anonymize('State').using FieldStrategy::RandomProvince.region_US 58 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US 59 | anonymize('Country') {|field| 'USA'} 60 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new 61 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new 62 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 63 | end 64 | 65 | 66 | 67 | end 68 | -------------------------------------------------------------------------------- /examples/whitelist_dsl_threads.rb: -------------------------------------------------------------------------------- 1 | system 'rake empty_dest' # clean destination database on every call 2 | 3 | require 'data-anonymization' 4 | 5 | DataAnon::Utils::Logging.logger.level = Logger::INFO 6 | 7 | database 'Chinook' do 8 | strategy DataAnon::Strategy::Whitelist 9 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite' 10 | destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite' 11 | 12 | default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh') 13 | 14 | table 'Genre' do 15 | primary_key 'GenreId' 16 | whitelist 'GenreId' 17 | anonymize 'Name' do |field| 18 | field.value + ' test' 19 | end 20 | end 21 | 22 | table 'MediaType' do 23 | primary_key 'MediaTypeId' 24 | anonymize('MediaTypeId') { |field| field.value } # same as whitelist 25 | anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}') 26 | 27 | end 28 | 29 | table 'Customer' do 30 | primary_key 'CustomerId' 31 | thread_num 5 # thread_num 32 | 33 | whitelist 'CustomerId', 'SupportRepId', 'Company' 34 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new 35 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new 36 | anonymize('LastName').using FieldStrategy::RandomLastName.new 37 | anonymize('Address').using FieldStrategy::RandomAddress.region_US 38 | anonymize('City').using FieldStrategy::RandomCity.region_US 39 | anonymize('State').using FieldStrategy::RandomProvince.region_US 40 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US 41 | anonymize('Country') {|field| 'USA'} 42 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new 43 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 44 | end 45 | 46 | table 'Employee' do 47 | thread_num 5 # thread_num 48 | 49 | whitelist 'EmployeeId', 'ReportsTo', 'Title' 50 | anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1) 51 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new 52 | anonymize('LastName').using FieldStrategy::RandomLastName.new 53 | anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0) 54 | anonymize('Address').using FieldStrategy::RandomAddress.region_US 55 | anonymize('City').using FieldStrategy::RandomCity.region_US 56 | anonymize('State').using FieldStrategy::RandomProvince.region_US 57 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US 58 | anonymize('Country') {|field| 'USA'} 59 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new 60 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new 61 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 62 | end 63 | 64 | 65 | 66 | end 67 | -------------------------------------------------------------------------------- /lib/core/database.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Core 3 | 4 | class Database 5 | include Utils::Logging 6 | 7 | def initialize name 8 | @name = name 9 | @strategy = DataAnon::Strategy::Whitelist 10 | @user_defaults = {} 11 | @tables = [] 12 | @execution_strategy = DataAnon::Core::Sequential 13 | ENV['parallel_execution'] = 'false' 14 | I18n.enforce_available_locales = false 15 | end 16 | 17 | def strategy strategy 18 | @strategy = strategy 19 | end 20 | 21 | def execution_strategy execution_strategy 22 | @execution_strategy = execution_strategy 23 | ENV['parallel_execution'] = 'true' if execution_strategy == DataAnon::Parallel::Table 24 | end 25 | 26 | def source_db connection_spec 27 | @source_database = connection_spec 28 | end 29 | 30 | def destination_db connection_spec 31 | @destination_database = connection_spec 32 | end 33 | 34 | def default_field_strategies default_strategies 35 | @user_defaults = default_strategies 36 | end 37 | 38 | def table (name, &block) 39 | table = @strategy.new(@source_database, @destination_database, name, @user_defaults).process_fields(&block) 40 | @tables << table 41 | end 42 | alias :collection :table 43 | 44 | def anonymize 45 | begin 46 | @execution_strategy.new.anonymize @tables 47 | rescue => e 48 | logger.error "\n#{e.message} \n #{e.backtrace}" 49 | end 50 | if @strategy.whitelist? 51 | @tables.each do |table| 52 | if table.fields_missing_strategy.present? 53 | logger.info('Fields missing the anonymization strategy:') 54 | table.fields_missing_strategy.print 55 | end 56 | end 57 | end 58 | 59 | @tables.each { |table| table.errors.print } 60 | end 61 | 62 | end 63 | 64 | class Sequential 65 | def anonymize tables 66 | tables.each do |table| 67 | begin 68 | table.process 69 | rescue => e 70 | logger.error "\n#{e.message} \n #{e.backtrace}" 71 | end 72 | end 73 | end 74 | end 75 | 76 | end 77 | end -------------------------------------------------------------------------------- /lib/core/dsl.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Core 3 | module DSL 4 | include Utils::Logging 5 | 6 | def database(name, &block) 7 | logger.debug "Processing Database: #{name}" 8 | database = DataAnon::Core::Database.new(name) 9 | database.instance_eval &block 10 | database.anonymize 11 | end 12 | 13 | end 14 | end 15 | end 16 | 17 | include DataAnon::Core::DSL 18 | 19 | -------------------------------------------------------------------------------- /lib/core/field.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Core 3 | 4 | class Field 5 | 6 | def initialize name, value, row_number, ar_record, table_name = 'unknown' 7 | @name = name 8 | @value = value 9 | @row_number = row_number 10 | @ar_record = ar_record 11 | @table_name = table_name 12 | end 13 | 14 | attr_accessor :name, :value, :row_number, :ar_record, :table_name 15 | 16 | alias :collection_name :table_name 17 | 18 | end 19 | 20 | end 21 | end -------------------------------------------------------------------------------- /lib/core/fields_missing_strategy.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Core 3 | 4 | class FieldsMissingStrategy 5 | include Utils::Logging 6 | 7 | def initialize table_name 8 | @table_name = table_name 9 | @fields_missing_strategy = [] 10 | end 11 | 12 | def missing field_name 13 | return if @fields_missing_strategy.include? field_name 14 | @fields_missing_strategy << field_name 15 | end 16 | 17 | def fields_missing_strategy 18 | @fields_missing_strategy 19 | end 20 | 21 | def print 22 | @fields_missing_strategy.each do |field_name| 23 | logger.info("#{@table_name}.#{field_name}") 24 | end 25 | end 26 | 27 | def present? 28 | fields_missing_strategy.size > 0 29 | end 30 | 31 | end 32 | 33 | end 34 | end -------------------------------------------------------------------------------- /lib/core/table_errors.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Core 3 | 4 | class TableErrors 5 | include Utils::Logging 6 | 7 | def initialize table_name 8 | @table_name = table_name 9 | @errors = [] 10 | end 11 | 12 | def log_error record, exception 13 | @errors << { :record => record, :exception => exception} 14 | raise 'Reached limit of error for a table' if @errors.length > 100 15 | end 16 | 17 | def errors 18 | @errors 19 | end 20 | 21 | def print 22 | return if @errors.length == 0 23 | logger.error("Errors while processing table '#{@table_name}':") 24 | @errors.each do |error| 25 | logger.error(error[:exception]) 26 | logger.error(error[:exception].backtrace.join("\n\t")) 27 | end 28 | end 29 | 30 | end 31 | 32 | end 33 | end -------------------------------------------------------------------------------- /lib/data-anonymization.rb: -------------------------------------------------------------------------------- 1 | require 'version' 2 | 3 | require 'utils/logging' 4 | require 'utils/random_int' 5 | require 'utils/random_float' 6 | require 'utils/random_string' 7 | require 'utils/random_string_chars_only' 8 | require 'utils/geojson_parser' 9 | require 'utils/progress_bar' 10 | require 'utils/parallel_progress_bar' 11 | require 'utils/resource' 12 | require 'utils/template_helper' 13 | require 'parallel/table' 14 | require 'core/database' 15 | require 'core/fields_missing_strategy' 16 | require 'thor/helpers/rdbms_dsl_generator' 17 | require 'core/field' 18 | require 'core/table_errors' 19 | require 'strategy/strategies' 20 | require 'utils/database' 21 | require 'core/dsl' 22 | 23 | begin 24 | require 'mongo' 25 | require 'thor/helpers/mongodb_dsl_generator' 26 | rescue LoadError 27 | 'Ignoring the mongodb specific libraries if monog driver is not specified in gem' 28 | end 29 | -------------------------------------------------------------------------------- /lib/parallel/table.rb: -------------------------------------------------------------------------------- 1 | require 'parallel' 2 | 3 | module DataAnon 4 | module Parallel 5 | class Table 6 | 7 | def anonymize tables 8 | ::Parallel.each(tables) do |table| 9 | begin 10 | table.progress_bar_class DataAnon::Utils::ParallelProgressBar 11 | table.process 12 | rescue => e 13 | logger.error "\n#{e.message} \n #{e.backtrace}" 14 | end 15 | end 16 | end 17 | 18 | end 19 | end 20 | end -------------------------------------------------------------------------------- /lib/strategy/base.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | class Base 4 | include Utils::Logging 5 | 6 | attr_accessor :fields, :user_strategies, :fields_missing_strategy, :errors 7 | 8 | def initialize source_database, destination_database, name, user_strategies 9 | @name = name 10 | @user_strategies = user_strategies 11 | @fields = {} 12 | @source_database = source_database 13 | @destination_database = destination_database 14 | @fields_missing_strategy = DataAnon::Core::FieldsMissingStrategy.new name 15 | @errors = DataAnon::Core::TableErrors.new(@name) 16 | @primary_keys = [] 17 | end 18 | 19 | def self.whitelist? 20 | false 21 | end 22 | 23 | def process_fields &block 24 | self.instance_eval &block 25 | self 26 | end 27 | 28 | def primary_key *fields 29 | @primary_keys = fields 30 | end 31 | 32 | def batch_size size 33 | @batch_size = size 34 | end 35 | 36 | def limit limit 37 | @limit = limit 38 | end 39 | 40 | def thread_num thread_num 41 | @thread_num = thread_num 42 | end 43 | 44 | def whitelist *fields 45 | fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new } 46 | end 47 | 48 | def skip &block 49 | @skip_block = block 50 | end 51 | 52 | def continue &block 53 | @continue_block = block 54 | end 55 | 56 | def anonymize *fields, &block 57 | if block.nil? 58 | fields.each { |f| @fields[f] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) } 59 | temp = self 60 | return Class.new do 61 | @temp_fields = fields 62 | @table_fields = temp.fields 63 | def self.using field_strategy 64 | @temp_fields.each { |f| @table_fields[f] = field_strategy } 65 | end 66 | end 67 | else 68 | fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Anonymous.new(&block) } 69 | end 70 | end 71 | 72 | def is_primary_key? field 73 | @primary_keys.select { |key| field == key }.length > 0 74 | end 75 | 76 | def default_strategy field_name 77 | @fields_missing_strategy.missing field_name 78 | DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) 79 | end 80 | 81 | def dest_table 82 | return @dest_table unless @dest_table.nil? 83 | table_klass = Utils::DestinationTable.create @name, @primary_keys 84 | table_klass.establish_connection @destination_database if @destination_database 85 | @dest_table = table_klass 86 | end 87 | 88 | def source_table 89 | return @source_table unless @source_table.nil? 90 | table_klass = Utils::SourceTable.create @name, @primary_keys 91 | table_klass.establish_connection @source_database 92 | @source_table = table_klass 93 | end 94 | 95 | def process 96 | logger.debug "Processing table #{@name} with fields strategies #{@fields}" 97 | total = source_table.count 98 | if total > 0 99 | progress = progress_bar.new(@name, total) 100 | if @primary_keys.empty? || !@batch_size.present? 101 | process_table progress 102 | elsif @thread_num.present? 103 | process_table_in_threads progress 104 | else 105 | process_table_in_batches progress 106 | end 107 | progress.close 108 | end 109 | if source_table.respond_to?('clear_all_connections!') 110 | source_table.clear_all_connections! 111 | end 112 | end 113 | 114 | def process_table progress 115 | index = 0 116 | 117 | source_table_limited.each do |record| 118 | index += 1 119 | begin 120 | process_record_if index, record 121 | rescue => exception 122 | @errors.log_error record, exception 123 | end 124 | progress.show index 125 | end 126 | end 127 | 128 | def process_table_in_batches progress 129 | logger.info "Processing table #{@name} records in batch size of #{@batch_size}" 130 | index = 0 131 | 132 | source_table_limited.find_each(:batch_size => @batch_size) do |record| 133 | index += 1 134 | begin 135 | process_record_if index, record 136 | rescue => exception 137 | @errors.log_error record, exception 138 | end 139 | progress.show index 140 | end 141 | end 142 | 143 | def process_table_in_threads progress 144 | logger.info "Processing table #{@name} records in batch size of #{@batch_size} [THREADS]" 145 | 146 | index = 0 147 | threads = [] 148 | 149 | source_table.find_in_batches(batch_size: @batch_size) do |records| 150 | until threads.count(&:alive?) <= @thread_num 151 | thr = threads.delete_at 0 152 | thr.join 153 | progress.show index 154 | end 155 | 156 | thr = Thread.new { 157 | records.each do |record| 158 | begin 159 | process_record_if index, record 160 | index += 1 161 | rescue => exception 162 | puts exception.inspect 163 | @errors.log_error record, exception 164 | end 165 | end 166 | } 167 | threads << thr 168 | end 169 | 170 | until threads.empty? 171 | thr = threads.delete_at 0 172 | thr.join 173 | progress.show index 174 | end 175 | end 176 | 177 | def source_table_limited 178 | @source_table_limited ||= begin 179 | if @limit.present? 180 | source_table.all.limit(@limit).order(created_at: :desc) 181 | else 182 | source_table.all 183 | end 184 | end 185 | end 186 | 187 | def process_record_if index, record 188 | return if @skip_block && @skip_block.call(index, record) 189 | return if @continue_block && !@continue_block.call(index, record) 190 | 191 | process_record index, record 192 | end 193 | 194 | def progress_bar 195 | @progress_bar || DataAnon::Utils::ProgressBar 196 | end 197 | 198 | def progress_bar_class progress_bar 199 | @progress_bar = progress_bar 200 | end 201 | 202 | 203 | end 204 | end 205 | end 206 | -------------------------------------------------------------------------------- /lib/strategy/blacklist.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | class Blacklist < DataAnon::Strategy::Base 4 | 5 | def process_record index, record 6 | updates = {} 7 | @fields.each do |field, strategy| 8 | database_field_name = record.attributes.select { |k,v| k == field }.keys[0] 9 | field_value = record.attributes[database_field_name] 10 | unless field_value.nil? || is_primary_key?(database_field_name) 11 | field = DataAnon::Core::Field.new(database_field_name, field_value, index, record, @name) 12 | updates[database_field_name] = strategy.anonymize(field) 13 | end 14 | end 15 | record.update_columns(updates) if updates.any? 16 | end 17 | 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/strategy/field/anonymize_array.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | 6 | class AnonymizeArray 7 | 8 | def self.user_defaults user_defaults 9 | @@user_defaults = user_defaults 10 | end 11 | 12 | def initialize strategy 13 | @strategy = strategy 14 | end 15 | 16 | def anonymize field 17 | field.value.collect do |v| 18 | strategy = @strategy || @@user_defaults[v.class.to_s.downcase.to_sym] 19 | strategy.anonymize DataAnon::Core::Field.new(field.name, v, field.row_number, field.ar_record, field.table_name) 20 | end 21 | end 22 | 23 | end 24 | 25 | 26 | end 27 | end 28 | end -------------------------------------------------------------------------------- /lib/strategy/field/anonymous.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | 6 | class Anonymous 7 | 8 | def initialize &block 9 | @block = block 10 | end 11 | 12 | def anonymize field 13 | @block.call field 14 | end 15 | 16 | end 17 | 18 | 19 | end 20 | end 21 | end -------------------------------------------------------------------------------- /lib/strategy/field/contact/geojson_base.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | class GeojsonBase 5 | 6 | def self.region_US 7 | self.new DataAnon::Utils::Resource.file('US_addresses.geojson') 8 | end 9 | 10 | def self.region_UK 11 | self.new DataAnon::Utils::Resource.file('UK_addresses.geojson') 12 | end 13 | 14 | def initialize file_path 15 | raise "Load and set the @values member variable in constructor" 16 | end 17 | 18 | def anonymize field 19 | @values.sample 20 | end 21 | end 22 | end 23 | end 24 | end -------------------------------------------------------------------------------- /lib/strategy/field/contact/random_address.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates address using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses. 6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump) 7 | # 8 | # !!!ruby 9 | # anonymize('Address').using FieldStrategy::RandomAddress.region_US 10 | # 11 | # !!!ruby 12 | # anonymize('Address').using FieldStrategy::RandomAddress.region_UK 13 | # 14 | # !!!ruby 15 | # # get your own geo_json file and use it 16 | # anonymize('Address').using FieldStrategy::RandomAddress.new('my_geo_json.json') 17 | 18 | class RandomAddress < GeojsonBase 19 | 20 | def initialize file_path 21 | @values = DataAnon::Utils::GeojsonParser.address(file_path) 22 | end 23 | 24 | end 25 | 26 | 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /lib/strategy/field/contact/random_city.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Similar to RandomAddress, generates city using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses. 6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump) 7 | # 8 | # !!!ruby 9 | # anonymize('City').using FieldStrategy::RandomCity.region_US 10 | # 11 | # !!!ruby 12 | # anonymize('City').using FieldStrategy::RandomCity.region_UK 13 | # 14 | # !!!ruby 15 | # # get your own geo_json file and use it 16 | # anonymize('City').using FieldStrategy::RandomCity.new('my_geo_json.json') 17 | 18 | class RandomCity < GeojsonBase 19 | 20 | def initialize file_path 21 | @values = DataAnon::Utils::GeojsonParser.city(file_path) 22 | end 23 | 24 | end 25 | 26 | 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /lib/strategy/field/contact/random_phone_number.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Keeping the format same it changes each digit in the string with random digit. 6 | # 7 | # !!!ruby 8 | # anonymize('PhoneNumber').using FieldStrategy::RandomPhoneNumber.new 9 | 10 | class RandomPhoneNumber < FormattedStringNumber 11 | 12 | end 13 | 14 | 15 | end 16 | end 17 | end -------------------------------------------------------------------------------- /lib/strategy/field/contact/random_province.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Similar to RandomAddress, generates province using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses. 6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump) 7 | # 8 | # !!!ruby 9 | # anonymize('Province').using FieldStrategy::RandomProvince.region_US 10 | # 11 | # !!!ruby 12 | # anonymize('Province').using FieldStrategy::RandomProvince.region_UK 13 | # 14 | # !!!ruby 15 | # # get your own geo_json file and use it 16 | # anonymize('Province').using FieldStrategy::RandomProvince.new('my_geo_json.json') 17 | 18 | class RandomProvince < GeojsonBase 19 | 20 | def initialize file_path 21 | @values = DataAnon::Utils::GeojsonParser.province(file_path) 22 | end 23 | 24 | end 25 | 26 | 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /lib/strategy/field/contact/random_zipcode.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Similar to RandomAddress, generates zipcode using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses. 6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump) 7 | # 8 | # !!!ruby 9 | # anonymize('Address').using FieldStrategy::RandomZipcode.region_US 10 | # 11 | # !!!ruby 12 | # anonymize('Address').using FieldStrategy::RandomZipcode.region_UK 13 | # 14 | # !!!ruby 15 | # # get your own geo_json file and use it 16 | # anonymize('Address').using FieldStrategy::RandomZipcode.new('my_geo_json.json') 17 | 18 | class RandomZipcode < GeojsonBase 19 | 20 | def initialize file_path 21 | @values = DataAnon::Utils::GeojsonParser.zipcode(file_path) 22 | end 23 | 24 | end 25 | 26 | 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /lib/strategy/field/datetime/anonymize_date.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Anonmizes day and month fields within natural range based on true/false input for that field. By defaut both fields are 6 | # anonymized 7 | # 8 | # !!!ruby 9 | # # anonymizes month and leaves day unchanged 10 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.new(true,false) 11 | # 12 | # In addition to customizing which fields you want anonymized, there are some helper methods which allow for quick anonymization 13 | # 14 | # ```ruby 15 | # # anonymizes only the month field 16 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.only_month 17 | # # anonymizes only the day field 18 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.only_day 19 | 20 | class AnonymizeDate 21 | 22 | 23 | def self.only_month 24 | self.new true, false 25 | end 26 | 27 | def self.only_day 28 | self.new false, true 29 | end 30 | 31 | def initialize anonymize_month, anonymize_day 32 | 33 | @anonymize_month = anonymize_month 34 | @anonymize_day = anonymize_day 35 | 36 | end 37 | 38 | def anonymize field 39 | 40 | original_time = field.value 41 | 42 | year = original_time.year 43 | month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month 44 | days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day 45 | day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day 46 | 47 | Date.new(year, month, day) 48 | end 49 | 50 | 51 | end 52 | end 53 | end 54 | end -------------------------------------------------------------------------------- /lib/strategy/field/datetime/anonymize_datetime.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Anonymizes each field(except year and seconds) within the natural range (e.g. hour between 1-24 and day within the month) based on true/false 6 | # input for that field. By default, all fields are anonymized. 7 | # 8 | # !!!ruby 9 | # # anonymizes month and hour fields, leaving the day and minute fields untouched 10 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.new(true,false,true,false) 11 | # 12 | # In addition to customizing which fields you want anonymized, there are some helper methods which allow for quick anonymization 13 | # 14 | # !!!ruby 15 | # # anonymizes only the month field 16 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_month 17 | # # anonymizes only the day field 18 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_day 19 | # # anonymizes only the hour field 20 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_hour 21 | # # anonymizes only the minute field 22 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_minute 23 | 24 | class AnonymizeDateTime < AnonymizeTime 25 | 26 | private 27 | def create_object(year, month, day, hour, min, sec) 28 | DateTime.new(year, month, day, hour, min, sec) 29 | end 30 | 31 | end 32 | end 33 | end 34 | end -------------------------------------------------------------------------------- /lib/strategy/field/datetime/anonymize_time.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Anonymizes each field(except year and seconds) within the natural range (e.g. hour between 1-24 and day within the month) based on true/false 6 | # input for that field. By default, all fields are anonymized. 7 | # 8 | # !!!ruby 9 | # # anonymizes month and hour fields, leaving the day and minute fields untouched 10 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.new(true,false,true,false) 11 | # 12 | # In addition to customizing which fields you want anonymized, there are some helper methods which allow for quick anonymization 13 | # 14 | # !!!ruby 15 | # # anonymizes only the month field 16 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_month 17 | # # anonymizes only the day field 18 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_day 19 | # # anonymizes only the hour field 20 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_hour 21 | # # anonymizes only the minute field 22 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_minute 23 | 24 | class AnonymizeTime 25 | 26 | DEFAULT_ANONYMIZATION = true 27 | 28 | def self.only_month 29 | self.new true, false, false, false 30 | end 31 | 32 | def self.only_day 33 | self.new false, true, false, false 34 | end 35 | 36 | def self.only_hour 37 | self.new false, false, true, false 38 | end 39 | 40 | def self.only_minute 41 | self.new false, false, false, true 42 | end 43 | 44 | def initialize anonymize_month, anonymize_day, anonymize_hour, anonymize_min 45 | 46 | @anonymize_month = anonymize_month 47 | @anonymize_day = anonymize_day 48 | @anonymize_hour = anonymize_hour 49 | @anonymize_min = anonymize_min 50 | 51 | end 52 | 53 | def anonymize field 54 | 55 | original_time = field.value 56 | 57 | year = original_time.year 58 | month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month 59 | days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day 60 | day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day 61 | hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(0,23) : original_time.hour 62 | min = @anonymize_min? DataAnon::Utils::RandomInt.generate(0,59) : original_time.min 63 | sec = original_time.sec 64 | 65 | create_object(year, month, day, hour, min, sec) 66 | end 67 | 68 | private 69 | 70 | def create_object(year, month, day, hour, min, sec) 71 | Time.new(year, month, day, hour, min, sec) 72 | end 73 | 74 | end 75 | end 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /lib/strategy/field/datetime/date_delta.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Shifts date randomly within given delta range. Default shits date within 10 days + or - 6 | # 7 | # !!!ruby 8 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.new 9 | # 10 | # !!!ruby 11 | # # shifts date within 25 days 12 | # anonymize('DateOfBirth').using FieldStrategy::DateDelta.new(25) 13 | # 14 | 15 | class DateDelta 16 | 17 | DEFAULT_DAY_DELTA = 10 18 | 19 | def initialize day_delta = DEFAULT_DAY_DELTA 20 | @day_delta = day_delta 21 | end 22 | 23 | def anonymize field 24 | day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta) 25 | return field.value + day_adjustment.days 26 | end 27 | 28 | end 29 | end 30 | end 31 | end -------------------------------------------------------------------------------- /lib/strategy/field/datetime/date_time_delta.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Shifts data randomly within given range. Default shifts date within 10 days + or - and shifts time within 30 minutes. 6 | # 7 | # !!!ruby 8 | # anonymize('DateOfBirth').using FieldStrategy::DateTimeDelta.new 9 | # 10 | # !!!ruby 11 | # # shifts date within 20 days and time within 50 minutes 12 | # anonymize('DateOfBirth').using FieldStrategy::DateTimeDelta.new(20, 50) 13 | 14 | class DateTimeDelta 15 | 16 | DEFAULT_DAY_DELTA = 10 17 | DEFAULT_MINUTE_DELTA = 30 18 | 19 | def initialize day_delta = DEFAULT_DAY_DELTA, minute_delta = DEFAULT_MINUTE_DELTA 20 | @day_delta = day_delta 21 | @minute_delta = minute_delta 22 | end 23 | 24 | def anonymize field 25 | day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta) 26 | minute_adjustment = DataAnon::Utils::RandomInt.generate(-@minute_delta,@minute_delta) 27 | return field.value + (day_adjustment.days + minute_adjustment.minutes) 28 | end 29 | 30 | end 31 | end 32 | end 33 | end -------------------------------------------------------------------------------- /lib/strategy/field/datetime/time_delta.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Shifts data randomly within given range. Default shifts date within 10 days + or - and shifts time within 30 minutes. 6 | # 7 | # !!!ruby 8 | # anonymize('DateOfBirth').using FieldStrategy::TimeDelta.new 9 | # 10 | # !!!ruby 11 | # # shifts date within 20 days and time within 50 minutes 12 | # anonymize('DateOfBirth').using FieldStrategy::TimeDelta.new(20, 50) 13 | 14 | class TimeDelta < DateTimeDelta 15 | end 16 | 17 | 18 | end 19 | end 20 | end -------------------------------------------------------------------------------- /lib/strategy/field/default_anon.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | class DefaultAnon 6 | 7 | DEFAULT_STRATEGIES = {:string => FieldStrategy::RandomString.new, 8 | :integer => FieldStrategy::RandomIntegerDelta.new(5), 9 | :fixnum => FieldStrategy::RandomIntegerDelta.new(5), 10 | :bignum => FieldStrategy::RandomIntegerDelta.new(5000), 11 | :float => FieldStrategy::RandomFloatDelta.new(5.0), 12 | :bigdecimal => FieldStrategy::RandomBigDecimalDelta.new(500.0), 13 | :datetime => FieldStrategy::DateTimeDelta.new, 14 | :time => FieldStrategy::TimeDelta.new, 15 | :date => FieldStrategy::DateDelta.new, 16 | :array => FieldStrategy::AnonymizeArray.new(nil), 17 | :trueclass => FieldStrategy::RandomBoolean.new, 18 | :"bson::objectid" => FieldStrategy::Whitelist.new, 19 | :falseclass => FieldStrategy::RandomBoolean.new 20 | } 21 | 22 | def initialize user_defaults = {} 23 | @user_defaults = DEFAULT_STRATEGIES.merge user_defaults 24 | FieldStrategy::AnonymizeArray.user_defaults @user_defaults 25 | end 26 | 27 | def anonymize field 28 | strategy = @user_defaults[field.value.class.to_s.downcase.to_sym] 29 | raise "No strategy defined for datatype #{field.value.class}. Use 'default_field_strategies' option in your script. Refer to http://sunitparekh.github.com/data-anonymization/#default-field-strategies for more details. #{field.inspect}" unless strategy 30 | strategy.anonymize field 31 | end 32 | 33 | end 34 | 35 | 36 | end 37 | end 38 | end -------------------------------------------------------------------------------- /lib/strategy/field/email/gmail_template.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates a valid unique gmail address by taking advantage of the gmail + strategy. Takes in a valid gmail username and 6 | # generates emails of the form username+@gmail.com 7 | # 8 | # !!!ruby 9 | # anonymize('Email').using FieldStrategy::GmailTemplate.new('username') 10 | # 11 | 12 | class GmailTemplate 13 | 14 | def initialize username = 'someusername' 15 | @username = username 16 | end 17 | 18 | def anonymize field 19 | "#{@username}+#{field.row_number}@gmail.com" 20 | end 21 | end 22 | end 23 | end 24 | end -------------------------------------------------------------------------------- /lib/strategy/field/email/random_email.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates email randomly using the given HOSTNAME and TLD. 6 | # By defaults generates hostname randomly along with email id. 7 | # 8 | # !!!ruby 9 | # anonymize('Email').using FieldStrategy::RandomEmail.new('thoughtworks','com') 10 | # 11 | 12 | class RandomEmail 13 | 14 | TLDS = ['com','org','net','edu','gov','mil','biz','info'] 15 | 16 | 17 | def initialize hostname = nil, tld = nil 18 | @hostname = hostname 19 | @tld = tld 20 | end 21 | 22 | def anonymize field 23 | 24 | username_length = DataAnon::Utils::RandomInt.generate(5,15) 25 | host_name_length = DataAnon::Utils::RandomInt.generate(2,10) 26 | 27 | username = DataAnon::Utils::RandomString.generate(username_length) 28 | hostname = @hostname || DataAnon::Utils::RandomString.generate(host_name_length) 29 | tld = @tld || TLDS[rand(TLDS.length)] 30 | 31 | return username + "@" + hostname + "." + tld 32 | 33 | end 34 | end 35 | end 36 | 37 | end 38 | end -------------------------------------------------------------------------------- /lib/strategy/field/email/random_mailinator_email.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates random email using mailinator hostname. e.g. @mailinator.com 6 | # 7 | # !!!ruby 8 | # anonymize('Email').using FieldStrategy::RandomMailinatorEmail.new 9 | 10 | class RandomMailinatorEmail 11 | 12 | def initialize 13 | @email_anonymizer = DataAnon::Strategy::Field::RandomEmail.new("mailinator","com") 14 | end 15 | 16 | def anonymize field 17 | return @email_anonymizer.anonymize(field) 18 | end 19 | end 20 | end 21 | 22 | end 23 | end -------------------------------------------------------------------------------- /lib/strategy/field/fields.rb: -------------------------------------------------------------------------------- 1 | require 'strategy/field/whitelist' 2 | require 'strategy/field/random_boolean' 3 | 4 | require 'strategy/field/anonymous' 5 | 6 | #array 7 | require 'strategy/field/anonymize_array' 8 | 9 | # string 10 | require 'strategy/field/string/lorem_ipsum' 11 | require 'strategy/field/string/string_template' 12 | require 'strategy/field/string/random_string' 13 | require 'strategy/field/string/random_url' 14 | require 'strategy/field/string/formatted_string_numbers' 15 | require 'strategy/field/string/random_formatted_string' 16 | 17 | require 'strategy/field/string/select_from_file' 18 | require 'strategy/field/string/select_from_list' 19 | require 'strategy/field/string/select_from_database' 20 | 21 | # number 22 | require 'strategy/field/number/random_integer' 23 | require 'strategy/field/number/random_float' 24 | require 'strategy/field/number/random_integer_delta' 25 | require 'strategy/field/number/random_float_delta' 26 | require 'strategy/field/number/random_big_decimal_delta' 27 | 28 | # contact 29 | require 'strategy/field/contact/geojson_base' 30 | require 'strategy/field/contact/random_phone_number' 31 | require 'strategy/field/contact/random_address' 32 | require 'strategy/field/contact/random_zipcode' 33 | require 'strategy/field/contact/random_city' 34 | require 'strategy/field/contact/random_province' 35 | 36 | # datetime 37 | require 'strategy/field/datetime/anonymize_time' 38 | require 'strategy/field/datetime/anonymize_datetime' 39 | require 'strategy/field/datetime/anonymize_date' 40 | require 'strategy/field/datetime/date_time_delta' 41 | require 'strategy/field/datetime/time_delta' 42 | require 'strategy/field/datetime/date_delta' 43 | 44 | # email 45 | require 'strategy/field/email/random_email' 46 | require 'strategy/field/email/gmail_template' 47 | require 'strategy/field/email/random_mailinator_email' 48 | 49 | # name 50 | require 'strategy/field/name/random_first_name' 51 | require 'strategy/field/name/random_last_name' 52 | require 'strategy/field/name/random_full_name' 53 | require 'strategy/field/name/random_user_name' 54 | 55 | 56 | 57 | FieldStrategy = DataAnon::Strategy::Field 58 | 59 | require 'strategy/field/default_anon' 60 | 61 | -------------------------------------------------------------------------------- /lib/strategy/field/name/random_first_name.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Randomly picks up first name from the predefined list in the file. Default [file](https://raw.github.com/sunitparekh/data-anonymization/master/resources/first_names.txt) is part of the gem. 6 | # File should contain first name on each line. 7 | # 8 | # !!!ruby ```ruby 9 | # anonymize('FirstName').using FieldStrategy::RandomFirstName.new 10 | # 11 | # !!!ruby 12 | # anonymize('FirstName').using FieldStrategy::RandomFirstName.new('my_first_names.txt') 13 | # 14 | 15 | class RandomFirstName < SelectFromFile 16 | 17 | def initialize file_path = nil 18 | super(file_path || DataAnon::Utils::Resource.file('first_names.txt')) 19 | end 20 | 21 | end 22 | end 23 | end 24 | end -------------------------------------------------------------------------------- /lib/strategy/field/name/random_full_name.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates full name using the RandomFirstName and RandomLastName strategies. 6 | # 7 | # !!!ruby 8 | # anonymize('FullName').using FieldStrategy::RandomFullName.new 9 | # 10 | # !!!ruby 11 | # anonymize('FullName').using FieldStrategy::RandomLastName.new('my_first_names.txt', 'my_last_names.txt') 12 | 13 | class RandomFullName 14 | 15 | def initialize first_names = nil, last_names = nil 16 | @first_name_anonymizer = DataAnon::Strategy::Field::RandomFirstName.new(first_names) 17 | @last_name_anonymizer = DataAnon::Strategy::Field::RandomLastName.new(last_names) 18 | end 19 | 20 | def anonymize field 21 | 22 | name_words = field.value.split(' ') 23 | 24 | anonymized_first_name = @first_name_anonymizer.anonymize(field) 25 | anonymized_last_name = "" 26 | for counter in (1..name_words.size-1) 27 | anonymized_last_name = anonymized_last_name + " " + @last_name_anonymizer.anonymize(field) 28 | end 29 | 30 | return anonymized_first_name + anonymized_last_name 31 | 32 | end 33 | end 34 | end 35 | end 36 | end -------------------------------------------------------------------------------- /lib/strategy/field/name/random_last_name.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Randomly picks up last name from the predefined list in the file. Default [file](https://raw.github.com/sunitparekh/data-anonymization/master/resources/last_names.txt) is part of the gem. 6 | # File should contain last name on each line. 7 | # 8 | # !!!ruby 9 | # anonymize('LastName').using FieldStrategy::RandomLastName.new 10 | # 11 | # !!!ruby 12 | # anonymize('LastName').using FieldStrategy::RandomLastName.new('my_last_names.txt') 13 | 14 | class RandomLastName < SelectFromFile 15 | 16 | def initialize file_path = nil 17 | super(file_path || DataAnon::Utils::Resource.file('last_names.txt')) 18 | end 19 | 20 | end 21 | end 22 | end 23 | end -------------------------------------------------------------------------------- /lib/strategy/field/name/random_user_name.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates random user name of same length as original user name. 6 | # 7 | # !!!ruby 8 | # anonymize('Username').using FieldStrategy::RandomUserName.new 9 | # 10 | class RandomUserName 11 | 12 | DEFAULT_MIN_LENGTH = 5 13 | DEFAULT_MAX_LENGTH = 10 14 | 15 | def initialize min_length = DEFAULT_MIN_LENGTH, max_length = DEFAULT_MAX_LENGTH 16 | @min_length = min_length 17 | @max_length = max_length 18 | end 19 | 20 | def anonymize field 21 | username_length = DataAnon::Utils::RandomInt.generate(@min_length,@max_length) 22 | return DataAnon::Utils::RandomString.generate(username_length) 23 | 24 | end 25 | end 26 | end 27 | end 28 | end -------------------------------------------------------------------------------- /lib/strategy/field/number/random_big_decimal_delta.rb: -------------------------------------------------------------------------------- 1 | require 'bigdecimal' 2 | 3 | module DataAnon 4 | module Strategy 5 | module Field 6 | 7 | # Shifts the current value randomly within given delta + and -. Default is 10.0 8 | # 9 | # !!!ruby 10 | # anonymize('points').using FieldStrategy::RandomFloatDelta.new(2.5) 11 | 12 | class RandomBigDecimalDelta 13 | 14 | def initialize delta = 100.0 15 | @delta = delta 16 | end 17 | 18 | def anonymize field 19 | return BigDecimal("#{field.value + DataAnon::Utils::RandomFloat.generate(-@delta, +@delta)}") 20 | end 21 | 22 | end 23 | end 24 | end 25 | end -------------------------------------------------------------------------------- /lib/strategy/field/number/random_float.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates random float number between given two numbers. Default range is 0.0 to 100.0 6 | # 7 | # !!!ruby 8 | # anonymize('points').using FieldStrategy::RandomFloat.new(3.0,5.0) 9 | 10 | class RandomFloat 11 | 12 | def initialize from = 0.0, to = 100.0 13 | @from = from 14 | @to = to 15 | 16 | end 17 | 18 | def anonymize field 19 | DataAnon::Utils::RandomFloat.generate(@from,@to) 20 | end 21 | 22 | end 23 | 24 | 25 | end 26 | end 27 | end -------------------------------------------------------------------------------- /lib/strategy/field/number/random_float_delta.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Shifts the current value randomly within given delta + and -. Default is 10.0 6 | # 7 | # !!!ruby 8 | # anonymize('points').using FieldStrategy::RandomFloatDelta.new(2.5) 9 | 10 | class RandomFloatDelta 11 | 12 | def initialize delta = 10.0 13 | @delta = delta 14 | end 15 | 16 | def anonymize field 17 | return field.value + DataAnon::Utils::RandomFloat.generate(-@delta, +@delta) 18 | end 19 | 20 | end 21 | end 22 | end 23 | end -------------------------------------------------------------------------------- /lib/strategy/field/number/random_integer.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates random integer number between given two numbers. Default range is 0 to 100. 6 | # 7 | # !!!ruby 8 | # anonymize('Age').using FieldStrategy::RandomInteger.new(18,70) 9 | 10 | class RandomInteger 11 | 12 | def initialize from = 0, to = 100 13 | @from = from 14 | @to = to 15 | 16 | end 17 | 18 | def anonymize field 19 | DataAnon::Utils::RandomInt.generate(@from,@to) 20 | end 21 | 22 | end 23 | 24 | 25 | end 26 | end 27 | end -------------------------------------------------------------------------------- /lib/strategy/field/number/random_integer_delta.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Shifts the current value randomly within given delta + and -. Default is 10 6 | # 7 | # !!!ruby 8 | # anonymize('Age').using FieldStrategy::RandomIntegerDelta.new(2) 9 | 10 | class RandomIntegerDelta 11 | 12 | def initialize delta = 10 13 | @delta = delta 14 | end 15 | 16 | def anonymize field 17 | adjustment = DataAnon::Utils::RandomInt.generate(-@delta,@delta) 18 | return field.value + adjustment 19 | end 20 | end 21 | 22 | end 23 | end 24 | end -------------------------------------------------------------------------------- /lib/strategy/field/random_boolean.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | 6 | class RandomBoolean 7 | 8 | BOOL_VALUES = [true,false] 9 | 10 | def anonymize field 11 | BOOL_VALUES.sample 12 | end 13 | 14 | end 15 | 16 | 17 | end 18 | end 19 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/formatted_string_numbers.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Keeping the format same it changes each digit in the string with random digit. 6 | # 7 | # !!!ruby 8 | # anonymize('CreditCardNumber').using FieldStrategy::FormattedStringNumber.new 9 | 10 | class FormattedStringNumber 11 | 12 | def anonymize field 13 | @original_string = field.value 14 | @anonymized_string = "" 15 | @original_string.each_char do |char| 16 | if /\d/.match(char).nil? 17 | @anonymized_string += char 18 | else 19 | @anonymized_string += DataAnon::Utils::RandomInt.generate(0,9).to_s 20 | end 21 | end 22 | 23 | @anonymized_string 24 | end 25 | 26 | end 27 | 28 | 29 | end 30 | end 31 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/lorem_ipsum.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Default anonymization strategy for `string` content. Uses default 'Lorem ipsum...' text or text supplied in strategy to generate same length string. 6 | # !!!ruby 7 | # anonymize('UserName').using FieldStrategy::LoremIpsum.new 8 | # 9 | # !!!ruby 10 | # anonymize('UserName').using FieldStrategy::LoremIpsum.new("very large string....") 11 | # 12 | # !!!ruby 13 | # anonymize('UserName').using FieldStrategy::LoremIpsum.new(File.read('my_file.txt')) 14 | 15 | class LoremIpsum 16 | 17 | DEFAULT_TEXT = <<-default 18 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed quis nulla quis ligula bibendum dignissim. Nullam elementum convallis mauris, at ultrices odio dignissim dapibus. Etiam vitae neque lorem, a luctus purus. In at diam mi, sit amet dapibus magna. Maecenas tincidunt tortor id dolor tristique dictum. Morbi pulvinar odio ut lorem gravida ac varius orci ultrices. Nulla id arcu dui, sit amet commodo augue. Curabitur elit elit, semper quis tincidunt at, auctor et tortor. 19 | Quisque ut enim arcu. Praesent orci mi, tincidunt non sodales a, blandit ac nunc. Phasellus sed erat a nibh suscipit molestie sed a augue. Aliquam pretium ultricies nibh. Sed sit amet accumsan sapien. Pellentesque urna orci, iaculis eu lacinia ac, consequat vel elit. Suspendisse aliquet tortor et urna varius non ullamcorper augue tempus. Phasellus pretium, nulla eu adipiscing viverra, purus est fermentum enim, ut fringilla ligula lectus quis est. Phasellus quis scelerisque ligula. Cras accumsan lobortis egestas. Ut quis orci sem, sed gravida orci. 20 | Vestibulum eget odio nisl, nec ornare ante. Aenean tristique, nisl eget lacinia aliquam, neque lectus lacinia enim, id ullamcorper nisl lorem vitae enim. Sed vulputate condimentum convallis. Ut viverra tincidunt arcu ac egestas. Quisque ut neque nec quam suscipit ornare a ornare est. Nulla facilisi. Mauris facilisis eleifend neque eget egestas. Vestibulum egestas dui eleifend urna pharetra a hendrerit quam sagittis. Duis ut turpis convallis diam interdum congue. In hac habitasse platea dictumst. Nulla a erat eget tortor tempor consectetur. Fusce euismod congue risus in feugiat. Sed rutrum vehicula lectus et vehicula. In porttitor malesuada sem at auctor. 21 | Maecenas lacinia placerat augue quis posuere. Cras eu augue quam, eu malesuada sem. Proin facilisis iaculis lectus, vel hendrerit nulla tristique quis. Donec risus mauris, vulputate tristique feugiat nec, imperdiet sed sapien. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Aenean vitae aliquam magna. Donec tempor, ipsum non dapibus elementum, est sem hendrerit nulla, scelerisque sollicitudin lacus mauris eu libero. Vivamus turpis justo, ullamcorper sed ullamcorper quis, tempor in elit. Sed nisl erat, laoreet at adipiscing quis, lobortis et est. Duis congue iaculis mollis. Curabitur ligula turpis, malesuada non feugiat vitae, ullamcorper non nibh. Aliquam adipiscing pellentesque leo nec molestie. Donec tempor eleifend libero, at rutrum velit semper a. Sed tincidunt dictum lorem eu egestas. 22 | Sed at iaculis risus. Nulla aliquet vulputate nulla, nec euismod sem porta quis. Aliquam erat volutpat. Sed tincidunt pharetra metus, in facilisis nunc suscipit ut. Nunc placerat vulputate sapien, elementum varius mi viverra eget. Nam hendrerit felis et arcu ultrices vehicula. Phasellus condimentum ornare orci sed placerat. Sed vel rutrum lorem. Fusce id bibendum ipsum. 23 | default 24 | 25 | def initialize text = nil 26 | @text = text || DEFAULT_TEXT 27 | end 28 | 29 | def anonymize field 30 | @text[0, field.value.length] 31 | end 32 | 33 | end 34 | 35 | 36 | end 37 | end 38 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/random_formatted_string.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Keeping the format same it changes each digit with random digit, character with character preserving the case. 6 | # 7 | # !!!ruby 8 | # anonymize('PhoneNumber').using FieldStrategy::RandomFormattedString.new 9 | # anonymize('Email').using FieldStrategy::RandomFormattedString.new 10 | 11 | class RandomFormattedString 12 | 13 | SMALL_CHARS = "abcdefghjkmnpqrstuvwxyz" 14 | CAPS_CHARS = "ABCDEFGHJKLMNPQRSTUVWXYZ" 15 | 16 | def anonymize field 17 | @original_string = field.value 18 | @anonymized_string = "" 19 | @original_string.each_char do |char| 20 | if /\d/.match(char) 21 | @anonymized_string += DataAnon::Utils::RandomInt.generate(0, 9).to_s 22 | elsif /[a-z]/.match(char) 23 | @anonymized_string += SMALL_CHARS[rand(SMALL_CHARS.length)] 24 | elsif /[A-Z]/.match(char) 25 | @anonymized_string += CAPS_CHARS[rand(CAPS_CHARS.length)] 26 | else 27 | @anonymized_string += char 28 | end 29 | end 30 | 31 | @anonymized_string 32 | end 33 | 34 | end 35 | 36 | 37 | end 38 | end 39 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/random_string.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates random string of same length. 6 | # 7 | # !!!ruby 8 | # anonymize('UserName').using FieldStrategy::RandomString.new 9 | 10 | class RandomString 11 | 12 | def anonymize field 13 | 14 | original_string = field.value 15 | string_words = original_string.split(' ') 16 | anonymized_string = "" 17 | 18 | string_words.each do |word| 19 | anonymized_string = anonymized_string + DataAnon::Utils::RandomString.generate(word.length) + " " 20 | end 21 | 22 | anonymized_string.strip 23 | 24 | end 25 | end 26 | end 27 | end 28 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/random_url.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Generates a randomized URL while maintaining the structure of the original url 6 | # 7 | # !!!ruby 8 | # anonymize('fb_profile').using FieldStrategy::RandomUrl.new 9 | 10 | class RandomUrl 11 | 12 | def anonymize field 13 | 14 | url = field.value 15 | randomized_url = "" 16 | protocols = url.scan(/https?:\/\/|www\./) 17 | protocols.each do |token| 18 | url = url.gsub(token,"") 19 | randomized_url += token 20 | end 21 | 22 | marker_position = 0 23 | 24 | while marker_position < url.length 25 | special_char_index = url.index(/\W/, marker_position) || url.length 26 | text = url[marker_position...special_char_index] 27 | randomized_url += "#{DataAnon::Utils::RandomStringCharsOnly.generate(text.length)}#{url[special_char_index]}" 28 | marker_position = special_char_index + 1 29 | end 30 | 31 | randomized_url 32 | end 33 | end 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/strategy/field/string/select_from_database.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Similar to SelectFromList with difference is the list of values are collected from the database table using distinct column query. 6 | # 7 | # !!!ruby 8 | # # values are collected using `select distinct state from customers` query connecting to specified database in connection_spec 9 | # anonymize('state').using FieldStrategy::SelectFromDatabase.new('customers','state', connection_spec) 10 | 11 | class SelectFromDatabase < SelectFromFile 12 | include Utils::Logging 13 | 14 | def initialize table_name, field_name, connection_spec 15 | @table_name = table_name 16 | @field_name = field_name 17 | @connection_spec = connection_spec 18 | end 19 | 20 | def anonymize field 21 | @values ||= begin 22 | DataAnon::Utils::SourceDatabase.establish_connection @connection_spec 23 | source = Utils::SourceTable.create @table_name, [] 24 | values = source.select(@field_name).distinct.collect { |record| record[@field_name]} 25 | logger.debug "For field strategy #{@table_name}:#{@field_name} using values #{values} " 26 | values 27 | end 28 | 29 | super 30 | end 31 | end 32 | end 33 | end 34 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/select_from_file.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Similar to SelectFromList only difference is the list of values are picked up from file. Classical usage is like states field anonymization. 6 | # 7 | # !!!ruby 8 | # anonymize('State').using FieldStrategy::SelectFromFile.new('states.txt') 9 | # 10 | 11 | class SelectFromFile 12 | 13 | def initialize file_path 14 | @values = File.read(file_path).split 15 | end 16 | 17 | def anonymize field 18 | return @values.sample(field.value.length) if field.value.kind_of? Array 19 | @values.sample 20 | end 21 | 22 | end 23 | end 24 | end 25 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/select_from_list.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Select randomly one of the values specified. 6 | # 7 | # !!!ruby 8 | # anonymize('State').using FieldStrategy::SelectFromList.new(['New York','Georgia',...]) 9 | # 10 | # !!!ruby 11 | # anonymize('NameTitle').using FieldStrategy::SelectFromList.new(['Mr','Mrs','Dr',...]) 12 | # 13 | 14 | class SelectFromList < SelectFromFile 15 | 16 | def initialize values 17 | @values = values.class == Array ? values : [values] 18 | end 19 | 20 | end 21 | 22 | 23 | end 24 | end 25 | end -------------------------------------------------------------------------------- /lib/strategy/field/string/string_template.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | # Simple string evaluation within [DataAnon::Core::Field](#dataanon-core-field) context. Can be used for email, username anonymization. 6 | # Make sure to put the string in 'single quote' else it will get evaluated inline. 7 | # 8 | # !!!ruby 9 | # anonymize('UserName').using FieldStrategy::StringTemplate.new('user#{row_number}') 10 | # 11 | # !!!ruby 12 | # anonymize('Email').using FieldStrategy::StringTemplate.new('valid.address+#{row_number}@gmail.com') 13 | # 14 | # !!!ruby 15 | # anonymize('Email').using FieldStrategy::StringTemplate.new('useremail#{row_number}@mailinator.com') 16 | 17 | class StringTemplate 18 | 19 | def initialize template 20 | @template = template 21 | end 22 | 23 | def anonymize field 24 | context = field.instance_eval { binding } 25 | eval ('"' + @template + '"'), context 26 | end 27 | 28 | end 29 | 30 | 31 | end 32 | end 33 | end -------------------------------------------------------------------------------- /lib/strategy/field/whitelist.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module Field 4 | 5 | 6 | class Whitelist 7 | 8 | def anonymize field 9 | field.value 10 | end 11 | 12 | end 13 | 14 | 15 | end 16 | end 17 | end -------------------------------------------------------------------------------- /lib/strategy/mongodb/anonymize_field.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module MongoDB 4 | class AnonymizeField 5 | 6 | def initialize field, field_strategy, anonymization_strategy 7 | @field = field 8 | @field_strategy = field_strategy 9 | @anonymization_strategy = anonymization_strategy 10 | end 11 | 12 | def anonymize 13 | if sub_document? 14 | @anonymization_strategy.anonymize_document(@field.value, @field.row_number, @field_strategy) 15 | elsif sub_documents? 16 | anonymize_sub_documents 17 | else 18 | anonymize_field 19 | end 20 | end 21 | 22 | def anonymize_sub_documents 23 | @field.value.collect { |value| @anonymization_strategy.anonymize_document(value, @field.row_number, @field_strategy) } 24 | end 25 | 26 | def anonymize_field 27 | @field_strategy = @field_strategy || @anonymization_strategy.default_strategy(@field.name) 28 | raise "Improper fields strategy defined for '#{@field.name}' within document \n #{@field.ar_record}" unless @field_strategy.respond_to?(:anonymize) 29 | @field_strategy.anonymize(@field) 30 | end 31 | 32 | def sub_documents? 33 | @field.value.kind_of?(Array) && (@field_strategy.kind_of?(Hash) || @field.value[0].kind_of?(Hash)) 34 | end 35 | 36 | def sub_document? 37 | @field.value.kind_of?(Hash) 38 | end 39 | 40 | 41 | end 42 | end 43 | end 44 | end -------------------------------------------------------------------------------- /lib/strategy/mongodb/blacklist.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | module MongoDB 4 | class Blacklist < DataAnon::Strategy::MongoDB::Whitelist 5 | 6 | def self.whitelist? 7 | false 8 | end 9 | 10 | def process_record index, document 11 | anonymized_document = anonymize_document(document, index, @fields) 12 | source_collection.find({'_id' => anonymized_document['_id']}).update_one(anonymized_document) 13 | end 14 | 15 | def anonymize_document document, index, field_strategies = {} 16 | field_strategies.each do |field_name, field_strategy| 17 | field_value = document[field_name] 18 | unless field_value.nil? 19 | field = DataAnon::Core::Field.new(field_name, field_value, index, document, @name) 20 | document[field.name] = AnonymizeField.new(field, field_strategy, self).anonymize 21 | end 22 | end 23 | document 24 | end 25 | 26 | end 27 | 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/strategy/mongodb/whitelist.rb: -------------------------------------------------------------------------------- 1 | require 'mongo' 2 | 3 | class Mongo::Collection 4 | alias :all :find 5 | end 6 | 7 | module DataAnon 8 | module Strategy 9 | module MongoDB 10 | class Whitelist < DataAnon::Strategy::Base 11 | 12 | def self.whitelist? 13 | true 14 | end 15 | 16 | def collection field, &block 17 | whitelist = self.class.new @source_database, @destination_database, @name, @user_strategies 18 | whitelist.process_fields &block 19 | @fields[field] = whitelist.fields 20 | end 21 | 22 | alias :document :collection 23 | 24 | def mongo_collection(database) 25 | options = database[:options] || {} 26 | Mongo::Client.new(database[:mongodb_uri], options).database.collection(@name) 27 | end 28 | 29 | def dest_collection 30 | database = @destination_database 31 | @dest_collection ||= mongo_collection(database) 32 | end 33 | 34 | def source_collection 35 | @source_collection ||= mongo_collection(@source_database) 36 | end 37 | 38 | alias :source_table :source_collection 39 | alias :dest_table :dest_collection 40 | 41 | def process_record index, document 42 | dest_collection.insert_one anonymize_document(document, index, @fields) 43 | end 44 | 45 | def anonymize_document document, index, field_strategies = {} 46 | anonymized_document = {} 47 | document.each do |field_name, field_value| 48 | field_strategy = field_strategies[field_name] if field_strategies.kind_of?(Hash) 49 | unless field_value.nil? 50 | field = DataAnon::Core::Field.new(field_name, field_value, index, document, @name) 51 | anonymized_document[field.name] = AnonymizeField.new(field, field_strategy, self).anonymize 52 | end 53 | end 54 | anonymized_document 55 | end 56 | 57 | 58 | end 59 | 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/strategy/strategies.rb: -------------------------------------------------------------------------------- 1 | require 'strategy/base' 2 | require 'strategy/whitelist' 3 | require 'strategy/blacklist' 4 | require 'strategy/field/fields' 5 | 6 | begin 7 | require 'mongo' 8 | require 'strategy/mongodb/anonymize_field' 9 | require 'strategy/mongodb/whitelist' 10 | require 'strategy/mongodb/blacklist' 11 | rescue LoadError 12 | 'Ignoring the mongodb specific libraries if monog driver is not specified in gem' 13 | end 14 | -------------------------------------------------------------------------------- /lib/strategy/whitelist.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Strategy 3 | class Whitelist < DataAnon::Strategy::Base 4 | 5 | def self.whitelist? 6 | true 7 | end 8 | 9 | def process_record(index, record) 10 | dest_record_map = {} 11 | record.attributes.each do |field_name, field_value| 12 | unless field_value.nil? || is_primary_key?(field_name) 13 | field = DataAnon::Core::Field.new(field_name, field_value, index, record, @name) 14 | field_strategy = @fields[field_name] || default_strategy(field_name) 15 | dest_record_map[field_name] = field_strategy.anonymize(field) 16 | end 17 | end 18 | dest_record = dest_table.new dest_record_map 19 | @primary_keys.each do |key| 20 | dest_record[key] = record[key] 21 | end 22 | dest_record.save! 23 | end 24 | 25 | 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/tasks/rake_tasks.rb: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rake/tasklib' 3 | 4 | module DataAnonymization 5 | class RakeTasks 6 | include Rake::DSL if defined? Rake::DSL 7 | 8 | def initialize 9 | desc 'Task to build the clean empty destination database' 10 | task :empty_dest do 11 | system 'rm sample-data/chinook-empty.sqlite' 12 | system 'sqlite3 sample-data/chinook-empty.sqlite < sample-data/chinook_schema.sql' 13 | end 14 | end 15 | 16 | 17 | end 18 | end 19 | 20 | -------------------------------------------------------------------------------- /lib/thor/helpers/mongodb_dsl_generator.rb: -------------------------------------------------------------------------------- 1 | require 'erb' 2 | require 'thor' 3 | 4 | module DataAnon 5 | module ThorHelpers 6 | class MongoDBDSLGenerator 7 | 8 | def self.source_root 9 | File.dirname(__FILE__) 10 | end 11 | 12 | def initialize(configuration_hash, whitelist_patterns) 13 | @mongodb_uri = DataAnon::Utils::TemplateHelper.mongo_uri(configuration_hash) 14 | @whitelist_patterns = whitelist_patterns || [/^_/,/_at$/,/_id$/,/_type$/] 15 | @configuration_hash = configuration_hash 16 | @output = [] 17 | end 18 | 19 | def generate 20 | 21 | db = Mongo::Client.new(@mongodb_uri, :database => @configuration_hash[:database]) 22 | collections = db.collections 23 | collections.each do |collection| 24 | unless collection.name.start_with?('system.') 25 | depth = 2 26 | @output << "\tcollection '#{collection.name}' do" 27 | document = collection.find({}).first 28 | process_document(depth, document) 29 | @output << "\tend\n" 30 | end 31 | end 32 | 33 | erb = ERB.new( File.new(RDBMSDSLGenerator.source_root + "/../templates/mongodb_whitelist_template.erb").read, nil, '-') 34 | File.open('mongodb_whitelist_generated.rb', 'w') do |f| 35 | f.write erb.result(binding) 36 | f.close 37 | end 38 | 39 | end 40 | 41 | def process_document(depth, document) 42 | return if document.nil? 43 | document.each do |key, value| 44 | @output << ("\t"*depth) 45 | if value.kind_of?(Hash) 46 | end_statement = @output[-1]+"end" 47 | @output[-1] << "document '#{key}' do" 48 | process_document depth+1, value 49 | @output << end_statement 50 | elsif value.kind_of?(Array) && value[0].kind_of?(Hash) 51 | end_statement = @output[-1]+"end" 52 | @output[-1] << "collection '#{key}' do" 53 | process_document depth+1, value[0] 54 | @output << end_statement 55 | elsif @whitelist_patterns.collect { |pattern| key.match(pattern) }.compact.length > 0 56 | @output[-1] << "whitelist '#{key}'" 57 | elsif 58 | @output[-1] << "anonymize '#{key}'" 59 | end 60 | end 61 | end 62 | 63 | end 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /lib/thor/helpers/rdbms_dsl_generator.rb: -------------------------------------------------------------------------------- 1 | require 'thor' 2 | require 'active_record' 3 | require 'erb' 4 | 5 | module DataAnon 6 | module ThorHelpers 7 | class RDBMSDSLGenerator 8 | 9 | def self.source_root 10 | File.dirname(__FILE__) 11 | end 12 | 13 | def generate_whitelist_script(configuration_hash) 14 | 15 | @configuration_hash = configuration_hash 16 | @ar_object = ActiveRecord::Base.establish_connection(@configuration_hash) 17 | 18 | @tables = @ar_object.connection.tables 19 | 20 | erb = ERB.new( File.new(RDBMSDSLGenerator.source_root + "/../templates/whitelist_template.erb").read, nil, '-') 21 | 22 | File.open('rdbms_whitelist_generated.rb', 'w') do |f| 23 | f.write erb.result(binding) 24 | f.close 25 | end 26 | 27 | rescue => e 28 | puts "\e[31mActiverecord was unable to establish a connection to the specified database. Please check the configuration options and try again.\e[0m" 29 | puts e.backtrace 30 | end 31 | 32 | end 33 | end 34 | end 35 | 36 | 37 | -------------------------------------------------------------------------------- /lib/thor/templates/mongodb_whitelist_template.erb: -------------------------------------------------------------------------------- 1 | require 'data-anonymization' 2 | require 'mongo' 3 | 4 | DataAnon::Utils::Logging.logger.level = Logger::INFO 5 | 6 | database 'test' do 7 | 8 | strategy DataAnon::Strategy::MongoDB::Whitelist 9 | source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_mongo @configuration_hash %> 10 | destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_mongo %> 11 | 12 | <%= @output.join("\n") %> 13 | 14 | end 15 | 16 | -------------------------------------------------------------------------------- /lib/thor/templates/whitelist_template.erb: -------------------------------------------------------------------------------- 1 | require 'data-anonymization' 2 | 3 | DataAnon::Utils::Logging.logger.level = Logger::INFO 4 | 5 | database 'Template' do 6 | 7 | strategy DataAnon::Strategy::Whitelist 8 | source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms @configuration_hash %> 9 | destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms @configuration_hash %> 10 | 11 | <% @tables.each do |table| %> 12 | table '<%= table %>' do 13 | primary_key '<%= @ar_object.connection.primary_key("#{table}").nil? ? "" : @ar_object.connection.primary_key("#{table}") %>' 14 | <%- @ar_object.connection.indexes("#{table}").each do |index| -%> 15 | whitelist '<%= index.columns.first %>' 16 | <%- end -%> 17 | end 18 | <% end %> 19 | 20 | end 21 | 22 | -------------------------------------------------------------------------------- /lib/utils/database.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | require 'composite_primary_keys' 3 | require 'logger' 4 | 5 | module DataAnon 6 | module Utils 7 | 8 | class TempDatabase < ActiveRecord::Base 9 | self.abstract_class = true 10 | end 11 | 12 | class DisableReferentialIntegrityDatabase < ActiveRecord::Base 13 | self.abstract_class = true 14 | end 15 | 16 | class SourceDatabase < ActiveRecord::Base 17 | self.abstract_class = true 18 | end 19 | 20 | class DestinationDatabase < ActiveRecord::Base 21 | self.abstract_class = true 22 | end 23 | 24 | class BaseTable 25 | 26 | def self.create_table database, table_name, primary_keys = [] 27 | klass_name = table_name.to_s.downcase.capitalize 28 | return database.const_get(klass_name, false) if database.const_defined?(klass_name, false) 29 | database.const_set(klass_name, Class.new(database) do 30 | self.table_name = table_name 31 | self.primary_keys = primary_keys if primary_keys.length > 1 32 | self.primary_key = primary_keys[0] if primary_keys.length == 1 33 | self.primary_key = nil if primary_keys.length == 0 34 | self.inheritance_column = :_type_disabled 35 | end 36 | ) 37 | end 38 | 39 | end 40 | 41 | class SourceTable < BaseTable 42 | 43 | def self.create table_name, primary_key = [] 44 | create_table SourceDatabase, table_name, primary_key 45 | end 46 | 47 | end 48 | 49 | class DestinationTable < BaseTable 50 | 51 | def self.create table_name, primary_key = [] 52 | create_table DestinationDatabase, table_name, primary_key 53 | end 54 | 55 | end 56 | 57 | end 58 | end -------------------------------------------------------------------------------- /lib/utils/geojson_parser.rb: -------------------------------------------------------------------------------- 1 | require 'rgeo/geo_json' 2 | 3 | module DataAnon 4 | module Utils 5 | class GeojsonParser 6 | 7 | 8 | def self.address file_path 9 | self.new(file_path).parse 'address' 10 | end 11 | 12 | def self.zipcode file_path 13 | self.new(file_path).parse 'postcode' 14 | end 15 | 16 | def self.province file_path 17 | self.new(file_path).parse 'province' 18 | end 19 | 20 | def self.city file_path 21 | self.new(file_path).parse 'city' 22 | end 23 | 24 | def self.country file_path 25 | self.new(file_path).parse 'country' 26 | end 27 | 28 | def initialize file_path 29 | @places = File.read(file_path).split(/\n/) 30 | end 31 | 32 | def parse property 33 | result_list = [] 34 | @places.each do |loc| 35 | geom = RGeo::GeoJSON.decode(loc, :json_parser => :json) 36 | result_list.push(geom[property]) 37 | end 38 | result_list 39 | end 40 | end 41 | end 42 | end -------------------------------------------------------------------------------- /lib/utils/logging.rb: -------------------------------------------------------------------------------- 1 | require 'active_record' 2 | require 'logger' 3 | 4 | module DataAnon 5 | module Utils 6 | module Logging 7 | 8 | def logger 9 | @@utils_logger ||= (self.logger = Logger.new(STDOUT) ) 10 | end 11 | 12 | def logger= logger 13 | @@utils_logger = logger 14 | ActiveRecord::Base.logger = logger 15 | @@utils_logger 16 | end 17 | 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/utils/parallel_progress_bar.rb: -------------------------------------------------------------------------------- 1 | require 'powerbar' 2 | 3 | module DataAnon 4 | module Utils 5 | 6 | class ParallelProgressBar < ProgressBar 7 | include Utils::Logging 8 | 9 | def initialize table_name, total 10 | @total = total 11 | @table_name = table_name 12 | end 13 | 14 | protected 15 | 16 | def show_progress index 17 | suffix = started(index) ? "STARTED" : (complete(index) ? "COMPLETE" : "") 18 | logger.info("%-30s [ %7d/%-7d ] %s" % [@table_name, index, @total, suffix]) 19 | end 20 | 21 | end 22 | 23 | end 24 | end -------------------------------------------------------------------------------- /lib/utils/progress_bar.rb: -------------------------------------------------------------------------------- 1 | require 'powerbar' 2 | 3 | module DataAnon 4 | module Utils 5 | 6 | class ProgressBar 7 | 8 | def initialize table_name, total 9 | @total = total 10 | @table_name = table_name 11 | @power_bar = PowerBar.new if show_progress_env 12 | apply_power_bar_settings if show_progress_env 13 | end 14 | 15 | def apply_power_bar_settings 16 | @power_bar.settings.tty.finite.template.main = \ 17 | "${} ${ }\e[0m${/s} \e[33;1m${%} " + 18 | "\e[36;1m${}\e[31;1m${ ETA: }" 19 | @power_bar.settings.tty.finite.template.padchar = "\e[0m\u2589" 20 | @power_bar.settings.tty.finite.template.barchar = "\e[34;1m\u2589" 21 | @power_bar.settings.tty.finite.template.exit = "\e[?25h\e[0m" # clean up after us 22 | @power_bar.settings.tty.finite.template.close = "\e[?25h\e[0m\n" # clean up after us 23 | @power_bar.settings.tty.finite.output = Proc.new { |s| $stderr.print s } 24 | end 25 | 26 | def show index 27 | if show_progress? index 28 | show_progress index 29 | end 30 | end 31 | 32 | def close 33 | @power_bar.close if @power_bar 34 | end 35 | 36 | protected 37 | 38 | def show_progress? index 39 | show_progress_env && (started(index) || regular_interval(index) || complete(index)) 40 | end 41 | 42 | def show_progress_env 43 | ENV['show_progress'] == "false" ? false : true 44 | end 45 | 46 | def show_progress counter 47 | sleep 0.1 48 | msg = "%-20s [%6d/%-6d]" % [@table_name, counter, @total] 49 | @power_bar.show({:msg => msg, :done => counter, :total => @total}) 50 | end 51 | 52 | def complete index 53 | index == @total 54 | end 55 | 56 | def regular_interval index 57 | (index % 1000) == 0 58 | end 59 | 60 | def started index 61 | index == 1 62 | end 63 | 64 | 65 | end 66 | 67 | end 68 | end -------------------------------------------------------------------------------- /lib/utils/random_float.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Utils 3 | class RandomFloat 4 | 5 | def self.generate min, max 6 | return 0 if (min == 0.0 && max == 0.0) 7 | Random.new.rand * (max-min) + min 8 | end 9 | 10 | end 11 | end 12 | end -------------------------------------------------------------------------------- /lib/utils/random_int.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Utils 3 | class RandomInt 4 | 5 | def self.generate min, max 6 | return 0 if (min == 0 && max == 0) 7 | Random.new.rand min..max 8 | end 9 | end 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /lib/utils/random_string.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Utils 3 | class RandomString 4 | 5 | RANDOM_STRING_CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ0123456789' 6 | 7 | def self.generate length = nil, chars = RANDOM_STRING_CHARS 8 | length ||= Random.new.rand 5...15 9 | random_string = '' 10 | length.times { random_string << chars[rand(chars.size)] } 11 | random_string 12 | end 13 | end 14 | end 15 | end -------------------------------------------------------------------------------- /lib/utils/random_string_chars_only.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Utils 3 | class RandomStringCharsOnly 4 | 5 | CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ' 6 | 7 | def self.generate length = nil 8 | RandomString.generate length, CHARS 9 | end 10 | end 11 | end 12 | end -------------------------------------------------------------------------------- /lib/utils/resource.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Utils 3 | 4 | class Resource 5 | 6 | def self.file file_name 7 | project_home = File.join(File.dirname(__FILE__), '../../') 8 | "#{project_home}resources/#{file_name}" 9 | end 10 | 11 | def self.project_home 12 | File.join(File.dirname(__FILE__), '../../') 13 | end 14 | end 15 | 16 | end 17 | end -------------------------------------------------------------------------------- /lib/utils/template_helper.rb: -------------------------------------------------------------------------------- 1 | module DataAnon 2 | module Utils 3 | class TemplateHelper 4 | 5 | def self.source_connection_specs_rdbms config_hash 6 | 7 | config_hash.keys.reject{|key| config_hash[key].nil? }.collect { |key| 8 | if ((config_hash[key].class.to_s.downcase == 'string')) 9 | ":#{key} => '#{config_hash[key]}'" 10 | elsif ((config_hash[key].class.to_s.downcase == 'integer')) 11 | ":#{key} => #{config_hash[key]}" 12 | elsif ((config_hash[key].class.to_s.downcase == 'fixnum')) 13 | ":#{key} => #{config_hash[key]}" 14 | end 15 | }.join ', ' 16 | 17 | end 18 | 19 | def self.destination_connection_specs_rdbms config_hash 20 | 21 | config_hash.keys.collect { |key| 22 | ":#{key} => ''" 23 | }.join ', ' 24 | 25 | end 26 | 27 | def self.source_connection_specs_mongo config_hash 28 | ":mongodb_uri => '#{self.mongo_uri config_hash}', :database => '#{config_hash[:database]}'" 29 | end 30 | 31 | def self.destination_connection_specs_mongo 32 | ":mongodb_uri => '', :database => ''" 33 | end 34 | 35 | def self.mongo_uri config_hash 36 | if config_hash[:user].nil? 37 | mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}/#{config_hash[:database]}" 38 | else 39 | credentials = "#{config_hash[:username]}:#{config_hash[:password]}" 40 | mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}@#{credentials}/#{config_hash[:database]}" 41 | end 42 | mongo_uri 43 | end 44 | end 45 | end 46 | end -------------------------------------------------------------------------------- /lib/version.rb: -------------------------------------------------------------------------------- 1 | module DataAnonymization 2 | VERSION = '0.8.9' 3 | end 4 | -------------------------------------------------------------------------------- /resources/first_names.txt: -------------------------------------------------------------------------------- 1 | Mozella 2 | Corie 3 | Pamela 4 | Rivka 5 | Zonia 6 | Etta 7 | Shenika 8 | Shakira 9 | Ana 10 | Asa 11 | Kimbery 12 | Manual 13 | Eleanore 14 | Shalon 15 | Catherin 16 | Michaela 17 | Danette 18 | Ha 19 | Daniele 20 | Treena 21 | Phil 22 | Jaunita 23 | Audrea 24 | Milton 25 | Waylon 26 | Vinnie 27 | Letitia 28 | Clelia 29 | Bennie 30 | Kimiko 31 | Oleta 32 | Walter 33 | Raquel 34 | Lizbeth 35 | Rosella 36 | Corrine 37 | Lorine 38 | Ethyl 39 | Rosalina 40 | Freddie 41 | Jamika 42 | Hugo 43 | Patsy 44 | Karla 45 | Chery 46 | Beckie 47 | Melonie 48 | Nelia 49 | Hien 50 | Luke 51 | Leola 52 | Flora 53 | Theodore 54 | Genny 55 | Wilfredo 56 | Robin 57 | Shera 58 | Rudy 59 | Kai 60 | Lelia 61 | Young 62 | Steffanie 63 | Shena 64 | Foster 65 | Bradford 66 | Georgetta 67 | Cherelle 68 | Daphne 69 | Terrence 70 | Obdulia 71 | Deloris 72 | Garrett 73 | Kristian 74 | Ashlea 75 | Maribel 76 | Kyra 77 | Jerrold 78 | Tajuana 79 | Ilda 80 | Faustina 81 | Tayna 82 | Ludivina 83 | Hyun 84 | Edris 85 | Loan 86 | Christena 87 | Reynaldo 88 | Andreas 89 | Danita 90 | Lesli 91 | Chantay 92 | Season 93 | Mellisa 94 | Celinda 95 | Angla 96 | Sharlene 97 | Maritza 98 | Gwyn 99 | Floy 100 | Derrick 101 | Letisha 102 | Gino 103 | Kassie 104 | Alysa 105 | Jacob 106 | Shemika 107 | Ivette 108 | Celestina 109 | Merissa 110 | Kati 111 | Carlotta 112 | Shala 113 | Dewitt 114 | Leo 115 | Alberta 116 | Collen 117 | Hue 118 | Helga 119 | Danna 120 | Vanna 121 | Joy 122 | Sharonda 123 | Reid 124 | Aundrea 125 | Gabrielle 126 | Karyn 127 | Rashida 128 | Benita 129 | Margherita 130 | Yan 131 | Ling 132 | Melda 133 | Gerda 134 | Flor 135 | Ferne 136 | Dorinda 137 | Ella 138 | Cornelius 139 | Malisa 140 | Ned 141 | Odelia 142 | Era 143 | Brittany 144 | Babara 145 | Shelby 146 | Myesha 147 | Sanjuana 148 | Margarite 149 | Roosevelt 150 | Bette 151 | Roger 152 | Lawanda 153 | Alan 154 | Shiloh 155 | Calvin 156 | Bell 157 | Merlin 158 | Louis 159 | Branda 160 | Magen 161 | Augustina 162 | Caroyln 163 | Milan 164 | Ardella 165 | Adrianna 166 | Justine 167 | Rosamond 168 | Arianne 169 | Cortez 170 | Fredrick 171 | Evelyne 172 | Christina 173 | Tania 174 | Loretta 175 | Franchesca 176 | Jules 177 | Fernanda 178 | Kaitlin 179 | Melva 180 | Leeanna 181 | Nathalie 182 | Fermin 183 | Evelia 184 | Lucius 185 | Kit 186 | Barabara 187 | Paz 188 | Han 189 | Shantay 190 | Tyson 191 | Debroah 192 | Angle 193 | Jodie 194 | Bettyann 195 | Ocie 196 | Sidney 197 | Delphia 198 | Monet 199 | Bess 200 | George 201 | Jamaal 202 | Alix 203 | Louise 204 | Aurora 205 | Tommye 206 | Cornelia 207 | Michele 208 | Isa 209 | Long 210 | Cheree 211 | Ofelia 212 | Sonya 213 | Reita 214 | Mayme 215 | Shila 216 | Robyn 217 | Zana 218 | Rolf 219 | Elise 220 | Whitley 221 | Leota 222 | Elizebeth 223 | Emilie 224 | Joellen 225 | Yasmin 226 | Emogene 227 | Rosalinda 228 | Rosalind 229 | Margarette 230 | Omar 231 | Earl 232 | Irma 233 | Tawanna 234 | Ivy 235 | Burton 236 | Patience 237 | Nelda 238 | Emery 239 | Maryrose 240 | Carri 241 | Dominique 242 | Essie 243 | Ria 244 | Corinne 245 | Zulma 246 | Terresa 247 | Everett 248 | Petra 249 | Phyllis 250 | Dyan 251 | Machelle 252 | Willian 253 | Cruz 254 | Adah 255 | Ashlyn 256 | Verena 257 | Briana 258 | Sherill 259 | Adina 260 | Chi 261 | Domenic 262 | Kirby 263 | Gracia 264 | Rickie 265 | Charity 266 | Terina 267 | Rutha 268 | Jeramy 269 | Tempie 270 | Emerald 271 | Candice 272 | Marcelina 273 | Luis 274 | Laurinda 275 | Kenia 276 | Lyman 277 | Merle 278 | Tonja 279 | Valrie 280 | Julietta 281 | Evelyn 282 | Angelique 283 | Shirlene 284 | Amada 285 | Bernice 286 | Kimberlee 287 | Tomasa 288 | Min 289 | Joane 290 | Kathleen 291 | Jessika 292 | Mamie 293 | Tonia 294 | Anjanette 295 | Hassan 296 | Darby 297 | Lyn 298 | Lloyd 299 | Helena 300 | Tashina 301 | Suzanne 302 | Sherley 303 | Katherina 304 | Shaniqua 305 | Dolly 306 | Kara 307 | Gwendolyn 308 | Noah 309 | Chastity 310 | Marlyn 311 | Veronique 312 | Donny 313 | Lahoma 314 | Nathan 315 | Kristen 316 | Adelina 317 | Kaleigh 318 | Myrta 319 | Carmina 320 | Bryan 321 | Shamika 322 | Wilmer 323 | Arletta 324 | Bulah 325 | Carl 326 | Marian 327 | Cyndi 328 | Wen 329 | Melony 330 | Johana 331 | Iraida 332 | Marc 333 | Leighann 334 | Elenora 335 | Golda 336 | Donovan 337 | Annika 338 | Lavinia 339 | Delena 340 | Pattie 341 | Carmelita 342 | Katherin 343 | Deidre 344 | Andria 345 | Silvana 346 | Bianca 347 | Dayna 348 | Milo 349 | Ricky 350 | Caterina 351 | Valery 352 | Gisele 353 | Maren 354 | Tena 355 | Lexie 356 | Tabatha 357 | Myrna 358 | Kizzie 359 | Particia 360 | Gena 361 | Ming 362 | Allen 363 | Lauran 364 | Virgie 365 | Myra 366 | Jim 367 | Arlena 368 | Marvel 369 | Roseanna 370 | Otto 371 | Pearlene 372 | Katrice 373 | Kecia 374 | Monte 375 | Matilde 376 | Tamisha 377 | Bethany 378 | Elbert 379 | Danae 380 | Shane 381 | Hank 382 | Marcia 383 | Neoma 384 | Edwina 385 | Anita 386 | Sharilyn 387 | Emely 388 | Christa 389 | Tressa 390 | Idalia 391 | Franklin 392 | Iesha 393 | Kyong 394 | Janella 395 | Lili 396 | Belen 397 | Arnetta 398 | Christiana 399 | Verda 400 | Suzann 401 | Carola 402 | Katharina 403 | Molly 404 | Josefine 405 | Danika 406 | Shirely 407 | Cinda 408 | Marla 409 | Jonah 410 | Tom 411 | Zenobia 412 | Katheleen 413 | Jacklyn 414 | Beryl 415 | Shan 416 | Jeannie 417 | Saturnina 418 | Ellen 419 | Jarod 420 | Kelsi 421 | Freida 422 | Hal 423 | Merri 424 | Nia 425 | Amee 426 | Illa 427 | Sherrill 428 | Elene 429 | Tomi 430 | Amberly 431 | Forrest 432 | Corinna 433 | Marleen 434 | Clarissa 435 | Dee 436 | Lucy 437 | Carlo 438 | Erma 439 | Curt 440 | Lavada 441 | Shyla 442 | Maia 443 | Margorie 444 | Pricilla 445 | Abby 446 | Catalina 447 | Mui 448 | Ellamae 449 | Everette 450 | Jeremiah 451 | Pamula 452 | Malena 453 | Golden 454 | Elza 455 | Joi 456 | Frankie 457 | Noemi 458 | Josefina 459 | Rubye 460 | Shirly 461 | Mika 462 | Shira 463 | Sharita 464 | Lucile 465 | Anthony 466 | Thomasine 467 | Angelia 468 | Kizzy 469 | Mona 470 | Jaye 471 | Wilda 472 | Aaron 473 | Ervin 474 | Karleen 475 | Danuta 476 | Keneth 477 | Fonda 478 | Walton 479 | Wanetta 480 | Charlott 481 | Elvina 482 | Marin 483 | Jessi 484 | Alfred 485 | Darin 486 | Chloe 487 | Ardelle 488 | Madlyn 489 | Terence 490 | Concetta 491 | Debbi 492 | Teddy 493 | Leopoldo 494 | Charla 495 | Kristin 496 | Emmanuel 497 | Porsche 498 | Kiesha 499 | Ruth 500 | Kasey -------------------------------------------------------------------------------- /resources/last_names.txt: -------------------------------------------------------------------------------- 1 | Kunst 2 | Higuera 3 | Suire 4 | Cozad 5 | Verner 6 | Paik 7 | Gatton 8 | Leitner 9 | Confer 10 | Kwiecien 11 | Rempel 12 | Mccolley 13 | Bjork 14 | Dudney 15 | Mccartney 16 | Cargo 17 | Gonzalas 18 | Genest 19 | Lembo 20 | Fraga 21 | Frisbie 22 | Hilger 23 | Horrocks 24 | Nale 25 | Buescher 26 | Creegan 27 | Schreiber 28 | Deputy 29 | States 30 | Abee 31 | Ciulla 32 | Macha 33 | Giddens 34 | Traverso 35 | Hassen 36 | Whiteley 37 | Hagen 38 | Kovar 39 | Rothrock 40 | Meyerson 41 | Maddy 42 | Neiman 43 | Villafane 44 | Addison 45 | Herzog 46 | Driggers 47 | Leverett 48 | Schwebach 49 | Seiber 50 | Okelley 51 | Donahue 52 | Crumley 53 | Ridenhour 54 | Witty 55 | Kitzman 56 | Icenhour 57 | Giorgio 58 | Hargett 59 | Wolters 60 | Medrano 61 | Hake 62 | Boggess 63 | Lavallie 64 | Bassham 65 | Powers 66 | Watterson 67 | Reedy 68 | Heidel 69 | Quinney 70 | Stoker 71 | Hatfield 72 | Krall 73 | Ivey 74 | Slaybaugh 75 | Marksberry 76 | Delucia 77 | Vess 78 | Vanhoose 79 | Noe 80 | Dillow 81 | Gabel 82 | Alcaraz 83 | Fannin 84 | Stradley 85 | Bushnell 86 | Mccleskey 87 | Rising 88 | Rudd 89 | Jourdan 90 | Mcarthur 91 | Casados 92 | Karim 93 | Delong 94 | Szabo 95 | Tedeschi 96 | Mcdermott 97 | Leber 98 | Duhart 99 | Heinz 100 | Lefler 101 | Barajas 102 | Tuohy 103 | Dorfman 104 | Bolz 105 | Heitzman 106 | Ingham 107 | Chaplin 108 | Grabert 109 | Sonntag 110 | Gathers 111 | Carasco 112 | Kohut 113 | Pereyra 114 | Mudd 115 | Gonce 116 | Eskridge 117 | Orum 118 | Lyles 119 | Freese 120 | Casselman 121 | Strachan 122 | Jorge 123 | Chasteen 124 | Macky 125 | Vuong 126 | Stoecker 127 | Meehan 128 | Caroll 129 | Eiler 130 | Vansant 131 | Steidl 132 | Devitt 133 | Cromwell 134 | Wilcox 135 | Kring 136 | Llanes 137 | Abadie 138 | Korman 139 | Otts 140 | Lapp 141 | Flemming 142 | Wadkins 143 | Brill 144 | Penna 145 | Eckler 146 | Sok 147 | Mazzotta 148 | Nalley 149 | Mclamb 150 | Wheatley 151 | Musgrave 152 | Pasternak 153 | Rameau 154 | Peters 155 | Hiser 156 | Guynn 157 | Guyette 158 | Gaulke 159 | Cavanaugh 160 | Decaro 161 | Terpstra 162 | Sevigny 163 | Gabaldon 164 | Figgins 165 | Duerr 166 | Burghardt 167 | Ackley 168 | Hofmeister 169 | Gibbs 170 | Bobo 171 | Seifried 172 | Hunger 173 | Fraley 174 | Moffatt 175 | Osbourn 176 | Mcglothlin 177 | Shriner 178 | Smead 179 | Mercure 180 | Whitehead 181 | Salvador 182 | Gravitt 183 | Marko 184 | Droz 185 | Lykes 186 | Reys 187 | Dick 188 | Gilbertson 189 | Flanagan 190 | Kroh 191 | Lafortune 192 | Greear 193 | Moloney 194 | Shockey 195 | Bakos 196 | Esper 197 | Darcy 198 | Hawthorn 199 | Bones 200 | Iacovelli 201 | Hurn 202 | Harries 203 | Lines 204 | Blind 205 | Bainter 206 | Woolbright 207 | Klumpp 208 | Rehberg 209 | Manrique 210 | Hu 211 | Case 212 | Bosworth 213 | Chesnut 214 | Santora 215 | Key 216 | Valerio 217 | Cupps 218 | Mak 219 | Loya 220 | Larabee 221 | Mckie 222 | Clukey 223 | Belton 224 | Roberie 225 | Guinyard 226 | Werts 227 | Chaffin 228 | Tourville 229 | Ridout 230 | Lichtenstein 231 | Penrod 232 | Houk 233 | Facey 234 | Redner 235 | Ritzman 236 | Konkel 237 | Pogue 238 | Rother 239 | Eilers 240 | Tallmadge 241 | Fox 242 | Cotten 243 | Mckown 244 | Kestner 245 | Sienkiewicz 246 | Baier 247 | Tjaden 248 | Steely 249 | Russom 250 | Nunemaker 251 | Motz 252 | Vibbert 253 | Bostrom 254 | Mcclard 255 | Cantara 256 | Almonte 257 | Keating 258 | Gerald 259 | Bloodworth 260 | Rowlette 261 | Carrico 262 | Clem 263 | Croce 264 | Melia 265 | Marlar 266 | Callihan 267 | Conyers 268 | Burgos 269 | Mcadoo 270 | Hoppe 271 | Myers 272 | German 273 | Lush 274 | Storck 275 | Brindley 276 | Marsch 277 | Hendershott 278 | Mickey 279 | Miron 280 | Points 281 | Webber 282 | Yerian 283 | Chaudhry 284 | Nottingham 285 | Nicoll 286 | Stillings 287 | Babst 288 | Wein 289 | Osbourne 290 | Schier 291 | Ragsdale 292 | Ackerley 293 | Dedmon 294 | Lorusso 295 | Brugger 296 | Mun 297 | Hymel 298 | Zehr 299 | Caudillo 300 | Mcmath 301 | Badilla 302 | Menjivar 303 | Towell 304 | Croom 305 | Ritenour 306 | Angles 307 | Robson 308 | Medlin 309 | Waldrep 310 | Searls 311 | Ruley 312 | Lamberton 313 | Francoeur 314 | Damron 315 | Wareham 316 | Vigliotti 317 | Huitt 318 | Febres 319 | Sipe 320 | Grover 321 | Moschella 322 | Kyker 323 | Titcomb 324 | Kreitzer 325 | Rademacher 326 | Boxx 327 | Harger 328 | Pabon 329 | Nunez 330 | Montrose 331 | Banta 332 | Spaeth 333 | Ferrel 334 | Mcmanis 335 | Arceo 336 | Lemmons 337 | Edgemon 338 | Bremer 339 | Busbee 340 | Hight 341 | Honda 342 | Torrez 343 | Hanford 344 | Poteete 345 | Courchesne 346 | Breeding 347 | Kincannon 348 | Pirtle 349 | Bundick 350 | Marquardt 351 | Flannigan 352 | Raynor 353 | Sizemore 354 | Amador 355 | Teeter 356 | Swicegood 357 | Saleem 358 | Rolon 359 | Bomar 360 | Rodi 361 | Wierenga 362 | Bednar 363 | Music 364 | Smoot 365 | Latham 366 | Alex 367 | Luper 368 | Merriman 369 | Ackles 370 | Seawell 371 | Crider 372 | Stennett 373 | Westgate 374 | Sharpton 375 | Ring 376 | Pinkowski 377 | Feucht 378 | Pillai 379 | Ballantyne 380 | Railey 381 | Shiffer 382 | Tsui 383 | Lucero 384 | Wilmer 385 | Weight 386 | Romans 387 | Cora 388 | Hummer 389 | Foskey 390 | Dangerfield 391 | Konrad 392 | Raulerson 393 | Bernard 394 | Kirker 395 | Woodside 396 | Gwaltney 397 | Berber 398 | Mandel 399 | Voyles 400 | Hohman 401 | Vitale 402 | Nichol 403 | Lurie 404 | Landrum 405 | Patten 406 | Jamerson 407 | Radosevich 408 | Hunsicker 409 | Boser 410 | Roan 411 | Cervantez 412 | Holland 413 | Heinemann 414 | Meacham 415 | Boozer 416 | Edmund 417 | Hurlburt 418 | Tow 419 | Thelen 420 | Dora 421 | Mercado 422 | Gatlin 423 | Tye 424 | Hahne 425 | Highsmith 426 | Giampaolo 427 | Gatto 428 | Donaghy 429 | Keef 430 | Ma 431 | Beckett 432 | Urso 433 | Edge 434 | Gebhard 435 | Shadduck 436 | Schade 437 | Crumble 438 | Ellender 439 | Tsang 440 | Bramlett 441 | Rhames 442 | Dunmire 443 | Oney 444 | Southward 445 | Dunnington 446 | Fiorenza 447 | Grable 448 | Reiff 449 | Sieck 450 | Kersh 451 | Mutter 452 | Hiner 453 | Wadsworth 454 | Aceuedo 455 | Chaloux 456 | Sthilaire 457 | Goble 458 | Waldron 459 | Cranford 460 | Tumlinson 461 | Bourgeois 462 | Krenz 463 | Blystone 464 | Grissom 465 | Payan 466 | Mullet 467 | Lorenzana 468 | Buttars 469 | Stoltenberg 470 | Chiarello 471 | Aleman 472 | Schnur 473 | Flack 474 | Lovering 475 | Sak 476 | Havard 477 | Astorga 478 | Thames 479 | Mcclurg 480 | Tenaglia 481 | Crenshaw 482 | Strickland 483 | Hagler 484 | Newcomb 485 | Schenkel 486 | Pilkington 487 | Boze 488 | Gowers 489 | Chism 490 | Urena 491 | Endo 492 | Goldstein 493 | Beason 494 | Hartford 495 | Redden 496 | Regan 497 | Linsley 498 | Mcgrew 499 | Kitchens 500 | Lowder -------------------------------------------------------------------------------- /sample-data/chinook.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thoughtbot/data-anonymization/78de64c1638723ca6c4e49a4d698aeaec505ba89/sample-data/chinook.sqlite -------------------------------------------------------------------------------- /sample-data/chinook_schema.sql: -------------------------------------------------------------------------------- 1 |  2 | /******************************************************************************* 3 | Chinook Database - Version 1.3 4 | Script: Chinook_Sqlite.sql 5 | Description: Creates and populates the Chinook database. 6 | DB Server: Sqlite 7 | Author: Luis Rocha 8 | License: http://www.codeplex.com/ChinookDatabase/license 9 | ********************************************************************************/ 10 | 11 | /******************************************************************************* 12 | Drop Foreign Keys Constraints 13 | ********************************************************************************/ 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | /******************************************************************************* 38 | Drop Tables 39 | ********************************************************************************/ 40 | DROP TABLE IF EXISTS [Album]; 41 | 42 | DROP TABLE IF EXISTS [Artist]; 43 | 44 | DROP TABLE IF EXISTS [Customer]; 45 | 46 | DROP TABLE IF EXISTS [Employee]; 47 | 48 | DROP TABLE IF EXISTS [Genre]; 49 | 50 | DROP TABLE IF EXISTS [Invoice]; 51 | 52 | DROP TABLE IF EXISTS [InvoiceLine]; 53 | 54 | DROP TABLE IF EXISTS [MediaType]; 55 | 56 | DROP TABLE IF EXISTS [Playlist]; 57 | 58 | DROP TABLE IF EXISTS [PlaylistTrack]; 59 | 60 | DROP TABLE IF EXISTS [Track]; 61 | 62 | 63 | /******************************************************************************* 64 | Create Tables 65 | ********************************************************************************/ 66 | CREATE TABLE [Album] 67 | ( 68 | [AlbumId] INTEGER NOT NULL, 69 | [Title] NVARCHAR(160) NOT NULL, 70 | [ArtistId] INTEGER NOT NULL, 71 | CONSTRAINT [PK_Album] PRIMARY KEY ([AlbumId]), 72 | FOREIGN KEY ([ArtistId]) REFERENCES [Artist] ([ArtistId]) 73 | ON DELETE NO ACTION ON UPDATE NO ACTION 74 | ); 75 | 76 | CREATE TABLE [Artist] 77 | ( 78 | [ArtistId] INTEGER NOT NULL, 79 | [Name] NVARCHAR(120), 80 | CONSTRAINT [PK_Artist] PRIMARY KEY ([ArtistId]) 81 | ); 82 | 83 | CREATE TABLE [Customer] 84 | ( 85 | [CustomerId] INTEGER NOT NULL, 86 | [FirstName] NVARCHAR(40) NOT NULL, 87 | [LastName] NVARCHAR(20) NOT NULL, 88 | [Company] NVARCHAR(80), 89 | [Address] NVARCHAR(70), 90 | [City] NVARCHAR(40), 91 | [State] NVARCHAR(40), 92 | [Country] NVARCHAR(40), 93 | [PostalCode] NVARCHAR(10), 94 | [Phone] NVARCHAR(24), 95 | [Fax] NVARCHAR(24), 96 | [Email] NVARCHAR(60) NOT NULL, 97 | [SupportRepId] INTEGER, 98 | CONSTRAINT [PK_Customer] PRIMARY KEY ([CustomerId]), 99 | FOREIGN KEY ([SupportRepId]) REFERENCES [Employee] ([EmployeeId]) 100 | ON DELETE NO ACTION ON UPDATE NO ACTION 101 | ); 102 | 103 | CREATE TABLE [Employee] 104 | ( 105 | [EmployeeId] INTEGER NOT NULL, 106 | [LastName] NVARCHAR(20) NOT NULL, 107 | [FirstName] NVARCHAR(20) NOT NULL, 108 | [Title] NVARCHAR(30), 109 | [ReportsTo] INTEGER, 110 | [BirthDate] DATETIME, 111 | [HireDate] DATETIME, 112 | [Address] NVARCHAR(70), 113 | [City] NVARCHAR(40), 114 | [State] NVARCHAR(40), 115 | [Country] NVARCHAR(40), 116 | [PostalCode] NVARCHAR(10), 117 | [Phone] NVARCHAR(24), 118 | [Fax] NVARCHAR(24), 119 | [Email] NVARCHAR(60), 120 | CONSTRAINT [PK_Employee] PRIMARY KEY ([EmployeeId]), 121 | FOREIGN KEY ([ReportsTo]) REFERENCES [Employee] ([EmployeeId]) 122 | ON DELETE NO ACTION ON UPDATE NO ACTION 123 | ); 124 | 125 | CREATE TABLE [Genre] 126 | ( 127 | [GenreId] INTEGER NOT NULL, 128 | [Name] NVARCHAR(120), 129 | CONSTRAINT [PK_Genre] PRIMARY KEY ([GenreId]) 130 | ); 131 | 132 | CREATE TABLE [Invoice] 133 | ( 134 | [InvoiceId] INTEGER NOT NULL, 135 | [CustomerId] INTEGER NOT NULL, 136 | [InvoiceDate] DATETIME NOT NULL, 137 | [BillingAddress] NVARCHAR(70), 138 | [BillingCity] NVARCHAR(40), 139 | [BillingState] NVARCHAR(40), 140 | [BillingCountry] NVARCHAR(40), 141 | [BillingPostalCode] NVARCHAR(10), 142 | [Total] NUMERIC(10,2) NOT NULL, 143 | CONSTRAINT [PK_Invoice] PRIMARY KEY ([InvoiceId]), 144 | FOREIGN KEY ([CustomerId]) REFERENCES [Customer] ([CustomerId]) 145 | ON DELETE NO ACTION ON UPDATE NO ACTION 146 | ); 147 | 148 | CREATE TABLE [InvoiceLine] 149 | ( 150 | [InvoiceLineId] INTEGER NOT NULL, 151 | [InvoiceId] INTEGER NOT NULL, 152 | [TrackId] INTEGER NOT NULL, 153 | [UnitPrice] NUMERIC(10,2) NOT NULL, 154 | [Quantity] INTEGER NOT NULL, 155 | CONSTRAINT [PK_InvoiceLine] PRIMARY KEY ([InvoiceLineId]), 156 | FOREIGN KEY ([InvoiceId]) REFERENCES [Invoice] ([InvoiceId]) 157 | ON DELETE NO ACTION ON UPDATE NO ACTION, 158 | FOREIGN KEY ([TrackId]) REFERENCES [Track] ([TrackId]) 159 | ON DELETE NO ACTION ON UPDATE NO ACTION 160 | ); 161 | 162 | CREATE TABLE [MediaType] 163 | ( 164 | [MediaTypeId] INTEGER NOT NULL, 165 | [Name] NVARCHAR(120), 166 | CONSTRAINT [PK_MediaType] PRIMARY KEY ([MediaTypeId]) 167 | ); 168 | 169 | CREATE TABLE [Playlist] 170 | ( 171 | [PlaylistId] INTEGER NOT NULL, 172 | [Name] NVARCHAR(120), 173 | CONSTRAINT [PK_Playlist] PRIMARY KEY ([PlaylistId]) 174 | ); 175 | 176 | CREATE TABLE [PlaylistTrack] 177 | ( 178 | [PlaylistId] INTEGER NOT NULL, 179 | [TrackId] INTEGER NOT NULL, 180 | CONSTRAINT [PK_PlaylistTrack] PRIMARY KEY ([PlaylistId], [TrackId]), 181 | FOREIGN KEY ([PlaylistId]) REFERENCES [Playlist] ([PlaylistId]) 182 | ON DELETE NO ACTION ON UPDATE NO ACTION, 183 | FOREIGN KEY ([TrackId]) REFERENCES [Track] ([TrackId]) 184 | ON DELETE NO ACTION ON UPDATE NO ACTION 185 | ); 186 | 187 | CREATE TABLE [Track] 188 | ( 189 | [TrackId] INTEGER NOT NULL, 190 | [Name] NVARCHAR(200) NOT NULL, 191 | [AlbumId] INTEGER, 192 | [MediaTypeId] INTEGER NOT NULL, 193 | [GenreId] INTEGER, 194 | [Composer] NVARCHAR(220), 195 | [Milliseconds] INTEGER NOT NULL, 196 | [Bytes] INTEGER, 197 | [UnitPrice] NUMERIC(10,2) NOT NULL, 198 | CONSTRAINT [PK_Track] PRIMARY KEY ([TrackId]), 199 | FOREIGN KEY ([AlbumId]) REFERENCES [Album] ([AlbumId]) 200 | ON DELETE NO ACTION ON UPDATE NO ACTION, 201 | FOREIGN KEY ([GenreId]) REFERENCES [Genre] ([GenreId]) 202 | ON DELETE NO ACTION ON UPDATE NO ACTION, 203 | FOREIGN KEY ([MediaTypeId]) REFERENCES [MediaType] ([MediaTypeId]) 204 | ON DELETE NO ACTION ON UPDATE NO ACTION 205 | ); 206 | 207 | 208 | /******************************************************************************* 209 | Create Primary Key Unique Indexes 210 | ********************************************************************************/ 211 | CREATE UNIQUE INDEX [IPK_Album] ON [Album]([AlbumId]); 212 | 213 | CREATE UNIQUE INDEX [IPK_Artist] ON [Artist]([ArtistId]); 214 | 215 | CREATE UNIQUE INDEX [IPK_Customer] ON [Customer]([CustomerId]); 216 | 217 | CREATE UNIQUE INDEX [IPK_Employee] ON [Employee]([EmployeeId]); 218 | 219 | CREATE UNIQUE INDEX [IPK_Genre] ON [Genre]([GenreId]); 220 | 221 | CREATE UNIQUE INDEX [IPK_Invoice] ON [Invoice]([InvoiceId]); 222 | 223 | CREATE UNIQUE INDEX [IPK_InvoiceLine] ON [InvoiceLine]([InvoiceLineId]); 224 | 225 | CREATE UNIQUE INDEX [IPK_MediaType] ON [MediaType]([MediaTypeId]); 226 | 227 | CREATE UNIQUE INDEX [IPK_Playlist] ON [Playlist]([PlaylistId]); 228 | 229 | CREATE UNIQUE INDEX [IPK_PlaylistTrack] ON [PlaylistTrack]([PlaylistId], [TrackId]); 230 | 231 | CREATE UNIQUE INDEX [IPK_Track] ON [Track]([TrackId]); 232 | 233 | 234 | /******************************************************************************* 235 | Create Foreign Keys 236 | ********************************************************************************/ 237 | CREATE INDEX [IFK_AlbumArtistId] ON [Album] ([ArtistId]); 238 | 239 | CREATE INDEX [IFK_CustomerSupportRepId] ON [Customer] ([SupportRepId]); 240 | 241 | CREATE INDEX [IFK_EmployeeReportsTo] ON [Employee] ([ReportsTo]); 242 | 243 | CREATE INDEX [IFK_InvoiceCustomerId] ON [Invoice] ([CustomerId]); 244 | 245 | CREATE INDEX [IFK_InvoiceLineInvoiceId] ON [InvoiceLine] ([InvoiceId]); 246 | 247 | CREATE INDEX [IFK_InvoiceLineTrackId] ON [InvoiceLine] ([TrackId]); 248 | 249 | CREATE INDEX [IFK_PlaylistTrackTrackId] ON [PlaylistTrack] ([TrackId]); 250 | 251 | CREATE INDEX [IFK_TrackAlbumId] ON [Track] ([AlbumId]); 252 | 253 | CREATE INDEX [IFK_TrackGenreId] ON [Track] ([GenreId]); 254 | 255 | CREATE INDEX [IFK_TrackMediaTypeId] ON [Track] ([MediaTypeId]); 256 | 257 | 258 | 259 | 260 | 261 | -------------------------------------------------------------------------------- /sample-data/mongo/plans.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name":"Free", 4 | "features":[ 5 | { 6 | "max_storage": 21474836480, 7 | "type":"AmazonS3", 8 | "users": { "max" : 1, "additional" : false } 9 | }, 10 | { 11 | "max_storage": 21474836480, 12 | "type":"DropBox", 13 | "users": { "max" : 1, "additional" : false } 14 | } 15 | ], 16 | "term":"month", 17 | "public_sharing": false, 18 | "photo_sharing": true, 19 | "created_at":{ "$date":1346740765000 } 20 | }, 21 | { 22 | "name":"Team", 23 | "plan_aliases":["Business","Paid"], 24 | "features":[ 25 | { 26 | "max_storage": 53687091200, 27 | "type":"AmazonS3", 28 | "users": { "max" : 5, "additional" : true } 29 | }, 30 | { 31 | "max_storage": 53687091200, 32 | "type":"DropBox", 33 | "users": { "max" : 5, "additional" : true } 34 | } 35 | ], 36 | "term":"month", 37 | "public_sharing": true, 38 | "photo_sharing": true, 39 | "created_at":{ "$date":1346740765000 } 40 | }, 41 | { 42 | "name":"Team", 43 | "plan_aliases":[], 44 | "features":[], 45 | "term":"month", 46 | "public_sharing": true, 47 | "photo_sharing": true, 48 | "created_at":{ "$date":1346740765000 } 49 | } 50 | ] 51 | -------------------------------------------------------------------------------- /sample-data/mongo/users.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "user_id": "sunitparekh", 4 | "date_of_birth": { "$date":1346740765000 }, 5 | "email":"parekh.sunit@gmail.com", 6 | "password":"TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@", 7 | "failed_attempts":0, 8 | "first_name":"Sunit", 9 | "last_name":"Parekh", 10 | "password_reset_answer":"manza", 11 | "password_reset_question":"My new car modal?", 12 | "nick_names" : ["sUnit","Mr S", "Parekh"], 13 | "updated_at":{ "$date":1346740767000 } 14 | }, 15 | { 16 | "user_id": "satyamag", 17 | "date_of_birth":{ "$date":1346740765000 }, 18 | "email":"satyamag@gmail.com", 19 | "password":"$2a$10$2YTfqIK8Pd8GlbMDFZCvGOcJYLkQs7Hlpal4YF99iSh9yhnWPggZG", 20 | "failed_attempts":1, 21 | "first_name":"Satyam", 22 | "last_name":"Agarwal", 23 | "password_reset_answer":"iphone", 24 | "password_reset_question":"My phone?", 25 | "updated_at":{ "$date":1346740767000 } 26 | }, 27 | { 28 | "user_id": "anandagrawal", 29 | "date_of_birth":{ "$date":1346740765000 }, 30 | "email":"anandagrawal84@gmail.com", 31 | "password":"Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum", 32 | "failed_attempts":0, 33 | "first_name":"Anand", 34 | "last_name":"Agrawal", 35 | "password_reset_answer":"android", 36 | "password_reset_question":"My phone?", 37 | "updated_at":{ "$date":1346740767000 } 38 | } 39 | ] 40 | -------------------------------------------------------------------------------- /spec/acceptance/mongodb_blacklist_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'mongo' 3 | 4 | describe 'End 2 End MongoDB Blacklist Acceptance Test' do 5 | 6 | before(:each) do 7 | Mongo::Client.new('mongodb://localhost/test').database().drop() 8 | users = [ 9 | { 10 | '_id' => 1, 11 | 'USER_ID' => 'sunitparekh', 12 | 'date_of_birth' => Time.new(2012, 7, 14, 13, 1, 0), 13 | 'email' => 'parekh-sunit@mailinator.com', 14 | 'password' => 'TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@', 15 | 'failed_attempts' => 0, 16 | 'first_name' => 'Sunit', 17 | 'last_name' => 'Parekh', 18 | 'password_reset_answer' => 'manza', 19 | 'password_reset_question' => 'My new car modal?', 20 | 'updated_at' => Time.new(2012, 8, 15, 13, 1, 0), 21 | 'alternate_emails' => ['abc@test.com', 'abc2@test.com'] 22 | 23 | }, 24 | { 25 | '_id' => 2, 26 | 'USER_ID' => 'anandagrawal', 27 | 'date_of_birth' => Time.new(2011, 8, 11, 13, 1, 0), 28 | 'email' => 'anand-agrawal@mailinator.com', 29 | 'password' => 'Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum', 30 | 'failed_attempts' => 0, 31 | 'first_name' => 'Anand', 32 | 'last_name' => 'Agrawal', 33 | 'password_reset_answer' => 'android', 34 | 'password_reset_question' => 'My phone?', 35 | 'updated_at' => Time.new(2012, 2, 11, 13, 1, 0), 36 | 'alternate_emails' => ['abc@test.com', 'abc2@test.com'] 37 | } 38 | ] 39 | users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users') 40 | users.each { |p| users_coll.insert_one p } 41 | end 42 | 43 | it 'should anonymize plans collection' do 44 | 45 | database 'test' do 46 | strategy DataAnon::Strategy::MongoDB::Blacklist 47 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test' 48 | 49 | collection 'users' do 50 | anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30) 51 | anonymize('USER_ID').using FieldStrategy::StringTemplate.new('user-#{row_number}') 52 | anonymize('email').using FieldStrategy::RandomMailinatorEmail.new 53 | anonymize('password') { |field| 'password'} 54 | anonymize('first_name').using FieldStrategy::RandomFirstName.new 55 | anonymize('last_name').using FieldStrategy::RandomLastName.new 56 | anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new) 57 | end 58 | 59 | end 60 | 61 | users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users') 62 | users_coll.find.count.to_int.should be 2 63 | user = users_coll.find({'_id' => 1}).to_a[0] 64 | 65 | user['_id'].should == 1 66 | user['USER_ID'].should == 'user-1' 67 | user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i 68 | user['email'].should_not == 'parekh-sunit@mailinator.com' 69 | user['password'].should == 'password' 70 | user['failed_attempts'].should == 0 71 | user['first_name'].should_not be 'Sunit' 72 | user['last_name'].should_not be 'Parekh' 73 | user['password_reset_answer'].should == 'manza' 74 | user['password_reset_question'].should == 'My new car modal?' 75 | user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i 76 | user['alternate_emails'].length.should == 2 77 | user['alternate_emails'][0].should_not == 'abc@test.com' 78 | user['alternate_emails'][1].should_not == 'abc2@test.com' 79 | 80 | 81 | end 82 | end -------------------------------------------------------------------------------- /spec/acceptance/mongodb_whitelist_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'mongo' 3 | 4 | describe 'End 2 End MongoDB Whitelist Acceptance Test' do 5 | 6 | before(:each) do 7 | Mongo::Client.new('mongodb://localhost/test').database.drop 8 | Mongo::Client.new('mongodb://localhost/dest').database.drop 9 | plans = [ 10 | { 11 | '_id' => 1, 12 | 'name' => 'Free', 13 | 'nick_names' => ['Name1', 'Name2'], 14 | 'features' => [ 15 | { 16 | 'max_storage' => 21474836480, 17 | 'type' => 'AmazonS3', 18 | 'users' => {'max' => 1, 'additional' => false} 19 | }, 20 | { 21 | 'max_storage' => 21474836480, 22 | 'type' => 'DropBox', 23 | 'users' => {'max' => 1, 'additional' => false} 24 | } 25 | ], 26 | 'term' => 'month', 27 | 'public_sharing' => false, 28 | 'photo_sharing' => true, 29 | 'created_at' => Time.new(2012, 6, 21, 13, 30, 0) 30 | }, 31 | { 32 | '_id' => 2, 33 | 'name' => 'Team', 34 | 'plan_aliases' => ['Business', 'Paid'], 35 | 'features' => [ 36 | { 37 | 'max_storage' => 53687091200, 38 | 'type' => 'AmazonS3', 39 | 'users' => {'max' => 5, 'additional' => true} 40 | }, 41 | { 42 | 'max_storage' => 53687091200, 43 | 'type' => 'DropBox', 44 | 'users' => {'max' => 5, 'additional' => true} 45 | } 46 | ], 47 | 'term' => 'month', 48 | 'public_sharing' => true, 49 | 'photo_sharing' => true, 50 | 'created_at' => Time.new(2012, 8, 11, 13, 1, 0) 51 | } 52 | ] 53 | plans_coll = Mongo::Client.new('mongodb://localhost/test').database.collection('plans') 54 | plans.each { |p| plans_coll.insert_one p } 55 | end 56 | 57 | it 'should anonymize plans collection' do 58 | 59 | database 'dest' do 60 | strategy DataAnon::Strategy::MongoDB::Whitelist 61 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test' 62 | destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest' 63 | 64 | collection 'plans' do 65 | whitelist '_id', 'name', 'term', 'created_at' 66 | anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(%w(Free Team Business Paid)) 67 | anonymize 'public_sharing', 'photo_sharing' 68 | 69 | collection 'features' do 70 | anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240, 21474836480, 53687091200]) 71 | whitelist 'type' 72 | 73 | document 'users' do 74 | anonymize 'max', 'additional' 75 | end 76 | end 77 | end 78 | 79 | end 80 | 81 | plans_coll = Mongo::Client.new('mongodb://localhost/dest').database.collection('plans') 82 | plans_coll.find.count.to_int.should be 2 83 | plan = plans_coll.find({ '_id' => 1}).to_a[0] 84 | 85 | plan['_id'].should == 1 86 | plan['name'].should == 'Free' 87 | plan['nick_names'][0].should_not == 'Name1' 88 | plan['nick_names'][1].should_not == 'Name2' 89 | plan['term'].should == 'month' 90 | plan['created_at'].should == Time.new(2012, 6, 21, 13, 30, 0) 91 | plan['plan_aliases'].should be_nil 92 | [true,false].should include(plan['public_sharing']) 93 | [true,false].should include(plan['photo_sharing']) 94 | plan['features'].length.should == 2 95 | feature1 = plan['features'][0] 96 | [10737418240, 21474836480, 53687091200].should include(feature1['max_storage']) 97 | feature1['type'].should == 'AmazonS3' 98 | feature1['users']['max'].should be_kind_of(Integer) 99 | [true,false].should include(feature1['users']['additional']) 100 | 101 | 102 | plan = plans_coll.find({ '_id' => 2}).to_a[0] 103 | plan['plan_aliases'].length.should == 2 104 | ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][0]) 105 | ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][1]) 106 | end 107 | end -------------------------------------------------------------------------------- /spec/acceptance/rdbms_blacklist_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'End 2 End RDBMS Blacklist Acceptance Test using SQLite database' do 4 | connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'} 5 | 6 | before(:each) do 7 | CustomerSample.clean 8 | CustomerSample.create_schema connection_spec 9 | CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA[0] 10 | end 11 | 12 | it 'should anonymize customer table record ' do 13 | 14 | database 'Customer' do 15 | strategy DataAnon::Strategy::Blacklist 16 | source_db connection_spec 17 | 18 | table 'customers' do 19 | primary_key 'cust_id' 20 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 21 | anonymize 'terms_n_condition', 'age' 22 | end 23 | end 24 | 25 | DataAnon::Utils::SourceDatabase.establish_connection connection_spec 26 | source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id'] 27 | new_rec = source.find(CustomerSample::SAMPLE_DATA[0][:cust_id]) 28 | new_rec['email'].should == 'test+1@gmail.com' 29 | 30 | end 31 | 32 | it 'should skip anonymization of the record if condition in skip is true' do 33 | database 'Customer' do 34 | strategy DataAnon::Strategy::Blacklist 35 | source_db connection_spec 36 | 37 | table 'customers' do 38 | skip { |index, record| record['age'] > 18 } 39 | 40 | primary_key 'cust_id' 41 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 42 | anonymize 'terms_n_condition', 'age' 43 | end 44 | end 45 | 46 | DataAnon::Utils::SourceDatabase.establish_connection connection_spec 47 | source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id'] 48 | new_rec = source.find(CustomerSample::SAMPLE_DATA[0][:cust_id]) 49 | new_rec['email'].should_not == 'test+1@gmail.com' 50 | 51 | end 52 | 53 | it 'should continue with anonymization of the record if condition in skip is true' do 54 | database 'Customer' do 55 | strategy DataAnon::Strategy::Blacklist 56 | source_db connection_spec 57 | 58 | table 'customers' do 59 | continue { |index, record| record['age'] > 18 } 60 | 61 | primary_key 'cust_id' 62 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 63 | anonymize 'terms_n_condition', 'age' 64 | end 65 | end 66 | 67 | DataAnon::Utils::SourceDatabase.establish_connection connection_spec 68 | source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id'] 69 | new_rec = source.find(CustomerSample::SAMPLE_DATA[0][:cust_id]) 70 | new_rec['email'].should == 'test+1@gmail.com' 71 | 72 | 73 | 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /spec/acceptance/rdbms_whitelist_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do 4 | 5 | source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'} 6 | dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'} 7 | 8 | before(:each) do 9 | CustomerSample.clean 10 | CustomerSample.create_schema source_connection_spec 11 | CustomerSample.insert_records source_connection_spec, CustomerSample::SAMPLE_DATA 12 | 13 | CustomerSample.create_schema dest_connection_spec 14 | end 15 | 16 | it 'should anonymize customer table record ' do 17 | 18 | database 'Customer' do 19 | strategy DataAnon::Strategy::Whitelist 20 | source_db source_connection_spec 21 | destination_db dest_connection_spec 22 | 23 | table 'customers' do 24 | whitelist 'cust_id', 'address', 'zipcode', 'blog_url' 25 | anonymize('first_name').using FieldStrategy::RandomFirstName.new 26 | anonymize('last_name').using FieldStrategy::RandomLastName.new 27 | anonymize('state').using FieldStrategy::SelectFromList.new(['Gujrat','Karnataka']) 28 | anonymize('phone').using FieldStrategy::RandomPhoneNumber.new 29 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 30 | anonymize 'terms_n_condition', 'age', 'longitude' 31 | anonymize('latitude').using FieldStrategy::RandomFloatDelta.new(2.0) 32 | whitelist 'created_at','updated_at' 33 | end 34 | end 35 | 36 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec 37 | dest_table = DataAnon::Utils::DestinationTable.create 'customers' 38 | dest_table.count.should == 2 39 | new_rec = dest_table.where('cust_id' => CustomerSample::SAMPLE_DATA[0][:cust_id]).first 40 | new_rec.first_name.should_not be('Sunit') 41 | new_rec.last_name.should_not be('Parekh') 42 | new_rec.birth_date.should_not be(Date.new(1977,7,8)) 43 | new_rec.address.should == 'F 501 Shanti Nagar' 44 | %w(Gujrat Karnataka).should include(new_rec.state) 45 | new_rec.zipcode.should == '411048' 46 | new_rec.phone.should_not be '9923700662' 47 | new_rec.email.should == 'test+1@gmail.com' 48 | [true,false].should include(new_rec.terms_n_condition) 49 | new_rec.age.should be_between(0,100) 50 | new_rec.latitude.should be_between( 38.689060, 42.689060) 51 | new_rec.longitude.should be_between( -84.044636, -64.044636) 52 | new_rec.created_at.should == Time.new(2010,10,10) 53 | new_rec.updated_at.should == Time.new(2010,5,5) 54 | end 55 | 56 | describe 'batch_size' do 57 | it 'processes all records in batches' do 58 | database 'Customer' do 59 | strategy DataAnon::Strategy::Whitelist 60 | source_db source_connection_spec 61 | destination_db dest_connection_spec 62 | 63 | table 'customers' do 64 | batch_size 1 65 | whitelist 'first_name' 66 | end 67 | end 68 | 69 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec 70 | dest_table = DataAnon::Utils::DestinationTable.create 'customers' 71 | dest_table.count.should == 2 72 | first_rec = dest_table.first 73 | first_rec.first_name.should eq('Sunit') 74 | second_rec = dest_table.second 75 | second_rec.first_name.should eq('Rohit') 76 | end 77 | end 78 | 79 | describe 'limiting' do 80 | it 'returns only last record' do 81 | database 'Customer' do 82 | strategy DataAnon::Strategy::Whitelist 83 | source_db source_connection_spec 84 | destination_db dest_connection_spec 85 | 86 | table 'customers' do 87 | limit 1 88 | whitelist 'cust_id', 'first_name', 'created_at','updated_at' 89 | end 90 | end 91 | 92 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec 93 | dest_table = DataAnon::Utils::DestinationTable.create 'customers' 94 | dest_table.count.should == 1 95 | new_rec = dest_table.first 96 | new_rec.first_name.should eq('Rohit') 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /spec/acceptance/rdbms_whitelist_with_primary_key_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do 4 | 5 | source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'} 6 | dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'} 7 | 8 | before(:each) do 9 | CustomerSample.clean 10 | CustomerSample.create_schema source_connection_spec 11 | CustomerSample.insert_record source_connection_spec, CustomerSample::SAMPLE_DATA[0] 12 | 13 | CustomerSample.create_schema dest_connection_spec 14 | end 15 | 16 | it 'should anonymize customer table record ' do 17 | 18 | database 'Customer' do 19 | strategy DataAnon::Strategy::Whitelist 20 | source_db source_connection_spec 21 | destination_db dest_connection_spec 22 | 23 | table 'customers' do 24 | primary_key 'cust_id' 25 | batch_size 1 26 | 27 | whitelist 'cust_id', 'address', 'zipcode', 'blog_url' 28 | anonymize('first_name').using FieldStrategy::RandomFirstName.new 29 | anonymize('last_name').using FieldStrategy::RandomLastName.new 30 | anonymize('state').using FieldStrategy::SelectFromList.new(['Gujrat','Karnataka']) 31 | anonymize('phone').using FieldStrategy::RandomPhoneNumber.new 32 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com') 33 | anonymize 'terms_n_condition', 'age', 'longitude' 34 | anonymize('latitude').using FieldStrategy::RandomFloatDelta.new(2.0) 35 | end 36 | end 37 | 38 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec 39 | dest_table = DataAnon::Utils::DestinationTable.create 'customers' 40 | new_rec = dest_table.where('cust_id' => CustomerSample::SAMPLE_DATA[0][:cust_id]).first 41 | new_rec.first_name.should_not be('Sunit') 42 | new_rec.last_name.should_not be('Parekh') 43 | new_rec.birth_date.should_not be(Date.new(1977,7,8)) 44 | new_rec.address.should == 'F 501 Shanti Nagar' 45 | ['Gujrat','Karnataka'].should include(new_rec.state) 46 | new_rec.zipcode.should == '411048' 47 | new_rec.phone.should_not be '9923700662' 48 | new_rec.email.should == 'test+1@gmail.com' 49 | [true,false].should include(new_rec.terms_n_condition) 50 | new_rec.age.should be_between(0,100) 51 | new_rec.latitude.should be_between( 38.689060, 42.689060) 52 | new_rec.longitude.should be_between( -84.044636, -64.044636) 53 | 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /spec/core/fields_missing_strategy_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe DataAnon::Core::FieldsMissingStrategy do 4 | 5 | FMS = DataAnon::Core::FieldsMissingStrategy 6 | 7 | it 'should be able to add field for new table that doesnot exist' do 8 | users = FMS.new('users') 9 | users.missing('confirm_email') 10 | users.fields_missing_strategy.should == ['confirm_email'] 11 | end 12 | 13 | it 'should be able to take care for same field appearing multiple time' do 14 | users = FMS.new('users') 15 | users.missing('confirm_email') 16 | users.missing('confirm_email') 17 | users.fields_missing_strategy.should == ['confirm_email'] 18 | end 19 | 20 | it 'should be able to add multiple fields for table' do 21 | users = FMS.new('users') 22 | users.missing('confirm_email') 23 | users.missing('password_reset') 24 | users.fields_missing_strategy.should == %w(confirm_email password_reset) 25 | end 26 | end -------------------------------------------------------------------------------- /spec/resource/sample.geojson: -------------------------------------------------------------------------------- 1 | {"geometry": {"type": "Point", "coordinates": [-134.412039, 58.30057]}, "type": "Feature", "id": "SG_5xejzYOfDRcyoVHXqvO2hB_58.300570_-134.412039@1293731153", "properties": {"province": "AK", "city": "Juneau", "name": "Purchasing Dept", "tags": ["state"], "country": "US", "classifiers": [{"category": "Government", "type": "Public Place", "subcategory": "Office"}], "phone": "+1 907 465 2250", "href": "http://api.simplegeo.com/1.0/features/SG_5xejzYOfDRcyoVHXqvO2hB_58.300570_-134.412039@1293731153.json", "address": "333 Willoughby Ave", "owner": "simplegeo", "postcode": "99801"}} 2 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rspec' 2 | require "pry" 3 | require 'coveralls' 4 | 5 | Coveralls.wear! 6 | 7 | require 'data-anonymization' 8 | 9 | ENV['show_progress'] = 'false' 10 | 11 | Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f} 12 | 13 | DataAnon::Utils::Logging.logger.level = Logger::WARN 14 | Mongo::Logger.logger.level = Logger::WARN 15 | 16 | RSpec.configure do |config| 17 | config.expect_with :rspec do |c| 18 | c.syntax = [:should, :expect] 19 | end 20 | 21 | config.mock_with :rspec do |c| 22 | c.syntax = [:should, :expect] 23 | end 24 | 25 | config.before(:suite) do 26 | end 27 | 28 | config.before(:each) do 29 | end 30 | 31 | config.after(:suite) do 32 | end 33 | end 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /spec/strategy/field/contact/random_address_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe DataAnon::Strategy::Field::RandomAddress do 4 | 5 | RandomAddress = DataAnon::Strategy::Field::RandomAddress 6 | let(:field) {DataAnon::Core::Field.new('address','1 Infinite Loop',1,nil)} 7 | 8 | describe 'anonymized address should be different from original address' do 9 | let(:anonymized_address) {RandomAddress.region_US.anonymize(field)} 10 | it {anonymized_address.should_not eq('1 Infinite Loop')} 11 | end 12 | end -------------------------------------------------------------------------------- /spec/strategy/field/contact/random_city_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe DataAnon::Strategy::Field::RandomCity do 4 | 5 | RandomCity = DataAnon::Strategy::Field::RandomCity 6 | let(:field) { DataAnon::Core::Field.new('city', 'Atlanta', 1, nil) } 7 | 8 | describe 'anonymized city should be different from original city' do 9 | let(:anonymized_city) { RandomCity.region_US.anonymize(field) } 10 | 11 | it { anonymized_city.should_not be_nil } 12 | it { anonymized_city.should_not eq("Atlanta") } 13 | end 14 | end -------------------------------------------------------------------------------- /spec/strategy/field/contact/random_phone_number_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomPhoneNumber do 4 | 5 | RandomPhoneNumber = FieldStrategy::RandomPhoneNumber 6 | let(:field) { DataAnon::Core::Field.new('phone_number', "+0 (123) 456-7890", 1, nil) } 7 | 8 | describe 'anonymized phone number preserving the format' do 9 | let(:anonymized_number) { RandomPhoneNumber.new().anonymize(field) } 10 | 11 | it { anonymized_number.should_not equal field.value } 12 | it { anonymized_number.should match /\+\d\ \(\d{3}\)\ \d{3}-\d{4}$/ } 13 | end 14 | 15 | 16 | end -------------------------------------------------------------------------------- /spec/strategy/field/contact/random_province_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe DataAnon::Strategy::Field::RandomProvince do 4 | 5 | RandomProvince = DataAnon::Strategy::Field::RandomProvince 6 | let(:field) { DataAnon::Core::Field.new('province', 'Atlanta', 1, nil) } 7 | 8 | describe 'anonymized province should be different from original province' do 9 | let(:anonymized_province) { RandomProvince.region_US.anonymize(field) } 10 | 11 | it { anonymized_province.should_not be_nil } 12 | it { anonymized_province.should_not eq("Atlanta") } 13 | end 14 | end -------------------------------------------------------------------------------- /spec/strategy/field/contact/random_zipcode_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe DataAnon::Strategy::Field::RandomZipcode do 4 | 5 | RandomZipcode = DataAnon::Strategy::Field::RandomZipcode 6 | let(:field) { DataAnon::Core::Field.new('zipcode', '12345', 1, nil) } 7 | 8 | describe 'anonymized zipcode should be different from original zipcode' do 9 | let(:anonymized_zipcode) { RandomZipcode.region_US.anonymize(field) } 10 | 11 | it { anonymized_zipcode.should_not be_nil } 12 | it { anonymized_zipcode.should_not eq("12345") } 13 | end 14 | end -------------------------------------------------------------------------------- /spec/strategy/field/datetime/anonymize_date_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::AnonymizeDate do 4 | 5 | AnonymizeDate = FieldStrategy::AnonymizeDate 6 | let(:field) { DataAnon::Core::Field.new('date', Date.new(2011,7,5), 1, nil) } 7 | 8 | describe 'providing true only for month should randomize only the month field' do 9 | 10 | let(:anonymized_time) { AnonymizeDate.only_month.anonymize(field) } 11 | 12 | it { anonymized_time.should be_kind_of Date} 13 | it { anonymized_time.year.should be 2011 } 14 | it { anonymized_time.month.should be_between(1,12)} 15 | it { anonymized_time.day.should be 5 } 16 | end 17 | 18 | describe 'providing true only for date should randomize only the date field' do 19 | 20 | let(:anonymized_time) { AnonymizeDate.only_day.anonymize(field) } 21 | 22 | it { anonymized_time.year.should be 2011 } 23 | it { anonymized_time.month.should be 7} 24 | it { anonymized_time.day.should be_between(1,31) } 25 | end 26 | 27 | end -------------------------------------------------------------------------------- /spec/strategy/field/datetime/anonymize_datetime_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::AnonymizeDateTime do 4 | 5 | AnonymizeDateTime = FieldStrategy::AnonymizeDateTime 6 | let(:field) { DataAnon::Core::Field.new('date', DateTime.new(2000,1,1,12,12,12), 1, nil) } 7 | 8 | describe 'providing true only for month should randomize only the month field' do 9 | 10 | let(:anonymized_time) { AnonymizeDateTime.only_month.anonymize(field) } 11 | 12 | it { anonymized_time.should be_kind_of DateTime} 13 | it { anonymized_time.year.should be 2000 } 14 | it { anonymized_time.month.should be_between(1,12)} 15 | it { anonymized_time.day.should be 1 } 16 | it { anonymized_time.hour.should be 12} 17 | it { anonymized_time.min.should be 12} 18 | it { anonymized_time.sec.should be 12} 19 | end 20 | 21 | describe 'providing true only for date should randomize only the date field' do 22 | 23 | let(:anonymized_time) { AnonymizeDateTime.only_day.anonymize(field) } 24 | 25 | it { anonymized_time.year.should be 2000 } 26 | it { anonymized_time.month.should be 1} 27 | it { anonymized_time.day.should be_between(1,31) } 28 | it { anonymized_time.hour.should be 12} 29 | it { anonymized_time.min.should be 12} 30 | it { anonymized_time.sec.should be 12} 31 | end 32 | 33 | describe 'providing true only for hour should randomize only the hour field' do 34 | 35 | let(:anonymized_time) { AnonymizeDateTime.only_hour.anonymize(field) } 36 | 37 | it { anonymized_time.year.should be 2000 } 38 | it { anonymized_time.month.should be 1} 39 | it { anonymized_time.day.should be 1 } 40 | it { anonymized_time.hour.should be_between(1,24)} 41 | it { anonymized_time.min.should be 12} 42 | it { anonymized_time.sec.should be 12} 43 | end 44 | 45 | describe 'providing true only for minute should randomize only the minute field' do 46 | 47 | let(:anonymized_time) { AnonymizeDateTime.only_minute.anonymize(field) } 48 | 49 | it { anonymized_time.year.should be 2000 } 50 | it { anonymized_time.month.should be 1} 51 | it { anonymized_time.day.should be 1 } 52 | it { anonymized_time.hour.should be 12} 53 | it { anonymized_time.min.should be_between(1,60)} 54 | it { anonymized_time.sec.should be 12} 55 | end 56 | 57 | end -------------------------------------------------------------------------------- /spec/strategy/field/datetime/anonymize_time_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::AnonymizeTime do 4 | 5 | AnonymizeTime = FieldStrategy::AnonymizeTime 6 | let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) } 7 | 8 | describe 'providing true only for month should randomize only the month field' do 9 | 10 | let(:anonymized_time) { AnonymizeTime.only_month.anonymize(field) } 11 | 12 | it { anonymized_time.should be_kind_of Time} 13 | it { anonymized_time.year.should be 2000 } 14 | it { anonymized_time.month.should be_between(1,12)} 15 | it { anonymized_time.day.should be 1 } 16 | it { anonymized_time.hour.should be 12} 17 | it { anonymized_time.min.should be 12} 18 | it { anonymized_time.sec.should be 12} 19 | end 20 | 21 | describe 'providing true only for date should randomize only the date field' do 22 | 23 | let(:anonymized_time) { AnonymizeTime.only_day.anonymize(field) } 24 | 25 | it { anonymized_time.year.should be 2000 } 26 | it { anonymized_time.month.should be 1} 27 | it { anonymized_time.day.should be_between(1,31) } 28 | it { anonymized_time.hour.should be 12} 29 | it { anonymized_time.min.should be 12} 30 | it { anonymized_time.sec.should be 12} 31 | end 32 | 33 | describe 'providing true only for hour should randomize only the hour field' do 34 | 35 | let(:anonymized_time) { AnonymizeTime.only_hour.anonymize(field) } 36 | 37 | it { anonymized_time.year.should be 2000 } 38 | it { anonymized_time.month.should be 1} 39 | it { anonymized_time.day.should be 1 } 40 | it { anonymized_time.hour.should be_between(1,24)} 41 | it { anonymized_time.min.should be 12} 42 | it { anonymized_time.sec.should be 12} 43 | end 44 | 45 | describe 'providing true only for minute should randomize only the minute field' do 46 | 47 | let(:anonymized_time) { AnonymizeTime.only_minute.anonymize(field) } 48 | 49 | it { anonymized_time.year.should be 2000 } 50 | it { anonymized_time.month.should be 1} 51 | it { anonymized_time.day.should be 1 } 52 | it { anonymized_time.hour.should be 12} 53 | it { anonymized_time.min.should be_between(1,60)} 54 | it { anonymized_time.sec.should be 12} 55 | end 56 | 57 | end -------------------------------------------------------------------------------- /spec/strategy/field/datetime/date_delta_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::DateDelta do 4 | 5 | DateDelta = FieldStrategy::DateDelta 6 | let(:field) { DataAnon::Core::Field.new('date', Date.new(2011,4,7), 1, nil) } 7 | 8 | describe 'date should not remain the same' do 9 | 10 | let(:anonymized_value) { DateDelta.new().anonymize(field) } 11 | let(:date_difference) {anonymized_value - field.value} 12 | 13 | it { anonymized_value.should be_kind_of Date} 14 | it { date_difference.should be_between(-10.days, 10.days) } 15 | end 16 | 17 | describe 'date should not change when provided with 0 delta for both date and time' do 18 | 19 | let(:anonymized_date) { DateDelta.new(0).anonymize(field) } 20 | 21 | it {anonymized_date.should == Date.new(2011,4,7) } 22 | 23 | end 24 | 25 | describe 'date should be anonymized within provided delta' do 26 | 27 | let(:anonymized_value) { DateDelta.new(5).anonymize(field) } 28 | let(:date_difference) {anonymized_value - field.value} 29 | 30 | it { date_difference.should be_between(-5.days, 5.days) } 31 | 32 | end 33 | 34 | 35 | end -------------------------------------------------------------------------------- /spec/strategy/field/datetime/date_time_delta_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::DateTimeDelta do 4 | 5 | DateTimeDelta = FieldStrategy::DateTimeDelta 6 | let(:field) { DataAnon::Core::Field.new('date', DateTime.new(2000,1,1), 1, nil) } 7 | 8 | describe 'datetime should not remain the same' do 9 | 10 | let(:anonymized_value) { DateTimeDelta.new().anonymize(field) } 11 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 12 | 13 | it { anonymized_value.should be_kind_of DateTime} 14 | it {date_difference.should_not be 0 } 15 | end 16 | 17 | describe 'datetime should not change when provided with 0 delta for both date and time' do 18 | 19 | let(:anonymized_value) { DateTimeDelta.new(0,0).anonymize(field) } 20 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 21 | 22 | it {date_difference.should be 0 } 23 | 24 | end 25 | 26 | describe 'date should be anonymized within provided delta' do 27 | 28 | let(:anonymized_value) { DateTimeDelta.new(5,0).anonymize(field) } 29 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 30 | 31 | it { date_difference.should be_between(-5.days, 5.days) } 32 | 33 | end 34 | 35 | describe 'time should be anonymized within provided delta' do 36 | 37 | let(:anonymized_value) { DateTimeDelta.new(0,10).anonymize(field) } 38 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 39 | 40 | it { date_difference.should be_between(-10.minutes, 10.minutes)} 41 | end 42 | 43 | 44 | end -------------------------------------------------------------------------------- /spec/strategy/field/datetime/time_delta_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::TimeDelta do 4 | 5 | TimeDelta = FieldStrategy::TimeDelta 6 | let(:field) { DataAnon::Core::Field.new('date', Time.new(2012,10,10,13,20,10), 1, nil) } 7 | 8 | describe 'time should not remain the same' do 9 | 10 | let(:anonymized_value) { TimeDelta.new().anonymize(field) } 11 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 12 | 13 | it { anonymized_value.should be_kind_of Time} 14 | it {date_difference.should_not be 0 } 15 | end 16 | 17 | describe 'time should not change when provided with 0 delta for both date and time' do 18 | 19 | let(:anonymized_value) { TimeDelta.new(0,0).anonymize(field) } 20 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 21 | 22 | it {date_difference.should be 0 } 23 | 24 | end 25 | 26 | describe 'date should be anonymized within provided delta' do 27 | 28 | let(:anonymized_value) { TimeDelta.new(5,0).anonymize(field) } 29 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 30 | 31 | it { date_difference.should be_between(-5.days, 5.days) } 32 | 33 | end 34 | 35 | describe 'time should be anonymized within provided delta' do 36 | 37 | let(:anonymized_value) { TimeDelta.new(0,10).anonymize(field) } 38 | let(:date_difference) {anonymized_value.to_i - field.value.to_i} 39 | 40 | it { date_difference.should be_between(-10.minutes, 10.minutes)} 41 | end 42 | 43 | 44 | end -------------------------------------------------------------------------------- /spec/strategy/field/default_anon_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe FieldStrategy::DefaultAnon do 4 | 5 | DefaultAnon = FieldStrategy::DefaultAnon 6 | 7 | describe 'anonymized boolean true value' do 8 | let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)} 9 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)} 10 | 11 | it { [TrueClass,FalseClass].should include(anonymized_value.class) } 12 | end 13 | 14 | describe 'anonymized boolean false value' do 15 | let(:field) {DataAnon::Core::Field.new('boolean_field',false,1,nil)} 16 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)} 17 | 18 | it { [TrueClass,FalseClass].should include(anonymized_value.class) } 19 | end 20 | 21 | describe 'anonymized float value' do 22 | let(:field) {DataAnon::Core::Field.new('float_field',2.0,1,nil)} 23 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)} 24 | 25 | it { anonymized_value.should be_kind_of Float } 26 | end 27 | 28 | describe 'anonymized int value' do 29 | let(:field) {DataAnon::Core::Field.new('int_field',2,1,nil)} 30 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)} 31 | 32 | it { anonymized_value.should be_kind_of Integer } 33 | end 34 | 35 | describe 'anonymized bignum value' do 36 | let(:field) {DataAnon::Core::Field.new('int_field',2348723489723847382947,1,nil)} 37 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)} 38 | 39 | it { anonymized_value.should be_kind_of Integer } 40 | end 41 | 42 | describe 'anonymized string value' do 43 | let(:field) {DataAnon::Core::Field.new('string_field','String',1,nil)} 44 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)} 45 | 46 | it { anonymized_value.should be_kind_of String } 47 | end 48 | 49 | end -------------------------------------------------------------------------------- /spec/strategy/field/email/gmail_template_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::GmailTemplate do 4 | 5 | GmailTemplate = FieldStrategy::GmailTemplate 6 | let(:field) { DataAnon::Core::Field.new('email', 'user@company.com', 456, nil) } 7 | 8 | describe 'generated email using default username' do 9 | let(:anonymized_email) { GmailTemplate.new.anonymize(field) } 10 | it { anonymized_email.should eq('someusername+456@gmail.com') } 11 | end 12 | 13 | describe 'generated email using given username' do 14 | let(:anonymized_email) { GmailTemplate.new("fake").anonymize(field) } 15 | it { anonymized_email.should eq('fake+456@gmail.com') } 16 | end 17 | end -------------------------------------------------------------------------------- /spec/strategy/field/email/random_email_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomEmail do 4 | 5 | RandomEmail = FieldStrategy::RandomEmail 6 | let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)} 7 | 8 | 9 | describe 'anonymized email must be different from original email' do 10 | 11 | let(:anonymized_value) {RandomEmail.new.anonymize(field)} 12 | 13 | it {anonymized_value.should_not equal field.value} 14 | it {anonymized_value.should match '^\S+@\S+\.\S+$'} 15 | 16 | end 17 | 18 | end -------------------------------------------------------------------------------- /spec/strategy/field/email/random_mailinator_email_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomMailinatorEmail do 4 | 5 | RandomMailinatorEmail = FieldStrategy::RandomMailinatorEmail 6 | let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)} 7 | 8 | describe 'anonymized email should not be the same as original email' do 9 | let(:anonymized_email) {RandomMailinatorEmail.new.anonymize(field)} 10 | 11 | it {anonymized_email.should_not equal field.value} 12 | it {anonymized_email.should match '^\S+@\mailinator\.com$'} 13 | end 14 | end -------------------------------------------------------------------------------- /spec/strategy/field/name/random_first_name_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomFirstName do 4 | 5 | RandomFirstName = FieldStrategy::RandomFirstName 6 | let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)} 7 | 8 | describe 'anonymized name must not be the same as provided name' do 9 | let(:anonymized_value) {RandomFirstName.new().anonymize(field)} 10 | 11 | it {anonymized_value.should_not equal field.value} 12 | end 13 | 14 | end -------------------------------------------------------------------------------- /spec/strategy/field/name/random_full_name_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomFullName do 4 | 5 | RandomFullName = FieldStrategy::RandomFullName 6 | 7 | describe 'anonymized name with just single name' do 8 | let(:field) {DataAnon::Core::Field.new('name','Fake',1,nil)} 9 | let(:anonymized_value) {RandomFullName.new().anonymize(field)} 10 | 11 | it {anonymized_value.should_not equal field.value} 12 | end 13 | 14 | describe 'anonymized name should be the same as original' do 15 | let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)} 16 | let(:anonymized_value) {RandomFullName.new().anonymize(field)} 17 | 18 | it {anonymized_value.should_not equal field.value} 19 | end 20 | 21 | describe 'anonymized name should have same number of words as original' do 22 | let(:field) {DataAnon::Core::Field.new('name','Fake User Longer Name Test',1,nil)} 23 | let(:anonymized_value) {RandomFullName.new().anonymize(field)} 24 | 25 | it {anonymized_value.split(' ').size.should equal 5} 26 | end 27 | 28 | end -------------------------------------------------------------------------------- /spec/strategy/field/name/random_last_name_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomLastName do 4 | 5 | RandomLastName = FieldStrategy::RandomLastName 6 | let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)} 7 | 8 | describe 'anonymized name must not be the same as provided name' do 9 | let(:anonymized_value) {RandomLastName.new().anonymize(field)} 10 | 11 | it {anonymized_value.should_not equal field.value} 12 | end 13 | 14 | end -------------------------------------------------------------------------------- /spec/strategy/field/name/random_user_name_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomUserName do 4 | 5 | RandomUserName = FieldStrategy::RandomUserName 6 | let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)} 7 | 8 | describe 'anonymized user name should not be the same as original user name' do 9 | let(:anonymized_username) {RandomUserName.new.anonymize(field)} 10 | 11 | it {anonymized_username.should_not equal field.value} 12 | it {anonymized_username.length.should be_between(5,10)} 13 | it {anonymized_username.should match '^[a-zA-Z0-9]*$'} 14 | 15 | end 16 | 17 | describe 'different length for username' do 18 | let(:anonymized_username) {RandomUserName.new(15,20).anonymize(field)} 19 | 20 | it {anonymized_username.length.should be_between(15,20)} 21 | end 22 | 23 | end -------------------------------------------------------------------------------- /spec/strategy/field/number/random_big_decimal_delta_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | require 'bigdecimal' 3 | 4 | describe FieldStrategy::RandomBigDecimalDelta do 5 | 6 | RandomBigDecimalDelta = FieldStrategy::RandomBigDecimalDelta 7 | let(:field) {DataAnon::Core::Field.new('decimal_field',BigDecimal("53422342378687687342893.23324"),1,nil)} 8 | 9 | describe 'anonymized big decimal should not be the same as original value' do 10 | let(:anonymized_value) {RandomBigDecimalDelta.new.anonymize(field)} 11 | 12 | it {anonymized_value.should_not equal field.value} 13 | end 14 | 15 | describe 'anonymized value returned should be big decimal' do 16 | let(:anonymized_value) {RandomBigDecimalDelta.new.anonymize(field)} 17 | 18 | it { anonymized_value.should be_kind_of BigDecimal } 19 | end 20 | end -------------------------------------------------------------------------------- /spec/strategy/field/number/random_float_delta_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomFloatDelta do 4 | 5 | RandomFloatDelta = FieldStrategy::RandomFloatDelta 6 | let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)} 7 | 8 | describe 'anonymized float should not be the same as original value' do 9 | let(:anonymized_value) {RandomFloatDelta.new(5).anonymize(field)} 10 | 11 | it {anonymized_value.should_not equal field.value} 12 | end 13 | 14 | describe 'anonymized value returned should be a float' do 15 | let(:anonymized_value) {RandomFloatDelta.new(5).anonymize(field)} 16 | 17 | it { anonymized_value.should be_kind_of Float } 18 | end 19 | end -------------------------------------------------------------------------------- /spec/strategy/field/number/random_float_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomFloat do 4 | 5 | RandomFloat = FieldStrategy::RandomFloat 6 | let(:field) { DataAnon::Core::Field.new('points', 2.5, 1, nil) } 7 | 8 | describe 'verify age range between 18 and 70' do 9 | 10 | let(:anonymized_int) { RandomFloat.new(2.0, 8.0).anonymize(field) } 11 | 12 | it { anonymized_int.should >= 2.0 } 13 | it { anonymized_int.should <= 8.0 } 14 | 15 | end 16 | 17 | describe 'default range between 0 and 100' do 18 | 19 | let(:anonymized_int) { RandomFloat.new.anonymize(field) } 20 | 21 | it { anonymized_int.should >= 0.0 } 22 | it { anonymized_int.should <= 100.0 } 23 | 24 | end 25 | 26 | 27 | 28 | end -------------------------------------------------------------------------------- /spec/strategy/field/number/random_integer_delta_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomIntegerDelta do 4 | 5 | RandomIntegerDelta = FieldStrategy::RandomIntegerDelta 6 | let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)} 7 | 8 | describe "anonymized value returned should be an integer" do 9 | let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)} 10 | 11 | it { anonymized_integer.should be_kind_of Integer } 12 | 13 | end 14 | 15 | describe "anonymized integer should be within delta from original integer" do 16 | let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)} 17 | 18 | it{anonymized_integer.should be_between(90,110)} 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /spec/strategy/field/number/random_integer_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomInteger do 4 | 5 | RandomInteger = FieldStrategy::RandomInteger 6 | let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) } 7 | 8 | describe 'verify age range between 18 and 70' do 9 | 10 | let(:anonymized_int) { RandomInteger.new(18, 70).anonymize(field) } 11 | 12 | it { anonymized_int.should >= 18 } 13 | it { anonymized_int.should <= 70 } 14 | 15 | end 16 | 17 | describe 'default range between 0 and 100' do 18 | 19 | let(:anonymized_int) { RandomInteger.new.anonymize(field) } 20 | 21 | it { anonymized_int.should >= 0 } 22 | it { anonymized_int.should <= 100 } 23 | 24 | end 25 | 26 | 27 | 28 | end -------------------------------------------------------------------------------- /spec/strategy/field/random_boolean_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe FieldStrategy::RandomBoolean do 4 | 5 | RandomBoolean = FieldStrategy::RandomBoolean 6 | let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)} 7 | 8 | describe 'anonymized value should be a boolean' do 9 | let(:anonymized_boolean) {RandomBoolean.new.anonymize(field)} 10 | 11 | it { 12 | is_boolean = anonymized_boolean.is_a?(TrueClass) || anonymized_boolean.is_a?(FalseClass) 13 | is_boolean.should be true 14 | } 15 | end 16 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/formatted_string_numbers_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::FormattedStringNumber do 4 | 5 | FormattedStringNumber = FieldStrategy::FormattedStringNumber 6 | let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)} 7 | 8 | describe 'anonymized credit card number preserving the format' do 9 | let(:anonymized_number) {FormattedStringNumber.new.anonymize(field)} 10 | 11 | it {anonymized_number.should_not equal field.value} 12 | it { anonymized_number.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/} 13 | end 14 | 15 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/lorem_ipsum_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::LoremIpsum do 4 | 5 | LoremIpsum = FieldStrategy::LoremIpsum 6 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) } 7 | 8 | describe 'should return same length value using default text' do 9 | 10 | let(:anonymized_value) { LoremIpsum.new.anonymize(field) } 11 | 12 | it { anonymized_value.length.should_not be('New Delhi') } 13 | it { anonymized_value.length.should == 'New Delhi'.length } 14 | 15 | end 16 | 17 | describe 'should return same length value using set text' do 18 | 19 | let(:anonymized_value) { LoremIpsum.new("Sunit Parekh").anonymize(field) } 20 | 21 | it { anonymized_value.length.should_not be('New Delhi') } 22 | it { anonymized_value.should == 'Sunit Par' } 23 | 24 | end 25 | 26 | 27 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/random_formatted_string_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomFormattedString do 4 | 5 | RandomFormattedString = FieldStrategy::RandomFormattedString 6 | 7 | describe 'anonymized credit card number preserving the format' do 8 | let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)} 9 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)} 10 | 11 | it {anonymized_value.should_not equal field.value} 12 | it { anonymized_value.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/} 13 | end 14 | 15 | describe 'anonymized email preserving the format' do 16 | let(:field) {DataAnon::Core::Field.new('email',"parekh1.sunit@gmail.com",1,nil)} 17 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)} 18 | 19 | it {anonymized_value.should_not equal field.value} 20 | it { anonymized_value.should match /^[a-z]{6}\d\.[a-z]{5}@[a-z]{5}\.[a-z]{3}$/} 21 | end 22 | 23 | describe 'anonymized string preserving the string case & format' do 24 | let(:field) {DataAnon::Core::Field.new('email',"parekh1.SUNIT@gmail.com",1,nil)} 25 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)} 26 | 27 | it {anonymized_value.should_not equal field.value} 28 | it { anonymized_value.should match /^[a-z]{6}\d\.[A-Z]{5}@[a-z]{5}\.[a-z]{3}$/} 29 | end 30 | 31 | describe 'anonymized phone# preserving the format' do 32 | let(:field) {DataAnon::Core::Field.new('home_phone',"(020)3423-8013",1,nil)} 33 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)} 34 | 35 | it {anonymized_value.should_not equal field.value} 36 | it { anonymized_value.should match /^\(\d{3}\)\d{4}-\d{4}$/} 37 | end 38 | 39 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/random_string_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomString do 4 | 5 | RandomString = FieldStrategy::RandomString 6 | 7 | describe 'anonymized string must not be the same as original string' do 8 | let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)} 9 | let(:anonymized_string) {RandomString.new.anonymize(field)} 10 | 11 | it {anonymized_string.should_not equal field.value} 12 | it {anonymized_string.length.should equal field.value.length} 13 | end 14 | 15 | describe 'anonymized name should have same number of words as original' do 16 | 17 | let(:field) {DataAnon::Core::Field.new('string_field','Fake Longer String Test',1,nil)} 18 | let(:anonymized_string) {RandomString.new.anonymize(field)} 19 | 20 | it {anonymized_string.split(' ').size.should equal field.value.split(' ').size} 21 | 22 | end 23 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/random_url_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::RandomUrl do 4 | 5 | RandomUrl = FieldStrategy::RandomUrl 6 | 7 | describe 'anonymized url must not be the same as original url' do 8 | let(:url) { 'http://example.org' } 9 | 10 | let(:field) {DataAnon::Core::Field.new('string_field',url,1,nil)} 11 | let(:anonymized_url) {RandomUrl.new.anonymize(field)} 12 | 13 | it {anonymized_url.should_not equal field.value} 14 | it {anonymized_url.should match /http:\/\/[\S]+/} 15 | 16 | context 'with https url' do 17 | let(:url) { 'https://example.org' } 18 | 19 | it {anonymized_url.should_not equal field.value} 20 | it {anonymized_url.should match /https:\/\/[\S]+/} 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /spec/strategy/field/string/select_from_database_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::SelectFromDatabase do 4 | 5 | SelectFromDatabase = FieldStrategy::SelectFromDatabase 6 | let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) } 7 | let(:source) { {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'} } 8 | 9 | describe 'more than one values in predefined list' do 10 | 11 | let(:anonymized_value) { SelectFromDatabase.new('MediaType','Name', source).anonymize(field) } 12 | 13 | it { anonymized_value.should_not be('Abcd') } 14 | it { anonymized_value.should_not be_empty } 15 | 16 | end 17 | 18 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/select_from_file_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::SelectFromFile do 4 | 5 | SelectFromFile = FieldStrategy::SelectFromFile 6 | 7 | describe 'anonymized name must not be the same as provided name' do 8 | let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)} 9 | let(:anonymized_value) {SelectFromFile.new(DataAnon::Utils::Resource.file('first_names.txt')).anonymize(field)} 10 | 11 | it {anonymized_value.should_not equal field.value} 12 | end 13 | 14 | describe 'anonymized multiple values' do 15 | let(:field) {DataAnon::Core::Field.new('firstname',['value1','value2'],1,nil)} 16 | let(:anonymized_values) {SelectFromFile.new(DataAnon::Utils::Resource.file('first_names.txt')).anonymize(field)} 17 | 18 | it {anonymized_values.length.should equal 2} 19 | end 20 | 21 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/select_from_list_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::SelectFromList do 4 | 5 | SelectFromList = FieldStrategy::SelectFromList 6 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) } 7 | 8 | describe 'more than one values in predefined list' do 9 | 10 | let(:states) { ['Maharashtra','Gujrat','Karnataka'] } 11 | let(:anonymized_value) { SelectFromList.new(states).anonymize(field) } 12 | 13 | it { states.should include(anonymized_value) } 14 | 15 | end 16 | 17 | describe 'only one value in list' do 18 | 19 | let(:states) { ['Maharashtra'] } 20 | let(:anonymized_value) { SelectFromList.new(states).anonymize(field) } 21 | 22 | it { anonymized_value.should == 'Maharashtra' } 23 | 24 | end 25 | 26 | describe 'string value' do 27 | 28 | let(:states) { 'Maharashtra' } 29 | let(:anonymized_value) { SelectFromList.new(states).anonymize(field) } 30 | 31 | it { anonymized_value.should == 'Maharashtra' } 32 | 33 | end 34 | 35 | 36 | end -------------------------------------------------------------------------------- /spec/strategy/field/string/string_template_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe FieldStrategy::StringTemplate do 4 | 5 | StringTemplate = FieldStrategy::StringTemplate 6 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) } 7 | 8 | describe 'should return same string value as StringTemplate' do 9 | let(:anonymized_value) { StringTemplate.new('Sunit #{row_number} Parekh').anonymize(field) } 10 | it { anonymized_value.should == 'Sunit 3456 Parekh' } 11 | end 12 | 13 | 14 | 15 | end -------------------------------------------------------------------------------- /spec/strategy/field/whitelist_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe FieldStrategy::Whitelist do 4 | 5 | Whitelist = FieldStrategy::Whitelist 6 | 7 | describe 'should return same string value as whitelist' do 8 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) } 9 | let(:anonymized_value) { Whitelist.new.anonymize(field) } 10 | it { anonymized_value.should == 'New Delhi' } 11 | end 12 | 13 | describe 'should return same date value as whitelist' do 14 | let(:dob) { Time.now } 15 | let(:field) { DataAnon::Core::Field.new('DateOfBirth', dob, 1, nil) } 16 | let(:anonymized_value) { Whitelist.new.anonymize(field) } 17 | it { anonymized_value.should == dob } 18 | end 19 | 20 | 21 | end -------------------------------------------------------------------------------- /spec/strategy/mongodb/anonymize_field_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe DataAnon::Strategy::MongoDB::AnonymizeField do 4 | 5 | 6 | it 'should do callback recursive in case of sub document' do 7 | sub_document = {'key' => 'value'} 8 | field_strategy = {'key' => FieldStrategy::LoremIpsum.new} 9 | anonymization_strategy = double('AnonymizationStrategy') 10 | anonymization_strategy.should_receive(:anonymize_document).with(sub_document,1,field_strategy).and_return({'key' => 'anonymized_value'}) 11 | field = DataAnon::Core::Field.new('sub_document_field', sub_document,1,nil) 12 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy) 13 | anonymized_value = anonymize_field.anonymize 14 | anonymized_value['key'].should == 'anonymized_value' 15 | end 16 | 17 | it 'should do callback recursive multiple time in case of array of sub document' do 18 | sub_documents = [{'key' => 'value1'},{'key' => 'value2'}] 19 | field_strategy = {'key' => FieldStrategy::LoremIpsum.new} 20 | anonymization_strategy = double('AnonymizationStrategy') 21 | anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value1'},1,field_strategy).and_return({'key' => 'anonymized_value1'}) 22 | anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value2'},1,field_strategy).and_return({'key' => 'anonymized_value2'}) 23 | field = DataAnon::Core::Field.new('sub_document_field', sub_documents,1,nil) 24 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy) 25 | anonymized_value = anonymize_field.anonymize 26 | anonymized_value.length.should == 2 27 | anonymized_value[0]['key'].should == 'anonymized_value1' 28 | anonymized_value[1]['key'].should == 'anonymized_value2' 29 | end 30 | 31 | it 'should anonymize array field data type' do 32 | anonymization_strategy = double('AnonymizationStrategy') 33 | anonymization_strategy.should_not_receive(:anonymize_document) 34 | field = DataAnon::Core::Field.new('tags',['tag1','tag2'],1,nil) 35 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::SelectFromList.new(['tag4','tag5','tag6','tag7','tag8']),anonymization_strategy) 36 | anonymized_value = anonymize_field.anonymize 37 | anonymized_value.length == 2 38 | ['tag4','tag5','tag6','tag7','tag8'].should include(anonymized_value[0]) 39 | ['tag4','tag5','tag6','tag7','tag8'].should include(anonymized_value[1]) 40 | end 41 | 42 | it 'should anonymize field and return anonymized value using passed strategy' do 43 | anonymization_strategy = double('AnonymizationStrategy') 44 | anonymization_strategy.should_not_receive(:anonymize_document) 45 | field = DataAnon::Core::Field.new('boolean_field',false,1,nil) 46 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::RandomBoolean.new,anonymization_strategy) 47 | anonymized_value = anonymize_field.anonymize 48 | [true, false].should include(anonymized_value) 49 | end 50 | 51 | 52 | end -------------------------------------------------------------------------------- /spec/support/customer_sample.rb: -------------------------------------------------------------------------------- 1 | class CustomerSample 2 | 3 | class CreateCustomer < ActiveRecord::Migration[7.0] 4 | def up 5 | create_table :customers, :id => false, :force => true do |t| 6 | t.integer :cust_id, :primary => true 7 | t.string :first_name 8 | t.string :last_name 9 | t.date :birth_date 10 | t.string :address 11 | t.string :state 12 | t.string :zipcode 13 | t.string :phone 14 | t.string :email 15 | t.string :blog_url 16 | t.boolean :terms_n_condition 17 | t.integer :age 18 | t.float :latitude 19 | t.float :longitude 20 | 21 | t.timestamps null: false 22 | end 23 | end 24 | end 25 | 26 | def self.clean 27 | system "rm -f tmp/*.sqlite" 28 | system "mkdir -p tmp" 29 | end 30 | 31 | def self.create_schema connection_spec 32 | ActiveRecord::Migration.verbose = false 33 | ActiveRecord::Base.establish_connection connection_spec 34 | CreateCustomer.migrate :up 35 | end 36 | 37 | SAMPLE_DATA = [ 38 | { 39 | :cust_id => 100, :first_name => "Sunit", :last_name => "Parekh", 40 | :birth_date => Date.new(1977,7,8), :address => "F 501 Shanti Nagar", 41 | :state => "Maharastra", :zipcode => "411048", :phone => "9923700662", 42 | :email => "parekh.sunit@gmail.com", :terms_n_condition => true, 43 | :age => 34, :longitude => -74.044636, :latitude => +40.689060, 44 | :created_at => Time.new(2010,10,10), :updated_at => Time.new(2010,5,5) 45 | }, 46 | { 47 | :cust_id => 101, :first_name => "Rohit", :last_name => "Parekh", 48 | :birth_date => Date.new(1977,7,8), :address => "F 501 Shanti Nagar", 49 | :state => "Maharastra", :zipcode => "411048", :phone => "9923700662", 50 | :email => "parekh.sunit@gmail.com", :terms_n_condition => true, 51 | :age => 34, :longitude => -74.044636, :latitude => +40.689060, 52 | :created_at => Time.now, :updated_at => Time.now 53 | } 54 | ] 55 | 56 | def self.insert_record connection_spec, data_hash = SAMPLE_DATA[0] 57 | DataAnon::Utils::TempDatabase.establish_connection connection_spec 58 | source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id' 59 | cust = source.new data_hash 60 | cust.cust_id = data_hash[:cust_id] 61 | cust.save! 62 | end 63 | 64 | def self.insert_records connection_spec, data_hash = SAMPLE_DATA 65 | DataAnon::Utils::TempDatabase.establish_connection connection_spec 66 | source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id' 67 | data_hash.each do |data_row| 68 | cust = source.new data_row 69 | cust.cust_id = data_row[:cust_id] 70 | cust.save! 71 | end 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /spec/utils/database_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'Utils' do 4 | 5 | before(:each) do 6 | source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'} 7 | DataAnon::Utils::SourceDatabase.establish_connection source 8 | 9 | destination = {:adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'} 10 | DataAnon::Utils::DestinationDatabase.establish_connection destination 11 | end 12 | 13 | it 'should test the connection to source database' do 14 | album = DataAnon::Utils::SourceTable.create 'Album', ['AlbumId'] 15 | album.count.should > 0 16 | album.all.length > 0 17 | end 18 | 19 | it 'ignores inherited constants when creating a table with matching name' do 20 | conditionals = DataAnon::Utils::SourceTable.create 'Conditionals' 21 | conditionals.count.should == 0 22 | conditionals.all.length == 0 23 | end 24 | 25 | end -------------------------------------------------------------------------------- /spec/utils/geojson_parser_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe "Geo Json Parser" do 4 | 5 | SAMPLE_DATA_FILE_PATH = DataAnon::Utils::Resource.project_home+'spec/resource/sample.geojson' 6 | 7 | describe "parser should return list of addresses when address method is called" do 8 | let(:result_list) {DataAnon::Utils::GeojsonParser.address(SAMPLE_DATA_FILE_PATH)} 9 | 10 | it {result_list.length.should be 1} 11 | it {result_list[0].should eq("333 Willoughby Ave")} 12 | end 13 | 14 | describe "parser should return list of zip codes when zipcode method is called" do 15 | let(:result_list) {DataAnon::Utils::GeojsonParser.zipcode(SAMPLE_DATA_FILE_PATH)} 16 | 17 | it {result_list.length.should be 1} 18 | it {result_list[0].should eq("99801")} 19 | 20 | end 21 | 22 | describe "parser should return list of province when province method is called" do 23 | let(:result_list) {DataAnon::Utils::GeojsonParser.province(SAMPLE_DATA_FILE_PATH)} 24 | 25 | it {result_list.length.should be 1} 26 | it {result_list[0].should eq("AK")} 27 | 28 | end 29 | 30 | describe "parser should return list of cities when city method is called" do 31 | let(:result_list) {DataAnon::Utils::GeojsonParser.city(SAMPLE_DATA_FILE_PATH)} 32 | 33 | it {result_list.length.should be 1} 34 | it {result_list[0].should eq("Juneau")} 35 | 36 | end 37 | 38 | end -------------------------------------------------------------------------------- /spec/utils/random_float_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe "Number Utils" do 4 | 5 | describe 'should return same length value using default text' do 6 | 7 | let(:random_float) { DataAnon::Utils::RandomFloat.generate(5,10) } 8 | 9 | it { random_float.should be_between(5,10) } 10 | it { random_float.should be_a_kind_of Float } 11 | end 12 | end -------------------------------------------------------------------------------- /spec/utils/random_int_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe "Number Utils" do 4 | 5 | it "should generate random int between provided range" do 6 | random_int = DataAnon::Utils::RandomInt.generate(5,10) 7 | random_int.should be_between(5,10) 8 | end 9 | end -------------------------------------------------------------------------------- /spec/utils/random_string_char_only_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe "String Utils" do 4 | 5 | it "should generate random string of given length" do 6 | DataAnon::Utils::RandomStringCharsOnly.generate(10).length.should equal 10 7 | end 8 | 9 | it "should generate random string only with characters" do 10 | DataAnon::Utils::RandomStringCharsOnly.generate(10).should match /^[a-zA-Z]{10}$/ 11 | end 12 | end -------------------------------------------------------------------------------- /spec/utils/random_string_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe "String Utils" do 4 | 5 | it "should generate random string of given length" do 6 | DataAnon::Utils::RandomString.generate(10).length.should equal 10 7 | end 8 | end -------------------------------------------------------------------------------- /spec/utils/template_helper_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'Template Helper' do 4 | 5 | it 'should return a correctly formatted string based on input connection hash for source' do 6 | connection_hash = {adapter: 'test_adapter', port: 5000} 7 | DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms(connection_hash).should eq(":adapter => 'test_adapter', :port => 5000") 8 | end 9 | 10 | it 'should return a correctly formatted string based on input connection hash for destination' do 11 | connection_hash = {adapter: 'test_adapter', port: 5000} 12 | DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms(connection_hash).should eq(":adapter => '', :port => ''") 13 | end 14 | end --------------------------------------------------------------------------------