├── .coveralls.yml
├── .documentup.json
├── .gitignore
├── .rspec
├── .ruby-gemset
├── .ruby-version
├── .travis.yml
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── bin
└── datanon
├── commands.txt
├── data-anonymization.gemspec
├── examples
├── blacklist_dsl.rb
├── mongodb_blacklist_dsl.rb
├── mongodb_whitelist_dsl.rb
├── whitelist_dsl.rb
└── whitelist_dsl_threads.rb
├── lib
├── core
│ ├── database.rb
│ ├── dsl.rb
│ ├── field.rb
│ ├── fields_missing_strategy.rb
│ └── table_errors.rb
├── data-anonymization.rb
├── parallel
│ └── table.rb
├── strategy
│ ├── base.rb
│ ├── blacklist.rb
│ ├── field
│ │ ├── anonymize_array.rb
│ │ ├── anonymous.rb
│ │ ├── contact
│ │ │ ├── geojson_base.rb
│ │ │ ├── random_address.rb
│ │ │ ├── random_city.rb
│ │ │ ├── random_phone_number.rb
│ │ │ ├── random_province.rb
│ │ │ └── random_zipcode.rb
│ │ ├── datetime
│ │ │ ├── anonymize_date.rb
│ │ │ ├── anonymize_datetime.rb
│ │ │ ├── anonymize_time.rb
│ │ │ ├── date_delta.rb
│ │ │ ├── date_time_delta.rb
│ │ │ └── time_delta.rb
│ │ ├── default_anon.rb
│ │ ├── email
│ │ │ ├── gmail_template.rb
│ │ │ ├── random_email.rb
│ │ │ └── random_mailinator_email.rb
│ │ ├── fields.rb
│ │ ├── name
│ │ │ ├── random_first_name.rb
│ │ │ ├── random_full_name.rb
│ │ │ ├── random_last_name.rb
│ │ │ └── random_user_name.rb
│ │ ├── number
│ │ │ ├── random_big_decimal_delta.rb
│ │ │ ├── random_float.rb
│ │ │ ├── random_float_delta.rb
│ │ │ ├── random_integer.rb
│ │ │ └── random_integer_delta.rb
│ │ ├── random_boolean.rb
│ │ ├── string
│ │ │ ├── formatted_string_numbers.rb
│ │ │ ├── lorem_ipsum.rb
│ │ │ ├── random_formatted_string.rb
│ │ │ ├── random_string.rb
│ │ │ ├── random_url.rb
│ │ │ ├── select_from_database.rb
│ │ │ ├── select_from_file.rb
│ │ │ ├── select_from_list.rb
│ │ │ └── string_template.rb
│ │ └── whitelist.rb
│ ├── mongodb
│ │ ├── anonymize_field.rb
│ │ ├── blacklist.rb
│ │ └── whitelist.rb
│ ├── strategies.rb
│ └── whitelist.rb
├── tasks
│ └── rake_tasks.rb
├── thor
│ ├── helpers
│ │ ├── mongodb_dsl_generator.rb
│ │ └── rdbms_dsl_generator.rb
│ └── templates
│ │ ├── mongodb_whitelist_template.erb
│ │ └── whitelist_template.erb
├── utils
│ ├── database.rb
│ ├── geojson_parser.rb
│ ├── logging.rb
│ ├── parallel_progress_bar.rb
│ ├── progress_bar.rb
│ ├── random_float.rb
│ ├── random_int.rb
│ ├── random_string.rb
│ ├── random_string_chars_only.rb
│ ├── resource.rb
│ └── template_helper.rb
└── version.rb
├── resources
├── UK_addresses.geojson
├── US_addresses.geojson
├── first_names.txt
└── last_names.txt
├── sample-data
├── chinook.sqlite
├── chinook_data.sql
├── chinook_schema.sql
└── mongo
│ ├── plans.json
│ └── users.json
└── spec
├── acceptance
├── mongodb_blacklist_spec.rb
├── mongodb_whitelist_spec.rb
├── rdbms_blacklist_spec.rb
├── rdbms_whitelist_spec.rb
└── rdbms_whitelist_with_primary_key_spec.rb
├── core
└── fields_missing_strategy_spec.rb
├── resource
└── sample.geojson
├── spec_helper.rb
├── strategy
├── field
│ ├── contact
│ │ ├── random_address_spec.rb
│ │ ├── random_city_spec.rb
│ │ ├── random_phone_number_spec.rb
│ │ ├── random_province_spec.rb
│ │ └── random_zipcode_spec.rb
│ ├── datetime
│ │ ├── anonymize_date_spec.rb
│ │ ├── anonymize_datetime_spec.rb
│ │ ├── anonymize_time_spec.rb
│ │ ├── date_delta_spec.rb
│ │ ├── date_time_delta_spec.rb
│ │ └── time_delta_spec.rb
│ ├── default_anon_spec.rb
│ ├── email
│ │ ├── gmail_template_spec.rb
│ │ ├── random_email_spec.rb
│ │ └── random_mailinator_email_spec.rb
│ ├── name
│ │ ├── random_first_name_spec.rb
│ │ ├── random_full_name_spec.rb
│ │ ├── random_last_name_spec.rb
│ │ └── random_user_name_spec.rb
│ ├── number
│ │ ├── random_big_decimal_delta_spec.rb
│ │ ├── random_float_delta_spec.rb
│ │ ├── random_float_spec.rb
│ │ ├── random_integer_delta_spec.rb
│ │ └── random_integer_spec.rb
│ ├── random_boolean_spec.rb
│ ├── string
│ │ ├── formatted_string_numbers_spec.rb
│ │ ├── lorem_ipsum_spec.rb
│ │ ├── random_formatted_string_spec.rb
│ │ ├── random_string_spec.rb
│ │ ├── random_url_spec.rb
│ │ ├── select_from_database_spec.rb
│ │ ├── select_from_file_spec.rb
│ │ ├── select_from_list_spec.rb
│ │ └── string_template_spec.rb
│ └── whitelist_spec.rb
└── mongodb
│ └── anonymize_field_spec.rb
├── support
└── customer_sample.rb
└── utils
├── database_spec.rb
├── geojson_parser_spec.rb
├── random_float_spec.rb
├── random_int_spec.rb
├── random_string_char_only_spec.rb
├── random_string_spec.rb
└── template_helper_spec.rb
/.coveralls.yml:
--------------------------------------------------------------------------------
1 | repo_token: iq3YwsHsWi20COgLsNkJMLsbXin813TLt
--------------------------------------------------------------------------------
/.documentup.json:
--------------------------------------------------------------------------------
1 | {
2 | "repo": "sunitparekh/data-anonymization",
3 | "name": "Data Anonymization",
4 | "theme": "v1",
5 | "color": "#336699",
6 | "travis": true,
7 | "twitter": ["dataanon"],
8 | "google_analytics":"UA-34000799-1"
9 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *.rbc
3 | .bundle
4 | .config
5 | .yardoc
6 | Gemfile.lock
7 | InstalledFiles
8 | _yardoc
9 | coverage
10 | doc/
11 | lib/bundler/man
12 | pkg
13 | rdoc
14 | spec/reports
15 | test/tmp
16 | test/version_tmp
17 | tmp
18 | .idea
19 | sample-data/chinook-empty.sqlite
20 | tmp
21 | examples/mongodb_whitelist_generated.rb
22 | data
--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | #--profile
3 | #--format documentation
--------------------------------------------------------------------------------
/.ruby-gemset:
--------------------------------------------------------------------------------
1 | data-anon
2 |
--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | ruby-3.2.1
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: ruby
2 | services:
3 | - mongodb
4 | before_install: gem install bundler
5 | before_script: rake empty_dest
6 | rvm:
7 | - 2.7.7
8 | - 3.0.5
9 | - 3.1.3
10 | - 3.2.1
11 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | gemspec
4 |
5 | group :development, :test do
6 | gem 'foreman'
7 | gem 'rake'
8 | gem 'rspec'
9 | gem 'pry'
10 | gem 'sqlite3'
11 | gem 'mongo'
12 | gem 'coveralls', require: false
13 | end
14 |
15 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2012 Sunit Parekh
2 |
3 | MIT License
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining
6 | a copy of this software and associated documentation files (the
7 | "Software"), to deal in the Software without restriction, including
8 | without limitation the rights to use, copy, modify, merge, publish,
9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data::Anonymization
2 | Afraid of using production data due to privacy issues? Data Anonymization is a tool that helps you build anonymized production data dumps which you can use for performance testing, security testing, debugging and development.
3 |
4 | ## Java/Kotlin version
5 |
6 | Java/Kotlin version of tool supporting RDBMS databases is available with similar easy to use DSL.
7 | * [Kotlin/Java Data Anonymization Tool](https://github.com/dataanon/data-anon)
8 | * [Kotlin Maven Sample Project](https://github.com/dataanon/dataanon-kotlin-sample)
9 | * [Java Maven Sample Project](https://github.com/dataanon/dataanon-java-sample)
10 |
11 |
12 | ----------------------
13 |
14 |
15 | [
](http://travis-ci.org/sunitparekh/data-anonymization)
16 | [
](https://gemnasium.com/sunitparekh/data-anonymization)
17 | [
](https://codeclimate.com/github/sunitparekh/data-anonymization)
18 | [](https://coveralls.io/r/sunitparekh/data-anonymization?branch=master)
19 | [](http://badge.fury.io/rb/data-anonymization)
20 |
21 | ## Getting started
22 |
23 | Install gem using:
24 |
25 | $ gem install data-anonymization
26 |
27 | Install required database adapter library for active record:
28 |
29 | $ gem install sqlite3
30 |
31 | Create ruby program using data-anonymization DSL as following `my_dsl.rb`:
32 |
33 | ```ruby
34 | require 'data-anonymization'
35 |
36 | database 'DatabaseName' do
37 | strategy DataAnon::Strategy::Blacklist # whitelist (default) or blacklist
38 |
39 | # database config as active record connection hash
40 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
41 |
42 | # User -> table name (case sensitive)
43 | table 'User' do
44 | # id, DateOfBirth, FirstName, LastName, UserName, Password -> table column names (case sensitive)
45 | primary_key 'id' # composite key is also supported
46 | anonymize 'DateOfBirth','FirstName','LastName' # uses default anonymization based on data types
47 | anonymize('UserName').using FieldStrategy::StringTemplate.new('user#{row_number}')
48 | anonymize('Password') { |field| "password" }
49 | end
50 |
51 | ...
52 |
53 | end
54 | ```
55 |
56 | Run using:
57 |
58 | $ ruby my_dsl.rb
59 |
60 | Liked it? please share
61 |
62 | [
](https://twitter.com/share?text=A+simple+ruby+DSL+based+data+anonymization&url=http:%2F%2Fsunitparekh.github.com%2Fdata-anonymization&via=dataanon&hashtags=dataanon)
63 |
64 | ## Examples
65 |
66 | SQLite database
67 |
68 | 1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb)
69 | 2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/blacklist_dsl.rb)
70 |
71 | MongoDB
72 |
73 | 1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/mongodb_whitelist_dsl.rb)
74 | 2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/examples/mongodb_blacklist_dsl.rb)
75 |
76 | Postgresql database having **composite primary key**
77 |
78 | 1. [Whitelist](https://github.com/sunitparekh/test-anonymization/blob/master/dell_whitelist.rb)
79 | 2. [Blacklist](https://github.com/sunitparekh/test-anonymization/blob/master/dell_blacklist.rb)
80 |
81 |
82 | ## Changelog
83 |
84 | #### 0.8.7 (Jan 14, 2022)
85 | 1. Upgraded to rails 7.x
86 |
87 | #### 0.8.5 (May 28, 2020)
88 | 1. Upgraded to rails 6.x
89 |
90 | #### 0.8.1 (Aug 19, 2017)
91 | 1. Multi-threading support added by [stanislav-tyutin](https://github.com/stanislav-tyutin) using Pull Request.
92 | 2. Fixed to work with Ruby 2.4.x, issue with Integer data type
93 |
94 | #### 0.8.0 (Oct 31, 2016)
95 | 1. Upgraded to rails 5.x
96 |
97 | #### 0.7.4 (Oct 29, 2016)
98 | 1. Continue to work on rails 4.x. Minor changes based on feedback.
99 |
100 | #### 0.8.0.rc1 (Sep 5, 2016)
101 | 1. Upgraded to rails 5.0, please report any issue or use case not working.
102 |
103 | #### 0.7.3 (Feb 5, 2016)
104 | 1. Fixed issue with batchsize. Thanks to [Jan Raasch](https://github.com/janraasch) for sending pull request.
105 |
106 | #### 0.7.2 (Sep 26, 2015)
107 | 1. Upgraded MongoDB to latest gem version 2.1.0 and tested with MongoDB 3.x version.
108 | 2. Upgraded gems to latest version
109 | 3. Adding limit functionality - Merge pull request #27 from yanismydj/master
110 |
111 | #### 0.7.1 (Jun 13, 2015)
112 | 1. Fixed issues with empty array data for MongoDB
113 | 2. Added feature to skip and continue records during anaonymisation, this is useful to apply different strategies for different types of records.
114 |
115 |
116 | #### 0.7.0 (Mar 9, 2015)
117 | 1. Removed downcase from field name since it was causing issues with upper case field names. So now for databsae where case matters field name case should be maintained.
118 | 2. Upgraded gems to latest version
119 |
120 |
121 | #### 0.6.7 (Jan 17, 2015)
122 | 1. Upgraded gems to latest version including activerecord to 4.2. Please try it out and provide feedback.
123 |
124 |
125 | #### 0.6.6 (Oct 31, 2014)
126 | 1. Upgraded gems to latest version.
127 |
128 |
129 | #### 0.6.5 (Jun 02, 2014)
130 | 1. Upgraded most of the gems to latest version. major change is rails activerecord gem to latest versions 4.1.1, please provide feedback.
131 |
132 | #### 0.6.0 (Dec 09, 2013)
133 | 1. Upgraded rails activerecord gem to latest versions 4.0.2, please provide feedback.
134 |
135 | #### 0.5.5 (Dec 4, 2013)
136 | 1. Upgraded gems to latest versions
137 |
138 |
139 | #### 0.5.2 (Jan 29, 2013)
140 |
141 | 1. Fixed [issue #17](https://github.com/sunitparekh/data-anonymization/issues/17)
142 | 2. Upgraded Thor dependency to latest version
143 |
144 |
145 | #### 0.5.2 (Jan 20, 2013)
146 |
147 | 1. Upgraded all gem to latest and greatest including Rails activerecord and activesupport.
148 |
149 | #### 0.5.1 (Oct 26, 2012)
150 |
151 | 1. Minor fixes release, no major functionality or feature added.
152 |
153 | Please see the [Github 0.5.1 milestone page](https://github.com/sunitparekh/data-anonymization/issues?milestone=3&state=open) for more details on changes/fixes in release 0.5.1
154 |
155 | #### 0.5.0 (Sep 28, 2012)
156 |
157 | Major changes:
158 |
159 | 1. MongoDB support
160 | 2. Command line utility to generate whitelist DSL for RDBMS & MongoDB (reduces pain for writing whitelist dsl)
161 | 3. Added support for reporting fields missing mapping in case of whitelist
162 | 4. Errors reported at the end of process. Job doesn't fail for a single error, it fails it more than 100 records failed during anonymization.
163 |
164 |
165 | Please see the [Github 0.5.0 milestone page](https://github.com/sunitparekh/data-anonymization/issues?milestone=2&state=open) for more details on changes/fixes in release 0.5.0
166 |
167 | #### 0.3.0 (Sep 4, 2012)
168 |
169 | Major changes:
170 |
171 | 1. Added support for Parallel table execution
172 | 2. Change in default String strategy from LoremIpsum to RandomString based on end user feedback.
173 | 3. Fixed issue with table column name 'type' as this is default name for STI in activerecord.
174 |
175 | Please see the [Github 0.3.0 milestone page](https://github.com/sunitparekh/data-anonymization/issues?milestone=1&state=closed) for more details on changes/fixes in release 0.3.0
176 |
177 | #### 0.2.0 (August 16, 2012)
178 |
179 | 1. Added the progress bar using 'powerbar' gem. Which also shows the ETA for each table.
180 | 2. Added More strategies
181 | 3. Fixed default anonymization strategies for boolean and integer values
182 | 4. Added support for composite primary key
183 |
184 | #### 0.1.2 (August 14, 2012)
185 |
186 | 1. First initial release
187 |
188 | ## Roadmap
189 |
190 | MVP done. Fix defects and support queries, suggestions, enhancements logged in Github issues :-)
191 |
192 | ## Share feedback
193 |
194 | Please use Github [issues](https://github.com/sunitparekh/data-anonymization/issues) to share feedback, feature suggestions and report issues.
195 |
196 | ## What is data anonymization?
197 |
198 | For almost all projects there is a need for production data dump in order to run performance tests, rehearse production releases and debug production issues.
199 | However, getting production data and using it is not feasible due to multiple reasons, primary being privacy concerns for user data. And thus the need for data anonymization.
200 | This tool helps you to get anonymized production data dump using either Blacklist or Whitelist strategies.
201 |
202 | Read more about [data anonymization here](http://sunitspace.blogspot.in/2012/09/data-anonymization.html)
203 |
204 | ## Anonymization Strategies
205 |
206 | ### Blacklist
207 | This approach essentially leaves all fields unchanged with the exception of those specified by the user, which are scrambled/anonymized (hence the name blacklist).
208 | For `Blacklist` create a copy of prod database and chooses the fields to be anonymized like e.g. username, password, email, name, geo location etc. based on user specification. Most of the fields have different rules e.g. password should be set to same value for all users, email needs to be valid.
209 |
210 | The problem with this approach is that when new fields are added they will not be anonymized by default. Human error in omitting users personal data could be damaging.
211 |
212 | ```ruby
213 | database 'DatabaseName' do
214 | strategy DataAnon::Strategy::Blacklist
215 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
216 | ...
217 | end
218 | ```
219 |
220 | ### Whitelist
221 | This approach, by default scrambles/anonymizes all fields except a list of fields which are allowed to copied as is. Hence the name whitelist.
222 | By default all data needs to be anonymized. So from production database data is sanitized record by record and inserted as anonymized data into destination database. Source database needs to be readonly.
223 | All fields would be anonymized using default anonymization strategy which is based on the datatype, unless a special anonymization strategy is specified. For instance special strategies could be used for emails, passwords, usernames etc.
224 | A whitelisted field implies that it's okay to copy the data as is and anonymization isn't required.
225 | This way any new field will be anonymized by default and if we need them as is, add it to the whitelist explicitly. This prevents any human error and protects sensitive information.
226 |
227 | ```ruby
228 | database 'DatabaseName' do
229 | strategy DataAnon::Strategy::Whitelist
230 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
231 | destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
232 | ...
233 | end
234 | ```
235 |
236 | Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/2012/09/data-anonymization-blacklist-whitelist.html)
237 |
238 |
239 | ## Tips
240 |
241 | 1. In Whitelist approach make source database connection READONLY.
242 | 2. Change [default field strategies](#default-field-strategies) to avoid using same strategy again and again in your DSL.
243 | 3. To run anonymization in parallel at Table level, provided no FK constraint on tables use DataAnon::Parallel::Table strategy
244 | 4. For large table to load them in batches from table set 'batch_size' and it will use RoR's batch mode processing. Checkout [example](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb) on how to use batch processing.
245 | 5. Make sure to give proper case for fields and table names.
246 | 6. Use skip and continue to apply different strategies for records.
247 | 7. Use 'limit' to limit the number of rows that will be imported in whitelist
248 | 8. RDBMS databases utilizing schemas can be specified via `schema_search_path`: `source_db { ... schema_search_path: 'public,my_special_schema' }`
249 |
250 | ## DSL Generation
251 |
252 | We provide a command line tool to generate whitelist scripts for RDBMS and NoSQL databases. The user needs to supply the connection details to the database and a script is generated by analyzing the schema. Below are examples of how to use the tool to generate the scripts for RDBMS and NoSQL datastores
253 |
254 | When you install the data-anonymization tool, the **datanon** command become available on the terminal. If you type **datanon --help** and execute you should see the below
255 |
256 | ```
257 | Tasks:
258 |
259 | datanon generate_mongo_dsl -d, --database=DATABASE -h, --host=HOST # Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema
260 | datanon generate_rdbms_dsl -a, --adapter=ADAPTER -d, --database=DATABASE -h, --host=HOST # Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema
261 | datanon help [TASK] # Describe available tasks or one specific task
262 |
263 | ```
264 |
265 | ### RDBMS whitelist generation
266 |
267 | The gem uses ActiveRecord(AR) abstraction to connect to relational databases. You can generate a whitelist script in seconds for any relational database supported by Active Record. To do so use the following command
268 |
269 | ```
270 | datanon generate_rdbms_dsl [options]
271 |
272 | ```
273 |
274 | The options available are :
275 |
276 | 1. adapter(-a) : The activerecord adapter to use to connect to the database (eg. mysql2, postgresql)
277 | 2. host(-h) : DB host name or IP address
278 | 3. database(-d) : The name of the database to generate the whitelist script for
279 | 4. username(-u) : Username for DB authentication
280 | 5. password(-w) : Password for DB authentication
281 | 6. port(-p) : The port the database service is running on. Default port provided by AR will be used if nothing is specififed.
282 |
283 | The adapter, host and database options are mandatory. The others are optional.
284 |
285 | A few examples of the command is shown below
286 |
287 | ```
288 | datanon generate_rdbms_dsl -a mysql2 -h db.host.com -p 3306 -d production_db -u root -w password
289 |
290 | datanon generate_rdbms_dsl -a postgresql -h 123.456.7.8 -d production_db
291 |
292 | ```
293 |
294 | The relevant db gems must be installed so that AR has the adapters required to establish the connection to the databases. The script generates a file named **rdbms_whitelist_generated.rb** in the same location as the project.
295 |
296 | ### MongoDB whitelist generation
297 |
298 | Similar to the the relational databases, a whitelist script for mongo db can be generated by analysing the database structure
299 |
300 | ```
301 | datanon generate_mongo_dsl [options]
302 |
303 | ```
304 |
305 | The options available are :
306 |
307 | 1. host(-h) : DB host name or IP address
308 | 2. database(-d) : The name of the database to generate the whitelist script for
309 | 3. username(-u) : Username for DB authentication
310 | 4. password(-w) : Password for DB authentication
311 | 5. port(-p) : The port the database service is running on.
312 | 6. whitelist patterns(-r): A regex expression which can be used to match records in the database to list as whitelisted fields in the generated script.
313 |
314 | The host and database options are mandatory. The others are optional.
315 |
316 | A few examples of the command is shown below
317 |
318 | ```
319 | datanon generate_mongo_dsl -h db.host.com -d production_db -u root -w password
320 |
321 | datanon generate_mongo_dsl -h 123.456.7.8 -d production_db
322 |
323 | ```
324 |
325 | The **mongo** gem is required in order to install the mongo db drivers. The script generates a file named **mongodb_whitelist_generated.rb** in the same location as the project.
326 |
327 |
328 |
329 | ## Running in Parallel
330 | Currently provides capability of running anonymization in parallel at table level provided no FK constraints on tables.
331 | It uses [Parallel gem](https://github.com/grosser/parallel) provided by Michael Grosser.
332 | By default it starts multiple parallel ruby processes processing table one by one.
333 |
334 | ```ruby
335 | database 'DellStore' do
336 | strategy DataAnon::Strategy::Whitelist
337 | execution_strategy DataAnon::Parallel::Table # by default sequential table processing
338 | ...
339 | end
340 | ```
341 |
342 |
343 | ## DataAnon::Core::Field
344 | The object that gets passed along with the field strategies.
345 |
346 | has following attribute accessor
347 |
348 | - `name` current field/column name
349 | - `value` current field/column value
350 | - `row_number` current row number
351 | - `ar_record` active record of the current row under processing
352 |
353 | ## Field Strategies
354 |
355 |
356 |
357 |
358 | Content |
359 | Name |
360 | Description |
361 |
362 |
363 | Text |
364 | LoremIpsum |
365 | Generates a random Lorep Ipsum String |
366 |
367 |
368 | Text |
369 | RandomString |
370 | Generates a random string of equal length |
371 |
372 |
373 | Text |
374 | StringTemplate |
375 | Generates a string based on provided template |
376 |
377 |
378 | Text |
379 | SelectFromList |
380 | Randomly selects a string from a provided list |
381 |
382 |
383 | Text |
384 | SelectFromFile |
385 | Randomly selects a string from a provided file |
386 |
387 |
388 | Text |
389 | FormattedStringNumber |
390 | Randomize digits in a string while maintaining the format |
391 |
392 |
393 | Text |
394 | SelectFromDatabase |
395 | Selects randomly from the result of a query on a database |
396 |
397 |
398 | Text |
399 | RandomUrl |
400 | Anonymizes a URL while mainting the structure |
401 |
402 |
403 |
404 | Content |
405 | Name |
406 | Description |
407 |
408 |
409 | Number |
410 | RandomInteger |
411 | Generates a random integer between provided limits (default 0 to 100) |
412 |
413 |
414 | Number |
415 | RandomIntegerDelta |
416 | Generates a random integer within -delta and delta of original integer |
417 |
418 |
419 | Number |
420 | RandomFloat |
421 | Generates a random float between provided limits (default 0.0 to 100.0) |
422 |
423 |
424 | Number |
425 | RandomFloatDelta |
426 | Generates a random float within -delta and delta of original float |
427 |
428 |
429 | Number |
430 | RandomBigDecimalDelta |
431 | Similar to previous but creates a big decimal object |
432 |
433 |
434 |
435 | Content |
436 | Name |
437 | Description |
438 |
439 |
440 | Address |
441 | RandomAddress |
442 | Randomly selects an address from a geojson flat file [Default US address] |
443 |
444 |
445 | City |
446 | RandomCity |
447 | Similar to address, picks a random city from a geojson flafile [Default US cities] |
448 |
449 |
450 | Province |
451 | RandomProvince |
452 | Similar to address, picks a random city from a geojson flafile [Default US provinces] |
453 |
454 |
455 | Zip code |
456 | RandomZipcode |
457 | Similar to address, picks a random zipcode from a geojson flafile [Default US zipcodes] |
458 |
459 |
460 | Phone number |
461 | RandomPhoneNumber |
462 | Randomizes a phone number while preserving locale specific fomatting |
463 |
464 |
465 |
466 | Content |
467 | Name |
468 | Description |
469 |
470 |
471 | DateTime |
472 | AnonymizeDateTime |
473 | Anonymizes each field (except year and seconds) within natural range of the field depending on true/false flag provided |
474 |
475 |
476 | Time |
477 | AnonymizeTime |
478 | Exactly similar to above except returned object is of type 'Time' |
479 |
480 |
481 | Date |
482 | AnonymizeDate |
483 | Anonymizes day and month within natural ranges based on true/false flag |
484 |
485 |
486 | DateTimeDelta |
487 | DateTimeDelta |
488 | Shifts data randomly within given range. Default shifts date within 10 days + or - and shifts time within 30 minutes. |
489 |
490 |
491 | TimeDelta |
492 | TimeDelta |
493 | Exactly similar to above except returned object is of type 'Time' |
494 |
495 |
496 | DateDelta |
497 | DateDelta |
498 | Shifts date randomly within given delta range. Default shits date within 10 days + or - |
499 |
500 |
501 |
502 | Content |
503 | Name |
504 | Description |
505 |
506 |
507 | Email |
508 | RandomEmail |
509 | Generates email randomly using the given HOSTNAME and TLD. |
510 |
511 |
512 | Email |
513 | GmailTemplate |
514 | Generates a valid unique gmail address by taking advantage of the gmail + strategy |
515 |
516 |
517 | Email |
518 | RandomMailinatorEmail |
519 | Generates random email using mailinator hostname. |
520 |
521 |
522 |
523 | Content |
524 | Name |
525 | Description |
526 |
527 |
528 | First name |
529 | RandomFirstName |
530 | Randomly picks up first name from the predefined list in the file. Default file is part of the gem. |
531 |
532 |
533 | Last name |
534 | RandomLastName |
535 | Randomly picks up last name from the predefined list in the file. Default file is part of the gem. |
536 |
537 |
538 | Full Name |
539 | RandomFullName |
540 | Generates full name using the RandomFirstName and RandomLastName strategies. |
541 |
542 |
543 | User name |
544 | RandomUserName |
545 | Generates random user name of same length as original user name. |
546 |
547 |
548 |
549 |
550 | ## Write you own field strategies
551 | field parameter in following code is [DataAnon::Core::Field](#dataanon-core-field)
552 |
553 | ```ruby
554 | class MyFieldStrategy
555 |
556 | # method anonymize is what required
557 | def anonymize field
558 | # write your code here
559 | end
560 |
561 | end
562 | ```
563 |
564 | write your own anonymous field strategies within DSL,
565 |
566 | ```ruby
567 | table 'User' do
568 | anonymize('Password') { |field| "password" }
569 | anonymize('email') do |field|
570 | "test+#{field.row_number}@gmail.com"
571 | end
572 | end
573 | ```
574 |
575 | ## Default field strategies
576 |
577 | ```ruby
578 | DEFAULT_STRATEGIES = {:string => FieldStrategy::RandomString.new,
579 | :fixnum => FieldStrategy::RandomIntegerDelta.new(5),
580 | :bignum => FieldStrategy::RandomIntegerDelta.new(5000),
581 | :float => FieldStrategy::RandomFloatDelta.new(5.0),
582 | :bigdecimal => FieldStrategy::RandomBigDecimalDelta.new(500.0),
583 | :datetime => FieldStrategy::DateTimeDelta.new,
584 | :time => FieldStrategy::TimeDelta.new,
585 | :date => FieldStrategy::DateDelta.new,
586 | :trueclass => FieldStrategy::RandomBoolean.new,
587 | :falseclass => FieldStrategy::RandomBoolean.new
588 | }
589 | ```
590 |
591 | Overriding default field strategies & can be used to provide default strategy for missing data type.
592 |
593 | ```ruby
594 | database 'Chinook' do
595 | ...
596 | default_field_strategies :string => FieldStrategy::RandomString.new
597 | ...
598 | end
599 | ```
600 |
601 | ## Logging
602 |
603 | How do I switch off the progress bar?
604 |
605 | ```ruby
606 | # add following line in your ruby file
607 | ENV['show_progress'] = 'false'
608 | ```
609 |
610 | `Logger` provides debug level messages including database queries of active record.
611 |
612 | ```ruby
613 | DataAnon::Utils::Logging.logger.level = Logger::INFO
614 | ```
615 |
616 | ## Skip and Continue records
617 |
618 | *Skip* is used to skip records during anonymization when condition returns true. This records are ignored,
619 | in blacklist it remains as it is in database and in case of whitelist this records will not be copied to destination database.
620 |
621 | ```ruby
622 | table 'customers' do
623 | skip { |index, record| record['age'] < 18 }
624 |
625 | primary_key 'cust_id'
626 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
627 | anonymize 'terms_n_condition', 'age'
628 | end
629 | ```
630 |
631 |
632 | *Continue* is exactly opposite of Skip and it continue with anonymization only if given condition returns true.
633 | In case of blacklist records are anonymized for matching conditions and for whitelist records are anonymized and copied
634 | to new database for matching conditions.
635 |
636 | ```ruby
637 | table 'customers' do
638 | continue { |index, record| record['age'] > 18 }
639 |
640 | primary_key 'cust_id'
641 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
642 | anonymize 'terms_n_condition', 'age'
643 | end
644 | ```
645 |
646 |
647 | ## Want to contribute?
648 |
649 | 1. Fork it
650 | 2. Create your feature branch (`git checkout -b my-new-feature`)
651 | 3. Commit your changes (`git commit -am 'Add some feature'`)
652 | 4. Push to the branch (`git push origin my-new-feature`)
653 | 5. Create new Pull Request
654 |
655 | ## License
656 |
657 | [MIT License](https://github.com/sunitparekh/data-anonymization/blob/master/LICENSE.txt)
658 |
659 | ## Credits
660 |
661 | - [ThoughtWorks Inc](http://www.thoughtworks.com), for allowing us to build this tool and make it open source.
662 | - [Panda](https://twitter.com/sarbashrestha) for reviewing the documentation.
663 | - [Dan Abel](http://www.linkedin.com/pub/dan-abel/0/61b/9b0) for introducing me to Blacklist and Whitelist approach for data anonymization.
664 | - [Chirga Doshi](https://twitter.com/chiragsdoshi) for encouraging me to get this done.
665 | - [Aditya Karle](https://twitter.com/adityakarle) for the Logo. (Coming Soon...)
666 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler'
2 | Bundler.setup(:default, :test)
3 | require 'rspec/core/rake_task'
4 | require 'tasks/rake_tasks'
5 |
6 | Bundler::GemHelper.install_tasks
7 | RSpec::Core::RakeTask.new(:spec)
8 | DataAnonymization::RakeTasks.new
9 |
10 | task :default => :spec
11 |
--------------------------------------------------------------------------------
/bin/datanon:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # encoding: UTF-8
3 |
4 | require 'thor'
5 | require 'data-anonymization'
6 |
7 | class AnonymizationCLI < Thor
8 |
9 | include Thor::Actions
10 |
11 | desc "generate_rdbms_dsl", "Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema"
12 |
13 | def generate_rdbms_dsl
14 |
15 | configuration_hash = {:adapter => options["adapter"],
16 | :host => options["host"],
17 | :port => options["port"],
18 | :database => options["database"],
19 | :username => options["username"],
20 | :password => options["password"]
21 | }
22 | create_file "rdbms_whitelist_generated.rb"
23 | DataAnon::ThorHelpers::RDBMSDSLGenerator.new.generate_whitelist_script(configuration_hash)
24 | end
25 |
26 | method_option :adapter, :required => true, :aliases => "-a", :desc => "Activerecord database adapter to be used [required]", :for => :generate_rdbms_dsl
27 | method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_rdbms_dsl
28 | method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_rdbms_dsl
29 | method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port provided by AR will be used", :for => :generate_rdbms_dsl
30 | method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_rdbms_dsl
31 | method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_rdbms_dsl
32 |
33 | desc "generate_mongo_dsl", "Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema"
34 |
35 | def generate_mongo_dsl
36 |
37 | configuration_hash = {:host => options["host"],
38 | :port => options["port"],
39 | :database => options["database"],
40 | :username => options["username"],
41 | :password => options["password"]
42 | }
43 |
44 | create_file "mongodb_whitelist_generated.rb"
45 | DataAnon::ThorHelpers::MongoDBDSLGenerator.new(configuration_hash, options["whitelist_patterns"]).generate
46 | end
47 |
48 | method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_mongo_dsl
49 | method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_mongo_dsl
50 | method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port will be used", :for => :generate_mongo_dsl
51 | method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_mongo_dsl
52 | method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_mongo_dsl
53 | method_option :whitelist_patterns, :aliases => "-r", :desc => "Whitelist Patterns", :for => :generate_mongo_dsl
54 |
55 | end
56 |
57 | AnonymizationCLI.start
58 |
--------------------------------------------------------------------------------
/commands.txt:
--------------------------------------------------------------------------------
1 | rbenv exec bundle update
2 | rbenv exec bundle outdated
3 |
4 | rbenv exec bundle exec rake
5 |
--------------------------------------------------------------------------------
/data-anonymization.gemspec:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | lib = File.expand_path('../lib', __FILE__)
3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4 | require 'version'
5 |
6 | Gem::Specification.new do |gem|
7 | gem.name = 'data-anonymization'
8 | gem.version = DataAnonymization::VERSION
9 | gem.authors = ['Sunit Parekh', 'Anand Agrawal', 'Satyam Agarwala']
10 | gem.email = %w(parekh.sunit@gmail.com anand.agrawal84@gmail.com satyamag@gmail.com)
11 | gem.description = %q{Data anonymization tool for RDBMS and MongoDB databases}
12 | gem.summary = %q{Tool to create anonymized production data dump to use for performance and testing environments.}
13 | gem.homepage = 'http://sunitparekh.github.com/data-anonymization'
14 | gem.license = 'MIT'
15 |
16 |
17 | gem.files = `git ls-files`.split($/).select { |f| !f.match(/^sample-data/) }
18 | gem.executables = 'datanon'
19 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20 | gem.require_paths = ['lib']
21 |
22 | gem.add_dependency('activerecord', '~> 7.0')
23 | gem.add_dependency('activesupport', '~> 7.0')
24 | gem.add_dependency('composite_primary_keys', '~> 14.0')
25 | gem.add_dependency('parallel', '~> 1.21')
26 | gem.add_dependency('powerbar', '~> 2.0')
27 | gem.add_dependency('rgeo', '~> 2.4.0')
28 | gem.add_dependency('rgeo-geojson', '~> 2.1.1')
29 | gem.add_dependency('thor', '~> 1.2.1')
30 | end
31 |
--------------------------------------------------------------------------------
/examples/blacklist_dsl.rb:
--------------------------------------------------------------------------------
1 | system 'bundle exec ruby examples/whitelist_dsl.rb'
2 |
3 | require 'data-anonymization'
4 |
5 | DataAnon::Utils::Logging.logger.level = Logger::INFO
6 |
7 | database 'Chinook' do
8 | strategy DataAnon::Strategy::Blacklist
9 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
10 |
11 | table 'Employee' do
12 | primary_key 'EmployeeId'
13 | anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
14 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new
15 | anonymize('LastName').using FieldStrategy::RandomLastName.new
16 | anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
17 | anonymize('Address').using FieldStrategy::RandomAddress.region_US
18 | anonymize('City').using FieldStrategy::RandomCity.region_US
19 | anonymize('State').using FieldStrategy::RandomProvince.region_US
20 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
21 | anonymize('Country') {|field| 'USA'}
22 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
23 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
24 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
25 | end
26 |
27 | table 'Customer' do
28 | primary_key 'CustomerId'
29 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
30 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new
31 | anonymize('LastName').using FieldStrategy::RandomLastName.new
32 | anonymize('Address').using FieldStrategy::RandomAddress.region_US
33 | anonymize('City').using FieldStrategy::RandomCity.region_US
34 | anonymize('State').using FieldStrategy::RandomProvince.region_US
35 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
36 | anonymize('Country') {|field| 'USA'}
37 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
38 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
39 | end
40 |
41 | end
42 |
43 |
--------------------------------------------------------------------------------
/examples/mongodb_blacklist_dsl.rb:
--------------------------------------------------------------------------------
1 | require 'data-anonymization'
2 | require 'mongo'
3 |
4 | DataAnon::Utils::Logging.logger.level = Logger::INFO
5 | Mongo::Logger.logger.level = Logger::WARN
6 |
7 | Mongo::Client.new('mongodb://localhost/test').database.drop
8 | system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
9 | system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
10 |
11 |
12 | database 'test' do
13 | strategy DataAnon::Strategy::MongoDB::Blacklist
14 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
15 |
16 | collection 'users' do
17 | anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
18 | anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
19 | anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
20 | anonymize('password') { |field| 'password'}
21 | anonymize('first_name').using FieldStrategy::RandomFirstName.new
22 | anonymize('last_name').using FieldStrategy::RandomLastName.new
23 | end
24 |
25 | collection 'plans' do
26 | anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(%w(Free Team Business Paid)))
27 | anonymize 'public_sharing','photo_sharing'
28 |
29 | document 'features' do
30 | anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
31 |
32 | document 'users' do
33 | anonymize 'max', 'additional'
34 | end
35 | end
36 |
37 | end
38 |
39 | end
40 |
41 |
--------------------------------------------------------------------------------
/examples/mongodb_whitelist_dsl.rb:
--------------------------------------------------------------------------------
1 | require 'data-anonymization'
2 | require 'mongo'
3 |
4 | DataAnon::Utils::Logging.logger.level = Logger::INFO
5 | Mongo::Logger.logger.level = Logger::WARN
6 |
7 |
8 | Mongo::Client.new('mongodb://localhost/test').database.drop
9 | Mongo::Client.new('mongodb://localhost/dest').database.drop
10 | system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
11 | system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
12 |
13 | database 'test' do
14 | strategy DataAnon::Strategy::MongoDB::Whitelist
15 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
16 | destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
17 |
18 | collection 'users' do
19 | whitelist '_id','failed_attempts','updated_at'
20 | anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
21 | anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
22 | anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
23 | anonymize('password') { |field| 'password'}
24 | anonymize('first_name').using FieldStrategy::RandomFirstName.new
25 | anonymize('last_name').using FieldStrategy::RandomLastName.new
26 | anonymize 'password_reset_answer','password_reset_question'
27 | end
28 |
29 | collection 'plans' do
30 | whitelist '_id', 'name','term', 'created_at'
31 | anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(%w(Free Team Business Paid)))
32 | anonymize 'public_sharing','photo_sharing'
33 |
34 | collection 'features' do
35 | anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
36 | whitelist 'type'
37 |
38 | document 'users' do
39 | anonymize 'max', 'additional'
40 | end
41 | end
42 |
43 | end
44 |
45 | end
46 |
47 |
--------------------------------------------------------------------------------
/examples/whitelist_dsl.rb:
--------------------------------------------------------------------------------
1 | system 'rake empty_dest' # clean destination database on every call
2 |
3 | require 'data-anonymization'
4 |
5 | DataAnon::Utils::Logging.logger.level = Logger::INFO
6 |
7 | database 'Chinook' do
8 | strategy DataAnon::Strategy::Whitelist
9 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
10 | destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
11 |
12 | default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
13 |
14 | table 'Genre' do
15 | primary_key 'GenreId'
16 | whitelist 'GenreId'
17 | anonymize 'Name' do |field|
18 | field.value + ' test'
19 | end
20 | end
21 |
22 | table 'MediaType' do
23 | primary_key 'MediaTypeId'
24 | anonymize('MediaTypeId') { |field| field.value } # same as whitelist
25 | anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
26 |
27 | end
28 |
29 | table 'Customer' do
30 | primary_key 'CustomerId'
31 | batch_size 5 # batch_size works only if the primary_key is defined for the table
32 | limit 10 # will only take last 10 records
33 |
34 | whitelist 'CustomerId', 'SupportRepId', 'Company'
35 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
36 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new
37 | anonymize('LastName').using FieldStrategy::RandomLastName.new
38 | anonymize('Address').using FieldStrategy::RandomAddress.region_US
39 | anonymize('City').using FieldStrategy::RandomCity.region_US
40 | anonymize('State').using FieldStrategy::RandomProvince.region_US
41 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
42 | anonymize('Country') {|field| 'USA'}
43 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
44 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
45 | end
46 |
47 | table 'Employee' do
48 | batch_size 5 # this won't work since there is no 'primary_key' defined
49 |
50 | whitelist 'EmployeeId', 'ReportsTo', 'Title'
51 | anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
52 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new
53 | anonymize('LastName').using FieldStrategy::RandomLastName.new
54 | anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
55 | anonymize('Address').using FieldStrategy::RandomAddress.region_US
56 | anonymize('City').using FieldStrategy::RandomCity.region_US
57 | anonymize('State').using FieldStrategy::RandomProvince.region_US
58 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
59 | anonymize('Country') {|field| 'USA'}
60 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
61 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
62 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
63 | end
64 |
65 |
66 |
67 | end
68 |
--------------------------------------------------------------------------------
/examples/whitelist_dsl_threads.rb:
--------------------------------------------------------------------------------
1 | system 'rake empty_dest' # clean destination database on every call
2 |
3 | require 'data-anonymization'
4 |
5 | DataAnon::Utils::Logging.logger.level = Logger::INFO
6 |
7 | database 'Chinook' do
8 | strategy DataAnon::Strategy::Whitelist
9 | source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
10 | destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
11 |
12 | default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
13 |
14 | table 'Genre' do
15 | primary_key 'GenreId'
16 | whitelist 'GenreId'
17 | anonymize 'Name' do |field|
18 | field.value + ' test'
19 | end
20 | end
21 |
22 | table 'MediaType' do
23 | primary_key 'MediaTypeId'
24 | anonymize('MediaTypeId') { |field| field.value } # same as whitelist
25 | anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
26 |
27 | end
28 |
29 | table 'Customer' do
30 | primary_key 'CustomerId'
31 | thread_num 5 # thread_num
32 |
33 | whitelist 'CustomerId', 'SupportRepId', 'Company'
34 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
35 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new
36 | anonymize('LastName').using FieldStrategy::RandomLastName.new
37 | anonymize('Address').using FieldStrategy::RandomAddress.region_US
38 | anonymize('City').using FieldStrategy::RandomCity.region_US
39 | anonymize('State').using FieldStrategy::RandomProvince.region_US
40 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
41 | anonymize('Country') {|field| 'USA'}
42 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
43 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
44 | end
45 |
46 | table 'Employee' do
47 | thread_num 5 # thread_num
48 |
49 | whitelist 'EmployeeId', 'ReportsTo', 'Title'
50 | anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
51 | anonymize('FirstName').using FieldStrategy::RandomFirstName.new
52 | anonymize('LastName').using FieldStrategy::RandomLastName.new
53 | anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
54 | anonymize('Address').using FieldStrategy::RandomAddress.region_US
55 | anonymize('City').using FieldStrategy::RandomCity.region_US
56 | anonymize('State').using FieldStrategy::RandomProvince.region_US
57 | anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
58 | anonymize('Country') {|field| 'USA'}
59 | anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
60 | anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
61 | anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
62 | end
63 |
64 |
65 |
66 | end
67 |
--------------------------------------------------------------------------------
/lib/core/database.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Core
3 |
4 | class Database
5 | include Utils::Logging
6 |
7 | def initialize name
8 | @name = name
9 | @strategy = DataAnon::Strategy::Whitelist
10 | @user_defaults = {}
11 | @tables = []
12 | @execution_strategy = DataAnon::Core::Sequential
13 | ENV['parallel_execution'] = 'false'
14 | I18n.enforce_available_locales = false
15 | end
16 |
17 | def strategy strategy
18 | @strategy = strategy
19 | end
20 |
21 | def execution_strategy execution_strategy
22 | @execution_strategy = execution_strategy
23 | ENV['parallel_execution'] = 'true' if execution_strategy == DataAnon::Parallel::Table
24 | end
25 |
26 | def source_db connection_spec
27 | @source_database = connection_spec
28 | end
29 |
30 | def destination_db connection_spec
31 | @destination_database = connection_spec
32 | end
33 |
34 | def default_field_strategies default_strategies
35 | @user_defaults = default_strategies
36 | end
37 |
38 | def table (name, &block)
39 | table = @strategy.new(@source_database, @destination_database, name, @user_defaults).process_fields(&block)
40 | @tables << table
41 | end
42 | alias :collection :table
43 |
44 | def anonymize
45 | begin
46 | @execution_strategy.new.anonymize @tables
47 | rescue => e
48 | logger.error "\n#{e.message} \n #{e.backtrace}"
49 | end
50 | if @strategy.whitelist?
51 | @tables.each do |table|
52 | if table.fields_missing_strategy.present?
53 | logger.info('Fields missing the anonymization strategy:')
54 | table.fields_missing_strategy.print
55 | end
56 | end
57 | end
58 |
59 | @tables.each { |table| table.errors.print }
60 | end
61 |
62 | end
63 |
64 | class Sequential
65 | def anonymize tables
66 | tables.each do |table|
67 | begin
68 | table.process
69 | rescue => e
70 | logger.error "\n#{e.message} \n #{e.backtrace}"
71 | end
72 | end
73 | end
74 | end
75 |
76 | end
77 | end
--------------------------------------------------------------------------------
/lib/core/dsl.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Core
3 | module DSL
4 | include Utils::Logging
5 |
6 | def database(name, &block)
7 | logger.debug "Processing Database: #{name}"
8 | database = DataAnon::Core::Database.new(name)
9 | database.instance_eval &block
10 | database.anonymize
11 | end
12 |
13 | end
14 | end
15 | end
16 |
17 | include DataAnon::Core::DSL
18 |
19 |
--------------------------------------------------------------------------------
/lib/core/field.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Core
3 |
4 | class Field
5 |
6 | def initialize name, value, row_number, ar_record, table_name = 'unknown'
7 | @name = name
8 | @value = value
9 | @row_number = row_number
10 | @ar_record = ar_record
11 | @table_name = table_name
12 | end
13 |
14 | attr_accessor :name, :value, :row_number, :ar_record, :table_name
15 |
16 | alias :collection_name :table_name
17 |
18 | end
19 |
20 | end
21 | end
--------------------------------------------------------------------------------
/lib/core/fields_missing_strategy.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Core
3 |
4 | class FieldsMissingStrategy
5 | include Utils::Logging
6 |
7 | def initialize table_name
8 | @table_name = table_name
9 | @fields_missing_strategy = []
10 | end
11 |
12 | def missing field_name
13 | return if @fields_missing_strategy.include? field_name
14 | @fields_missing_strategy << field_name
15 | end
16 |
17 | def fields_missing_strategy
18 | @fields_missing_strategy
19 | end
20 |
21 | def print
22 | @fields_missing_strategy.each do |field_name|
23 | logger.info("#{@table_name}.#{field_name}")
24 | end
25 | end
26 |
27 | def present?
28 | fields_missing_strategy.size > 0
29 | end
30 |
31 | end
32 |
33 | end
34 | end
--------------------------------------------------------------------------------
/lib/core/table_errors.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Core
3 |
4 | class TableErrors
5 | include Utils::Logging
6 |
7 | def initialize table_name
8 | @table_name = table_name
9 | @errors = []
10 | end
11 |
12 | def log_error record, exception
13 | @errors << { :record => record, :exception => exception}
14 | raise 'Reached limit of error for a table' if @errors.length > 100
15 | end
16 |
17 | def errors
18 | @errors
19 | end
20 |
21 | def print
22 | return if @errors.length == 0
23 | logger.error("Errors while processing table '#{@table_name}':")
24 | @errors.each do |error|
25 | logger.error(error[:exception])
26 | logger.error(error[:exception].backtrace.join("\n\t"))
27 | end
28 | end
29 |
30 | end
31 |
32 | end
33 | end
--------------------------------------------------------------------------------
/lib/data-anonymization.rb:
--------------------------------------------------------------------------------
1 | require 'version'
2 |
3 | require 'utils/logging'
4 | require 'utils/random_int'
5 | require 'utils/random_float'
6 | require 'utils/random_string'
7 | require 'utils/random_string_chars_only'
8 | require 'utils/geojson_parser'
9 | require 'utils/progress_bar'
10 | require 'utils/parallel_progress_bar'
11 | require 'utils/resource'
12 | require 'utils/template_helper'
13 | require 'parallel/table'
14 | require 'core/database'
15 | require 'core/fields_missing_strategy'
16 | require 'thor/helpers/rdbms_dsl_generator'
17 | require 'core/field'
18 | require 'core/table_errors'
19 | require 'strategy/strategies'
20 | require 'utils/database'
21 | require 'core/dsl'
22 |
23 | begin
24 | require 'mongo'
25 | require 'thor/helpers/mongodb_dsl_generator'
26 | rescue LoadError
27 | 'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
28 | end
29 |
--------------------------------------------------------------------------------
/lib/parallel/table.rb:
--------------------------------------------------------------------------------
1 | require 'parallel'
2 |
3 | module DataAnon
4 | module Parallel
5 | class Table
6 |
7 | def anonymize tables
8 | ::Parallel.each(tables) do |table|
9 | begin
10 | table.progress_bar_class DataAnon::Utils::ParallelProgressBar
11 | table.process
12 | rescue => e
13 | logger.error "\n#{e.message} \n #{e.backtrace}"
14 | end
15 | end
16 | end
17 |
18 | end
19 | end
20 | end
--------------------------------------------------------------------------------
/lib/strategy/base.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | class Base
4 | include Utils::Logging
5 |
6 | attr_accessor :fields, :user_strategies, :fields_missing_strategy, :errors
7 |
8 | def initialize source_database, destination_database, name, user_strategies
9 | @name = name
10 | @user_strategies = user_strategies
11 | @fields = {}
12 | @source_database = source_database
13 | @destination_database = destination_database
14 | @fields_missing_strategy = DataAnon::Core::FieldsMissingStrategy.new name
15 | @errors = DataAnon::Core::TableErrors.new(@name)
16 | @primary_keys = []
17 | end
18 |
19 | def self.whitelist?
20 | false
21 | end
22 |
23 | def process_fields &block
24 | self.instance_eval &block
25 | self
26 | end
27 |
28 | def primary_key *fields
29 | @primary_keys = fields
30 | end
31 |
32 | def batch_size size
33 | @batch_size = size
34 | end
35 |
36 | def limit limit
37 | @limit = limit
38 | end
39 |
40 | def thread_num thread_num
41 | @thread_num = thread_num
42 | end
43 |
44 | def whitelist *fields
45 | fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
46 | end
47 |
48 | def skip &block
49 | @skip_block = block
50 | end
51 |
52 | def continue &block
53 | @continue_block = block
54 | end
55 |
56 | def anonymize *fields, &block
57 | if block.nil?
58 | fields.each { |f| @fields[f] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
59 | temp = self
60 | return Class.new do
61 | @temp_fields = fields
62 | @table_fields = temp.fields
63 | def self.using field_strategy
64 | @temp_fields.each { |f| @table_fields[f] = field_strategy }
65 | end
66 | end
67 | else
68 | fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Anonymous.new(&block) }
69 | end
70 | end
71 |
72 | def is_primary_key? field
73 | @primary_keys.select { |key| field == key }.length > 0
74 | end
75 |
76 | def default_strategy field_name
77 | @fields_missing_strategy.missing field_name
78 | DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
79 | end
80 |
81 | def dest_table
82 | return @dest_table unless @dest_table.nil?
83 | table_klass = Utils::DestinationTable.create @name, @primary_keys
84 | table_klass.establish_connection @destination_database if @destination_database
85 | @dest_table = table_klass
86 | end
87 |
88 | def source_table
89 | return @source_table unless @source_table.nil?
90 | table_klass = Utils::SourceTable.create @name, @primary_keys
91 | table_klass.establish_connection @source_database
92 | @source_table = table_klass
93 | end
94 |
95 | def process
96 | logger.debug "Processing table #{@name} with fields strategies #{@fields}"
97 | total = source_table.count
98 | if total > 0
99 | progress = progress_bar.new(@name, total)
100 | if @primary_keys.empty? || !@batch_size.present?
101 | process_table progress
102 | elsif @thread_num.present?
103 | process_table_in_threads progress
104 | else
105 | process_table_in_batches progress
106 | end
107 | progress.close
108 | end
109 | if source_table.respond_to?('clear_all_connections!')
110 | source_table.clear_all_connections!
111 | end
112 | end
113 |
114 | def process_table progress
115 | index = 0
116 |
117 | source_table_limited.each do |record|
118 | index += 1
119 | begin
120 | process_record_if index, record
121 | rescue => exception
122 | @errors.log_error record, exception
123 | end
124 | progress.show index
125 | end
126 | end
127 |
128 | def process_table_in_batches progress
129 | logger.info "Processing table #{@name} records in batch size of #{@batch_size}"
130 | index = 0
131 |
132 | source_table_limited.find_each(:batch_size => @batch_size) do |record|
133 | index += 1
134 | begin
135 | process_record_if index, record
136 | rescue => exception
137 | @errors.log_error record, exception
138 | end
139 | progress.show index
140 | end
141 | end
142 |
143 | def process_table_in_threads progress
144 | logger.info "Processing table #{@name} records in batch size of #{@batch_size} [THREADS]"
145 |
146 | index = 0
147 | threads = []
148 |
149 | source_table.find_in_batches(batch_size: @batch_size) do |records|
150 | until threads.count(&:alive?) <= @thread_num
151 | thr = threads.delete_at 0
152 | thr.join
153 | progress.show index
154 | end
155 |
156 | thr = Thread.new {
157 | records.each do |record|
158 | begin
159 | process_record_if index, record
160 | index += 1
161 | rescue => exception
162 | puts exception.inspect
163 | @errors.log_error record, exception
164 | end
165 | end
166 | }
167 | threads << thr
168 | end
169 |
170 | until threads.empty?
171 | thr = threads.delete_at 0
172 | thr.join
173 | progress.show index
174 | end
175 | end
176 |
177 | def source_table_limited
178 | @source_table_limited ||= begin
179 | if @limit.present?
180 | source_table.all.limit(@limit).order(created_at: :desc)
181 | else
182 | source_table.all
183 | end
184 | end
185 | end
186 |
187 | def process_record_if index, record
188 | return if @skip_block && @skip_block.call(index, record)
189 | return if @continue_block && !@continue_block.call(index, record)
190 |
191 | process_record index, record
192 | end
193 |
194 | def progress_bar
195 | @progress_bar || DataAnon::Utils::ProgressBar
196 | end
197 |
198 | def progress_bar_class progress_bar
199 | @progress_bar = progress_bar
200 | end
201 |
202 |
203 | end
204 | end
205 | end
206 |
--------------------------------------------------------------------------------
/lib/strategy/blacklist.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | class Blacklist < DataAnon::Strategy::Base
4 |
5 | def process_record index, record
6 | updates = {}
7 | @fields.each do |field, strategy|
8 | database_field_name = record.attributes.select { |k,v| k == field }.keys[0]
9 | field_value = record.attributes[database_field_name]
10 | unless field_value.nil? || is_primary_key?(database_field_name)
11 | field = DataAnon::Core::Field.new(database_field_name, field_value, index, record, @name)
12 | updates[database_field_name] = strategy.anonymize(field)
13 | end
14 | end
15 | record.update_columns(updates) if updates.any?
16 | end
17 |
18 | end
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/lib/strategy/field/anonymize_array.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 |
6 | class AnonymizeArray
7 |
8 | def self.user_defaults user_defaults
9 | @@user_defaults = user_defaults
10 | end
11 |
12 | def initialize strategy
13 | @strategy = strategy
14 | end
15 |
16 | def anonymize field
17 | field.value.collect do |v|
18 | strategy = @strategy || @@user_defaults[v.class.to_s.downcase.to_sym]
19 | strategy.anonymize DataAnon::Core::Field.new(field.name, v, field.row_number, field.ar_record, field.table_name)
20 | end
21 | end
22 |
23 | end
24 |
25 |
26 | end
27 | end
28 | end
--------------------------------------------------------------------------------
/lib/strategy/field/anonymous.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 |
6 | class Anonymous
7 |
8 | def initialize &block
9 | @block = block
10 | end
11 |
12 | def anonymize field
13 | @block.call field
14 | end
15 |
16 | end
17 |
18 |
19 | end
20 | end
21 | end
--------------------------------------------------------------------------------
/lib/strategy/field/contact/geojson_base.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 | class GeojsonBase
5 |
6 | def self.region_US
7 | self.new DataAnon::Utils::Resource.file('US_addresses.geojson')
8 | end
9 |
10 | def self.region_UK
11 | self.new DataAnon::Utils::Resource.file('UK_addresses.geojson')
12 | end
13 |
14 | def initialize file_path
15 | raise "Load and set the @values member variable in constructor"
16 | end
17 |
18 | def anonymize field
19 | @values.sample
20 | end
21 | end
22 | end
23 | end
24 | end
--------------------------------------------------------------------------------
/lib/strategy/field/contact/random_address.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates address using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses.
6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump)
7 | #
8 | # !!!ruby
9 | # anonymize('Address').using FieldStrategy::RandomAddress.region_US
10 | #
11 | # !!!ruby
12 | # anonymize('Address').using FieldStrategy::RandomAddress.region_UK
13 | #
14 | # !!!ruby
15 | # # get your own geo_json file and use it
16 | # anonymize('Address').using FieldStrategy::RandomAddress.new('my_geo_json.json')
17 |
18 | class RandomAddress < GeojsonBase
19 |
20 | def initialize file_path
21 | @values = DataAnon::Utils::GeojsonParser.address(file_path)
22 | end
23 |
24 | end
25 |
26 |
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/strategy/field/contact/random_city.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Similar to RandomAddress, generates city using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses.
6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump)
7 | #
8 | # !!!ruby
9 | # anonymize('City').using FieldStrategy::RandomCity.region_US
10 | #
11 | # !!!ruby
12 | # anonymize('City').using FieldStrategy::RandomCity.region_UK
13 | #
14 | # !!!ruby
15 | # # get your own geo_json file and use it
16 | # anonymize('City').using FieldStrategy::RandomCity.new('my_geo_json.json')
17 |
18 | class RandomCity < GeojsonBase
19 |
20 | def initialize file_path
21 | @values = DataAnon::Utils::GeojsonParser.city(file_path)
22 | end
23 |
24 | end
25 |
26 |
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/strategy/field/contact/random_phone_number.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Keeping the format same it changes each digit in the string with random digit.
6 | #
7 | # !!!ruby
8 | # anonymize('PhoneNumber').using FieldStrategy::RandomPhoneNumber.new
9 |
10 | class RandomPhoneNumber < FormattedStringNumber
11 |
12 | end
13 |
14 |
15 | end
16 | end
17 | end
--------------------------------------------------------------------------------
/lib/strategy/field/contact/random_province.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Similar to RandomAddress, generates province using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses.
6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump)
7 | #
8 | # !!!ruby
9 | # anonymize('Province').using FieldStrategy::RandomProvince.region_US
10 | #
11 | # !!!ruby
12 | # anonymize('Province').using FieldStrategy::RandomProvince.region_UK
13 | #
14 | # !!!ruby
15 | # # get your own geo_json file and use it
16 | # anonymize('Province').using FieldStrategy::RandomProvince.new('my_geo_json.json')
17 |
18 | class RandomProvince < GeojsonBase
19 |
20 | def initialize file_path
21 | @values = DataAnon::Utils::GeojsonParser.province(file_path)
22 | end
23 |
24 | end
25 |
26 |
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/strategy/field/contact/random_zipcode.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Similar to RandomAddress, generates zipcode using the [geojson](http://www.geojson.org/geojson-spec.html) format file. The default US/UK file chooses randomly from 300 addresses.
6 | # The large data set can be downloaded from [here](http://www.infochimps.com/datasets/simplegeo-places-dump)
7 | #
8 | # !!!ruby
9 | # anonymize('Address').using FieldStrategy::RandomZipcode.region_US
10 | #
11 | # !!!ruby
12 | # anonymize('Address').using FieldStrategy::RandomZipcode.region_UK
13 | #
14 | # !!!ruby
15 | # # get your own geo_json file and use it
16 | # anonymize('Address').using FieldStrategy::RandomZipcode.new('my_geo_json.json')
17 |
18 | class RandomZipcode < GeojsonBase
19 |
20 | def initialize file_path
21 | @values = DataAnon::Utils::GeojsonParser.zipcode(file_path)
22 | end
23 |
24 | end
25 |
26 |
27 | end
28 | end
29 | end
--------------------------------------------------------------------------------
/lib/strategy/field/datetime/anonymize_date.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Anonmizes day and month fields within natural range based on true/false input for that field. By defaut both fields are
6 | # anonymized
7 | #
8 | # !!!ruby
9 | # # anonymizes month and leaves day unchanged
10 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.new(true,false)
11 | #
12 | # In addition to customizing which fields you want anonymized, there are some helper methods which allow for quick anonymization
13 | #
14 | # ```ruby
15 | # # anonymizes only the month field
16 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.only_month
17 | # # anonymizes only the day field
18 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.only_day
19 |
20 | class AnonymizeDate
21 |
22 |
23 | def self.only_month
24 | self.new true, false
25 | end
26 |
27 | def self.only_day
28 | self.new false, true
29 | end
30 |
31 | def initialize anonymize_month, anonymize_day
32 |
33 | @anonymize_month = anonymize_month
34 | @anonymize_day = anonymize_day
35 |
36 | end
37 |
38 | def anonymize field
39 |
40 | original_time = field.value
41 |
42 | year = original_time.year
43 | month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month
44 | days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day
45 | day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day
46 |
47 | Date.new(year, month, day)
48 | end
49 |
50 |
51 | end
52 | end
53 | end
54 | end
--------------------------------------------------------------------------------
/lib/strategy/field/datetime/anonymize_datetime.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Anonymizes each field(except year and seconds) within the natural range (e.g. hour between 1-24 and day within the month) based on true/false
6 | # input for that field. By default, all fields are anonymized.
7 | #
8 | # !!!ruby
9 | # # anonymizes month and hour fields, leaving the day and minute fields untouched
10 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.new(true,false,true,false)
11 | #
12 | # In addition to customizing which fields you want anonymized, there are some helper methods which allow for quick anonymization
13 | #
14 | # !!!ruby
15 | # # anonymizes only the month field
16 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_month
17 | # # anonymizes only the day field
18 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_day
19 | # # anonymizes only the hour field
20 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_hour
21 | # # anonymizes only the minute field
22 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDateTime.only_minute
23 |
24 | class AnonymizeDateTime < AnonymizeTime
25 |
26 | private
27 | def create_object(year, month, day, hour, min, sec)
28 | DateTime.new(year, month, day, hour, min, sec)
29 | end
30 |
31 | end
32 | end
33 | end
34 | end
--------------------------------------------------------------------------------
/lib/strategy/field/datetime/anonymize_time.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Anonymizes each field(except year and seconds) within the natural range (e.g. hour between 1-24 and day within the month) based on true/false
6 | # input for that field. By default, all fields are anonymized.
7 | #
8 | # !!!ruby
9 | # # anonymizes month and hour fields, leaving the day and minute fields untouched
10 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.new(true,false,true,false)
11 | #
12 | # In addition to customizing which fields you want anonymized, there are some helper methods which allow for quick anonymization
13 | #
14 | # !!!ruby
15 | # # anonymizes only the month field
16 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_month
17 | # # anonymizes only the day field
18 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_day
19 | # # anonymizes only the hour field
20 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_hour
21 | # # anonymizes only the minute field
22 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeTime.only_minute
23 |
24 | class AnonymizeTime
25 |
26 | DEFAULT_ANONYMIZATION = true
27 |
28 | def self.only_month
29 | self.new true, false, false, false
30 | end
31 |
32 | def self.only_day
33 | self.new false, true, false, false
34 | end
35 |
36 | def self.only_hour
37 | self.new false, false, true, false
38 | end
39 |
40 | def self.only_minute
41 | self.new false, false, false, true
42 | end
43 |
44 | def initialize anonymize_month, anonymize_day, anonymize_hour, anonymize_min
45 |
46 | @anonymize_month = anonymize_month
47 | @anonymize_day = anonymize_day
48 | @anonymize_hour = anonymize_hour
49 | @anonymize_min = anonymize_min
50 |
51 | end
52 |
53 | def anonymize field
54 |
55 | original_time = field.value
56 |
57 | year = original_time.year
58 | month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month
59 | days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day
60 | day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day
61 | hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(0,23) : original_time.hour
62 | min = @anonymize_min? DataAnon::Utils::RandomInt.generate(0,59) : original_time.min
63 | sec = original_time.sec
64 |
65 | create_object(year, month, day, hour, min, sec)
66 | end
67 |
68 | private
69 |
70 | def create_object(year, month, day, hour, min, sec)
71 | Time.new(year, month, day, hour, min, sec)
72 | end
73 |
74 | end
75 | end
76 | end
77 | end
78 |
--------------------------------------------------------------------------------
/lib/strategy/field/datetime/date_delta.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Shifts date randomly within given delta range. Default shits date within 10 days + or -
6 | #
7 | # !!!ruby
8 | # anonymize('DateOfBirth').using FieldStrategy::AnonymizeDate.new
9 | #
10 | # !!!ruby
11 | # # shifts date within 25 days
12 | # anonymize('DateOfBirth').using FieldStrategy::DateDelta.new(25)
13 | #
14 |
15 | class DateDelta
16 |
17 | DEFAULT_DAY_DELTA = 10
18 |
19 | def initialize day_delta = DEFAULT_DAY_DELTA
20 | @day_delta = day_delta
21 | end
22 |
23 | def anonymize field
24 | day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta)
25 | return field.value + day_adjustment.days
26 | end
27 |
28 | end
29 | end
30 | end
31 | end
--------------------------------------------------------------------------------
/lib/strategy/field/datetime/date_time_delta.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Shifts data randomly within given range. Default shifts date within 10 days + or - and shifts time within 30 minutes.
6 | #
7 | # !!!ruby
8 | # anonymize('DateOfBirth').using FieldStrategy::DateTimeDelta.new
9 | #
10 | # !!!ruby
11 | # # shifts date within 20 days and time within 50 minutes
12 | # anonymize('DateOfBirth').using FieldStrategy::DateTimeDelta.new(20, 50)
13 |
14 | class DateTimeDelta
15 |
16 | DEFAULT_DAY_DELTA = 10
17 | DEFAULT_MINUTE_DELTA = 30
18 |
19 | def initialize day_delta = DEFAULT_DAY_DELTA, minute_delta = DEFAULT_MINUTE_DELTA
20 | @day_delta = day_delta
21 | @minute_delta = minute_delta
22 | end
23 |
24 | def anonymize field
25 | day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta)
26 | minute_adjustment = DataAnon::Utils::RandomInt.generate(-@minute_delta,@minute_delta)
27 | return field.value + (day_adjustment.days + minute_adjustment.minutes)
28 | end
29 |
30 | end
31 | end
32 | end
33 | end
--------------------------------------------------------------------------------
/lib/strategy/field/datetime/time_delta.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Shifts data randomly within given range. Default shifts date within 10 days + or - and shifts time within 30 minutes.
6 | #
7 | # !!!ruby
8 | # anonymize('DateOfBirth').using FieldStrategy::TimeDelta.new
9 | #
10 | # !!!ruby
11 | # # shifts date within 20 days and time within 50 minutes
12 | # anonymize('DateOfBirth').using FieldStrategy::TimeDelta.new(20, 50)
13 |
14 | class TimeDelta < DateTimeDelta
15 | end
16 |
17 |
18 | end
19 | end
20 | end
--------------------------------------------------------------------------------
/lib/strategy/field/default_anon.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | class DefaultAnon
6 |
7 | DEFAULT_STRATEGIES = {:string => FieldStrategy::RandomString.new,
8 | :integer => FieldStrategy::RandomIntegerDelta.new(5),
9 | :fixnum => FieldStrategy::RandomIntegerDelta.new(5),
10 | :bignum => FieldStrategy::RandomIntegerDelta.new(5000),
11 | :float => FieldStrategy::RandomFloatDelta.new(5.0),
12 | :bigdecimal => FieldStrategy::RandomBigDecimalDelta.new(500.0),
13 | :datetime => FieldStrategy::DateTimeDelta.new,
14 | :time => FieldStrategy::TimeDelta.new,
15 | :date => FieldStrategy::DateDelta.new,
16 | :array => FieldStrategy::AnonymizeArray.new(nil),
17 | :trueclass => FieldStrategy::RandomBoolean.new,
18 | :"bson::objectid" => FieldStrategy::Whitelist.new,
19 | :falseclass => FieldStrategy::RandomBoolean.new
20 | }
21 |
22 | def initialize user_defaults = {}
23 | @user_defaults = DEFAULT_STRATEGIES.merge user_defaults
24 | FieldStrategy::AnonymizeArray.user_defaults @user_defaults
25 | end
26 |
27 | def anonymize field
28 | strategy = @user_defaults[field.value.class.to_s.downcase.to_sym]
29 | raise "No strategy defined for datatype #{field.value.class}. Use 'default_field_strategies' option in your script. Refer to http://sunitparekh.github.com/data-anonymization/#default-field-strategies for more details. #{field.inspect}" unless strategy
30 | strategy.anonymize field
31 | end
32 |
33 | end
34 |
35 |
36 | end
37 | end
38 | end
--------------------------------------------------------------------------------
/lib/strategy/field/email/gmail_template.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates a valid unique gmail address by taking advantage of the gmail + strategy. Takes in a valid gmail username and
6 | # generates emails of the form username+@gmail.com
7 | #
8 | # !!!ruby
9 | # anonymize('Email').using FieldStrategy::GmailTemplate.new('username')
10 | #
11 |
12 | class GmailTemplate
13 |
14 | def initialize username = 'someusername'
15 | @username = username
16 | end
17 |
18 | def anonymize field
19 | "#{@username}+#{field.row_number}@gmail.com"
20 | end
21 | end
22 | end
23 | end
24 | end
--------------------------------------------------------------------------------
/lib/strategy/field/email/random_email.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates email randomly using the given HOSTNAME and TLD.
6 | # By defaults generates hostname randomly along with email id.
7 | #
8 | # !!!ruby
9 | # anonymize('Email').using FieldStrategy::RandomEmail.new('thoughtworks','com')
10 | #
11 |
12 | class RandomEmail
13 |
14 | TLDS = ['com','org','net','edu','gov','mil','biz','info']
15 |
16 |
17 | def initialize hostname = nil, tld = nil
18 | @hostname = hostname
19 | @tld = tld
20 | end
21 |
22 | def anonymize field
23 |
24 | username_length = DataAnon::Utils::RandomInt.generate(5,15)
25 | host_name_length = DataAnon::Utils::RandomInt.generate(2,10)
26 |
27 | username = DataAnon::Utils::RandomString.generate(username_length)
28 | hostname = @hostname || DataAnon::Utils::RandomString.generate(host_name_length)
29 | tld = @tld || TLDS[rand(TLDS.length)]
30 |
31 | return username + "@" + hostname + "." + tld
32 |
33 | end
34 | end
35 | end
36 |
37 | end
38 | end
--------------------------------------------------------------------------------
/lib/strategy/field/email/random_mailinator_email.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates random email using mailinator hostname. e.g. @mailinator.com
6 | #
7 | # !!!ruby
8 | # anonymize('Email').using FieldStrategy::RandomMailinatorEmail.new
9 |
10 | class RandomMailinatorEmail
11 |
12 | def initialize
13 | @email_anonymizer = DataAnon::Strategy::Field::RandomEmail.new("mailinator","com")
14 | end
15 |
16 | def anonymize field
17 | return @email_anonymizer.anonymize(field)
18 | end
19 | end
20 | end
21 |
22 | end
23 | end
--------------------------------------------------------------------------------
/lib/strategy/field/fields.rb:
--------------------------------------------------------------------------------
1 | require 'strategy/field/whitelist'
2 | require 'strategy/field/random_boolean'
3 |
4 | require 'strategy/field/anonymous'
5 |
6 | #array
7 | require 'strategy/field/anonymize_array'
8 |
9 | # string
10 | require 'strategy/field/string/lorem_ipsum'
11 | require 'strategy/field/string/string_template'
12 | require 'strategy/field/string/random_string'
13 | require 'strategy/field/string/random_url'
14 | require 'strategy/field/string/formatted_string_numbers'
15 | require 'strategy/field/string/random_formatted_string'
16 |
17 | require 'strategy/field/string/select_from_file'
18 | require 'strategy/field/string/select_from_list'
19 | require 'strategy/field/string/select_from_database'
20 |
21 | # number
22 | require 'strategy/field/number/random_integer'
23 | require 'strategy/field/number/random_float'
24 | require 'strategy/field/number/random_integer_delta'
25 | require 'strategy/field/number/random_float_delta'
26 | require 'strategy/field/number/random_big_decimal_delta'
27 |
28 | # contact
29 | require 'strategy/field/contact/geojson_base'
30 | require 'strategy/field/contact/random_phone_number'
31 | require 'strategy/field/contact/random_address'
32 | require 'strategy/field/contact/random_zipcode'
33 | require 'strategy/field/contact/random_city'
34 | require 'strategy/field/contact/random_province'
35 |
36 | # datetime
37 | require 'strategy/field/datetime/anonymize_time'
38 | require 'strategy/field/datetime/anonymize_datetime'
39 | require 'strategy/field/datetime/anonymize_date'
40 | require 'strategy/field/datetime/date_time_delta'
41 | require 'strategy/field/datetime/time_delta'
42 | require 'strategy/field/datetime/date_delta'
43 |
44 | # email
45 | require 'strategy/field/email/random_email'
46 | require 'strategy/field/email/gmail_template'
47 | require 'strategy/field/email/random_mailinator_email'
48 |
49 | # name
50 | require 'strategy/field/name/random_first_name'
51 | require 'strategy/field/name/random_last_name'
52 | require 'strategy/field/name/random_full_name'
53 | require 'strategy/field/name/random_user_name'
54 |
55 |
56 |
57 | FieldStrategy = DataAnon::Strategy::Field
58 |
59 | require 'strategy/field/default_anon'
60 |
61 |
--------------------------------------------------------------------------------
/lib/strategy/field/name/random_first_name.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Randomly picks up first name from the predefined list in the file. Default [file](https://raw.github.com/sunitparekh/data-anonymization/master/resources/first_names.txt) is part of the gem.
6 | # File should contain first name on each line.
7 | #
8 | # !!!ruby ```ruby
9 | # anonymize('FirstName').using FieldStrategy::RandomFirstName.new
10 | #
11 | # !!!ruby
12 | # anonymize('FirstName').using FieldStrategy::RandomFirstName.new('my_first_names.txt')
13 | #
14 |
15 | class RandomFirstName < SelectFromFile
16 |
17 | def initialize file_path = nil
18 | super(file_path || DataAnon::Utils::Resource.file('first_names.txt'))
19 | end
20 |
21 | end
22 | end
23 | end
24 | end
--------------------------------------------------------------------------------
/lib/strategy/field/name/random_full_name.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates full name using the RandomFirstName and RandomLastName strategies.
6 | #
7 | # !!!ruby
8 | # anonymize('FullName').using FieldStrategy::RandomFullName.new
9 | #
10 | # !!!ruby
11 | # anonymize('FullName').using FieldStrategy::RandomLastName.new('my_first_names.txt', 'my_last_names.txt')
12 |
13 | class RandomFullName
14 |
15 | def initialize first_names = nil, last_names = nil
16 | @first_name_anonymizer = DataAnon::Strategy::Field::RandomFirstName.new(first_names)
17 | @last_name_anonymizer = DataAnon::Strategy::Field::RandomLastName.new(last_names)
18 | end
19 |
20 | def anonymize field
21 |
22 | name_words = field.value.split(' ')
23 |
24 | anonymized_first_name = @first_name_anonymizer.anonymize(field)
25 | anonymized_last_name = ""
26 | for counter in (1..name_words.size-1)
27 | anonymized_last_name = anonymized_last_name + " " + @last_name_anonymizer.anonymize(field)
28 | end
29 |
30 | return anonymized_first_name + anonymized_last_name
31 |
32 | end
33 | end
34 | end
35 | end
36 | end
--------------------------------------------------------------------------------
/lib/strategy/field/name/random_last_name.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Randomly picks up last name from the predefined list in the file. Default [file](https://raw.github.com/sunitparekh/data-anonymization/master/resources/last_names.txt) is part of the gem.
6 | # File should contain last name on each line.
7 | #
8 | # !!!ruby
9 | # anonymize('LastName').using FieldStrategy::RandomLastName.new
10 | #
11 | # !!!ruby
12 | # anonymize('LastName').using FieldStrategy::RandomLastName.new('my_last_names.txt')
13 |
14 | class RandomLastName < SelectFromFile
15 |
16 | def initialize file_path = nil
17 | super(file_path || DataAnon::Utils::Resource.file('last_names.txt'))
18 | end
19 |
20 | end
21 | end
22 | end
23 | end
--------------------------------------------------------------------------------
/lib/strategy/field/name/random_user_name.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates random user name of same length as original user name.
6 | #
7 | # !!!ruby
8 | # anonymize('Username').using FieldStrategy::RandomUserName.new
9 | #
10 | class RandomUserName
11 |
12 | DEFAULT_MIN_LENGTH = 5
13 | DEFAULT_MAX_LENGTH = 10
14 |
15 | def initialize min_length = DEFAULT_MIN_LENGTH, max_length = DEFAULT_MAX_LENGTH
16 | @min_length = min_length
17 | @max_length = max_length
18 | end
19 |
20 | def anonymize field
21 | username_length = DataAnon::Utils::RandomInt.generate(@min_length,@max_length)
22 | return DataAnon::Utils::RandomString.generate(username_length)
23 |
24 | end
25 | end
26 | end
27 | end
28 | end
--------------------------------------------------------------------------------
/lib/strategy/field/number/random_big_decimal_delta.rb:
--------------------------------------------------------------------------------
1 | require 'bigdecimal'
2 |
3 | module DataAnon
4 | module Strategy
5 | module Field
6 |
7 | # Shifts the current value randomly within given delta + and -. Default is 10.0
8 | #
9 | # !!!ruby
10 | # anonymize('points').using FieldStrategy::RandomFloatDelta.new(2.5)
11 |
12 | class RandomBigDecimalDelta
13 |
14 | def initialize delta = 100.0
15 | @delta = delta
16 | end
17 |
18 | def anonymize field
19 | return BigDecimal("#{field.value + DataAnon::Utils::RandomFloat.generate(-@delta, +@delta)}")
20 | end
21 |
22 | end
23 | end
24 | end
25 | end
--------------------------------------------------------------------------------
/lib/strategy/field/number/random_float.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates random float number between given two numbers. Default range is 0.0 to 100.0
6 | #
7 | # !!!ruby
8 | # anonymize('points').using FieldStrategy::RandomFloat.new(3.0,5.0)
9 |
10 | class RandomFloat
11 |
12 | def initialize from = 0.0, to = 100.0
13 | @from = from
14 | @to = to
15 |
16 | end
17 |
18 | def anonymize field
19 | DataAnon::Utils::RandomFloat.generate(@from,@to)
20 | end
21 |
22 | end
23 |
24 |
25 | end
26 | end
27 | end
--------------------------------------------------------------------------------
/lib/strategy/field/number/random_float_delta.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Shifts the current value randomly within given delta + and -. Default is 10.0
6 | #
7 | # !!!ruby
8 | # anonymize('points').using FieldStrategy::RandomFloatDelta.new(2.5)
9 |
10 | class RandomFloatDelta
11 |
12 | def initialize delta = 10.0
13 | @delta = delta
14 | end
15 |
16 | def anonymize field
17 | return field.value + DataAnon::Utils::RandomFloat.generate(-@delta, +@delta)
18 | end
19 |
20 | end
21 | end
22 | end
23 | end
--------------------------------------------------------------------------------
/lib/strategy/field/number/random_integer.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates random integer number between given two numbers. Default range is 0 to 100.
6 | #
7 | # !!!ruby
8 | # anonymize('Age').using FieldStrategy::RandomInteger.new(18,70)
9 |
10 | class RandomInteger
11 |
12 | def initialize from = 0, to = 100
13 | @from = from
14 | @to = to
15 |
16 | end
17 |
18 | def anonymize field
19 | DataAnon::Utils::RandomInt.generate(@from,@to)
20 | end
21 |
22 | end
23 |
24 |
25 | end
26 | end
27 | end
--------------------------------------------------------------------------------
/lib/strategy/field/number/random_integer_delta.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Shifts the current value randomly within given delta + and -. Default is 10
6 | #
7 | # !!!ruby
8 | # anonymize('Age').using FieldStrategy::RandomIntegerDelta.new(2)
9 |
10 | class RandomIntegerDelta
11 |
12 | def initialize delta = 10
13 | @delta = delta
14 | end
15 |
16 | def anonymize field
17 | adjustment = DataAnon::Utils::RandomInt.generate(-@delta,@delta)
18 | return field.value + adjustment
19 | end
20 | end
21 |
22 | end
23 | end
24 | end
--------------------------------------------------------------------------------
/lib/strategy/field/random_boolean.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 |
6 | class RandomBoolean
7 |
8 | BOOL_VALUES = [true,false]
9 |
10 | def anonymize field
11 | BOOL_VALUES.sample
12 | end
13 |
14 | end
15 |
16 |
17 | end
18 | end
19 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/formatted_string_numbers.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Keeping the format same it changes each digit in the string with random digit.
6 | #
7 | # !!!ruby
8 | # anonymize('CreditCardNumber').using FieldStrategy::FormattedStringNumber.new
9 |
10 | class FormattedStringNumber
11 |
12 | def anonymize field
13 | @original_string = field.value
14 | @anonymized_string = ""
15 | @original_string.each_char do |char|
16 | if /\d/.match(char).nil?
17 | @anonymized_string += char
18 | else
19 | @anonymized_string += DataAnon::Utils::RandomInt.generate(0,9).to_s
20 | end
21 | end
22 |
23 | @anonymized_string
24 | end
25 |
26 | end
27 |
28 |
29 | end
30 | end
31 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/lorem_ipsum.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Default anonymization strategy for `string` content. Uses default 'Lorem ipsum...' text or text supplied in strategy to generate same length string.
6 | # !!!ruby
7 | # anonymize('UserName').using FieldStrategy::LoremIpsum.new
8 | #
9 | # !!!ruby
10 | # anonymize('UserName').using FieldStrategy::LoremIpsum.new("very large string....")
11 | #
12 | # !!!ruby
13 | # anonymize('UserName').using FieldStrategy::LoremIpsum.new(File.read('my_file.txt'))
14 |
15 | class LoremIpsum
16 |
17 | DEFAULT_TEXT = <<-default
18 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed quis nulla quis ligula bibendum dignissim. Nullam elementum convallis mauris, at ultrices odio dignissim dapibus. Etiam vitae neque lorem, a luctus purus. In at diam mi, sit amet dapibus magna. Maecenas tincidunt tortor id dolor tristique dictum. Morbi pulvinar odio ut lorem gravida ac varius orci ultrices. Nulla id arcu dui, sit amet commodo augue. Curabitur elit elit, semper quis tincidunt at, auctor et tortor.
19 | Quisque ut enim arcu. Praesent orci mi, tincidunt non sodales a, blandit ac nunc. Phasellus sed erat a nibh suscipit molestie sed a augue. Aliquam pretium ultricies nibh. Sed sit amet accumsan sapien. Pellentesque urna orci, iaculis eu lacinia ac, consequat vel elit. Suspendisse aliquet tortor et urna varius non ullamcorper augue tempus. Phasellus pretium, nulla eu adipiscing viverra, purus est fermentum enim, ut fringilla ligula lectus quis est. Phasellus quis scelerisque ligula. Cras accumsan lobortis egestas. Ut quis orci sem, sed gravida orci.
20 | Vestibulum eget odio nisl, nec ornare ante. Aenean tristique, nisl eget lacinia aliquam, neque lectus lacinia enim, id ullamcorper nisl lorem vitae enim. Sed vulputate condimentum convallis. Ut viverra tincidunt arcu ac egestas. Quisque ut neque nec quam suscipit ornare a ornare est. Nulla facilisi. Mauris facilisis eleifend neque eget egestas. Vestibulum egestas dui eleifend urna pharetra a hendrerit quam sagittis. Duis ut turpis convallis diam interdum congue. In hac habitasse platea dictumst. Nulla a erat eget tortor tempor consectetur. Fusce euismod congue risus in feugiat. Sed rutrum vehicula lectus et vehicula. In porttitor malesuada sem at auctor.
21 | Maecenas lacinia placerat augue quis posuere. Cras eu augue quam, eu malesuada sem. Proin facilisis iaculis lectus, vel hendrerit nulla tristique quis. Donec risus mauris, vulputate tristique feugiat nec, imperdiet sed sapien. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Aenean vitae aliquam magna. Donec tempor, ipsum non dapibus elementum, est sem hendrerit nulla, scelerisque sollicitudin lacus mauris eu libero. Vivamus turpis justo, ullamcorper sed ullamcorper quis, tempor in elit. Sed nisl erat, laoreet at adipiscing quis, lobortis et est. Duis congue iaculis mollis. Curabitur ligula turpis, malesuada non feugiat vitae, ullamcorper non nibh. Aliquam adipiscing pellentesque leo nec molestie. Donec tempor eleifend libero, at rutrum velit semper a. Sed tincidunt dictum lorem eu egestas.
22 | Sed at iaculis risus. Nulla aliquet vulputate nulla, nec euismod sem porta quis. Aliquam erat volutpat. Sed tincidunt pharetra metus, in facilisis nunc suscipit ut. Nunc placerat vulputate sapien, elementum varius mi viverra eget. Nam hendrerit felis et arcu ultrices vehicula. Phasellus condimentum ornare orci sed placerat. Sed vel rutrum lorem. Fusce id bibendum ipsum.
23 | default
24 |
25 | def initialize text = nil
26 | @text = text || DEFAULT_TEXT
27 | end
28 |
29 | def anonymize field
30 | @text[0, field.value.length]
31 | end
32 |
33 | end
34 |
35 |
36 | end
37 | end
38 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/random_formatted_string.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Keeping the format same it changes each digit with random digit, character with character preserving the case.
6 | #
7 | # !!!ruby
8 | # anonymize('PhoneNumber').using FieldStrategy::RandomFormattedString.new
9 | # anonymize('Email').using FieldStrategy::RandomFormattedString.new
10 |
11 | class RandomFormattedString
12 |
13 | SMALL_CHARS = "abcdefghjkmnpqrstuvwxyz"
14 | CAPS_CHARS = "ABCDEFGHJKLMNPQRSTUVWXYZ"
15 |
16 | def anonymize field
17 | @original_string = field.value
18 | @anonymized_string = ""
19 | @original_string.each_char do |char|
20 | if /\d/.match(char)
21 | @anonymized_string += DataAnon::Utils::RandomInt.generate(0, 9).to_s
22 | elsif /[a-z]/.match(char)
23 | @anonymized_string += SMALL_CHARS[rand(SMALL_CHARS.length)]
24 | elsif /[A-Z]/.match(char)
25 | @anonymized_string += CAPS_CHARS[rand(CAPS_CHARS.length)]
26 | else
27 | @anonymized_string += char
28 | end
29 | end
30 |
31 | @anonymized_string
32 | end
33 |
34 | end
35 |
36 |
37 | end
38 | end
39 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/random_string.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates random string of same length.
6 | #
7 | # !!!ruby
8 | # anonymize('UserName').using FieldStrategy::RandomString.new
9 |
10 | class RandomString
11 |
12 | def anonymize field
13 |
14 | original_string = field.value
15 | string_words = original_string.split(' ')
16 | anonymized_string = ""
17 |
18 | string_words.each do |word|
19 | anonymized_string = anonymized_string + DataAnon::Utils::RandomString.generate(word.length) + " "
20 | end
21 |
22 | anonymized_string.strip
23 |
24 | end
25 | end
26 | end
27 | end
28 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/random_url.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Generates a randomized URL while maintaining the structure of the original url
6 | #
7 | # !!!ruby
8 | # anonymize('fb_profile').using FieldStrategy::RandomUrl.new
9 |
10 | class RandomUrl
11 |
12 | def anonymize field
13 |
14 | url = field.value
15 | randomized_url = ""
16 | protocols = url.scan(/https?:\/\/|www\./)
17 | protocols.each do |token|
18 | url = url.gsub(token,"")
19 | randomized_url += token
20 | end
21 |
22 | marker_position = 0
23 |
24 | while marker_position < url.length
25 | special_char_index = url.index(/\W/, marker_position) || url.length
26 | text = url[marker_position...special_char_index]
27 | randomized_url += "#{DataAnon::Utils::RandomStringCharsOnly.generate(text.length)}#{url[special_char_index]}"
28 | marker_position = special_char_index + 1
29 | end
30 |
31 | randomized_url
32 | end
33 | end
34 | end
35 | end
36 | end
37 |
--------------------------------------------------------------------------------
/lib/strategy/field/string/select_from_database.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Similar to SelectFromList with difference is the list of values are collected from the database table using distinct column query.
6 | #
7 | # !!!ruby
8 | # # values are collected using `select distinct state from customers` query connecting to specified database in connection_spec
9 | # anonymize('state').using FieldStrategy::SelectFromDatabase.new('customers','state', connection_spec)
10 |
11 | class SelectFromDatabase < SelectFromFile
12 | include Utils::Logging
13 |
14 | def initialize table_name, field_name, connection_spec
15 | @table_name = table_name
16 | @field_name = field_name
17 | @connection_spec = connection_spec
18 | end
19 |
20 | def anonymize field
21 | @values ||= begin
22 | DataAnon::Utils::SourceDatabase.establish_connection @connection_spec
23 | source = Utils::SourceTable.create @table_name, []
24 | values = source.select(@field_name).distinct.collect { |record| record[@field_name]}
25 | logger.debug "For field strategy #{@table_name}:#{@field_name} using values #{values} "
26 | values
27 | end
28 |
29 | super
30 | end
31 | end
32 | end
33 | end
34 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/select_from_file.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Similar to SelectFromList only difference is the list of values are picked up from file. Classical usage is like states field anonymization.
6 | #
7 | # !!!ruby
8 | # anonymize('State').using FieldStrategy::SelectFromFile.new('states.txt')
9 | #
10 |
11 | class SelectFromFile
12 |
13 | def initialize file_path
14 | @values = File.read(file_path).split
15 | end
16 |
17 | def anonymize field
18 | return @values.sample(field.value.length) if field.value.kind_of? Array
19 | @values.sample
20 | end
21 |
22 | end
23 | end
24 | end
25 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/select_from_list.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Select randomly one of the values specified.
6 | #
7 | # !!!ruby
8 | # anonymize('State').using FieldStrategy::SelectFromList.new(['New York','Georgia',...])
9 | #
10 | # !!!ruby
11 | # anonymize('NameTitle').using FieldStrategy::SelectFromList.new(['Mr','Mrs','Dr',...])
12 | #
13 |
14 | class SelectFromList < SelectFromFile
15 |
16 | def initialize values
17 | @values = values.class == Array ? values : [values]
18 | end
19 |
20 | end
21 |
22 |
23 | end
24 | end
25 | end
--------------------------------------------------------------------------------
/lib/strategy/field/string/string_template.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 | # Simple string evaluation within [DataAnon::Core::Field](#dataanon-core-field) context. Can be used for email, username anonymization.
6 | # Make sure to put the string in 'single quote' else it will get evaluated inline.
7 | #
8 | # !!!ruby
9 | # anonymize('UserName').using FieldStrategy::StringTemplate.new('user#{row_number}')
10 | #
11 | # !!!ruby
12 | # anonymize('Email').using FieldStrategy::StringTemplate.new('valid.address+#{row_number}@gmail.com')
13 | #
14 | # !!!ruby
15 | # anonymize('Email').using FieldStrategy::StringTemplate.new('useremail#{row_number}@mailinator.com')
16 |
17 | class StringTemplate
18 |
19 | def initialize template
20 | @template = template
21 | end
22 |
23 | def anonymize field
24 | context = field.instance_eval { binding }
25 | eval ('"' + @template + '"'), context
26 | end
27 |
28 | end
29 |
30 |
31 | end
32 | end
33 | end
--------------------------------------------------------------------------------
/lib/strategy/field/whitelist.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module Field
4 |
5 |
6 | class Whitelist
7 |
8 | def anonymize field
9 | field.value
10 | end
11 |
12 | end
13 |
14 |
15 | end
16 | end
17 | end
--------------------------------------------------------------------------------
/lib/strategy/mongodb/anonymize_field.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module MongoDB
4 | class AnonymizeField
5 |
6 | def initialize field, field_strategy, anonymization_strategy
7 | @field = field
8 | @field_strategy = field_strategy
9 | @anonymization_strategy = anonymization_strategy
10 | end
11 |
12 | def anonymize
13 | if sub_document?
14 | @anonymization_strategy.anonymize_document(@field.value, @field.row_number, @field_strategy)
15 | elsif sub_documents?
16 | anonymize_sub_documents
17 | else
18 | anonymize_field
19 | end
20 | end
21 |
22 | def anonymize_sub_documents
23 | @field.value.collect { |value| @anonymization_strategy.anonymize_document(value, @field.row_number, @field_strategy) }
24 | end
25 |
26 | def anonymize_field
27 | @field_strategy = @field_strategy || @anonymization_strategy.default_strategy(@field.name)
28 | raise "Improper fields strategy defined for '#{@field.name}' within document \n #{@field.ar_record}" unless @field_strategy.respond_to?(:anonymize)
29 | @field_strategy.anonymize(@field)
30 | end
31 |
32 | def sub_documents?
33 | @field.value.kind_of?(Array) && (@field_strategy.kind_of?(Hash) || @field.value[0].kind_of?(Hash))
34 | end
35 |
36 | def sub_document?
37 | @field.value.kind_of?(Hash)
38 | end
39 |
40 |
41 | end
42 | end
43 | end
44 | end
--------------------------------------------------------------------------------
/lib/strategy/mongodb/blacklist.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | module MongoDB
4 | class Blacklist < DataAnon::Strategy::MongoDB::Whitelist
5 |
6 | def self.whitelist?
7 | false
8 | end
9 |
10 | def process_record index, document
11 | anonymized_document = anonymize_document(document, index, @fields)
12 | source_collection.find({'_id' => anonymized_document['_id']}).update_one(anonymized_document)
13 | end
14 |
15 | def anonymize_document document, index, field_strategies = {}
16 | field_strategies.each do |field_name, field_strategy|
17 | field_value = document[field_name]
18 | unless field_value.nil?
19 | field = DataAnon::Core::Field.new(field_name, field_value, index, document, @name)
20 | document[field.name] = AnonymizeField.new(field, field_strategy, self).anonymize
21 | end
22 | end
23 | document
24 | end
25 |
26 | end
27 |
28 | end
29 | end
30 | end
31 |
--------------------------------------------------------------------------------
/lib/strategy/mongodb/whitelist.rb:
--------------------------------------------------------------------------------
1 | require 'mongo'
2 |
3 | class Mongo::Collection
4 | alias :all :find
5 | end
6 |
7 | module DataAnon
8 | module Strategy
9 | module MongoDB
10 | class Whitelist < DataAnon::Strategy::Base
11 |
12 | def self.whitelist?
13 | true
14 | end
15 |
16 | def collection field, &block
17 | whitelist = self.class.new @source_database, @destination_database, @name, @user_strategies
18 | whitelist.process_fields &block
19 | @fields[field] = whitelist.fields
20 | end
21 |
22 | alias :document :collection
23 |
24 | def mongo_collection(database)
25 | options = database[:options] || {}
26 | Mongo::Client.new(database[:mongodb_uri], options).database.collection(@name)
27 | end
28 |
29 | def dest_collection
30 | database = @destination_database
31 | @dest_collection ||= mongo_collection(database)
32 | end
33 |
34 | def source_collection
35 | @source_collection ||= mongo_collection(@source_database)
36 | end
37 |
38 | alias :source_table :source_collection
39 | alias :dest_table :dest_collection
40 |
41 | def process_record index, document
42 | dest_collection.insert_one anonymize_document(document, index, @fields)
43 | end
44 |
45 | def anonymize_document document, index, field_strategies = {}
46 | anonymized_document = {}
47 | document.each do |field_name, field_value|
48 | field_strategy = field_strategies[field_name] if field_strategies.kind_of?(Hash)
49 | unless field_value.nil?
50 | field = DataAnon::Core::Field.new(field_name, field_value, index, document, @name)
51 | anonymized_document[field.name] = AnonymizeField.new(field, field_strategy, self).anonymize
52 | end
53 | end
54 | anonymized_document
55 | end
56 |
57 |
58 | end
59 |
60 | end
61 | end
62 | end
63 |
--------------------------------------------------------------------------------
/lib/strategy/strategies.rb:
--------------------------------------------------------------------------------
1 | require 'strategy/base'
2 | require 'strategy/whitelist'
3 | require 'strategy/blacklist'
4 | require 'strategy/field/fields'
5 |
6 | begin
7 | require 'mongo'
8 | require 'strategy/mongodb/anonymize_field'
9 | require 'strategy/mongodb/whitelist'
10 | require 'strategy/mongodb/blacklist'
11 | rescue LoadError
12 | 'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
13 | end
14 |
--------------------------------------------------------------------------------
/lib/strategy/whitelist.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Strategy
3 | class Whitelist < DataAnon::Strategy::Base
4 |
5 | def self.whitelist?
6 | true
7 | end
8 |
9 | def process_record(index, record)
10 | dest_record_map = {}
11 | record.attributes.each do |field_name, field_value|
12 | unless field_value.nil? || is_primary_key?(field_name)
13 | field = DataAnon::Core::Field.new(field_name, field_value, index, record, @name)
14 | field_strategy = @fields[field_name] || default_strategy(field_name)
15 | dest_record_map[field_name] = field_strategy.anonymize(field)
16 | end
17 | end
18 | dest_record = dest_table.new dest_record_map
19 | @primary_keys.each do |key|
20 | dest_record[key] = record[key]
21 | end
22 | dest_record.save!
23 | end
24 |
25 |
26 | end
27 | end
28 | end
29 |
--------------------------------------------------------------------------------
/lib/tasks/rake_tasks.rb:
--------------------------------------------------------------------------------
1 | require 'rake'
2 | require 'rake/tasklib'
3 |
4 | module DataAnonymization
5 | class RakeTasks
6 | include Rake::DSL if defined? Rake::DSL
7 |
8 | def initialize
9 | desc 'Task to build the clean empty destination database'
10 | task :empty_dest do
11 | system 'rm sample-data/chinook-empty.sqlite'
12 | system 'sqlite3 sample-data/chinook-empty.sqlite < sample-data/chinook_schema.sql'
13 | end
14 | end
15 |
16 |
17 | end
18 | end
19 |
20 |
--------------------------------------------------------------------------------
/lib/thor/helpers/mongodb_dsl_generator.rb:
--------------------------------------------------------------------------------
1 | require 'erb'
2 | require 'thor'
3 |
4 | module DataAnon
5 | module ThorHelpers
6 | class MongoDBDSLGenerator
7 |
8 | def self.source_root
9 | File.dirname(__FILE__)
10 | end
11 |
12 | def initialize(configuration_hash, whitelist_patterns)
13 | @mongodb_uri = DataAnon::Utils::TemplateHelper.mongo_uri(configuration_hash)
14 | @whitelist_patterns = whitelist_patterns || [/^_/,/_at$/,/_id$/,/_type$/]
15 | @configuration_hash = configuration_hash
16 | @output = []
17 | end
18 |
19 | def generate
20 |
21 | db = Mongo::Client.new(@mongodb_uri, :database => @configuration_hash[:database])
22 | collections = db.collections
23 | collections.each do |collection|
24 | unless collection.name.start_with?('system.')
25 | depth = 2
26 | @output << "\tcollection '#{collection.name}' do"
27 | document = collection.find({}).first
28 | process_document(depth, document)
29 | @output << "\tend\n"
30 | end
31 | end
32 |
33 | erb = ERB.new( File.new(RDBMSDSLGenerator.source_root + "/../templates/mongodb_whitelist_template.erb").read, nil, '-')
34 | File.open('mongodb_whitelist_generated.rb', 'w') do |f|
35 | f.write erb.result(binding)
36 | f.close
37 | end
38 |
39 | end
40 |
41 | def process_document(depth, document)
42 | return if document.nil?
43 | document.each do |key, value|
44 | @output << ("\t"*depth)
45 | if value.kind_of?(Hash)
46 | end_statement = @output[-1]+"end"
47 | @output[-1] << "document '#{key}' do"
48 | process_document depth+1, value
49 | @output << end_statement
50 | elsif value.kind_of?(Array) && value[0].kind_of?(Hash)
51 | end_statement = @output[-1]+"end"
52 | @output[-1] << "collection '#{key}' do"
53 | process_document depth+1, value[0]
54 | @output << end_statement
55 | elsif @whitelist_patterns.collect { |pattern| key.match(pattern) }.compact.length > 0
56 | @output[-1] << "whitelist '#{key}'"
57 | elsif
58 | @output[-1] << "anonymize '#{key}'"
59 | end
60 | end
61 | end
62 |
63 | end
64 | end
65 | end
66 |
--------------------------------------------------------------------------------
/lib/thor/helpers/rdbms_dsl_generator.rb:
--------------------------------------------------------------------------------
1 | require 'thor'
2 | require 'active_record'
3 | require 'erb'
4 |
5 | module DataAnon
6 | module ThorHelpers
7 | class RDBMSDSLGenerator
8 |
9 | def self.source_root
10 | File.dirname(__FILE__)
11 | end
12 |
13 | def generate_whitelist_script(configuration_hash)
14 |
15 | @configuration_hash = configuration_hash
16 | @ar_object = ActiveRecord::Base.establish_connection(@configuration_hash)
17 |
18 | @tables = @ar_object.connection.tables
19 |
20 | erb = ERB.new( File.new(RDBMSDSLGenerator.source_root + "/../templates/whitelist_template.erb").read, nil, '-')
21 |
22 | File.open('rdbms_whitelist_generated.rb', 'w') do |f|
23 | f.write erb.result(binding)
24 | f.close
25 | end
26 |
27 | rescue => e
28 | puts "\e[31mActiverecord was unable to establish a connection to the specified database. Please check the configuration options and try again.\e[0m"
29 | puts e.backtrace
30 | end
31 |
32 | end
33 | end
34 | end
35 |
36 |
37 |
--------------------------------------------------------------------------------
/lib/thor/templates/mongodb_whitelist_template.erb:
--------------------------------------------------------------------------------
1 | require 'data-anonymization'
2 | require 'mongo'
3 |
4 | DataAnon::Utils::Logging.logger.level = Logger::INFO
5 |
6 | database 'test' do
7 |
8 | strategy DataAnon::Strategy::MongoDB::Whitelist
9 | source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_mongo @configuration_hash %>
10 | destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_mongo %>
11 |
12 | <%= @output.join("\n") %>
13 |
14 | end
15 |
16 |
--------------------------------------------------------------------------------
/lib/thor/templates/whitelist_template.erb:
--------------------------------------------------------------------------------
1 | require 'data-anonymization'
2 |
3 | DataAnon::Utils::Logging.logger.level = Logger::INFO
4 |
5 | database 'Template' do
6 |
7 | strategy DataAnon::Strategy::Whitelist
8 | source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms @configuration_hash %>
9 | destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms @configuration_hash %>
10 |
11 | <% @tables.each do |table| %>
12 | table '<%= table %>' do
13 | primary_key '<%= @ar_object.connection.primary_key("#{table}").nil? ? "" : @ar_object.connection.primary_key("#{table}") %>'
14 | <%- @ar_object.connection.indexes("#{table}").each do |index| -%>
15 | whitelist '<%= index.columns.first %>'
16 | <%- end -%>
17 | end
18 | <% end %>
19 |
20 | end
21 |
22 |
--------------------------------------------------------------------------------
/lib/utils/database.rb:
--------------------------------------------------------------------------------
1 | require 'active_record'
2 | require 'composite_primary_keys'
3 | require 'logger'
4 |
5 | module DataAnon
6 | module Utils
7 |
8 | class TempDatabase < ActiveRecord::Base
9 | self.abstract_class = true
10 | end
11 |
12 | class DisableReferentialIntegrityDatabase < ActiveRecord::Base
13 | self.abstract_class = true
14 | end
15 |
16 | class SourceDatabase < ActiveRecord::Base
17 | self.abstract_class = true
18 | end
19 |
20 | class DestinationDatabase < ActiveRecord::Base
21 | self.abstract_class = true
22 | end
23 |
24 | class BaseTable
25 |
26 | def self.create_table database, table_name, primary_keys = []
27 | klass_name = table_name.to_s.downcase.capitalize
28 | return database.const_get(klass_name, false) if database.const_defined?(klass_name, false)
29 | database.const_set(klass_name, Class.new(database) do
30 | self.table_name = table_name
31 | self.primary_keys = primary_keys if primary_keys.length > 1
32 | self.primary_key = primary_keys[0] if primary_keys.length == 1
33 | self.primary_key = nil if primary_keys.length == 0
34 | self.inheritance_column = :_type_disabled
35 | end
36 | )
37 | end
38 |
39 | end
40 |
41 | class SourceTable < BaseTable
42 |
43 | def self.create table_name, primary_key = []
44 | create_table SourceDatabase, table_name, primary_key
45 | end
46 |
47 | end
48 |
49 | class DestinationTable < BaseTable
50 |
51 | def self.create table_name, primary_key = []
52 | create_table DestinationDatabase, table_name, primary_key
53 | end
54 |
55 | end
56 |
57 | end
58 | end
--------------------------------------------------------------------------------
/lib/utils/geojson_parser.rb:
--------------------------------------------------------------------------------
1 | require 'rgeo/geo_json'
2 |
3 | module DataAnon
4 | module Utils
5 | class GeojsonParser
6 |
7 |
8 | def self.address file_path
9 | self.new(file_path).parse 'address'
10 | end
11 |
12 | def self.zipcode file_path
13 | self.new(file_path).parse 'postcode'
14 | end
15 |
16 | def self.province file_path
17 | self.new(file_path).parse 'province'
18 | end
19 |
20 | def self.city file_path
21 | self.new(file_path).parse 'city'
22 | end
23 |
24 | def self.country file_path
25 | self.new(file_path).parse 'country'
26 | end
27 |
28 | def initialize file_path
29 | @places = File.read(file_path).split(/\n/)
30 | end
31 |
32 | def parse property
33 | result_list = []
34 | @places.each do |loc|
35 | geom = RGeo::GeoJSON.decode(loc, :json_parser => :json)
36 | result_list.push(geom[property])
37 | end
38 | result_list
39 | end
40 | end
41 | end
42 | end
--------------------------------------------------------------------------------
/lib/utils/logging.rb:
--------------------------------------------------------------------------------
1 | require 'active_record'
2 | require 'logger'
3 |
4 | module DataAnon
5 | module Utils
6 | module Logging
7 |
8 | def logger
9 | @@utils_logger ||= (self.logger = Logger.new(STDOUT) )
10 | end
11 |
12 | def logger= logger
13 | @@utils_logger = logger
14 | ActiveRecord::Base.logger = logger
15 | @@utils_logger
16 | end
17 |
18 | end
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/lib/utils/parallel_progress_bar.rb:
--------------------------------------------------------------------------------
1 | require 'powerbar'
2 |
3 | module DataAnon
4 | module Utils
5 |
6 | class ParallelProgressBar < ProgressBar
7 | include Utils::Logging
8 |
9 | def initialize table_name, total
10 | @total = total
11 | @table_name = table_name
12 | end
13 |
14 | protected
15 |
16 | def show_progress index
17 | suffix = started(index) ? "STARTED" : (complete(index) ? "COMPLETE" : "")
18 | logger.info("%-30s [ %7d/%-7d ] %s" % [@table_name, index, @total, suffix])
19 | end
20 |
21 | end
22 |
23 | end
24 | end
--------------------------------------------------------------------------------
/lib/utils/progress_bar.rb:
--------------------------------------------------------------------------------
1 | require 'powerbar'
2 |
3 | module DataAnon
4 | module Utils
5 |
6 | class ProgressBar
7 |
8 | def initialize table_name, total
9 | @total = total
10 | @table_name = table_name
11 | @power_bar = PowerBar.new if show_progress_env
12 | apply_power_bar_settings if show_progress_env
13 | end
14 |
15 | def apply_power_bar_settings
16 | @power_bar.settings.tty.finite.template.main = \
17 | "${} ${ }\e[0m${/s} \e[33;1m${%} " +
18 | "\e[36;1m${}\e[31;1m${ ETA: }"
19 | @power_bar.settings.tty.finite.template.padchar = "\e[0m\u2589"
20 | @power_bar.settings.tty.finite.template.barchar = "\e[34;1m\u2589"
21 | @power_bar.settings.tty.finite.template.exit = "\e[?25h\e[0m" # clean up after us
22 | @power_bar.settings.tty.finite.template.close = "\e[?25h\e[0m\n" # clean up after us
23 | @power_bar.settings.tty.finite.output = Proc.new { |s| $stderr.print s }
24 | end
25 |
26 | def show index
27 | if show_progress? index
28 | show_progress index
29 | end
30 | end
31 |
32 | def close
33 | @power_bar.close if @power_bar
34 | end
35 |
36 | protected
37 |
38 | def show_progress? index
39 | show_progress_env && (started(index) || regular_interval(index) || complete(index))
40 | end
41 |
42 | def show_progress_env
43 | ENV['show_progress'] == "false" ? false : true
44 | end
45 |
46 | def show_progress counter
47 | sleep 0.1
48 | msg = "%-20s [%6d/%-6d]" % [@table_name, counter, @total]
49 | @power_bar.show({:msg => msg, :done => counter, :total => @total})
50 | end
51 |
52 | def complete index
53 | index == @total
54 | end
55 |
56 | def regular_interval index
57 | (index % 1000) == 0
58 | end
59 |
60 | def started index
61 | index == 1
62 | end
63 |
64 |
65 | end
66 |
67 | end
68 | end
--------------------------------------------------------------------------------
/lib/utils/random_float.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Utils
3 | class RandomFloat
4 |
5 | def self.generate min, max
6 | return 0 if (min == 0.0 && max == 0.0)
7 | Random.new.rand * (max-min) + min
8 | end
9 |
10 | end
11 | end
12 | end
--------------------------------------------------------------------------------
/lib/utils/random_int.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Utils
3 | class RandomInt
4 |
5 | def self.generate min, max
6 | return 0 if (min == 0 && max == 0)
7 | Random.new.rand min..max
8 | end
9 | end
10 | end
11 | end
12 |
--------------------------------------------------------------------------------
/lib/utils/random_string.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Utils
3 | class RandomString
4 |
5 | RANDOM_STRING_CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ0123456789'
6 |
7 | def self.generate length = nil, chars = RANDOM_STRING_CHARS
8 | length ||= Random.new.rand 5...15
9 | random_string = ''
10 | length.times { random_string << chars[rand(chars.size)] }
11 | random_string
12 | end
13 | end
14 | end
15 | end
--------------------------------------------------------------------------------
/lib/utils/random_string_chars_only.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Utils
3 | class RandomStringCharsOnly
4 |
5 | CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
6 |
7 | def self.generate length = nil
8 | RandomString.generate length, CHARS
9 | end
10 | end
11 | end
12 | end
--------------------------------------------------------------------------------
/lib/utils/resource.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Utils
3 |
4 | class Resource
5 |
6 | def self.file file_name
7 | project_home = File.join(File.dirname(__FILE__), '../../')
8 | "#{project_home}resources/#{file_name}"
9 | end
10 |
11 | def self.project_home
12 | File.join(File.dirname(__FILE__), '../../')
13 | end
14 | end
15 |
16 | end
17 | end
--------------------------------------------------------------------------------
/lib/utils/template_helper.rb:
--------------------------------------------------------------------------------
1 | module DataAnon
2 | module Utils
3 | class TemplateHelper
4 |
5 | def self.source_connection_specs_rdbms config_hash
6 |
7 | config_hash.keys.reject{|key| config_hash[key].nil? }.collect { |key|
8 | if ((config_hash[key].class.to_s.downcase == 'string'))
9 | ":#{key} => '#{config_hash[key]}'"
10 | elsif ((config_hash[key].class.to_s.downcase == 'integer'))
11 | ":#{key} => #{config_hash[key]}"
12 | elsif ((config_hash[key].class.to_s.downcase == 'fixnum'))
13 | ":#{key} => #{config_hash[key]}"
14 | end
15 | }.join ', '
16 |
17 | end
18 |
19 | def self.destination_connection_specs_rdbms config_hash
20 |
21 | config_hash.keys.collect { |key|
22 | ":#{key} => ''"
23 | }.join ', '
24 |
25 | end
26 |
27 | def self.source_connection_specs_mongo config_hash
28 | ":mongodb_uri => '#{self.mongo_uri config_hash}', :database => '#{config_hash[:database]}'"
29 | end
30 |
31 | def self.destination_connection_specs_mongo
32 | ":mongodb_uri => '', :database => ''"
33 | end
34 |
35 | def self.mongo_uri config_hash
36 | if config_hash[:user].nil?
37 | mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}/#{config_hash[:database]}"
38 | else
39 | credentials = "#{config_hash[:username]}:#{config_hash[:password]}"
40 | mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}@#{credentials}/#{config_hash[:database]}"
41 | end
42 | mongo_uri
43 | end
44 | end
45 | end
46 | end
--------------------------------------------------------------------------------
/lib/version.rb:
--------------------------------------------------------------------------------
1 | module DataAnonymization
2 | VERSION = '0.8.9'
3 | end
4 |
--------------------------------------------------------------------------------
/resources/first_names.txt:
--------------------------------------------------------------------------------
1 | Mozella
2 | Corie
3 | Pamela
4 | Rivka
5 | Zonia
6 | Etta
7 | Shenika
8 | Shakira
9 | Ana
10 | Asa
11 | Kimbery
12 | Manual
13 | Eleanore
14 | Shalon
15 | Catherin
16 | Michaela
17 | Danette
18 | Ha
19 | Daniele
20 | Treena
21 | Phil
22 | Jaunita
23 | Audrea
24 | Milton
25 | Waylon
26 | Vinnie
27 | Letitia
28 | Clelia
29 | Bennie
30 | Kimiko
31 | Oleta
32 | Walter
33 | Raquel
34 | Lizbeth
35 | Rosella
36 | Corrine
37 | Lorine
38 | Ethyl
39 | Rosalina
40 | Freddie
41 | Jamika
42 | Hugo
43 | Patsy
44 | Karla
45 | Chery
46 | Beckie
47 | Melonie
48 | Nelia
49 | Hien
50 | Luke
51 | Leola
52 | Flora
53 | Theodore
54 | Genny
55 | Wilfredo
56 | Robin
57 | Shera
58 | Rudy
59 | Kai
60 | Lelia
61 | Young
62 | Steffanie
63 | Shena
64 | Foster
65 | Bradford
66 | Georgetta
67 | Cherelle
68 | Daphne
69 | Terrence
70 | Obdulia
71 | Deloris
72 | Garrett
73 | Kristian
74 | Ashlea
75 | Maribel
76 | Kyra
77 | Jerrold
78 | Tajuana
79 | Ilda
80 | Faustina
81 | Tayna
82 | Ludivina
83 | Hyun
84 | Edris
85 | Loan
86 | Christena
87 | Reynaldo
88 | Andreas
89 | Danita
90 | Lesli
91 | Chantay
92 | Season
93 | Mellisa
94 | Celinda
95 | Angla
96 | Sharlene
97 | Maritza
98 | Gwyn
99 | Floy
100 | Derrick
101 | Letisha
102 | Gino
103 | Kassie
104 | Alysa
105 | Jacob
106 | Shemika
107 | Ivette
108 | Celestina
109 | Merissa
110 | Kati
111 | Carlotta
112 | Shala
113 | Dewitt
114 | Leo
115 | Alberta
116 | Collen
117 | Hue
118 | Helga
119 | Danna
120 | Vanna
121 | Joy
122 | Sharonda
123 | Reid
124 | Aundrea
125 | Gabrielle
126 | Karyn
127 | Rashida
128 | Benita
129 | Margherita
130 | Yan
131 | Ling
132 | Melda
133 | Gerda
134 | Flor
135 | Ferne
136 | Dorinda
137 | Ella
138 | Cornelius
139 | Malisa
140 | Ned
141 | Odelia
142 | Era
143 | Brittany
144 | Babara
145 | Shelby
146 | Myesha
147 | Sanjuana
148 | Margarite
149 | Roosevelt
150 | Bette
151 | Roger
152 | Lawanda
153 | Alan
154 | Shiloh
155 | Calvin
156 | Bell
157 | Merlin
158 | Louis
159 | Branda
160 | Magen
161 | Augustina
162 | Caroyln
163 | Milan
164 | Ardella
165 | Adrianna
166 | Justine
167 | Rosamond
168 | Arianne
169 | Cortez
170 | Fredrick
171 | Evelyne
172 | Christina
173 | Tania
174 | Loretta
175 | Franchesca
176 | Jules
177 | Fernanda
178 | Kaitlin
179 | Melva
180 | Leeanna
181 | Nathalie
182 | Fermin
183 | Evelia
184 | Lucius
185 | Kit
186 | Barabara
187 | Paz
188 | Han
189 | Shantay
190 | Tyson
191 | Debroah
192 | Angle
193 | Jodie
194 | Bettyann
195 | Ocie
196 | Sidney
197 | Delphia
198 | Monet
199 | Bess
200 | George
201 | Jamaal
202 | Alix
203 | Louise
204 | Aurora
205 | Tommye
206 | Cornelia
207 | Michele
208 | Isa
209 | Long
210 | Cheree
211 | Ofelia
212 | Sonya
213 | Reita
214 | Mayme
215 | Shila
216 | Robyn
217 | Zana
218 | Rolf
219 | Elise
220 | Whitley
221 | Leota
222 | Elizebeth
223 | Emilie
224 | Joellen
225 | Yasmin
226 | Emogene
227 | Rosalinda
228 | Rosalind
229 | Margarette
230 | Omar
231 | Earl
232 | Irma
233 | Tawanna
234 | Ivy
235 | Burton
236 | Patience
237 | Nelda
238 | Emery
239 | Maryrose
240 | Carri
241 | Dominique
242 | Essie
243 | Ria
244 | Corinne
245 | Zulma
246 | Terresa
247 | Everett
248 | Petra
249 | Phyllis
250 | Dyan
251 | Machelle
252 | Willian
253 | Cruz
254 | Adah
255 | Ashlyn
256 | Verena
257 | Briana
258 | Sherill
259 | Adina
260 | Chi
261 | Domenic
262 | Kirby
263 | Gracia
264 | Rickie
265 | Charity
266 | Terina
267 | Rutha
268 | Jeramy
269 | Tempie
270 | Emerald
271 | Candice
272 | Marcelina
273 | Luis
274 | Laurinda
275 | Kenia
276 | Lyman
277 | Merle
278 | Tonja
279 | Valrie
280 | Julietta
281 | Evelyn
282 | Angelique
283 | Shirlene
284 | Amada
285 | Bernice
286 | Kimberlee
287 | Tomasa
288 | Min
289 | Joane
290 | Kathleen
291 | Jessika
292 | Mamie
293 | Tonia
294 | Anjanette
295 | Hassan
296 | Darby
297 | Lyn
298 | Lloyd
299 | Helena
300 | Tashina
301 | Suzanne
302 | Sherley
303 | Katherina
304 | Shaniqua
305 | Dolly
306 | Kara
307 | Gwendolyn
308 | Noah
309 | Chastity
310 | Marlyn
311 | Veronique
312 | Donny
313 | Lahoma
314 | Nathan
315 | Kristen
316 | Adelina
317 | Kaleigh
318 | Myrta
319 | Carmina
320 | Bryan
321 | Shamika
322 | Wilmer
323 | Arletta
324 | Bulah
325 | Carl
326 | Marian
327 | Cyndi
328 | Wen
329 | Melony
330 | Johana
331 | Iraida
332 | Marc
333 | Leighann
334 | Elenora
335 | Golda
336 | Donovan
337 | Annika
338 | Lavinia
339 | Delena
340 | Pattie
341 | Carmelita
342 | Katherin
343 | Deidre
344 | Andria
345 | Silvana
346 | Bianca
347 | Dayna
348 | Milo
349 | Ricky
350 | Caterina
351 | Valery
352 | Gisele
353 | Maren
354 | Tena
355 | Lexie
356 | Tabatha
357 | Myrna
358 | Kizzie
359 | Particia
360 | Gena
361 | Ming
362 | Allen
363 | Lauran
364 | Virgie
365 | Myra
366 | Jim
367 | Arlena
368 | Marvel
369 | Roseanna
370 | Otto
371 | Pearlene
372 | Katrice
373 | Kecia
374 | Monte
375 | Matilde
376 | Tamisha
377 | Bethany
378 | Elbert
379 | Danae
380 | Shane
381 | Hank
382 | Marcia
383 | Neoma
384 | Edwina
385 | Anita
386 | Sharilyn
387 | Emely
388 | Christa
389 | Tressa
390 | Idalia
391 | Franklin
392 | Iesha
393 | Kyong
394 | Janella
395 | Lili
396 | Belen
397 | Arnetta
398 | Christiana
399 | Verda
400 | Suzann
401 | Carola
402 | Katharina
403 | Molly
404 | Josefine
405 | Danika
406 | Shirely
407 | Cinda
408 | Marla
409 | Jonah
410 | Tom
411 | Zenobia
412 | Katheleen
413 | Jacklyn
414 | Beryl
415 | Shan
416 | Jeannie
417 | Saturnina
418 | Ellen
419 | Jarod
420 | Kelsi
421 | Freida
422 | Hal
423 | Merri
424 | Nia
425 | Amee
426 | Illa
427 | Sherrill
428 | Elene
429 | Tomi
430 | Amberly
431 | Forrest
432 | Corinna
433 | Marleen
434 | Clarissa
435 | Dee
436 | Lucy
437 | Carlo
438 | Erma
439 | Curt
440 | Lavada
441 | Shyla
442 | Maia
443 | Margorie
444 | Pricilla
445 | Abby
446 | Catalina
447 | Mui
448 | Ellamae
449 | Everette
450 | Jeremiah
451 | Pamula
452 | Malena
453 | Golden
454 | Elza
455 | Joi
456 | Frankie
457 | Noemi
458 | Josefina
459 | Rubye
460 | Shirly
461 | Mika
462 | Shira
463 | Sharita
464 | Lucile
465 | Anthony
466 | Thomasine
467 | Angelia
468 | Kizzy
469 | Mona
470 | Jaye
471 | Wilda
472 | Aaron
473 | Ervin
474 | Karleen
475 | Danuta
476 | Keneth
477 | Fonda
478 | Walton
479 | Wanetta
480 | Charlott
481 | Elvina
482 | Marin
483 | Jessi
484 | Alfred
485 | Darin
486 | Chloe
487 | Ardelle
488 | Madlyn
489 | Terence
490 | Concetta
491 | Debbi
492 | Teddy
493 | Leopoldo
494 | Charla
495 | Kristin
496 | Emmanuel
497 | Porsche
498 | Kiesha
499 | Ruth
500 | Kasey
--------------------------------------------------------------------------------
/resources/last_names.txt:
--------------------------------------------------------------------------------
1 | Kunst
2 | Higuera
3 | Suire
4 | Cozad
5 | Verner
6 | Paik
7 | Gatton
8 | Leitner
9 | Confer
10 | Kwiecien
11 | Rempel
12 | Mccolley
13 | Bjork
14 | Dudney
15 | Mccartney
16 | Cargo
17 | Gonzalas
18 | Genest
19 | Lembo
20 | Fraga
21 | Frisbie
22 | Hilger
23 | Horrocks
24 | Nale
25 | Buescher
26 | Creegan
27 | Schreiber
28 | Deputy
29 | States
30 | Abee
31 | Ciulla
32 | Macha
33 | Giddens
34 | Traverso
35 | Hassen
36 | Whiteley
37 | Hagen
38 | Kovar
39 | Rothrock
40 | Meyerson
41 | Maddy
42 | Neiman
43 | Villafane
44 | Addison
45 | Herzog
46 | Driggers
47 | Leverett
48 | Schwebach
49 | Seiber
50 | Okelley
51 | Donahue
52 | Crumley
53 | Ridenhour
54 | Witty
55 | Kitzman
56 | Icenhour
57 | Giorgio
58 | Hargett
59 | Wolters
60 | Medrano
61 | Hake
62 | Boggess
63 | Lavallie
64 | Bassham
65 | Powers
66 | Watterson
67 | Reedy
68 | Heidel
69 | Quinney
70 | Stoker
71 | Hatfield
72 | Krall
73 | Ivey
74 | Slaybaugh
75 | Marksberry
76 | Delucia
77 | Vess
78 | Vanhoose
79 | Noe
80 | Dillow
81 | Gabel
82 | Alcaraz
83 | Fannin
84 | Stradley
85 | Bushnell
86 | Mccleskey
87 | Rising
88 | Rudd
89 | Jourdan
90 | Mcarthur
91 | Casados
92 | Karim
93 | Delong
94 | Szabo
95 | Tedeschi
96 | Mcdermott
97 | Leber
98 | Duhart
99 | Heinz
100 | Lefler
101 | Barajas
102 | Tuohy
103 | Dorfman
104 | Bolz
105 | Heitzman
106 | Ingham
107 | Chaplin
108 | Grabert
109 | Sonntag
110 | Gathers
111 | Carasco
112 | Kohut
113 | Pereyra
114 | Mudd
115 | Gonce
116 | Eskridge
117 | Orum
118 | Lyles
119 | Freese
120 | Casselman
121 | Strachan
122 | Jorge
123 | Chasteen
124 | Macky
125 | Vuong
126 | Stoecker
127 | Meehan
128 | Caroll
129 | Eiler
130 | Vansant
131 | Steidl
132 | Devitt
133 | Cromwell
134 | Wilcox
135 | Kring
136 | Llanes
137 | Abadie
138 | Korman
139 | Otts
140 | Lapp
141 | Flemming
142 | Wadkins
143 | Brill
144 | Penna
145 | Eckler
146 | Sok
147 | Mazzotta
148 | Nalley
149 | Mclamb
150 | Wheatley
151 | Musgrave
152 | Pasternak
153 | Rameau
154 | Peters
155 | Hiser
156 | Guynn
157 | Guyette
158 | Gaulke
159 | Cavanaugh
160 | Decaro
161 | Terpstra
162 | Sevigny
163 | Gabaldon
164 | Figgins
165 | Duerr
166 | Burghardt
167 | Ackley
168 | Hofmeister
169 | Gibbs
170 | Bobo
171 | Seifried
172 | Hunger
173 | Fraley
174 | Moffatt
175 | Osbourn
176 | Mcglothlin
177 | Shriner
178 | Smead
179 | Mercure
180 | Whitehead
181 | Salvador
182 | Gravitt
183 | Marko
184 | Droz
185 | Lykes
186 | Reys
187 | Dick
188 | Gilbertson
189 | Flanagan
190 | Kroh
191 | Lafortune
192 | Greear
193 | Moloney
194 | Shockey
195 | Bakos
196 | Esper
197 | Darcy
198 | Hawthorn
199 | Bones
200 | Iacovelli
201 | Hurn
202 | Harries
203 | Lines
204 | Blind
205 | Bainter
206 | Woolbright
207 | Klumpp
208 | Rehberg
209 | Manrique
210 | Hu
211 | Case
212 | Bosworth
213 | Chesnut
214 | Santora
215 | Key
216 | Valerio
217 | Cupps
218 | Mak
219 | Loya
220 | Larabee
221 | Mckie
222 | Clukey
223 | Belton
224 | Roberie
225 | Guinyard
226 | Werts
227 | Chaffin
228 | Tourville
229 | Ridout
230 | Lichtenstein
231 | Penrod
232 | Houk
233 | Facey
234 | Redner
235 | Ritzman
236 | Konkel
237 | Pogue
238 | Rother
239 | Eilers
240 | Tallmadge
241 | Fox
242 | Cotten
243 | Mckown
244 | Kestner
245 | Sienkiewicz
246 | Baier
247 | Tjaden
248 | Steely
249 | Russom
250 | Nunemaker
251 | Motz
252 | Vibbert
253 | Bostrom
254 | Mcclard
255 | Cantara
256 | Almonte
257 | Keating
258 | Gerald
259 | Bloodworth
260 | Rowlette
261 | Carrico
262 | Clem
263 | Croce
264 | Melia
265 | Marlar
266 | Callihan
267 | Conyers
268 | Burgos
269 | Mcadoo
270 | Hoppe
271 | Myers
272 | German
273 | Lush
274 | Storck
275 | Brindley
276 | Marsch
277 | Hendershott
278 | Mickey
279 | Miron
280 | Points
281 | Webber
282 | Yerian
283 | Chaudhry
284 | Nottingham
285 | Nicoll
286 | Stillings
287 | Babst
288 | Wein
289 | Osbourne
290 | Schier
291 | Ragsdale
292 | Ackerley
293 | Dedmon
294 | Lorusso
295 | Brugger
296 | Mun
297 | Hymel
298 | Zehr
299 | Caudillo
300 | Mcmath
301 | Badilla
302 | Menjivar
303 | Towell
304 | Croom
305 | Ritenour
306 | Angles
307 | Robson
308 | Medlin
309 | Waldrep
310 | Searls
311 | Ruley
312 | Lamberton
313 | Francoeur
314 | Damron
315 | Wareham
316 | Vigliotti
317 | Huitt
318 | Febres
319 | Sipe
320 | Grover
321 | Moschella
322 | Kyker
323 | Titcomb
324 | Kreitzer
325 | Rademacher
326 | Boxx
327 | Harger
328 | Pabon
329 | Nunez
330 | Montrose
331 | Banta
332 | Spaeth
333 | Ferrel
334 | Mcmanis
335 | Arceo
336 | Lemmons
337 | Edgemon
338 | Bremer
339 | Busbee
340 | Hight
341 | Honda
342 | Torrez
343 | Hanford
344 | Poteete
345 | Courchesne
346 | Breeding
347 | Kincannon
348 | Pirtle
349 | Bundick
350 | Marquardt
351 | Flannigan
352 | Raynor
353 | Sizemore
354 | Amador
355 | Teeter
356 | Swicegood
357 | Saleem
358 | Rolon
359 | Bomar
360 | Rodi
361 | Wierenga
362 | Bednar
363 | Music
364 | Smoot
365 | Latham
366 | Alex
367 | Luper
368 | Merriman
369 | Ackles
370 | Seawell
371 | Crider
372 | Stennett
373 | Westgate
374 | Sharpton
375 | Ring
376 | Pinkowski
377 | Feucht
378 | Pillai
379 | Ballantyne
380 | Railey
381 | Shiffer
382 | Tsui
383 | Lucero
384 | Wilmer
385 | Weight
386 | Romans
387 | Cora
388 | Hummer
389 | Foskey
390 | Dangerfield
391 | Konrad
392 | Raulerson
393 | Bernard
394 | Kirker
395 | Woodside
396 | Gwaltney
397 | Berber
398 | Mandel
399 | Voyles
400 | Hohman
401 | Vitale
402 | Nichol
403 | Lurie
404 | Landrum
405 | Patten
406 | Jamerson
407 | Radosevich
408 | Hunsicker
409 | Boser
410 | Roan
411 | Cervantez
412 | Holland
413 | Heinemann
414 | Meacham
415 | Boozer
416 | Edmund
417 | Hurlburt
418 | Tow
419 | Thelen
420 | Dora
421 | Mercado
422 | Gatlin
423 | Tye
424 | Hahne
425 | Highsmith
426 | Giampaolo
427 | Gatto
428 | Donaghy
429 | Keef
430 | Ma
431 | Beckett
432 | Urso
433 | Edge
434 | Gebhard
435 | Shadduck
436 | Schade
437 | Crumble
438 | Ellender
439 | Tsang
440 | Bramlett
441 | Rhames
442 | Dunmire
443 | Oney
444 | Southward
445 | Dunnington
446 | Fiorenza
447 | Grable
448 | Reiff
449 | Sieck
450 | Kersh
451 | Mutter
452 | Hiner
453 | Wadsworth
454 | Aceuedo
455 | Chaloux
456 | Sthilaire
457 | Goble
458 | Waldron
459 | Cranford
460 | Tumlinson
461 | Bourgeois
462 | Krenz
463 | Blystone
464 | Grissom
465 | Payan
466 | Mullet
467 | Lorenzana
468 | Buttars
469 | Stoltenberg
470 | Chiarello
471 | Aleman
472 | Schnur
473 | Flack
474 | Lovering
475 | Sak
476 | Havard
477 | Astorga
478 | Thames
479 | Mcclurg
480 | Tenaglia
481 | Crenshaw
482 | Strickland
483 | Hagler
484 | Newcomb
485 | Schenkel
486 | Pilkington
487 | Boze
488 | Gowers
489 | Chism
490 | Urena
491 | Endo
492 | Goldstein
493 | Beason
494 | Hartford
495 | Redden
496 | Regan
497 | Linsley
498 | Mcgrew
499 | Kitchens
500 | Lowder
--------------------------------------------------------------------------------
/sample-data/chinook.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thoughtbot/data-anonymization/78de64c1638723ca6c4e49a4d698aeaec505ba89/sample-data/chinook.sqlite
--------------------------------------------------------------------------------
/sample-data/chinook_schema.sql:
--------------------------------------------------------------------------------
1 |
2 | /*******************************************************************************
3 | Chinook Database - Version 1.3
4 | Script: Chinook_Sqlite.sql
5 | Description: Creates and populates the Chinook database.
6 | DB Server: Sqlite
7 | Author: Luis Rocha
8 | License: http://www.codeplex.com/ChinookDatabase/license
9 | ********************************************************************************/
10 |
11 | /*******************************************************************************
12 | Drop Foreign Keys Constraints
13 | ********************************************************************************/
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | /*******************************************************************************
38 | Drop Tables
39 | ********************************************************************************/
40 | DROP TABLE IF EXISTS [Album];
41 |
42 | DROP TABLE IF EXISTS [Artist];
43 |
44 | DROP TABLE IF EXISTS [Customer];
45 |
46 | DROP TABLE IF EXISTS [Employee];
47 |
48 | DROP TABLE IF EXISTS [Genre];
49 |
50 | DROP TABLE IF EXISTS [Invoice];
51 |
52 | DROP TABLE IF EXISTS [InvoiceLine];
53 |
54 | DROP TABLE IF EXISTS [MediaType];
55 |
56 | DROP TABLE IF EXISTS [Playlist];
57 |
58 | DROP TABLE IF EXISTS [PlaylistTrack];
59 |
60 | DROP TABLE IF EXISTS [Track];
61 |
62 |
63 | /*******************************************************************************
64 | Create Tables
65 | ********************************************************************************/
66 | CREATE TABLE [Album]
67 | (
68 | [AlbumId] INTEGER NOT NULL,
69 | [Title] NVARCHAR(160) NOT NULL,
70 | [ArtistId] INTEGER NOT NULL,
71 | CONSTRAINT [PK_Album] PRIMARY KEY ([AlbumId]),
72 | FOREIGN KEY ([ArtistId]) REFERENCES [Artist] ([ArtistId])
73 | ON DELETE NO ACTION ON UPDATE NO ACTION
74 | );
75 |
76 | CREATE TABLE [Artist]
77 | (
78 | [ArtistId] INTEGER NOT NULL,
79 | [Name] NVARCHAR(120),
80 | CONSTRAINT [PK_Artist] PRIMARY KEY ([ArtistId])
81 | );
82 |
83 | CREATE TABLE [Customer]
84 | (
85 | [CustomerId] INTEGER NOT NULL,
86 | [FirstName] NVARCHAR(40) NOT NULL,
87 | [LastName] NVARCHAR(20) NOT NULL,
88 | [Company] NVARCHAR(80),
89 | [Address] NVARCHAR(70),
90 | [City] NVARCHAR(40),
91 | [State] NVARCHAR(40),
92 | [Country] NVARCHAR(40),
93 | [PostalCode] NVARCHAR(10),
94 | [Phone] NVARCHAR(24),
95 | [Fax] NVARCHAR(24),
96 | [Email] NVARCHAR(60) NOT NULL,
97 | [SupportRepId] INTEGER,
98 | CONSTRAINT [PK_Customer] PRIMARY KEY ([CustomerId]),
99 | FOREIGN KEY ([SupportRepId]) REFERENCES [Employee] ([EmployeeId])
100 | ON DELETE NO ACTION ON UPDATE NO ACTION
101 | );
102 |
103 | CREATE TABLE [Employee]
104 | (
105 | [EmployeeId] INTEGER NOT NULL,
106 | [LastName] NVARCHAR(20) NOT NULL,
107 | [FirstName] NVARCHAR(20) NOT NULL,
108 | [Title] NVARCHAR(30),
109 | [ReportsTo] INTEGER,
110 | [BirthDate] DATETIME,
111 | [HireDate] DATETIME,
112 | [Address] NVARCHAR(70),
113 | [City] NVARCHAR(40),
114 | [State] NVARCHAR(40),
115 | [Country] NVARCHAR(40),
116 | [PostalCode] NVARCHAR(10),
117 | [Phone] NVARCHAR(24),
118 | [Fax] NVARCHAR(24),
119 | [Email] NVARCHAR(60),
120 | CONSTRAINT [PK_Employee] PRIMARY KEY ([EmployeeId]),
121 | FOREIGN KEY ([ReportsTo]) REFERENCES [Employee] ([EmployeeId])
122 | ON DELETE NO ACTION ON UPDATE NO ACTION
123 | );
124 |
125 | CREATE TABLE [Genre]
126 | (
127 | [GenreId] INTEGER NOT NULL,
128 | [Name] NVARCHAR(120),
129 | CONSTRAINT [PK_Genre] PRIMARY KEY ([GenreId])
130 | );
131 |
132 | CREATE TABLE [Invoice]
133 | (
134 | [InvoiceId] INTEGER NOT NULL,
135 | [CustomerId] INTEGER NOT NULL,
136 | [InvoiceDate] DATETIME NOT NULL,
137 | [BillingAddress] NVARCHAR(70),
138 | [BillingCity] NVARCHAR(40),
139 | [BillingState] NVARCHAR(40),
140 | [BillingCountry] NVARCHAR(40),
141 | [BillingPostalCode] NVARCHAR(10),
142 | [Total] NUMERIC(10,2) NOT NULL,
143 | CONSTRAINT [PK_Invoice] PRIMARY KEY ([InvoiceId]),
144 | FOREIGN KEY ([CustomerId]) REFERENCES [Customer] ([CustomerId])
145 | ON DELETE NO ACTION ON UPDATE NO ACTION
146 | );
147 |
148 | CREATE TABLE [InvoiceLine]
149 | (
150 | [InvoiceLineId] INTEGER NOT NULL,
151 | [InvoiceId] INTEGER NOT NULL,
152 | [TrackId] INTEGER NOT NULL,
153 | [UnitPrice] NUMERIC(10,2) NOT NULL,
154 | [Quantity] INTEGER NOT NULL,
155 | CONSTRAINT [PK_InvoiceLine] PRIMARY KEY ([InvoiceLineId]),
156 | FOREIGN KEY ([InvoiceId]) REFERENCES [Invoice] ([InvoiceId])
157 | ON DELETE NO ACTION ON UPDATE NO ACTION,
158 | FOREIGN KEY ([TrackId]) REFERENCES [Track] ([TrackId])
159 | ON DELETE NO ACTION ON UPDATE NO ACTION
160 | );
161 |
162 | CREATE TABLE [MediaType]
163 | (
164 | [MediaTypeId] INTEGER NOT NULL,
165 | [Name] NVARCHAR(120),
166 | CONSTRAINT [PK_MediaType] PRIMARY KEY ([MediaTypeId])
167 | );
168 |
169 | CREATE TABLE [Playlist]
170 | (
171 | [PlaylistId] INTEGER NOT NULL,
172 | [Name] NVARCHAR(120),
173 | CONSTRAINT [PK_Playlist] PRIMARY KEY ([PlaylistId])
174 | );
175 |
176 | CREATE TABLE [PlaylistTrack]
177 | (
178 | [PlaylistId] INTEGER NOT NULL,
179 | [TrackId] INTEGER NOT NULL,
180 | CONSTRAINT [PK_PlaylistTrack] PRIMARY KEY ([PlaylistId], [TrackId]),
181 | FOREIGN KEY ([PlaylistId]) REFERENCES [Playlist] ([PlaylistId])
182 | ON DELETE NO ACTION ON UPDATE NO ACTION,
183 | FOREIGN KEY ([TrackId]) REFERENCES [Track] ([TrackId])
184 | ON DELETE NO ACTION ON UPDATE NO ACTION
185 | );
186 |
187 | CREATE TABLE [Track]
188 | (
189 | [TrackId] INTEGER NOT NULL,
190 | [Name] NVARCHAR(200) NOT NULL,
191 | [AlbumId] INTEGER,
192 | [MediaTypeId] INTEGER NOT NULL,
193 | [GenreId] INTEGER,
194 | [Composer] NVARCHAR(220),
195 | [Milliseconds] INTEGER NOT NULL,
196 | [Bytes] INTEGER,
197 | [UnitPrice] NUMERIC(10,2) NOT NULL,
198 | CONSTRAINT [PK_Track] PRIMARY KEY ([TrackId]),
199 | FOREIGN KEY ([AlbumId]) REFERENCES [Album] ([AlbumId])
200 | ON DELETE NO ACTION ON UPDATE NO ACTION,
201 | FOREIGN KEY ([GenreId]) REFERENCES [Genre] ([GenreId])
202 | ON DELETE NO ACTION ON UPDATE NO ACTION,
203 | FOREIGN KEY ([MediaTypeId]) REFERENCES [MediaType] ([MediaTypeId])
204 | ON DELETE NO ACTION ON UPDATE NO ACTION
205 | );
206 |
207 |
208 | /*******************************************************************************
209 | Create Primary Key Unique Indexes
210 | ********************************************************************************/
211 | CREATE UNIQUE INDEX [IPK_Album] ON [Album]([AlbumId]);
212 |
213 | CREATE UNIQUE INDEX [IPK_Artist] ON [Artist]([ArtistId]);
214 |
215 | CREATE UNIQUE INDEX [IPK_Customer] ON [Customer]([CustomerId]);
216 |
217 | CREATE UNIQUE INDEX [IPK_Employee] ON [Employee]([EmployeeId]);
218 |
219 | CREATE UNIQUE INDEX [IPK_Genre] ON [Genre]([GenreId]);
220 |
221 | CREATE UNIQUE INDEX [IPK_Invoice] ON [Invoice]([InvoiceId]);
222 |
223 | CREATE UNIQUE INDEX [IPK_InvoiceLine] ON [InvoiceLine]([InvoiceLineId]);
224 |
225 | CREATE UNIQUE INDEX [IPK_MediaType] ON [MediaType]([MediaTypeId]);
226 |
227 | CREATE UNIQUE INDEX [IPK_Playlist] ON [Playlist]([PlaylistId]);
228 |
229 | CREATE UNIQUE INDEX [IPK_PlaylistTrack] ON [PlaylistTrack]([PlaylistId], [TrackId]);
230 |
231 | CREATE UNIQUE INDEX [IPK_Track] ON [Track]([TrackId]);
232 |
233 |
234 | /*******************************************************************************
235 | Create Foreign Keys
236 | ********************************************************************************/
237 | CREATE INDEX [IFK_AlbumArtistId] ON [Album] ([ArtistId]);
238 |
239 | CREATE INDEX [IFK_CustomerSupportRepId] ON [Customer] ([SupportRepId]);
240 |
241 | CREATE INDEX [IFK_EmployeeReportsTo] ON [Employee] ([ReportsTo]);
242 |
243 | CREATE INDEX [IFK_InvoiceCustomerId] ON [Invoice] ([CustomerId]);
244 |
245 | CREATE INDEX [IFK_InvoiceLineInvoiceId] ON [InvoiceLine] ([InvoiceId]);
246 |
247 | CREATE INDEX [IFK_InvoiceLineTrackId] ON [InvoiceLine] ([TrackId]);
248 |
249 | CREATE INDEX [IFK_PlaylistTrackTrackId] ON [PlaylistTrack] ([TrackId]);
250 |
251 | CREATE INDEX [IFK_TrackAlbumId] ON [Track] ([AlbumId]);
252 |
253 | CREATE INDEX [IFK_TrackGenreId] ON [Track] ([GenreId]);
254 |
255 | CREATE INDEX [IFK_TrackMediaTypeId] ON [Track] ([MediaTypeId]);
256 |
257 |
258 |
259 |
260 |
261 |
--------------------------------------------------------------------------------
/sample-data/mongo/plans.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name":"Free",
4 | "features":[
5 | {
6 | "max_storage": 21474836480,
7 | "type":"AmazonS3",
8 | "users": { "max" : 1, "additional" : false }
9 | },
10 | {
11 | "max_storage": 21474836480,
12 | "type":"DropBox",
13 | "users": { "max" : 1, "additional" : false }
14 | }
15 | ],
16 | "term":"month",
17 | "public_sharing": false,
18 | "photo_sharing": true,
19 | "created_at":{ "$date":1346740765000 }
20 | },
21 | {
22 | "name":"Team",
23 | "plan_aliases":["Business","Paid"],
24 | "features":[
25 | {
26 | "max_storage": 53687091200,
27 | "type":"AmazonS3",
28 | "users": { "max" : 5, "additional" : true }
29 | },
30 | {
31 | "max_storage": 53687091200,
32 | "type":"DropBox",
33 | "users": { "max" : 5, "additional" : true }
34 | }
35 | ],
36 | "term":"month",
37 | "public_sharing": true,
38 | "photo_sharing": true,
39 | "created_at":{ "$date":1346740765000 }
40 | },
41 | {
42 | "name":"Team",
43 | "plan_aliases":[],
44 | "features":[],
45 | "term":"month",
46 | "public_sharing": true,
47 | "photo_sharing": true,
48 | "created_at":{ "$date":1346740765000 }
49 | }
50 | ]
51 |
--------------------------------------------------------------------------------
/sample-data/mongo/users.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "user_id": "sunitparekh",
4 | "date_of_birth": { "$date":1346740765000 },
5 | "email":"parekh.sunit@gmail.com",
6 | "password":"TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
7 | "failed_attempts":0,
8 | "first_name":"Sunit",
9 | "last_name":"Parekh",
10 | "password_reset_answer":"manza",
11 | "password_reset_question":"My new car modal?",
12 | "nick_names" : ["sUnit","Mr S", "Parekh"],
13 | "updated_at":{ "$date":1346740767000 }
14 | },
15 | {
16 | "user_id": "satyamag",
17 | "date_of_birth":{ "$date":1346740765000 },
18 | "email":"satyamag@gmail.com",
19 | "password":"$2a$10$2YTfqIK8Pd8GlbMDFZCvGOcJYLkQs7Hlpal4YF99iSh9yhnWPggZG",
20 | "failed_attempts":1,
21 | "first_name":"Satyam",
22 | "last_name":"Agarwal",
23 | "password_reset_answer":"iphone",
24 | "password_reset_question":"My phone?",
25 | "updated_at":{ "$date":1346740767000 }
26 | },
27 | {
28 | "user_id": "anandagrawal",
29 | "date_of_birth":{ "$date":1346740765000 },
30 | "email":"anandagrawal84@gmail.com",
31 | "password":"Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
32 | "failed_attempts":0,
33 | "first_name":"Anand",
34 | "last_name":"Agrawal",
35 | "password_reset_answer":"android",
36 | "password_reset_question":"My phone?",
37 | "updated_at":{ "$date":1346740767000 }
38 | }
39 | ]
40 |
--------------------------------------------------------------------------------
/spec/acceptance/mongodb_blacklist_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 | require 'mongo'
3 |
4 | describe 'End 2 End MongoDB Blacklist Acceptance Test' do
5 |
6 | before(:each) do
7 | Mongo::Client.new('mongodb://localhost/test').database().drop()
8 | users = [
9 | {
10 | '_id' => 1,
11 | 'USER_ID' => 'sunitparekh',
12 | 'date_of_birth' => Time.new(2012, 7, 14, 13, 1, 0),
13 | 'email' => 'parekh-sunit@mailinator.com',
14 | 'password' => 'TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@',
15 | 'failed_attempts' => 0,
16 | 'first_name' => 'Sunit',
17 | 'last_name' => 'Parekh',
18 | 'password_reset_answer' => 'manza',
19 | 'password_reset_question' => 'My new car modal?',
20 | 'updated_at' => Time.new(2012, 8, 15, 13, 1, 0),
21 | 'alternate_emails' => ['abc@test.com', 'abc2@test.com']
22 |
23 | },
24 | {
25 | '_id' => 2,
26 | 'USER_ID' => 'anandagrawal',
27 | 'date_of_birth' => Time.new(2011, 8, 11, 13, 1, 0),
28 | 'email' => 'anand-agrawal@mailinator.com',
29 | 'password' => 'Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum',
30 | 'failed_attempts' => 0,
31 | 'first_name' => 'Anand',
32 | 'last_name' => 'Agrawal',
33 | 'password_reset_answer' => 'android',
34 | 'password_reset_question' => 'My phone?',
35 | 'updated_at' => Time.new(2012, 2, 11, 13, 1, 0),
36 | 'alternate_emails' => ['abc@test.com', 'abc2@test.com']
37 | }
38 | ]
39 | users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users')
40 | users.each { |p| users_coll.insert_one p }
41 | end
42 |
43 | it 'should anonymize plans collection' do
44 |
45 | database 'test' do
46 | strategy DataAnon::Strategy::MongoDB::Blacklist
47 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
48 |
49 | collection 'users' do
50 | anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
51 | anonymize('USER_ID').using FieldStrategy::StringTemplate.new('user-#{row_number}')
52 | anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
53 | anonymize('password') { |field| 'password'}
54 | anonymize('first_name').using FieldStrategy::RandomFirstName.new
55 | anonymize('last_name').using FieldStrategy::RandomLastName.new
56 | anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new)
57 | end
58 |
59 | end
60 |
61 | users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users')
62 | users_coll.find.count.to_int.should be 2
63 | user = users_coll.find({'_id' => 1}).to_a[0]
64 |
65 | user['_id'].should == 1
66 | user['USER_ID'].should == 'user-1'
67 | user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i
68 | user['email'].should_not == 'parekh-sunit@mailinator.com'
69 | user['password'].should == 'password'
70 | user['failed_attempts'].should == 0
71 | user['first_name'].should_not be 'Sunit'
72 | user['last_name'].should_not be 'Parekh'
73 | user['password_reset_answer'].should == 'manza'
74 | user['password_reset_question'].should == 'My new car modal?'
75 | user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i
76 | user['alternate_emails'].length.should == 2
77 | user['alternate_emails'][0].should_not == 'abc@test.com'
78 | user['alternate_emails'][1].should_not == 'abc2@test.com'
79 |
80 |
81 | end
82 | end
--------------------------------------------------------------------------------
/spec/acceptance/mongodb_whitelist_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 | require 'mongo'
3 |
4 | describe 'End 2 End MongoDB Whitelist Acceptance Test' do
5 |
6 | before(:each) do
7 | Mongo::Client.new('mongodb://localhost/test').database.drop
8 | Mongo::Client.new('mongodb://localhost/dest').database.drop
9 | plans = [
10 | {
11 | '_id' => 1,
12 | 'name' => 'Free',
13 | 'nick_names' => ['Name1', 'Name2'],
14 | 'features' => [
15 | {
16 | 'max_storage' => 21474836480,
17 | 'type' => 'AmazonS3',
18 | 'users' => {'max' => 1, 'additional' => false}
19 | },
20 | {
21 | 'max_storage' => 21474836480,
22 | 'type' => 'DropBox',
23 | 'users' => {'max' => 1, 'additional' => false}
24 | }
25 | ],
26 | 'term' => 'month',
27 | 'public_sharing' => false,
28 | 'photo_sharing' => true,
29 | 'created_at' => Time.new(2012, 6, 21, 13, 30, 0)
30 | },
31 | {
32 | '_id' => 2,
33 | 'name' => 'Team',
34 | 'plan_aliases' => ['Business', 'Paid'],
35 | 'features' => [
36 | {
37 | 'max_storage' => 53687091200,
38 | 'type' => 'AmazonS3',
39 | 'users' => {'max' => 5, 'additional' => true}
40 | },
41 | {
42 | 'max_storage' => 53687091200,
43 | 'type' => 'DropBox',
44 | 'users' => {'max' => 5, 'additional' => true}
45 | }
46 | ],
47 | 'term' => 'month',
48 | 'public_sharing' => true,
49 | 'photo_sharing' => true,
50 | 'created_at' => Time.new(2012, 8, 11, 13, 1, 0)
51 | }
52 | ]
53 | plans_coll = Mongo::Client.new('mongodb://localhost/test').database.collection('plans')
54 | plans.each { |p| plans_coll.insert_one p }
55 | end
56 |
57 | it 'should anonymize plans collection' do
58 |
59 | database 'dest' do
60 | strategy DataAnon::Strategy::MongoDB::Whitelist
61 | source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
62 | destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
63 |
64 | collection 'plans' do
65 | whitelist '_id', 'name', 'term', 'created_at'
66 | anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(%w(Free Team Business Paid))
67 | anonymize 'public_sharing', 'photo_sharing'
68 |
69 | collection 'features' do
70 | anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240, 21474836480, 53687091200])
71 | whitelist 'type'
72 |
73 | document 'users' do
74 | anonymize 'max', 'additional'
75 | end
76 | end
77 | end
78 |
79 | end
80 |
81 | plans_coll = Mongo::Client.new('mongodb://localhost/dest').database.collection('plans')
82 | plans_coll.find.count.to_int.should be 2
83 | plan = plans_coll.find({ '_id' => 1}).to_a[0]
84 |
85 | plan['_id'].should == 1
86 | plan['name'].should == 'Free'
87 | plan['nick_names'][0].should_not == 'Name1'
88 | plan['nick_names'][1].should_not == 'Name2'
89 | plan['term'].should == 'month'
90 | plan['created_at'].should == Time.new(2012, 6, 21, 13, 30, 0)
91 | plan['plan_aliases'].should be_nil
92 | [true,false].should include(plan['public_sharing'])
93 | [true,false].should include(plan['photo_sharing'])
94 | plan['features'].length.should == 2
95 | feature1 = plan['features'][0]
96 | [10737418240, 21474836480, 53687091200].should include(feature1['max_storage'])
97 | feature1['type'].should == 'AmazonS3'
98 | feature1['users']['max'].should be_kind_of(Integer)
99 | [true,false].should include(feature1['users']['additional'])
100 |
101 |
102 | plan = plans_coll.find({ '_id' => 2}).to_a[0]
103 | plan['plan_aliases'].length.should == 2
104 | ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][0])
105 | ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][1])
106 | end
107 | end
--------------------------------------------------------------------------------
/spec/acceptance/rdbms_blacklist_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe 'End 2 End RDBMS Blacklist Acceptance Test using SQLite database' do
4 | connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
5 |
6 | before(:each) do
7 | CustomerSample.clean
8 | CustomerSample.create_schema connection_spec
9 | CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA[0]
10 | end
11 |
12 | it 'should anonymize customer table record ' do
13 |
14 | database 'Customer' do
15 | strategy DataAnon::Strategy::Blacklist
16 | source_db connection_spec
17 |
18 | table 'customers' do
19 | primary_key 'cust_id'
20 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
21 | anonymize 'terms_n_condition', 'age'
22 | end
23 | end
24 |
25 | DataAnon::Utils::SourceDatabase.establish_connection connection_spec
26 | source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
27 | new_rec = source.find(CustomerSample::SAMPLE_DATA[0][:cust_id])
28 | new_rec['email'].should == 'test+1@gmail.com'
29 |
30 | end
31 |
32 | it 'should skip anonymization of the record if condition in skip is true' do
33 | database 'Customer' do
34 | strategy DataAnon::Strategy::Blacklist
35 | source_db connection_spec
36 |
37 | table 'customers' do
38 | skip { |index, record| record['age'] > 18 }
39 |
40 | primary_key 'cust_id'
41 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
42 | anonymize 'terms_n_condition', 'age'
43 | end
44 | end
45 |
46 | DataAnon::Utils::SourceDatabase.establish_connection connection_spec
47 | source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
48 | new_rec = source.find(CustomerSample::SAMPLE_DATA[0][:cust_id])
49 | new_rec['email'].should_not == 'test+1@gmail.com'
50 |
51 | end
52 |
53 | it 'should continue with anonymization of the record if condition in skip is true' do
54 | database 'Customer' do
55 | strategy DataAnon::Strategy::Blacklist
56 | source_db connection_spec
57 |
58 | table 'customers' do
59 | continue { |index, record| record['age'] > 18 }
60 |
61 | primary_key 'cust_id'
62 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
63 | anonymize 'terms_n_condition', 'age'
64 | end
65 | end
66 |
67 | DataAnon::Utils::SourceDatabase.establish_connection connection_spec
68 | source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
69 | new_rec = source.find(CustomerSample::SAMPLE_DATA[0][:cust_id])
70 | new_rec['email'].should == 'test+1@gmail.com'
71 |
72 |
73 |
74 | end
75 | end
76 |
--------------------------------------------------------------------------------
/spec/acceptance/rdbms_whitelist_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do
4 |
5 | source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
6 | dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'}
7 |
8 | before(:each) do
9 | CustomerSample.clean
10 | CustomerSample.create_schema source_connection_spec
11 | CustomerSample.insert_records source_connection_spec, CustomerSample::SAMPLE_DATA
12 |
13 | CustomerSample.create_schema dest_connection_spec
14 | end
15 |
16 | it 'should anonymize customer table record ' do
17 |
18 | database 'Customer' do
19 | strategy DataAnon::Strategy::Whitelist
20 | source_db source_connection_spec
21 | destination_db dest_connection_spec
22 |
23 | table 'customers' do
24 | whitelist 'cust_id', 'address', 'zipcode', 'blog_url'
25 | anonymize('first_name').using FieldStrategy::RandomFirstName.new
26 | anonymize('last_name').using FieldStrategy::RandomLastName.new
27 | anonymize('state').using FieldStrategy::SelectFromList.new(['Gujrat','Karnataka'])
28 | anonymize('phone').using FieldStrategy::RandomPhoneNumber.new
29 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
30 | anonymize 'terms_n_condition', 'age', 'longitude'
31 | anonymize('latitude').using FieldStrategy::RandomFloatDelta.new(2.0)
32 | whitelist 'created_at','updated_at'
33 | end
34 | end
35 |
36 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
37 | dest_table = DataAnon::Utils::DestinationTable.create 'customers'
38 | dest_table.count.should == 2
39 | new_rec = dest_table.where('cust_id' => CustomerSample::SAMPLE_DATA[0][:cust_id]).first
40 | new_rec.first_name.should_not be('Sunit')
41 | new_rec.last_name.should_not be('Parekh')
42 | new_rec.birth_date.should_not be(Date.new(1977,7,8))
43 | new_rec.address.should == 'F 501 Shanti Nagar'
44 | %w(Gujrat Karnataka).should include(new_rec.state)
45 | new_rec.zipcode.should == '411048'
46 | new_rec.phone.should_not be '9923700662'
47 | new_rec.email.should == 'test+1@gmail.com'
48 | [true,false].should include(new_rec.terms_n_condition)
49 | new_rec.age.should be_between(0,100)
50 | new_rec.latitude.should be_between( 38.689060, 42.689060)
51 | new_rec.longitude.should be_between( -84.044636, -64.044636)
52 | new_rec.created_at.should == Time.new(2010,10,10)
53 | new_rec.updated_at.should == Time.new(2010,5,5)
54 | end
55 |
56 | describe 'batch_size' do
57 | it 'processes all records in batches' do
58 | database 'Customer' do
59 | strategy DataAnon::Strategy::Whitelist
60 | source_db source_connection_spec
61 | destination_db dest_connection_spec
62 |
63 | table 'customers' do
64 | batch_size 1
65 | whitelist 'first_name'
66 | end
67 | end
68 |
69 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
70 | dest_table = DataAnon::Utils::DestinationTable.create 'customers'
71 | dest_table.count.should == 2
72 | first_rec = dest_table.first
73 | first_rec.first_name.should eq('Sunit')
74 | second_rec = dest_table.second
75 | second_rec.first_name.should eq('Rohit')
76 | end
77 | end
78 |
79 | describe 'limiting' do
80 | it 'returns only last record' do
81 | database 'Customer' do
82 | strategy DataAnon::Strategy::Whitelist
83 | source_db source_connection_spec
84 | destination_db dest_connection_spec
85 |
86 | table 'customers' do
87 | limit 1
88 | whitelist 'cust_id', 'first_name', 'created_at','updated_at'
89 | end
90 | end
91 |
92 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
93 | dest_table = DataAnon::Utils::DestinationTable.create 'customers'
94 | dest_table.count.should == 1
95 | new_rec = dest_table.first
96 | new_rec.first_name.should eq('Rohit')
97 | end
98 | end
99 | end
100 |
--------------------------------------------------------------------------------
/spec/acceptance/rdbms_whitelist_with_primary_key_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do
4 |
5 | source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
6 | dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'}
7 |
8 | before(:each) do
9 | CustomerSample.clean
10 | CustomerSample.create_schema source_connection_spec
11 | CustomerSample.insert_record source_connection_spec, CustomerSample::SAMPLE_DATA[0]
12 |
13 | CustomerSample.create_schema dest_connection_spec
14 | end
15 |
16 | it 'should anonymize customer table record ' do
17 |
18 | database 'Customer' do
19 | strategy DataAnon::Strategy::Whitelist
20 | source_db source_connection_spec
21 | destination_db dest_connection_spec
22 |
23 | table 'customers' do
24 | primary_key 'cust_id'
25 | batch_size 1
26 |
27 | whitelist 'cust_id', 'address', 'zipcode', 'blog_url'
28 | anonymize('first_name').using FieldStrategy::RandomFirstName.new
29 | anonymize('last_name').using FieldStrategy::RandomLastName.new
30 | anonymize('state').using FieldStrategy::SelectFromList.new(['Gujrat','Karnataka'])
31 | anonymize('phone').using FieldStrategy::RandomPhoneNumber.new
32 | anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
33 | anonymize 'terms_n_condition', 'age', 'longitude'
34 | anonymize('latitude').using FieldStrategy::RandomFloatDelta.new(2.0)
35 | end
36 | end
37 |
38 | DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
39 | dest_table = DataAnon::Utils::DestinationTable.create 'customers'
40 | new_rec = dest_table.where('cust_id' => CustomerSample::SAMPLE_DATA[0][:cust_id]).first
41 | new_rec.first_name.should_not be('Sunit')
42 | new_rec.last_name.should_not be('Parekh')
43 | new_rec.birth_date.should_not be(Date.new(1977,7,8))
44 | new_rec.address.should == 'F 501 Shanti Nagar'
45 | ['Gujrat','Karnataka'].should include(new_rec.state)
46 | new_rec.zipcode.should == '411048'
47 | new_rec.phone.should_not be '9923700662'
48 | new_rec.email.should == 'test+1@gmail.com'
49 | [true,false].should include(new_rec.terms_n_condition)
50 | new_rec.age.should be_between(0,100)
51 | new_rec.latitude.should be_between( 38.689060, 42.689060)
52 | new_rec.longitude.should be_between( -84.044636, -64.044636)
53 |
54 | end
55 | end
56 |
--------------------------------------------------------------------------------
/spec/core/fields_missing_strategy_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe DataAnon::Core::FieldsMissingStrategy do
4 |
5 | FMS = DataAnon::Core::FieldsMissingStrategy
6 |
7 | it 'should be able to add field for new table that doesnot exist' do
8 | users = FMS.new('users')
9 | users.missing('confirm_email')
10 | users.fields_missing_strategy.should == ['confirm_email']
11 | end
12 |
13 | it 'should be able to take care for same field appearing multiple time' do
14 | users = FMS.new('users')
15 | users.missing('confirm_email')
16 | users.missing('confirm_email')
17 | users.fields_missing_strategy.should == ['confirm_email']
18 | end
19 |
20 | it 'should be able to add multiple fields for table' do
21 | users = FMS.new('users')
22 | users.missing('confirm_email')
23 | users.missing('password_reset')
24 | users.fields_missing_strategy.should == %w(confirm_email password_reset)
25 | end
26 | end
--------------------------------------------------------------------------------
/spec/resource/sample.geojson:
--------------------------------------------------------------------------------
1 | {"geometry": {"type": "Point", "coordinates": [-134.412039, 58.30057]}, "type": "Feature", "id": "SG_5xejzYOfDRcyoVHXqvO2hB_58.300570_-134.412039@1293731153", "properties": {"province": "AK", "city": "Juneau", "name": "Purchasing Dept", "tags": ["state"], "country": "US", "classifiers": [{"category": "Government", "type": "Public Place", "subcategory": "Office"}], "phone": "+1 907 465 2250", "href": "http://api.simplegeo.com/1.0/features/SG_5xejzYOfDRcyoVHXqvO2hB_58.300570_-134.412039@1293731153.json", "address": "333 Willoughby Ave", "owner": "simplegeo", "postcode": "99801"}}
2 |
--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
1 | require 'rspec'
2 | require "pry"
3 | require 'coveralls'
4 |
5 | Coveralls.wear!
6 |
7 | require 'data-anonymization'
8 |
9 | ENV['show_progress'] = 'false'
10 |
11 | Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
12 |
13 | DataAnon::Utils::Logging.logger.level = Logger::WARN
14 | Mongo::Logger.logger.level = Logger::WARN
15 |
16 | RSpec.configure do |config|
17 | config.expect_with :rspec do |c|
18 | c.syntax = [:should, :expect]
19 | end
20 |
21 | config.mock_with :rspec do |c|
22 | c.syntax = [:should, :expect]
23 | end
24 |
25 | config.before(:suite) do
26 | end
27 |
28 | config.before(:each) do
29 | end
30 |
31 | config.after(:suite) do
32 | end
33 | end
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/spec/strategy/field/contact/random_address_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe DataAnon::Strategy::Field::RandomAddress do
4 |
5 | RandomAddress = DataAnon::Strategy::Field::RandomAddress
6 | let(:field) {DataAnon::Core::Field.new('address','1 Infinite Loop',1,nil)}
7 |
8 | describe 'anonymized address should be different from original address' do
9 | let(:anonymized_address) {RandomAddress.region_US.anonymize(field)}
10 | it {anonymized_address.should_not eq('1 Infinite Loop')}
11 | end
12 | end
--------------------------------------------------------------------------------
/spec/strategy/field/contact/random_city_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe DataAnon::Strategy::Field::RandomCity do
4 |
5 | RandomCity = DataAnon::Strategy::Field::RandomCity
6 | let(:field) { DataAnon::Core::Field.new('city', 'Atlanta', 1, nil) }
7 |
8 | describe 'anonymized city should be different from original city' do
9 | let(:anonymized_city) { RandomCity.region_US.anonymize(field) }
10 |
11 | it { anonymized_city.should_not be_nil }
12 | it { anonymized_city.should_not eq("Atlanta") }
13 | end
14 | end
--------------------------------------------------------------------------------
/spec/strategy/field/contact/random_phone_number_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomPhoneNumber do
4 |
5 | RandomPhoneNumber = FieldStrategy::RandomPhoneNumber
6 | let(:field) { DataAnon::Core::Field.new('phone_number', "+0 (123) 456-7890", 1, nil) }
7 |
8 | describe 'anonymized phone number preserving the format' do
9 | let(:anonymized_number) { RandomPhoneNumber.new().anonymize(field) }
10 |
11 | it { anonymized_number.should_not equal field.value }
12 | it { anonymized_number.should match /\+\d\ \(\d{3}\)\ \d{3}-\d{4}$/ }
13 | end
14 |
15 |
16 | end
--------------------------------------------------------------------------------
/spec/strategy/field/contact/random_province_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe DataAnon::Strategy::Field::RandomProvince do
4 |
5 | RandomProvince = DataAnon::Strategy::Field::RandomProvince
6 | let(:field) { DataAnon::Core::Field.new('province', 'Atlanta', 1, nil) }
7 |
8 | describe 'anonymized province should be different from original province' do
9 | let(:anonymized_province) { RandomProvince.region_US.anonymize(field) }
10 |
11 | it { anonymized_province.should_not be_nil }
12 | it { anonymized_province.should_not eq("Atlanta") }
13 | end
14 | end
--------------------------------------------------------------------------------
/spec/strategy/field/contact/random_zipcode_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe DataAnon::Strategy::Field::RandomZipcode do
4 |
5 | RandomZipcode = DataAnon::Strategy::Field::RandomZipcode
6 | let(:field) { DataAnon::Core::Field.new('zipcode', '12345', 1, nil) }
7 |
8 | describe 'anonymized zipcode should be different from original zipcode' do
9 | let(:anonymized_zipcode) { RandomZipcode.region_US.anonymize(field) }
10 |
11 | it { anonymized_zipcode.should_not be_nil }
12 | it { anonymized_zipcode.should_not eq("12345") }
13 | end
14 | end
--------------------------------------------------------------------------------
/spec/strategy/field/datetime/anonymize_date_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::AnonymizeDate do
4 |
5 | AnonymizeDate = FieldStrategy::AnonymizeDate
6 | let(:field) { DataAnon::Core::Field.new('date', Date.new(2011,7,5), 1, nil) }
7 |
8 | describe 'providing true only for month should randomize only the month field' do
9 |
10 | let(:anonymized_time) { AnonymizeDate.only_month.anonymize(field) }
11 |
12 | it { anonymized_time.should be_kind_of Date}
13 | it { anonymized_time.year.should be 2011 }
14 | it { anonymized_time.month.should be_between(1,12)}
15 | it { anonymized_time.day.should be 5 }
16 | end
17 |
18 | describe 'providing true only for date should randomize only the date field' do
19 |
20 | let(:anonymized_time) { AnonymizeDate.only_day.anonymize(field) }
21 |
22 | it { anonymized_time.year.should be 2011 }
23 | it { anonymized_time.month.should be 7}
24 | it { anonymized_time.day.should be_between(1,31) }
25 | end
26 |
27 | end
--------------------------------------------------------------------------------
/spec/strategy/field/datetime/anonymize_datetime_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::AnonymizeDateTime do
4 |
5 | AnonymizeDateTime = FieldStrategy::AnonymizeDateTime
6 | let(:field) { DataAnon::Core::Field.new('date', DateTime.new(2000,1,1,12,12,12), 1, nil) }
7 |
8 | describe 'providing true only for month should randomize only the month field' do
9 |
10 | let(:anonymized_time) { AnonymizeDateTime.only_month.anonymize(field) }
11 |
12 | it { anonymized_time.should be_kind_of DateTime}
13 | it { anonymized_time.year.should be 2000 }
14 | it { anonymized_time.month.should be_between(1,12)}
15 | it { anonymized_time.day.should be 1 }
16 | it { anonymized_time.hour.should be 12}
17 | it { anonymized_time.min.should be 12}
18 | it { anonymized_time.sec.should be 12}
19 | end
20 |
21 | describe 'providing true only for date should randomize only the date field' do
22 |
23 | let(:anonymized_time) { AnonymizeDateTime.only_day.anonymize(field) }
24 |
25 | it { anonymized_time.year.should be 2000 }
26 | it { anonymized_time.month.should be 1}
27 | it { anonymized_time.day.should be_between(1,31) }
28 | it { anonymized_time.hour.should be 12}
29 | it { anonymized_time.min.should be 12}
30 | it { anonymized_time.sec.should be 12}
31 | end
32 |
33 | describe 'providing true only for hour should randomize only the hour field' do
34 |
35 | let(:anonymized_time) { AnonymizeDateTime.only_hour.anonymize(field) }
36 |
37 | it { anonymized_time.year.should be 2000 }
38 | it { anonymized_time.month.should be 1}
39 | it { anonymized_time.day.should be 1 }
40 | it { anonymized_time.hour.should be_between(1,24)}
41 | it { anonymized_time.min.should be 12}
42 | it { anonymized_time.sec.should be 12}
43 | end
44 |
45 | describe 'providing true only for minute should randomize only the minute field' do
46 |
47 | let(:anonymized_time) { AnonymizeDateTime.only_minute.anonymize(field) }
48 |
49 | it { anonymized_time.year.should be 2000 }
50 | it { anonymized_time.month.should be 1}
51 | it { anonymized_time.day.should be 1 }
52 | it { anonymized_time.hour.should be 12}
53 | it { anonymized_time.min.should be_between(1,60)}
54 | it { anonymized_time.sec.should be 12}
55 | end
56 |
57 | end
--------------------------------------------------------------------------------
/spec/strategy/field/datetime/anonymize_time_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::AnonymizeTime do
4 |
5 | AnonymizeTime = FieldStrategy::AnonymizeTime
6 | let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
7 |
8 | describe 'providing true only for month should randomize only the month field' do
9 |
10 | let(:anonymized_time) { AnonymizeTime.only_month.anonymize(field) }
11 |
12 | it { anonymized_time.should be_kind_of Time}
13 | it { anonymized_time.year.should be 2000 }
14 | it { anonymized_time.month.should be_between(1,12)}
15 | it { anonymized_time.day.should be 1 }
16 | it { anonymized_time.hour.should be 12}
17 | it { anonymized_time.min.should be 12}
18 | it { anonymized_time.sec.should be 12}
19 | end
20 |
21 | describe 'providing true only for date should randomize only the date field' do
22 |
23 | let(:anonymized_time) { AnonymizeTime.only_day.anonymize(field) }
24 |
25 | it { anonymized_time.year.should be 2000 }
26 | it { anonymized_time.month.should be 1}
27 | it { anonymized_time.day.should be_between(1,31) }
28 | it { anonymized_time.hour.should be 12}
29 | it { anonymized_time.min.should be 12}
30 | it { anonymized_time.sec.should be 12}
31 | end
32 |
33 | describe 'providing true only for hour should randomize only the hour field' do
34 |
35 | let(:anonymized_time) { AnonymizeTime.only_hour.anonymize(field) }
36 |
37 | it { anonymized_time.year.should be 2000 }
38 | it { anonymized_time.month.should be 1}
39 | it { anonymized_time.day.should be 1 }
40 | it { anonymized_time.hour.should be_between(1,24)}
41 | it { anonymized_time.min.should be 12}
42 | it { anonymized_time.sec.should be 12}
43 | end
44 |
45 | describe 'providing true only for minute should randomize only the minute field' do
46 |
47 | let(:anonymized_time) { AnonymizeTime.only_minute.anonymize(field) }
48 |
49 | it { anonymized_time.year.should be 2000 }
50 | it { anonymized_time.month.should be 1}
51 | it { anonymized_time.day.should be 1 }
52 | it { anonymized_time.hour.should be 12}
53 | it { anonymized_time.min.should be_between(1,60)}
54 | it { anonymized_time.sec.should be 12}
55 | end
56 |
57 | end
--------------------------------------------------------------------------------
/spec/strategy/field/datetime/date_delta_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::DateDelta do
4 |
5 | DateDelta = FieldStrategy::DateDelta
6 | let(:field) { DataAnon::Core::Field.new('date', Date.new(2011,4,7), 1, nil) }
7 |
8 | describe 'date should not remain the same' do
9 |
10 | let(:anonymized_value) { DateDelta.new().anonymize(field) }
11 | let(:date_difference) {anonymized_value - field.value}
12 |
13 | it { anonymized_value.should be_kind_of Date}
14 | it { date_difference.should be_between(-10.days, 10.days) }
15 | end
16 |
17 | describe 'date should not change when provided with 0 delta for both date and time' do
18 |
19 | let(:anonymized_date) { DateDelta.new(0).anonymize(field) }
20 |
21 | it {anonymized_date.should == Date.new(2011,4,7) }
22 |
23 | end
24 |
25 | describe 'date should be anonymized within provided delta' do
26 |
27 | let(:anonymized_value) { DateDelta.new(5).anonymize(field) }
28 | let(:date_difference) {anonymized_value - field.value}
29 |
30 | it { date_difference.should be_between(-5.days, 5.days) }
31 |
32 | end
33 |
34 |
35 | end
--------------------------------------------------------------------------------
/spec/strategy/field/datetime/date_time_delta_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::DateTimeDelta do
4 |
5 | DateTimeDelta = FieldStrategy::DateTimeDelta
6 | let(:field) { DataAnon::Core::Field.new('date', DateTime.new(2000,1,1), 1, nil) }
7 |
8 | describe 'datetime should not remain the same' do
9 |
10 | let(:anonymized_value) { DateTimeDelta.new().anonymize(field) }
11 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
12 |
13 | it { anonymized_value.should be_kind_of DateTime}
14 | it {date_difference.should_not be 0 }
15 | end
16 |
17 | describe 'datetime should not change when provided with 0 delta for both date and time' do
18 |
19 | let(:anonymized_value) { DateTimeDelta.new(0,0).anonymize(field) }
20 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
21 |
22 | it {date_difference.should be 0 }
23 |
24 | end
25 |
26 | describe 'date should be anonymized within provided delta' do
27 |
28 | let(:anonymized_value) { DateTimeDelta.new(5,0).anonymize(field) }
29 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
30 |
31 | it { date_difference.should be_between(-5.days, 5.days) }
32 |
33 | end
34 |
35 | describe 'time should be anonymized within provided delta' do
36 |
37 | let(:anonymized_value) { DateTimeDelta.new(0,10).anonymize(field) }
38 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
39 |
40 | it { date_difference.should be_between(-10.minutes, 10.minutes)}
41 | end
42 |
43 |
44 | end
--------------------------------------------------------------------------------
/spec/strategy/field/datetime/time_delta_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::TimeDelta do
4 |
5 | TimeDelta = FieldStrategy::TimeDelta
6 | let(:field) { DataAnon::Core::Field.new('date', Time.new(2012,10,10,13,20,10), 1, nil) }
7 |
8 | describe 'time should not remain the same' do
9 |
10 | let(:anonymized_value) { TimeDelta.new().anonymize(field) }
11 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
12 |
13 | it { anonymized_value.should be_kind_of Time}
14 | it {date_difference.should_not be 0 }
15 | end
16 |
17 | describe 'time should not change when provided with 0 delta for both date and time' do
18 |
19 | let(:anonymized_value) { TimeDelta.new(0,0).anonymize(field) }
20 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
21 |
22 | it {date_difference.should be 0 }
23 |
24 | end
25 |
26 | describe 'date should be anonymized within provided delta' do
27 |
28 | let(:anonymized_value) { TimeDelta.new(5,0).anonymize(field) }
29 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
30 |
31 | it { date_difference.should be_between(-5.days, 5.days) }
32 |
33 | end
34 |
35 | describe 'time should be anonymized within provided delta' do
36 |
37 | let(:anonymized_value) { TimeDelta.new(0,10).anonymize(field) }
38 | let(:date_difference) {anonymized_value.to_i - field.value.to_i}
39 |
40 | it { date_difference.should be_between(-10.minutes, 10.minutes)}
41 | end
42 |
43 |
44 | end
--------------------------------------------------------------------------------
/spec/strategy/field/default_anon_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe FieldStrategy::DefaultAnon do
4 |
5 | DefaultAnon = FieldStrategy::DefaultAnon
6 |
7 | describe 'anonymized boolean true value' do
8 | let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
9 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)}
10 |
11 | it { [TrueClass,FalseClass].should include(anonymized_value.class) }
12 | end
13 |
14 | describe 'anonymized boolean false value' do
15 | let(:field) {DataAnon::Core::Field.new('boolean_field',false,1,nil)}
16 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)}
17 |
18 | it { [TrueClass,FalseClass].should include(anonymized_value.class) }
19 | end
20 |
21 | describe 'anonymized float value' do
22 | let(:field) {DataAnon::Core::Field.new('float_field',2.0,1,nil)}
23 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)}
24 |
25 | it { anonymized_value.should be_kind_of Float }
26 | end
27 |
28 | describe 'anonymized int value' do
29 | let(:field) {DataAnon::Core::Field.new('int_field',2,1,nil)}
30 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)}
31 |
32 | it { anonymized_value.should be_kind_of Integer }
33 | end
34 |
35 | describe 'anonymized bignum value' do
36 | let(:field) {DataAnon::Core::Field.new('int_field',2348723489723847382947,1,nil)}
37 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)}
38 |
39 | it { anonymized_value.should be_kind_of Integer }
40 | end
41 |
42 | describe 'anonymized string value' do
43 | let(:field) {DataAnon::Core::Field.new('string_field','String',1,nil)}
44 | let(:anonymized_value) {DefaultAnon.new.anonymize(field)}
45 |
46 | it { anonymized_value.should be_kind_of String }
47 | end
48 |
49 | end
--------------------------------------------------------------------------------
/spec/strategy/field/email/gmail_template_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::GmailTemplate do
4 |
5 | GmailTemplate = FieldStrategy::GmailTemplate
6 | let(:field) { DataAnon::Core::Field.new('email', 'user@company.com', 456, nil) }
7 |
8 | describe 'generated email using default username' do
9 | let(:anonymized_email) { GmailTemplate.new.anonymize(field) }
10 | it { anonymized_email.should eq('someusername+456@gmail.com') }
11 | end
12 |
13 | describe 'generated email using given username' do
14 | let(:anonymized_email) { GmailTemplate.new("fake").anonymize(field) }
15 | it { anonymized_email.should eq('fake+456@gmail.com') }
16 | end
17 | end
--------------------------------------------------------------------------------
/spec/strategy/field/email/random_email_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomEmail do
4 |
5 | RandomEmail = FieldStrategy::RandomEmail
6 | let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)}
7 |
8 |
9 | describe 'anonymized email must be different from original email' do
10 |
11 | let(:anonymized_value) {RandomEmail.new.anonymize(field)}
12 |
13 | it {anonymized_value.should_not equal field.value}
14 | it {anonymized_value.should match '^\S+@\S+\.\S+$'}
15 |
16 | end
17 |
18 | end
--------------------------------------------------------------------------------
/spec/strategy/field/email/random_mailinator_email_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomMailinatorEmail do
4 |
5 | RandomMailinatorEmail = FieldStrategy::RandomMailinatorEmail
6 | let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7 |
8 | describe 'anonymized email should not be the same as original email' do
9 | let(:anonymized_email) {RandomMailinatorEmail.new.anonymize(field)}
10 |
11 | it {anonymized_email.should_not equal field.value}
12 | it {anonymized_email.should match '^\S+@\mailinator\.com$'}
13 | end
14 | end
--------------------------------------------------------------------------------
/spec/strategy/field/name/random_first_name_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomFirstName do
4 |
5 | RandomFirstName = FieldStrategy::RandomFirstName
6 | let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
7 |
8 | describe 'anonymized name must not be the same as provided name' do
9 | let(:anonymized_value) {RandomFirstName.new().anonymize(field)}
10 |
11 | it {anonymized_value.should_not equal field.value}
12 | end
13 |
14 | end
--------------------------------------------------------------------------------
/spec/strategy/field/name/random_full_name_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomFullName do
4 |
5 | RandomFullName = FieldStrategy::RandomFullName
6 |
7 | describe 'anonymized name with just single name' do
8 | let(:field) {DataAnon::Core::Field.new('name','Fake',1,nil)}
9 | let(:anonymized_value) {RandomFullName.new().anonymize(field)}
10 |
11 | it {anonymized_value.should_not equal field.value}
12 | end
13 |
14 | describe 'anonymized name should be the same as original' do
15 | let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)}
16 | let(:anonymized_value) {RandomFullName.new().anonymize(field)}
17 |
18 | it {anonymized_value.should_not equal field.value}
19 | end
20 |
21 | describe 'anonymized name should have same number of words as original' do
22 | let(:field) {DataAnon::Core::Field.new('name','Fake User Longer Name Test',1,nil)}
23 | let(:anonymized_value) {RandomFullName.new().anonymize(field)}
24 |
25 | it {anonymized_value.split(' ').size.should equal 5}
26 | end
27 |
28 | end
--------------------------------------------------------------------------------
/spec/strategy/field/name/random_last_name_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomLastName do
4 |
5 | RandomLastName = FieldStrategy::RandomLastName
6 | let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)}
7 |
8 | describe 'anonymized name must not be the same as provided name' do
9 | let(:anonymized_value) {RandomLastName.new().anonymize(field)}
10 |
11 | it {anonymized_value.should_not equal field.value}
12 | end
13 |
14 | end
--------------------------------------------------------------------------------
/spec/strategy/field/name/random_user_name_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomUserName do
4 |
5 | RandomUserName = FieldStrategy::RandomUserName
6 | let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)}
7 |
8 | describe 'anonymized user name should not be the same as original user name' do
9 | let(:anonymized_username) {RandomUserName.new.anonymize(field)}
10 |
11 | it {anonymized_username.should_not equal field.value}
12 | it {anonymized_username.length.should be_between(5,10)}
13 | it {anonymized_username.should match '^[a-zA-Z0-9]*$'}
14 |
15 | end
16 |
17 | describe 'different length for username' do
18 | let(:anonymized_username) {RandomUserName.new(15,20).anonymize(field)}
19 |
20 | it {anonymized_username.length.should be_between(15,20)}
21 | end
22 |
23 | end
--------------------------------------------------------------------------------
/spec/strategy/field/number/random_big_decimal_delta_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 | require 'bigdecimal'
3 |
4 | describe FieldStrategy::RandomBigDecimalDelta do
5 |
6 | RandomBigDecimalDelta = FieldStrategy::RandomBigDecimalDelta
7 | let(:field) {DataAnon::Core::Field.new('decimal_field',BigDecimal("53422342378687687342893.23324"),1,nil)}
8 |
9 | describe 'anonymized big decimal should not be the same as original value' do
10 | let(:anonymized_value) {RandomBigDecimalDelta.new.anonymize(field)}
11 |
12 | it {anonymized_value.should_not equal field.value}
13 | end
14 |
15 | describe 'anonymized value returned should be big decimal' do
16 | let(:anonymized_value) {RandomBigDecimalDelta.new.anonymize(field)}
17 |
18 | it { anonymized_value.should be_kind_of BigDecimal }
19 | end
20 | end
--------------------------------------------------------------------------------
/spec/strategy/field/number/random_float_delta_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomFloatDelta do
4 |
5 | RandomFloatDelta = FieldStrategy::RandomFloatDelta
6 | let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)}
7 |
8 | describe 'anonymized float should not be the same as original value' do
9 | let(:anonymized_value) {RandomFloatDelta.new(5).anonymize(field)}
10 |
11 | it {anonymized_value.should_not equal field.value}
12 | end
13 |
14 | describe 'anonymized value returned should be a float' do
15 | let(:anonymized_value) {RandomFloatDelta.new(5).anonymize(field)}
16 |
17 | it { anonymized_value.should be_kind_of Float }
18 | end
19 | end
--------------------------------------------------------------------------------
/spec/strategy/field/number/random_float_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomFloat do
4 |
5 | RandomFloat = FieldStrategy::RandomFloat
6 | let(:field) { DataAnon::Core::Field.new('points', 2.5, 1, nil) }
7 |
8 | describe 'verify age range between 18 and 70' do
9 |
10 | let(:anonymized_int) { RandomFloat.new(2.0, 8.0).anonymize(field) }
11 |
12 | it { anonymized_int.should >= 2.0 }
13 | it { anonymized_int.should <= 8.0 }
14 |
15 | end
16 |
17 | describe 'default range between 0 and 100' do
18 |
19 | let(:anonymized_int) { RandomFloat.new.anonymize(field) }
20 |
21 | it { anonymized_int.should >= 0.0 }
22 | it { anonymized_int.should <= 100.0 }
23 |
24 | end
25 |
26 |
27 |
28 | end
--------------------------------------------------------------------------------
/spec/strategy/field/number/random_integer_delta_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomIntegerDelta do
4 |
5 | RandomIntegerDelta = FieldStrategy::RandomIntegerDelta
6 | let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)}
7 |
8 | describe "anonymized value returned should be an integer" do
9 | let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
10 |
11 | it { anonymized_integer.should be_kind_of Integer }
12 |
13 | end
14 |
15 | describe "anonymized integer should be within delta from original integer" do
16 | let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
17 |
18 | it{anonymized_integer.should be_between(90,110)}
19 | end
20 |
21 | end
--------------------------------------------------------------------------------
/spec/strategy/field/number/random_integer_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomInteger do
4 |
5 | RandomInteger = FieldStrategy::RandomInteger
6 | let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) }
7 |
8 | describe 'verify age range between 18 and 70' do
9 |
10 | let(:anonymized_int) { RandomInteger.new(18, 70).anonymize(field) }
11 |
12 | it { anonymized_int.should >= 18 }
13 | it { anonymized_int.should <= 70 }
14 |
15 | end
16 |
17 | describe 'default range between 0 and 100' do
18 |
19 | let(:anonymized_int) { RandomInteger.new.anonymize(field) }
20 |
21 | it { anonymized_int.should >= 0 }
22 | it { anonymized_int.should <= 100 }
23 |
24 | end
25 |
26 |
27 |
28 | end
--------------------------------------------------------------------------------
/spec/strategy/field/random_boolean_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe FieldStrategy::RandomBoolean do
4 |
5 | RandomBoolean = FieldStrategy::RandomBoolean
6 | let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
7 |
8 | describe 'anonymized value should be a boolean' do
9 | let(:anonymized_boolean) {RandomBoolean.new.anonymize(field)}
10 |
11 | it {
12 | is_boolean = anonymized_boolean.is_a?(TrueClass) || anonymized_boolean.is_a?(FalseClass)
13 | is_boolean.should be true
14 | }
15 | end
16 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/formatted_string_numbers_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::FormattedStringNumber do
4 |
5 | FormattedStringNumber = FieldStrategy::FormattedStringNumber
6 | let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)}
7 |
8 | describe 'anonymized credit card number preserving the format' do
9 | let(:anonymized_number) {FormattedStringNumber.new.anonymize(field)}
10 |
11 | it {anonymized_number.should_not equal field.value}
12 | it { anonymized_number.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/}
13 | end
14 |
15 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/lorem_ipsum_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::LoremIpsum do
4 |
5 | LoremIpsum = FieldStrategy::LoremIpsum
6 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7 |
8 | describe 'should return same length value using default text' do
9 |
10 | let(:anonymized_value) { LoremIpsum.new.anonymize(field) }
11 |
12 | it { anonymized_value.length.should_not be('New Delhi') }
13 | it { anonymized_value.length.should == 'New Delhi'.length }
14 |
15 | end
16 |
17 | describe 'should return same length value using set text' do
18 |
19 | let(:anonymized_value) { LoremIpsum.new("Sunit Parekh").anonymize(field) }
20 |
21 | it { anonymized_value.length.should_not be('New Delhi') }
22 | it { anonymized_value.should == 'Sunit Par' }
23 |
24 | end
25 |
26 |
27 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/random_formatted_string_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomFormattedString do
4 |
5 | RandomFormattedString = FieldStrategy::RandomFormattedString
6 |
7 | describe 'anonymized credit card number preserving the format' do
8 | let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)}
9 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)}
10 |
11 | it {anonymized_value.should_not equal field.value}
12 | it { anonymized_value.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/}
13 | end
14 |
15 | describe 'anonymized email preserving the format' do
16 | let(:field) {DataAnon::Core::Field.new('email',"parekh1.sunit@gmail.com",1,nil)}
17 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)}
18 |
19 | it {anonymized_value.should_not equal field.value}
20 | it { anonymized_value.should match /^[a-z]{6}\d\.[a-z]{5}@[a-z]{5}\.[a-z]{3}$/}
21 | end
22 |
23 | describe 'anonymized string preserving the string case & format' do
24 | let(:field) {DataAnon::Core::Field.new('email',"parekh1.SUNIT@gmail.com",1,nil)}
25 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)}
26 |
27 | it {anonymized_value.should_not equal field.value}
28 | it { anonymized_value.should match /^[a-z]{6}\d\.[A-Z]{5}@[a-z]{5}\.[a-z]{3}$/}
29 | end
30 |
31 | describe 'anonymized phone# preserving the format' do
32 | let(:field) {DataAnon::Core::Field.new('home_phone',"(020)3423-8013",1,nil)}
33 | let(:anonymized_value) {RandomFormattedString.new.anonymize(field)}
34 |
35 | it {anonymized_value.should_not equal field.value}
36 | it { anonymized_value.should match /^\(\d{3}\)\d{4}-\d{4}$/}
37 | end
38 |
39 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/random_string_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomString do
4 |
5 | RandomString = FieldStrategy::RandomString
6 |
7 | describe 'anonymized string must not be the same as original string' do
8 | let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)}
9 | let(:anonymized_string) {RandomString.new.anonymize(field)}
10 |
11 | it {anonymized_string.should_not equal field.value}
12 | it {anonymized_string.length.should equal field.value.length}
13 | end
14 |
15 | describe 'anonymized name should have same number of words as original' do
16 |
17 | let(:field) {DataAnon::Core::Field.new('string_field','Fake Longer String Test',1,nil)}
18 | let(:anonymized_string) {RandomString.new.anonymize(field)}
19 |
20 | it {anonymized_string.split(' ').size.should equal field.value.split(' ').size}
21 |
22 | end
23 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/random_url_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::RandomUrl do
4 |
5 | RandomUrl = FieldStrategy::RandomUrl
6 |
7 | describe 'anonymized url must not be the same as original url' do
8 | let(:url) { 'http://example.org' }
9 |
10 | let(:field) {DataAnon::Core::Field.new('string_field',url,1,nil)}
11 | let(:anonymized_url) {RandomUrl.new.anonymize(field)}
12 |
13 | it {anonymized_url.should_not equal field.value}
14 | it {anonymized_url.should match /http:\/\/[\S]+/}
15 |
16 | context 'with https url' do
17 | let(:url) { 'https://example.org' }
18 |
19 | it {anonymized_url.should_not equal field.value}
20 | it {anonymized_url.should match /https:\/\/[\S]+/}
21 | end
22 | end
23 | end
24 |
--------------------------------------------------------------------------------
/spec/strategy/field/string/select_from_database_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::SelectFromDatabase do
4 |
5 | SelectFromDatabase = FieldStrategy::SelectFromDatabase
6 | let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) }
7 | let(:source) { {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'} }
8 |
9 | describe 'more than one values in predefined list' do
10 |
11 | let(:anonymized_value) { SelectFromDatabase.new('MediaType','Name', source).anonymize(field) }
12 |
13 | it { anonymized_value.should_not be('Abcd') }
14 | it { anonymized_value.should_not be_empty }
15 |
16 | end
17 |
18 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/select_from_file_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::SelectFromFile do
4 |
5 | SelectFromFile = FieldStrategy::SelectFromFile
6 |
7 | describe 'anonymized name must not be the same as provided name' do
8 | let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
9 | let(:anonymized_value) {SelectFromFile.new(DataAnon::Utils::Resource.file('first_names.txt')).anonymize(field)}
10 |
11 | it {anonymized_value.should_not equal field.value}
12 | end
13 |
14 | describe 'anonymized multiple values' do
15 | let(:field) {DataAnon::Core::Field.new('firstname',['value1','value2'],1,nil)}
16 | let(:anonymized_values) {SelectFromFile.new(DataAnon::Utils::Resource.file('first_names.txt')).anonymize(field)}
17 |
18 | it {anonymized_values.length.should equal 2}
19 | end
20 |
21 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/select_from_list_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::SelectFromList do
4 |
5 | SelectFromList = FieldStrategy::SelectFromList
6 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7 |
8 | describe 'more than one values in predefined list' do
9 |
10 | let(:states) { ['Maharashtra','Gujrat','Karnataka'] }
11 | let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
12 |
13 | it { states.should include(anonymized_value) }
14 |
15 | end
16 |
17 | describe 'only one value in list' do
18 |
19 | let(:states) { ['Maharashtra'] }
20 | let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
21 |
22 | it { anonymized_value.should == 'Maharashtra' }
23 |
24 | end
25 |
26 | describe 'string value' do
27 |
28 | let(:states) { 'Maharashtra' }
29 | let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
30 |
31 | it { anonymized_value.should == 'Maharashtra' }
32 |
33 | end
34 |
35 |
36 | end
--------------------------------------------------------------------------------
/spec/strategy/field/string/string_template_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe FieldStrategy::StringTemplate do
4 |
5 | StringTemplate = FieldStrategy::StringTemplate
6 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) }
7 |
8 | describe 'should return same string value as StringTemplate' do
9 | let(:anonymized_value) { StringTemplate.new('Sunit #{row_number} Parekh').anonymize(field) }
10 | it { anonymized_value.should == 'Sunit 3456 Parekh' }
11 | end
12 |
13 |
14 |
15 | end
--------------------------------------------------------------------------------
/spec/strategy/field/whitelist_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe FieldStrategy::Whitelist do
4 |
5 | Whitelist = FieldStrategy::Whitelist
6 |
7 | describe 'should return same string value as whitelist' do
8 | let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
9 | let(:anonymized_value) { Whitelist.new.anonymize(field) }
10 | it { anonymized_value.should == 'New Delhi' }
11 | end
12 |
13 | describe 'should return same date value as whitelist' do
14 | let(:dob) { Time.now }
15 | let(:field) { DataAnon::Core::Field.new('DateOfBirth', dob, 1, nil) }
16 | let(:anonymized_value) { Whitelist.new.anonymize(field) }
17 | it { anonymized_value.should == dob }
18 | end
19 |
20 |
21 | end
--------------------------------------------------------------------------------
/spec/strategy/mongodb/anonymize_field_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe DataAnon::Strategy::MongoDB::AnonymizeField do
4 |
5 |
6 | it 'should do callback recursive in case of sub document' do
7 | sub_document = {'key' => 'value'}
8 | field_strategy = {'key' => FieldStrategy::LoremIpsum.new}
9 | anonymization_strategy = double('AnonymizationStrategy')
10 | anonymization_strategy.should_receive(:anonymize_document).with(sub_document,1,field_strategy).and_return({'key' => 'anonymized_value'})
11 | field = DataAnon::Core::Field.new('sub_document_field', sub_document,1,nil)
12 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy)
13 | anonymized_value = anonymize_field.anonymize
14 | anonymized_value['key'].should == 'anonymized_value'
15 | end
16 |
17 | it 'should do callback recursive multiple time in case of array of sub document' do
18 | sub_documents = [{'key' => 'value1'},{'key' => 'value2'}]
19 | field_strategy = {'key' => FieldStrategy::LoremIpsum.new}
20 | anonymization_strategy = double('AnonymizationStrategy')
21 | anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value1'},1,field_strategy).and_return({'key' => 'anonymized_value1'})
22 | anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value2'},1,field_strategy).and_return({'key' => 'anonymized_value2'})
23 | field = DataAnon::Core::Field.new('sub_document_field', sub_documents,1,nil)
24 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy)
25 | anonymized_value = anonymize_field.anonymize
26 | anonymized_value.length.should == 2
27 | anonymized_value[0]['key'].should == 'anonymized_value1'
28 | anonymized_value[1]['key'].should == 'anonymized_value2'
29 | end
30 |
31 | it 'should anonymize array field data type' do
32 | anonymization_strategy = double('AnonymizationStrategy')
33 | anonymization_strategy.should_not_receive(:anonymize_document)
34 | field = DataAnon::Core::Field.new('tags',['tag1','tag2'],1,nil)
35 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::SelectFromList.new(['tag4','tag5','tag6','tag7','tag8']),anonymization_strategy)
36 | anonymized_value = anonymize_field.anonymize
37 | anonymized_value.length == 2
38 | ['tag4','tag5','tag6','tag7','tag8'].should include(anonymized_value[0])
39 | ['tag4','tag5','tag6','tag7','tag8'].should include(anonymized_value[1])
40 | end
41 |
42 | it 'should anonymize field and return anonymized value using passed strategy' do
43 | anonymization_strategy = double('AnonymizationStrategy')
44 | anonymization_strategy.should_not_receive(:anonymize_document)
45 | field = DataAnon::Core::Field.new('boolean_field',false,1,nil)
46 | anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::RandomBoolean.new,anonymization_strategy)
47 | anonymized_value = anonymize_field.anonymize
48 | [true, false].should include(anonymized_value)
49 | end
50 |
51 |
52 | end
--------------------------------------------------------------------------------
/spec/support/customer_sample.rb:
--------------------------------------------------------------------------------
1 | class CustomerSample
2 |
3 | class CreateCustomer < ActiveRecord::Migration[7.0]
4 | def up
5 | create_table :customers, :id => false, :force => true do |t|
6 | t.integer :cust_id, :primary => true
7 | t.string :first_name
8 | t.string :last_name
9 | t.date :birth_date
10 | t.string :address
11 | t.string :state
12 | t.string :zipcode
13 | t.string :phone
14 | t.string :email
15 | t.string :blog_url
16 | t.boolean :terms_n_condition
17 | t.integer :age
18 | t.float :latitude
19 | t.float :longitude
20 |
21 | t.timestamps null: false
22 | end
23 | end
24 | end
25 |
26 | def self.clean
27 | system "rm -f tmp/*.sqlite"
28 | system "mkdir -p tmp"
29 | end
30 |
31 | def self.create_schema connection_spec
32 | ActiveRecord::Migration.verbose = false
33 | ActiveRecord::Base.establish_connection connection_spec
34 | CreateCustomer.migrate :up
35 | end
36 |
37 | SAMPLE_DATA = [
38 | {
39 | :cust_id => 100, :first_name => "Sunit", :last_name => "Parekh",
40 | :birth_date => Date.new(1977,7,8), :address => "F 501 Shanti Nagar",
41 | :state => "Maharastra", :zipcode => "411048", :phone => "9923700662",
42 | :email => "parekh.sunit@gmail.com", :terms_n_condition => true,
43 | :age => 34, :longitude => -74.044636, :latitude => +40.689060,
44 | :created_at => Time.new(2010,10,10), :updated_at => Time.new(2010,5,5)
45 | },
46 | {
47 | :cust_id => 101, :first_name => "Rohit", :last_name => "Parekh",
48 | :birth_date => Date.new(1977,7,8), :address => "F 501 Shanti Nagar",
49 | :state => "Maharastra", :zipcode => "411048", :phone => "9923700662",
50 | :email => "parekh.sunit@gmail.com", :terms_n_condition => true,
51 | :age => 34, :longitude => -74.044636, :latitude => +40.689060,
52 | :created_at => Time.now, :updated_at => Time.now
53 | }
54 | ]
55 |
56 | def self.insert_record connection_spec, data_hash = SAMPLE_DATA[0]
57 | DataAnon::Utils::TempDatabase.establish_connection connection_spec
58 | source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
59 | cust = source.new data_hash
60 | cust.cust_id = data_hash[:cust_id]
61 | cust.save!
62 | end
63 |
64 | def self.insert_records connection_spec, data_hash = SAMPLE_DATA
65 | DataAnon::Utils::TempDatabase.establish_connection connection_spec
66 | source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
67 | data_hash.each do |data_row|
68 | cust = source.new data_row
69 | cust.cust_id = data_row[:cust_id]
70 | cust.save!
71 | end
72 | end
73 | end
74 |
--------------------------------------------------------------------------------
/spec/utils/database_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe 'Utils' do
4 |
5 | before(:each) do
6 | source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7 | DataAnon::Utils::SourceDatabase.establish_connection source
8 |
9 | destination = {:adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'}
10 | DataAnon::Utils::DestinationDatabase.establish_connection destination
11 | end
12 |
13 | it 'should test the connection to source database' do
14 | album = DataAnon::Utils::SourceTable.create 'Album', ['AlbumId']
15 | album.count.should > 0
16 | album.all.length > 0
17 | end
18 |
19 | it 'ignores inherited constants when creating a table with matching name' do
20 | conditionals = DataAnon::Utils::SourceTable.create 'Conditionals'
21 | conditionals.count.should == 0
22 | conditionals.all.length == 0
23 | end
24 |
25 | end
--------------------------------------------------------------------------------
/spec/utils/geojson_parser_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe "Geo Json Parser" do
4 |
5 | SAMPLE_DATA_FILE_PATH = DataAnon::Utils::Resource.project_home+'spec/resource/sample.geojson'
6 |
7 | describe "parser should return list of addresses when address method is called" do
8 | let(:result_list) {DataAnon::Utils::GeojsonParser.address(SAMPLE_DATA_FILE_PATH)}
9 |
10 | it {result_list.length.should be 1}
11 | it {result_list[0].should eq("333 Willoughby Ave")}
12 | end
13 |
14 | describe "parser should return list of zip codes when zipcode method is called" do
15 | let(:result_list) {DataAnon::Utils::GeojsonParser.zipcode(SAMPLE_DATA_FILE_PATH)}
16 |
17 | it {result_list.length.should be 1}
18 | it {result_list[0].should eq("99801")}
19 |
20 | end
21 |
22 | describe "parser should return list of province when province method is called" do
23 | let(:result_list) {DataAnon::Utils::GeojsonParser.province(SAMPLE_DATA_FILE_PATH)}
24 |
25 | it {result_list.length.should be 1}
26 | it {result_list[0].should eq("AK")}
27 |
28 | end
29 |
30 | describe "parser should return list of cities when city method is called" do
31 | let(:result_list) {DataAnon::Utils::GeojsonParser.city(SAMPLE_DATA_FILE_PATH)}
32 |
33 | it {result_list.length.should be 1}
34 | it {result_list[0].should eq("Juneau")}
35 |
36 | end
37 |
38 | end
--------------------------------------------------------------------------------
/spec/utils/random_float_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe "Number Utils" do
4 |
5 | describe 'should return same length value using default text' do
6 |
7 | let(:random_float) { DataAnon::Utils::RandomFloat.generate(5,10) }
8 |
9 | it { random_float.should be_between(5,10) }
10 | it { random_float.should be_a_kind_of Float }
11 | end
12 | end
--------------------------------------------------------------------------------
/spec/utils/random_int_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe "Number Utils" do
4 |
5 | it "should generate random int between provided range" do
6 | random_int = DataAnon::Utils::RandomInt.generate(5,10)
7 | random_int.should be_between(5,10)
8 | end
9 | end
--------------------------------------------------------------------------------
/spec/utils/random_string_char_only_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe "String Utils" do
4 |
5 | it "should generate random string of given length" do
6 | DataAnon::Utils::RandomStringCharsOnly.generate(10).length.should equal 10
7 | end
8 |
9 | it "should generate random string only with characters" do
10 | DataAnon::Utils::RandomStringCharsOnly.generate(10).should match /^[a-zA-Z]{10}$/
11 | end
12 | end
--------------------------------------------------------------------------------
/spec/utils/random_string_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe "String Utils" do
4 |
5 | it "should generate random string of given length" do
6 | DataAnon::Utils::RandomString.generate(10).length.should equal 10
7 | end
8 | end
--------------------------------------------------------------------------------
/spec/utils/template_helper_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe 'Template Helper' do
4 |
5 | it 'should return a correctly formatted string based on input connection hash for source' do
6 | connection_hash = {adapter: 'test_adapter', port: 5000}
7 | DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms(connection_hash).should eq(":adapter => 'test_adapter', :port => 5000")
8 | end
9 |
10 | it 'should return a correctly formatted string based on input connection hash for destination' do
11 | connection_hash = {adapter: 'test_adapter', port: 5000}
12 | DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms(connection_hash).should eq(":adapter => '', :port => ''")
13 | end
14 | end
--------------------------------------------------------------------------------