├── .gitignore ├── LICENSE ├── README.md ├── anonymize.py ├── anonymize.yml ├── developer_mozilla_org.yml └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | anon.sql 2 | 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Dave Dash 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Mysql Anonymous 2 | 3 | Contributors can benefit from having real data when they are 4 | developing. This script can do a few things (see `anonymize.yml`): 5 | 6 | * Truncate any tables (logs, and other cruft which may have sensitive data) 7 | * Nullify fields (emails, passwords, etc) 8 | * Fill in random/arbitrary data: 9 | * Random integers 10 | * Random IP addresses 11 | * Email addresses 12 | * Usernames 13 | * Delete rows based on simple rules: e.g. 14 | ``DELETE FROM mytable WHERE private = "Yes"``: 15 | 16 | database: 17 | tables: 18 | mytable: 19 | delete: 20 | private: Yes 21 | 22 | ### Usage 23 | 24 | python anonymize.py > anon.sql 25 | cat anon.sql | mysql 26 | -------------------------------------------------------------------------------- /anonymize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # This assumes an id on each field. 3 | import logging 4 | import hashlib 5 | import random 6 | 7 | 8 | log = logging.getLogger('anonymize') 9 | common_hash_secret = "%016x" % (random.getrandbits(128)) 10 | 11 | 12 | def get_truncates(config): 13 | database = config.get('database', {}) 14 | truncates = database.get('truncate', []) 15 | sql = [] 16 | for truncate in truncates: 17 | sql.append('TRUNCATE `%s`' % truncate) 18 | return sql 19 | 20 | 21 | def get_deletes(config): 22 | database = config.get('database', {}) 23 | tables = database.get('tables', []) 24 | sql = [] 25 | for table, data in tables.iteritems(): 26 | if 'delete' in data: 27 | fields = [] 28 | for f, v in data['delete'].iteritems(): 29 | fields.append('`%s` = "%s"' % (f, v)) 30 | statement = 'DELETE FROM `%s` WHERE ' % table + ' AND '.join(fields) 31 | sql.append(statement) 32 | return sql 33 | 34 | listify = lambda x: x if isinstance(x, list) else [x] 35 | 36 | def get_updates(config): 37 | global common_hash_secret 38 | 39 | database = config.get('database', {}) 40 | tables = database.get('tables', []) 41 | sql = [] 42 | for table, data in tables.iteritems(): 43 | updates = [] 44 | for operation, details in data.iteritems(): 45 | if operation == 'nullify': 46 | for field in listify(details): 47 | updates.append("`%s` = NULL" % field) 48 | elif operation == 'random_int': 49 | for field in listify(details): 50 | updates.append("`%s` = ROUND(RAND()*1000000)" % field) 51 | elif operation == 'random_ip': 52 | for field in listify(details): 53 | updates.append("`%s` = INET_NTOA(RAND()*1000000000)" % field) 54 | elif operation == 'random_email': 55 | for field in listify(details): 56 | updates.append("`%s` = CONCAT(id, '@mozilla.com')" 57 | % field) 58 | elif operation == 'random_username': 59 | for field in listify(details): 60 | updates.append("`%s` = CONCAT('_user_', id)" % field) 61 | elif operation == 'hash_value': 62 | for field in listify(details): 63 | updates.append("`%(field)s` = MD5(CONCAT(@common_hash_secret, `%(field)s`))" 64 | % dict(field=field)) 65 | elif operation == 'hash_email': 66 | for field in listify(details): 67 | updates.append("`%(field)s` = CONCAT(MD5(CONCAT(@common_hash_secret, `%(field)s`)), '@mozilla.com')" 68 | % dict(field=field)) 69 | elif operation == 'delete': 70 | continue 71 | else: 72 | log.warning('Unknown operation.') 73 | if updates: 74 | sql.append('UPDATE `%s` SET %s' % (table, ', '.join(updates))) 75 | return sql 76 | 77 | 78 | def anonymize(config): 79 | database = config.get('database', {}) 80 | 81 | if 'name' in database: 82 | print "USE `%s`;" % database['name'] 83 | 84 | print "SET FOREIGN_KEY_CHECKS=0;" 85 | 86 | sql = [] 87 | sql.extend(get_truncates(config)) 88 | sql.extend(get_deletes(config)) 89 | sql.extend(get_updates(config)) 90 | for stmt in sql: 91 | print stmt + ';' 92 | 93 | print "SET FOREIGN_KEY_CHECKS=1;" 94 | print 95 | 96 | if __name__ == '__main__': 97 | 98 | import yaml 99 | import sys 100 | 101 | if len(sys.argv) > 1: 102 | files = sys.argv[1:] 103 | else: 104 | files = [ 'anonymize.yml' ] 105 | 106 | for f in files: 107 | print "--" 108 | print "-- %s" %f 109 | print "--" 110 | print "SET @common_hash_secret=rand();" 111 | print "" 112 | cfg = yaml.load(open(f)) 113 | if 'databases' not in cfg: 114 | anonymize(cfg) 115 | else: 116 | databases = cfg.get('databases') 117 | for name, sub_cfg in databases.items(): 118 | print "USE `%s`;" % name 119 | anonymize({'database': sub_cfg}) 120 | -------------------------------------------------------------------------------- /anonymize.yml: -------------------------------------------------------------------------------- 1 | # This is a sample anonymize.yml file that's used for the Firefox Add-ons 2 | # database. 3 | 4 | database: 5 | truncate: 6 | - addonlogs 7 | - api_auth_tokens 8 | - approvals 9 | - auth_group 10 | - auth_group_permissions 11 | - auth_message 12 | - auth_permission 13 | - auth_user_groups 14 | - auth_user_user_permissions 15 | - auth_user 16 | - cache 17 | - cake_sessions 18 | - collections_tokens 19 | - django_admin_log 20 | - django_session 21 | - django_site 22 | - download_counts 23 | - eventlog 24 | - facebook_data 25 | - facebook_detected 26 | - facebook_favorites 27 | - facebook_sessions 28 | - facebook_users 29 | - favorites 30 | - global_stats 31 | - hubrsskeys 32 | - log_activity 33 | - log_activity_addon 34 | - log_activity_user 35 | - piston_consumer 36 | - piston_nonce 37 | - piston_token 38 | - reviewratings 39 | - reviews_moderation_flags 40 | - sphinx_index_feed_tmp 41 | - stats_addons_collections_counts 42 | - stats_collections 43 | - stats_collections_counts 44 | - stats_collections_share_counts 45 | - stats_collections_share_counts_totals 46 | - stats_contributions 47 | - stats_share_counts 48 | - stats_share_counts_totals 49 | - subscription_events 50 | - update_counts 51 | - users_versioncomments 52 | - versioncomments 53 | tables: 54 | addons: 55 | nullify: [nominationmessage, paypal_id, charity_id] 56 | random_int: 57 | - average_daily_downloads 58 | - average_daily_users 59 | - total_contributions 60 | addons_users: 61 | delete: 62 | listed: 0 63 | blacklisted_guids: 64 | nullify: comments 65 | collections: 66 | delete: 67 | listed: 0 68 | random_int: [downloads, weekly_subscribers, monthly_subscribers] 69 | compatibility_reports: 70 | random_ip: client_ip 71 | nullify: [client_os, comments] 72 | config: 73 | delete: 74 | value: "emailchange_secret" 75 | reviews: 76 | random_ip: ip_address 77 | users: 78 | random_email: email 79 | nullify: 80 | - firstname 81 | - lastname 82 | - password 83 | - confirmationcode 84 | - resetcode 85 | - resetcode_expires 86 | - notes 87 | - last_login_ip 88 | - last_login_ip 89 | - last_login_attempt 90 | - last_login_attempt_ip 91 | - failed_login_attempts 92 | random_username: [username, nickname] 93 | versions: 94 | nullify: approvalnotes 95 | -------------------------------------------------------------------------------- /developer_mozilla_org.yml: -------------------------------------------------------------------------------- 1 | # Anonymization rules for MDN wiki_mdc_deki 2 | 3 | databases: 4 | 5 | lmo_wiki_mdc_deki: 6 | 7 | truncate: 8 | - objectcache 9 | - querycache 10 | - requestlog 11 | - requeststats 12 | 13 | tables: 14 | attachments_backup: 15 | hash_value: 16 | - at_user_text 17 | - at_removed_by_text 18 | logins: 19 | random_ip: 20 | - login_ip_address 21 | users: 22 | hash_value: 23 | - user_name 24 | - user_real_name 25 | hash_email: 26 | - user_email 27 | nullify: 28 | - user_password 29 | - user_newpassword 30 | - user_token 31 | - user_external_name 32 | 33 | lmo_developer_mozilla_org_django: 34 | 35 | truncate: 36 | - auth_message 37 | - django_admin_log 38 | - django_session 39 | - threadedcomments_freethreadedcomment 40 | - threadedcomments_testmodel 41 | 42 | tables: 43 | actioncounters_actioncounterunique: 44 | random_ip: 45 | - ip 46 | nullify: 47 | - user_agent 48 | - session_key 49 | auth_user: 50 | hash_value: 51 | - username 52 | random_email: 53 | - email 54 | random_int: 55 | - first_name 56 | - last_name 57 | nullify: 58 | - password 59 | contentflagging_contentflag: 60 | random_ip: 61 | - ip 62 | nullify: 63 | - user_agent 64 | - session_key 65 | user_profiles: 66 | random_int: 67 | - location 68 | - homepage 69 | 70 | lmo_developer_mozilla_org_phpbb: 71 | 72 | truncate: 73 | - phpbb_sessions 74 | - phpbb_sessions_keys 75 | 76 | tables: 77 | phpbb_banlist: 78 | nullify: 79 | - ban_ip 80 | phpbb_log: 81 | random_ip: 82 | - log_ip 83 | phpbb_moderator_cache: 84 | hash_value: 85 | - username 86 | phpbb_posts: 87 | nullify: 88 | - post_username 89 | random_ip: 90 | - poster_ip 91 | phpbb_privmsgs: 92 | random_ip: 93 | - author_ip 94 | phpbb_profile_fields_data: 95 | hash_value: 96 | - pf_irc_nickname 97 | phpbb_topics: 98 | hash_value: 99 | - topic_first_poster_name 100 | - topic_last_poster_name 101 | phpbb_users: 102 | hash_value: 103 | - username 104 | - username_clean 105 | hash_email: 106 | - user_email 107 | nullify: 108 | - user_ip 109 | - user_password 110 | - user_passchg 111 | - user_email_hash 112 | - user_last_confirm_key 113 | - user_lastpage 114 | - user_form_salt 115 | 116 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | --------------------------------------------------------------------------------