├── .gitignore ├── README.md ├── config.ini.sample ├── dbutils.py ├── requirements.txt └── wordpress-plugin-audit.py /.gitignore: -------------------------------------------------------------------------------- 1 | config.ini 2 | output.csv 3 | __pycache__ 4 | plugins 5 | releases 6 | create_release.sh -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wordpress Automated CVE Hunting 2 | 3 | Scripts to download every Wordpress plugin (updated in the last 2 years) and run Semgrep over the lot of it while storing output in a database. 4 | 5 | Full write-up: https://projectblack.io/blog/cve-hunting-at-scale/ 6 | 7 | Want to skip straight to looking at the dataset? 8 | 9 | Download the latest mysqldump here: https://github.com/prjblk/wordpress-audit-automation/releases 10 | 11 | ## Getting Started 12 | 13 | ### Prerequisites 14 | 15 | * Ubuntu or other nix system 16 | * At least 30GB of disk space 17 | * MySQL database server 18 | * Python 19 | * Patience 20 | 21 | ## Steps 22 | 1. Setup a MySQL database server 23 | 2. Clone this repo 24 | ``` 25 | git clone https://github.com/prjblk/wordpress-audit-automation 26 | ``` 27 | 3. Configure the config file with database credentials/details 28 | ``` 29 | cp config.ini.sample config.ini 30 | nano config.ini 31 | ``` 32 | 4. Install Python dependencies + Semgrep 33 | ``` 34 | pip install -r requirements.txt 35 | ``` 36 | 6. You may have to login again to ensure Semgrep is available via path 37 | 7. Setup the database schema manually (skip this step if providing privileged database credentials to the script) 38 | * Create a database and run the SQL in create_plugin_data_table and create_plugin_results_table in dbutils.py 39 | 8. Run the script with the --download --audit and --create-schema options 40 | * You might want to run this in a tmux/screen session as it takes ages (15 hours?) 41 | * By default all the rules in p/php are run against the plugins (minus the PRO rules unless you're logged in). https://semgrep.dev/p/php 42 | * Would highly suggest looking at some of the other rules available as well 43 | 9. Triage output 44 | 10. ??? 45 | 11. CVEs 46 | 47 | ### Example Usage 48 | 49 | ``` 50 | $ python3 wordpress-plugin-audit.py -h 51 | usage: wordpress-plugin-audit.py [-h] [--download] [--download-dir DOWNLOAD_DIR] [--audit] [--config CONFIG] [--create-schema] [--clear-results] [--verbose] 52 | 53 | Downloads or audits all Wordpress plugins. 54 | 55 | options: 56 | -h, --help show this help message and exit 57 | --download Download and extract plugins, if plugin directory already exists, it will delete it and redownload 58 | --download-dir DOWNLOAD_DIR 59 | The directory to save/audit downloaded plugins (default: current directory) 60 | --audit Audits downloaded plugins sequentially 61 | --config CONFIG Semgrep config/rules to run - https://semgrep.dev/docs/running-rules#running-semgrep-registry-rules-locally (default: p/php) 62 | --create-schema Create the database and schema if this flag is set 63 | --clear-results Clear audit table and then run, useful if run as a cron job and we only care about the latest release 64 | --verbose Print detailed messages 65 | 66 | $ python3 wordpress-plugin-audit.py --download --audit --create-schema 67 | Downloading plugins: 100%|███████████████████████████████████| 2/2 [00:49<00:00, 24.65s/it] 68 | Auditing plugins: 10%|█████ | 2/20 [00:05<00:47, 2.62s/it] 69 | ``` 70 | #### Useful SQL Queries 71 | 72 | You can focus on a specific vulnerability class by querying for output relating to a specific rule. 73 | 74 | ``` 75 | USE SemgrepResults; 76 | SELECT PluginResults.slug,PluginData.active_installs,PluginResults.file_path,PluginResults.start_line,PluginResults.vuln_lines 77 | FROM PluginResults INNER JOIN PluginData ON PluginResults.slug = PluginData.slug 78 | WHERE check_id = "php.lang.security.injection.tainted-sql-string.tainted-sql-string" 79 | ORDER BY active_installs DESC 80 | ``` 81 | 82 | ### Troubleshooting 83 | 84 | If you have problems with auditing plugins, ensure you can run semgrep at the command line normally first. 85 | -------------------------------------------------------------------------------- /config.ini.sample: -------------------------------------------------------------------------------- 1 | [database] 2 | host = 127.0.0.1 3 | user = root 4 | password = password 5 | database = SemgrepResults 6 | -------------------------------------------------------------------------------- /dbutils.py: -------------------------------------------------------------------------------- 1 | import mysql.connector 2 | import configparser 3 | from datetime import datetime 4 | 5 | 6 | def connect_to_db(create_schema=False): 7 | # Read the configuration file 8 | config = configparser.ConfigParser() 9 | config.read("config.ini") 10 | 11 | # Extract database connection details 12 | db_config = config["database"] 13 | 14 | # Connect to the database server (initially without specifying the database) 15 | db_conn = mysql.connector.connect( 16 | host=db_config["host"], user=db_config["user"], password=db_config["password"] 17 | ) 18 | cursor = db_conn.cursor() 19 | try: 20 | # If schema creation is requested, create the database and table if they don't exist 21 | if create_schema: 22 | cursor.execute(f"CREATE DATABASE IF NOT EXISTS {db_config['database']}") 23 | db_conn.database = db_config["database"] 24 | create_plugin_data_table(cursor) 25 | create_plugin_results_table(cursor) 26 | else: 27 | db_conn.database = db_config["database"] 28 | 29 | except mysql.connector.errors.ProgrammingError as e: 30 | if "1049" in str(e): 31 | raise SystemExit( 32 | "Database {} does not exist. Please run with the '--create-schema' flag to create the database.".format( 33 | db_config["database"] 34 | ) 35 | ) 36 | 37 | return db_conn, cursor 38 | 39 | 40 | def delete_results_table(cursor): 41 | cursor.execute("DROP TABLE IF EXISTS PluginResults") 42 | create_plugin_results_table(cursor) 43 | 44 | 45 | def create_plugin_data_table(cursor): 46 | cursor.execute( 47 | """ 48 | CREATE TABLE IF NOT EXISTS PluginData ( 49 | slug VARCHAR(255) PRIMARY KEY, 50 | version VARCHAR(255), 51 | active_installs INT, 52 | downloaded INT, 53 | last_updated DATETIME, 54 | added_date DATE, 55 | download_link TEXT 56 | ) 57 | """ 58 | ) 59 | 60 | 61 | def create_plugin_results_table(cursor): 62 | cursor.execute( 63 | """ 64 | CREATE TABLE IF NOT EXISTS PluginResults ( 65 | id INT AUTO_INCREMENT PRIMARY KEY, 66 | slug VARCHAR(255), 67 | file_path VARCHAR(255), 68 | check_id VARCHAR(255), 69 | start_line INT, 70 | end_line INT, 71 | vuln_lines TEXT, 72 | FOREIGN KEY (slug) REFERENCES PluginData(slug) 73 | ) 74 | """ 75 | ) 76 | 77 | 78 | def insert_plugin_into_db(cursor, plugin): 79 | # Prepare SQL upsert statement 80 | sql = """ 81 | INSERT INTO PluginData (slug, version, active_installs, downloaded, last_updated, added_date, download_link) 82 | VALUES (%s, %s, %s, %s, %s, %s, %s) 83 | ON DUPLICATE KEY UPDATE 84 | version = VALUES(version), 85 | active_installs = VALUES(active_installs), 86 | downloaded = VALUES(downloaded), 87 | last_updated = VALUES(last_updated), 88 | added_date = VALUES(added_date), 89 | download_link = VALUES(download_link) 90 | """ 91 | 92 | # Prepare data for database insertion 93 | last_updated = plugin.get("last_updated", None) 94 | added_date = plugin.get("added", None) 95 | 96 | # Convert date formats if available 97 | if last_updated: 98 | last_updated = datetime.strptime(last_updated, "%Y-%m-%d %I:%M%p %Z").strftime( 99 | "%Y-%m-%d %H:%M:%S" 100 | ) 101 | if added_date: 102 | added_date = datetime.strptime(added_date, "%Y-%m-%d").strftime("%Y-%m-%d") 103 | 104 | data = ( 105 | plugin["slug"], 106 | plugin.get("version", "N/A"), 107 | int(plugin.get("active_installs", 0)), 108 | int(plugin.get("downloaded", 0)), 109 | last_updated, 110 | added_date, 111 | plugin.get("download_link", "N/A"), 112 | ) 113 | 114 | try: 115 | cursor.execute(sql, data) 116 | except mysql.connector.errors.ProgrammingError as e: 117 | if "1146" in str(e): 118 | raise SystemExit( 119 | "Table does not exist. Please run with the '--create-schema' flag to create the table." 120 | ) 121 | 122 | 123 | def insert_result_into_db(cursor, slug, result): 124 | sql = ( 125 | "INSERT INTO PluginResults (slug, file_path, check_id, start_line, end_line, vuln_lines)" 126 | "VALUES (%s, %s, %s, %s, %s, %s)" 127 | ) 128 | data = ( 129 | slug, 130 | result["path"], 131 | result["check_id"], 132 | result["start"]["line"], 133 | result["end"]["line"], 134 | result["extra"]["lines"], 135 | ) 136 | try: 137 | cursor.execute(sql, data) 138 | 139 | except mysql.connector.errors.ProgrammingError as e: 140 | if "1146" in str(e): 141 | raise SystemExit( 142 | "Table does not exist. Please run with the '--create-schema' flag to create the table." 143 | ) 144 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mysql-connector-python 2 | tqdm 3 | semgrep 4 | requests -------------------------------------------------------------------------------- /wordpress-plugin-audit.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import argparse 3 | import os 4 | import json 5 | import subprocess 6 | import zipfile 7 | import shutil 8 | from datetime import datetime 9 | from io import BytesIO 10 | from tqdm import tqdm 11 | from dbutils import ( 12 | connect_to_db, 13 | delete_results_table, 14 | insert_result_into_db, 15 | insert_plugin_into_db, 16 | ) 17 | 18 | 19 | # Let's only retrieve 10 plugins per page so people feel like the status bar is actually moving 20 | def get_plugins(page=1, per_page=10): 21 | url = f"https://api.wordpress.org/plugins/info/1.2/?action=query_plugins&request[page]={page}&request[per_page]={per_page}" 22 | response = requests.get(url) 23 | 24 | if response.status_code == 200: 25 | return response.json() 26 | else: 27 | print(f"Failed to retrieve page {page}: {response.status_code}") 28 | return None 29 | 30 | 31 | def write_plugins_to_csv_db_and_download(db_conn, cursor, download_dir, verbose=False): 32 | 33 | # Get the first page to find out the total number of pages 34 | data = get_plugins(page=1) 35 | 36 | if not data or "info" not in data: 37 | print("Failed to retrieve the plugin information.") 38 | return 39 | 40 | total_pages = data["info"]["pages"] 41 | 42 | # Ensure the directory for plugins exists 43 | os.makedirs(os.path.join(download_dir, "plugins"), exist_ok=True) 44 | 45 | # Iterate through the pages 46 | for page in tqdm(range(1, total_pages + 1), desc="Downloading plugins"): 47 | data = get_plugins(page=page) 48 | 49 | if not data or "plugins" not in data: 50 | break 51 | 52 | for plugin in data["plugins"]: 53 | insert_plugin_into_db(cursor, plugin) 54 | 55 | if verbose: 56 | print(f"Inserted data for plugin {plugin['slug']}.") 57 | # Download and extract the plugin 58 | download_and_extract_plugin(plugin, download_dir, verbose) 59 | 60 | 61 | def download_and_extract_plugin(plugin, download_dir, verbose): 62 | slug = plugin["slug"] 63 | download_link = plugin.get("download_link") 64 | last_updated = plugin.get("last_updated") 65 | 66 | # Check if the plugin was last updated in the last 2 years, we'll only download the ones that actively maintained 67 | try: 68 | # Parse the date format 'YYYY-MM-DD HH:MMpm GMT' 69 | last_updated_datetime = datetime.strptime(last_updated, "%Y-%m-%d %I:%M%p %Z") 70 | last_updated_year = last_updated_datetime.year 71 | if last_updated_year < (datetime.now().year - 2): 72 | return 73 | except ValueError: 74 | print(f"Invalid date format for plugin {slug}: {last_updated}") 75 | return 76 | 77 | # Download and extract the plugin 78 | plugin_path = os.path.join(download_dir, "plugins", slug) 79 | 80 | # Clear the directory if it exists 81 | if os.path.exists(plugin_path): 82 | if verbose: 83 | print(f"Plugin folder already exists, deleting folder: {plugin_path}") 84 | shutil.rmtree(plugin_path) 85 | 86 | try: 87 | if verbose: 88 | print(f"Downloading and extracting plugin: {slug}") 89 | response = requests.get(download_link) 90 | response.raise_for_status() # Raises an HTTPError for bad responses 91 | with zipfile.ZipFile(BytesIO(response.content)) as z: 92 | z.extractall(os.path.join(download_dir, "plugins")) 93 | except requests.RequestException as e: 94 | print(f"Failed to download {slug}: {e}") 95 | except zipfile.BadZipFile: 96 | print(f"Failed to unzip {slug}: Not a zip file or corrupt zip file") 97 | 98 | 99 | def run_semgrep_and_store_results(db_conn, cursor, download_dir, config, verbose=False): 100 | 101 | plugins = os.listdir(os.path.join(download_dir, "plugins")) 102 | 103 | for plugin in tqdm(plugins, desc="Auditing plugins"): 104 | plugin_path = os.path.join(download_dir, "plugins", plugin) 105 | output_file = os.path.join(plugin_path, "semgrep_output.json") 106 | 107 | command = [ 108 | "semgrep", 109 | "--config", 110 | "{}".format(config), 111 | "--json", 112 | "--no-git-ignore", 113 | "--output", 114 | output_file, 115 | "--quiet", # Suppress non-essential output 116 | plugin_path, 117 | ] 118 | 119 | try: 120 | # Run the semgrep command 121 | subprocess.run(command, check=True) 122 | if verbose: 123 | print(f"Semgrep analysis completed for {plugin}.") 124 | 125 | except subprocess.CalledProcessError as e: 126 | print(f"Semgrep failed for {plugin}: {e}") 127 | except json.JSONDecodeError as e: 128 | print(f"Failed to decode JSON for {plugin}: {e}") 129 | except Exception as e: 130 | print(f"Unexpected error for {plugin}: {e}") 131 | 132 | # Read the output file and process results 133 | with open(output_file, "r") as file: 134 | data = json.load(file) 135 | for item in data["results"]: 136 | insert_result_into_db(cursor, plugin, item) 137 | db_conn.commit() 138 | 139 | 140 | if __name__ == "__main__": 141 | parser = argparse.ArgumentParser( 142 | description="Downloads or audits all Wordpress plugins." 143 | ) 144 | parser.add_argument( 145 | "--download", 146 | action="store_true", 147 | help="Download and extract plugins, if plugin directory already exists, it will delete it and redownload", 148 | ) 149 | parser.add_argument( 150 | "--download-dir", 151 | type=str, 152 | default=".", 153 | help="The directory to save/audit downloaded plugins (default: current directory)", 154 | ) 155 | parser.add_argument( 156 | "--audit", 157 | action="store_true", 158 | help="Audits downloaded plugins sequentially", 159 | ) 160 | parser.add_argument( 161 | "--config", 162 | type=str, 163 | default="p/php", 164 | help="Semgrep config/rules to run - https://semgrep.dev/docs/running-rules#running-semgrep-registry-rules-locally (default: p/php)", 165 | ) 166 | parser.add_argument( 167 | "--create-schema", 168 | action="store_true", 169 | help="Create the database and schema if this flag is set", 170 | ) 171 | parser.add_argument( 172 | "--clear-results", 173 | action="store_true", 174 | help="Clear audit table and then run, useful if run as a cron job and we only care about the latest release", 175 | ) 176 | parser.add_argument( 177 | "--verbose", action="store_true", help="Print detailed messages" 178 | ) 179 | 180 | # Parse arguments 181 | args = parser.parse_args() 182 | 183 | if not args.download and not args.audit: 184 | print("Please set either the --download or --audit option.\n") 185 | parser.print_help() 186 | 187 | else: 188 | # Create schema 189 | db_conn, cursor = connect_to_db(args.create_schema) 190 | if args.clear_results: 191 | delete_results_table(cursor) 192 | 193 | # Write plugins to CSV, Database, and possibly download them 194 | if args.download: 195 | write_plugins_to_csv_db_and_download( 196 | db_conn, cursor, args.download_dir, args.verbose 197 | ) 198 | if args.audit: 199 | run_semgrep_and_store_results( 200 | db_conn, cursor, args.download_dir, args.config, args.verbose 201 | ) 202 | 203 | cursor.close() 204 | db_conn.close() 205 | --------------------------------------------------------------------------------