├── AUTHORS ├── COPYRIGHT ├── README.md ├── audit-log-replay.rb └── audit.sql /AUTHORS: -------------------------------------------------------------------------------- 1 | Craig Ringer 2 | github: 3 | 4 | with contributions including: 5 | 6 | github: 3nids 7 | github: ADTC 8 | github: asenyshyn 9 | github: jhm713 10 | github: paulovieira 11 | github: nineinchnick 12 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | The following is "The PostgreSQL License", effectively equivalent to the BSD 2 | license. 3 | 4 | I, Craig Ringer, cede any copyright on this work to the PostgreSQL Global 5 | Development Group. 6 | 7 | ------ 8 | 9 | PostgreSQL Audit Trigger Example 10 | Copyright (c) 2013, PostgreSQL Global Development Group 11 | 12 | Permission to use, copy, modify, and distribute this software and its 13 | documentation for any purpose, without fee, and without a written agreement 14 | is hereby granted, provided that the above copyright notice and this 15 | paragraph and the following two paragraphs appear in all copies. 16 | 17 | IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR 18 | DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING 19 | LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS 20 | DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE 21 | POSSIBILITY OF SUCH DAMAGE. 22 | 23 | THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 24 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 25 | AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 26 | ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO 27 | PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A simple, customisable table audit system for PostgreSQL implemented using triggers. 2 | 3 | See: 4 | 5 | http://wiki.postgresql.org/wiki/Audit_trigger_91plus 6 | 7 | This fork supports JSONB, daily tables, and real-time updates using NOTIFY/LISTEN. It is suitable for production use. 8 | -------------------------------------------------------------------------------- /audit-log-replay.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'optparse' 4 | require 'pg' 5 | require 'json' 6 | require 'ap' 7 | 8 | options = {} 9 | OptionParser.new do |opts| 10 | opts.banner = "Usage: audit-log-replay.rb [options]" 11 | 12 | opts.on('-eEVENT_ID', '--start-event-id=EVENT_ID', '[Re]-start from event id') do |event_id| 13 | options[:event_id] = event_id 14 | end 15 | opts.on('-aAUDIT_TABLE', '--audit-table=AUDIT_TABLE', 'Audit table to read from') do |audit_table| 16 | options[:audit_table] = audit_table 17 | end 18 | opts.on('-cCONNECTION', '--connection=CONNECTION', 'Connection string for the source event database') do |connection| 19 | options[:connection] = connection 20 | end 21 | end.parse! 22 | 23 | $connection = PG.connect(options[:connection]) 24 | $connection.('LISTEN audit_replication') 25 | at_exit do 26 | $connection.exec('UNLISTEN *') 27 | end 28 | 29 | def handle_event(row) 30 | row_data = JSON.parse(row['row_data'] || '{}') 31 | changed_fields = JSON.parse(row['changed_fields'] || '{}') 32 | 33 | case row['action'] 34 | when 'I' 35 | handle_insert(row['table_name'], row_data) 36 | when 'U' 37 | handle_update(row['table_name'], row_data, changed_fields) 38 | when 'D' 39 | handle_delete(row['table_name'], row_data) 40 | end 41 | end 42 | 43 | def escape_pg_array(value) 44 | new_value = value.map do |sub_value| 45 | if sub_value.is_a?(String) 46 | replaced = sub_value.gsub("\\", "\\\\\\\\").gsub('"', "\\\"") 47 | "\"#{replaced}\"" 48 | else 49 | escape(sub_value) 50 | end 51 | end 52 | "$_audit_replication_${#{new_value.join(',')}}$_audit_replication_$" 53 | end 54 | 55 | def escape(value) 56 | if value.nil? 57 | 'NULL' 58 | elsif value.is_a?(Integer) 59 | value 60 | elsif value.is_a?(Float) 61 | value 62 | elsif value.is_a?(Array) 63 | if value.all? { |sub_value| sub_value.is_a?(Integer) || sub_value.is_a?(String) || sub_value.is_a?(TrueClass) || sub_value.is_a?(FalseClass) } 64 | escape_pg_array(value) 65 | else 66 | escape(JSON.dump(value)) 67 | end 68 | elsif value.is_a?(TrueClass) 69 | "true" 70 | elsif value.is_a?(FalseClass) 71 | "false" 72 | elsif value.is_a?(Hash) 73 | escape(JSON.dump(value)) 74 | else 75 | "'#{$connection.escape_string(value)}'" 76 | end 77 | end 78 | 79 | def handle_insert(table_name, row_data) 80 | table = $connection.escape_identifier(table_name) 81 | fields = row_data.keys.map { |k| $connection.escape_identifier(k) }.join(', ') 82 | values = row_data.values.map { |v| escape(v) }.join(', ') 83 | 84 | insert_query = "INSERT INTO #{table} (#{fields}) VALUES (#{values});" 85 | puts insert_query 86 | end 87 | 88 | def handle_update(table_name, row_data, changed_fields) 89 | table = $connection.escape_identifier(table_name) 90 | id = escape(row_data['id']) 91 | pairs = changed_fields.map do |k, v| 92 | [$connection.escape_identifier(k), escape(v)].join(' = ') 93 | end.join(', ') 94 | 95 | update_query = "UPDATE #{table} SET #{pairs} WHERE id = #{id};" 96 | puts update_query 97 | end 98 | 99 | def handle_delete(table_name, row_data) 100 | table = $connection.escape_identifier(table_name) 101 | id = escape(row_data['id']) 102 | 103 | delete_query = "DELETE FROM #{table} WHERE id = #{id};" 104 | puts delete_query 105 | end 106 | 107 | query = <<-SQL 108 | SELECT 109 | event_id, 110 | action_tstamp_clk, 111 | table_name, 112 | action, 113 | row_data, 114 | changed_fields 115 | FROM #{options[:audit_table]} 116 | WHERE event_id > $1 117 | AND action IN ('I', 'U', 'D') 118 | ORDER BY event_id ASC 119 | SQL 120 | params = [options[:event_id]] 121 | 122 | $connection.send_query(query, params) 123 | $connection.set_single_row_mode 124 | i = 0 125 | $connection.get_result.stream_each do |row| 126 | handle_event(row) 127 | if i % 10000 == 0 128 | STDERR.printf( 129 | "%d events synced, current timestamp: %s, current event id: %s\n", 130 | i, 131 | row['action_tstamp_clk'], 132 | row['event_id'] 133 | ) 134 | end 135 | i += 1 136 | end 137 | 138 | loop do 139 | $connection.wait_for_notify do |channel, pid, payload| 140 | if channel == 'audit_replication' 141 | data = JSON.parse(payload) 142 | query = <<-SQL 143 | SELECT 144 | event_id, 145 | action_tstamp_clk, 146 | table_name, 147 | action, 148 | row_data, 149 | changed_fields 150 | FROM #{data['audit_table']} 151 | WHERE event_id = $1 152 | SQL 153 | params = [data['event_id']] 154 | row = $connection.exec(query, params).to_a.first 155 | if row 156 | handle_event(row) 157 | STDERR.printf("INFO: sync event for payload: %s\n", payload) 158 | else 159 | STDERR.printf("WARNING: could not find event for payload: %s\n", payload) 160 | end 161 | end 162 | end 163 | end 164 | -------------------------------------------------------------------------------- /audit.sql: -------------------------------------------------------------------------------- 1 | -- An audit history is important on most tables. Provide an audit trigger that logs to 2 | -- a dedicated audit table for the major relations. 3 | -- 4 | -- This file should be generic and not depend on application roles or structures, 5 | -- as it's being listed here: 6 | -- 7 | -- https://wiki.postgresql.org/wiki/Audit_trigger_91plus 8 | -- 9 | -- This trigger was originally based on 10 | -- http://wiki.postgresql.org/wiki/Audit_trigger 11 | -- but has been completely rewritten. 12 | -- 13 | -- Should really be converted into a relocatable EXTENSION, with control and upgrade files. 14 | 15 | CREATE OR REPLACE FUNCTION JSONB_SUBTRACT(v1 JSONB, v2 JSONB) 16 | RETURNS JSONB AS $$ 17 | DECLARE 18 | result JSONB; 19 | v RECORD; 20 | BEGIN 21 | result = v1; 22 | FOR v IN SELECT * FROM JSONB_EACH(v2) LOOP 23 | IF result->v.key IS NOT DISTINCT FROM v.value 24 | THEN result = result - v.key; 25 | END IF; 26 | END LOOP; 27 | RETURN result; 28 | END; 29 | $$ LANGUAGE PLPGSQL; 30 | 31 | CREATE SCHEMA IF NOT EXISTS audit; 32 | COMMENT ON SCHEMA audit IS 'Out-of-table audit/history logging tables and trigger functions'; 33 | 34 | -- 35 | -- Audited data. Lots of information is available, it's just a matter of how much 36 | -- you really want to record. See: 37 | -- 38 | -- http://www.postgresql.org/docs/9.1/static/functions-info.html 39 | -- 40 | -- Remember, every column you add takes up more audit table space and slows audit 41 | -- inserts. 42 | -- 43 | -- Every index you add has a big impact too, so avoid adding indexes to the 44 | -- audit table unless you REALLY need them 45 | -- 46 | -- It is sometimes worth copying the audit table, or a coarse subset of it that 47 | -- you're interested in, into a temporary table where you CREATE any useful 48 | -- indexes and do your analysis. 49 | -- 50 | DROP TABLE IF EXISTS audit.logged_actions; 51 | CREATE TABLE audit.logged_actions ( 52 | event_id BIGINT, 53 | schema_name TEXT NOT NULL, 54 | table_name TEXT NOT NULL, 55 | relid OID NOT NULL, 56 | session_user_name text, 57 | action_tstamp_tx TIMESTAMP WITH TIME ZONE NOT NULL, 58 | action_tstamp_stm TIMESTAMP WITH TIME ZONE NOT NULL, 59 | action_tstamp_clk TIMESTAMP WITH TIME ZONE NOT NULL, 60 | transaction_id BIGINT, 61 | application_name TEXT, 62 | client_addr INET, 63 | client_port INTEGER, 64 | client_query TEXT, 65 | action TEXT NOT NULL, 66 | row_data JSONB, 67 | changed_fields JSONB, 68 | statement_only BOOLEAN NOT NULL 69 | ) PARTITION BY RANGE (action_tstamp_clk); 70 | 71 | ALTER TABLE audit.logged_actions ADD CONSTRAINT cx__logged_actions__action CHECK (action IN ('I', 'D', 'U', 'T')); 72 | CREATE INDEX ix__logged_actions__event_id ON audit.logged_actions (event_id); 73 | CREATE SEQUENCE IF NOT EXISTS audit.logged_actions_event_id_seq; 74 | 75 | COMMENT ON TABLE audit.logged_actions IS 'History of auditable actions on audited tables, from audit.if_modified_func()'; 76 | COMMENT ON COLUMN audit.logged_actions.event_id IS 'Unique identifier for each auditable event'; 77 | COMMENT ON COLUMN audit.logged_actions.schema_name IS 'Database schema audited table for this event is in'; 78 | COMMENT ON COLUMN audit.logged_actions.table_name IS 'Non-schema-qualified table name of table event occured in'; 79 | COMMENT ON COLUMN audit.logged_actions.relid IS 'Table OID. Changes with drop/create. Get with ''tablename''::regclass'; 80 | COMMENT ON COLUMN audit.logged_actions.session_user_name IS 'Login / session user whose statement caused the audited event'; 81 | COMMENT ON COLUMN audit.logged_actions.action_tstamp_tx IS 'Transaction start timestamp for tx in which audited event occurred'; 82 | COMMENT ON COLUMN audit.logged_actions.action_tstamp_stm IS 'Statement start timestamp for tx in which audited event occurred'; 83 | COMMENT ON COLUMN audit.logged_actions.action_tstamp_clk IS 'Wall clock time at which audited event''s trigger call occurred'; 84 | COMMENT ON COLUMN audit.logged_actions.transaction_id IS 'Identifier of transaction that made the change. May wrap, but unique paired with action_tstamp_tx.'; 85 | COMMENT ON COLUMN audit.logged_actions.client_addr IS 'IP address of client that issued query. Null for unix domain socket.'; 86 | COMMENT ON COLUMN audit.logged_actions.client_port IS 'Remote peer IP port address of client that issued query. Undefined for unix socket.'; 87 | COMMENT ON COLUMN audit.logged_actions.client_query IS 'Top-level query that caused this auditable event. May be more than one statement.'; 88 | COMMENT ON COLUMN audit.logged_actions.application_name IS 'Application name set when this audit event occurred. Can be changed in-session by client.'; 89 | COMMENT ON COLUMN audit.logged_actions.action IS 'Action type; I = insert, D = delete, U = update, T = truncate'; 90 | COMMENT ON COLUMN audit.logged_actions.row_data IS 'Record value. Null for statement-level trigger. For INSERT this is the new tuple. For DELETE and UPDATE it is the old tuple.'; 91 | COMMENT ON COLUMN audit.logged_actions.changed_fields IS 'New values of fields changed by UPDATE. Null except for row-level UPDATE events.'; 92 | COMMENT ON COLUMN audit.logged_actions.statement_only IS '''t'' if audit event is from an FOR EACH STATEMENT trigger, ''f'' for FOR EACH ROW'; 93 | CREATE OR REPLACE FUNCTION audit.if_modified_func() RETURNS TRIGGER AS $body$ 94 | DECLARE 95 | audit_table_name VARCHAR; 96 | audit_row audit.logged_actions; 97 | include_values BOOLEAN; 98 | log_diffs BOOLEAN; 99 | j_old JSONB; 100 | j_new JSONB; 101 | excluded_cols TEXT[] = ARRAY[]::TEXT[]; 102 | inserted_event_id BIGINT; 103 | BEGIN 104 | IF TG_WHEN <> 'AFTER' THEN 105 | RAISE EXCEPTION 'audit.if_modified_func() may only run as an AFTER trigger'; 106 | END IF; 107 | 108 | audit_row = ROW( 109 | NEXTVAL('audit.logged_actions_event_id_seq'), -- event_id 110 | TG_TABLE_SCHEMA::TEXT, -- schema_name 111 | TG_TABLE_NAME::TEXT, -- table_name 112 | TG_RELID, -- relation OID for much quicker searches 113 | session_user::TEXT, -- session_user_name 114 | CURRENT_TIMESTAMP, -- action_tstamp_tx 115 | STATEMENT_TIMESTAMP(), -- action_tstamp_stm 116 | CLOCK_TIMESTAMP(), -- action_tstamp_clk 117 | TXID_CURRENT(), -- transaction ID 118 | CURRENT_SETTING('application_name'), -- client application 119 | INET_CLIENT_ADDR(), -- client_addr 120 | INET_CLIENT_PORT(), -- client_port 121 | CURRENT_QUERY(), -- top-level query or queries (if multistatement) from client 122 | SUBSTRING(TG_OP, 1, 1), -- action 123 | NULL, -- row_data 124 | NULL, -- changed_fields 125 | 'f' -- statement_only 126 | ); 127 | 128 | IF NOT TG_ARGV[0]::BOOLEAN IS DISTINCT FROM 'f'::BOOLEAN THEN 129 | audit_row.client_query = NULL; 130 | END IF; 131 | 132 | IF TG_ARGV[1] IS NOT NULL THEN 133 | excluded_cols = TG_ARGV[1]::TEXT[]; 134 | END IF; 135 | 136 | IF (TG_OP = 'UPDATE' AND TG_LEVEL = 'ROW') THEN 137 | audit_row.row_data = TO_JSONB(OLD) - excluded_cols; 138 | audit_row.changed_fields = JSONB_SUBTRACT(TO_JSONB(NEW), audit_row.row_data) - excluded_cols; 139 | IF audit_row.changed_fields = '{}'::JSONB THEN 140 | -- All changed fields are ignored. Skip this update. 141 | RETURN NULL; 142 | END IF; 143 | ELSIF (TG_OP = 'DELETE' AND TG_LEVEL = 'ROW') THEN 144 | audit_row.row_data = TO_JSONB(OLD) - excluded_cols; 145 | ELSIF (TG_OP = 'INSERT' AND TG_LEVEL = 'ROW') THEN 146 | audit_row.row_data = TO_JSONB(NEW) - excluded_cols; 147 | ELSIF (TG_LEVEL = 'STATEMENT' AND TG_OP IN ('INSERT','UPDATE','DELETE','TRUNCATE')) THEN 148 | audit_row.statement_only = 't'; 149 | ELSE 150 | RAISE EXCEPTION '[audit.if_modified_func] - Trigger func added as trigger for unhandled case: %, %',TG_OP, TG_LEVEL; 151 | RETURN NULL; 152 | END IF; 153 | 154 | audit_table_name = 'audit.logged_actions_' || TO_CHAR(audit_row.action_tstamp_clk, 'YYYY_MM_DD'); 155 | 156 | IF TO_REGCLASS(audit_table_name) IS NULL THEN 157 | EXECUTE FORMAT( 158 | 'CREATE TABLE IF NOT EXISTS %s PARTITION OF audit.logged_actions FOR VALUES FROM (''%s'') TO (''%s'')', 159 | audit_table_name, 160 | TO_CHAR(audit_row.action_tstamp_clk, 'YYYY-MM-DD'), 161 | TO_CHAR(audit_row.action_tstamp_clk + INTERVAL '1 DAY', 'YYYY-MM-DD') 162 | ); 163 | EXECUTE FORMAT( 164 | 'ALTER TABLE %s SET (AUTOVACUUM_ENABLED = FALSE, TOAST.AUTOVACUUM_ENABLED = FALSE)', 165 | audit_table_name 166 | ); 167 | END IF; 168 | INSERT INTO audit.logged_actions VALUES (audit_row.*) RETURNING event_id INTO inserted_event_id; 169 | 170 | RETURN NULL; 171 | END; 172 | $body$ 173 | LANGUAGE plpgsql 174 | SECURITY DEFINER 175 | SET search_path = pg_catalog, public; 176 | 177 | COMMENT ON FUNCTION audit.if_modified_func() IS $body$ 178 | Track changes to a table at the statement and/or row level. 179 | 180 | Optional parameters to trigger in CREATE TRIGGER call: 181 | 182 | param 0: boolean, whether to log the query text. Default 't'. 183 | 184 | param 1: text[], columns to ignore in updates. Default []. 185 | 186 | Updates to ignored cols are omitted from changed_fields. 187 | 188 | Updates with only ignored cols changed are not inserted 189 | into the audit log. 190 | 191 | Almost all the processing work is still done for updates 192 | that ignored. If you need to save the load, you need to use 193 | WHEN clause on the trigger instead. 194 | 195 | No warning or error is issued if ignored_cols contains columns 196 | that do not exist in the target table. This lets you specify 197 | a standard set of ignored columns. 198 | 199 | There is no parameter to disable logging of values. Add this trigger as 200 | a 'FOR EACH STATEMENT' rather than 'FOR EACH ROW' trigger if you do not 201 | want to log row values. 202 | 203 | Note that the user name logged is the login role for the session. The audit trigger 204 | cannot obtain the active role because it is reset by the SECURITY DEFINER invocation 205 | of the audit trigger its self. 206 | $body$; 207 | 208 | 209 | 210 | CREATE OR REPLACE FUNCTION audit.audit_table(target_table REGCLASS, audit_rows BOOLEAN, audit_query_text BOOLEAN, ignored_cols TEXT[]) RETURNS VOID AS $body$ 211 | DECLARE 212 | stm_targets TEXT = 'INSERT OR UPDATE OR DELETE OR TRUNCATE'; 213 | _q_txt TEXT; 214 | _ignored_cols_snip TEXT = ''; 215 | BEGIN 216 | EXECUTE 'DROP TRIGGER IF EXISTS audit_trigger_row ON ' || target_table; 217 | EXECUTE 'DROP TRIGGER IF EXISTS audit_trigger_stm ON ' || target_table; 218 | 219 | IF audit_rows THEN 220 | IF ARRAY_LENGTH(ignored_cols,1) > 0 THEN 221 | _ignored_cols_snip = ', ' || QUOTE_LITERAL(ignored_cols); 222 | END IF; 223 | _q_txt = 'CREATE TRIGGER audit_trigger_row AFTER INSERT OR UPDATE OR DELETE ON ' || 224 | target_table || 225 | ' FOR EACH ROW EXECUTE PROCEDURE audit.if_modified_func(' || 226 | QUOTE_LITERAL(audit_query_text) || _ignored_cols_snip || ');'; 227 | RAISE NOTICE '%', _q_txt; 228 | EXECUTE _q_txt; 229 | stm_targets = 'TRUNCATE'; 230 | ELSE 231 | END IF; 232 | 233 | _q_txt = 'CREATE TRIGGER audit_trigger_stm AFTER ' || stm_targets || ' ON ' || 234 | target_table || 235 | ' FOR EACH STATEMENT EXECUTE PROCEDURE audit.if_modified_func('|| 236 | QUOTE_LITERAL(audit_query_text) || ');'; 237 | RAISE NOTICE '%',_q_txt; 238 | EXECUTE _q_txt; 239 | END; 240 | $body$ 241 | language 'plpgsql'; 242 | 243 | COMMENT ON FUNCTION audit.audit_table(REGCLASS, BOOLEAN, BOOLEAN, TEXT[]) IS $body$ 244 | Add auditing support to a table. 245 | 246 | Arguments: 247 | target_table: Table name, schema qualified if not on search_path 248 | audit_rows: Record each row change, or only audit at a statement level 249 | audit_query_text: Record the text of the client query that triggered the audit event? 250 | ignored_cols: Columns to exclude from update diffs, ignore updates that change only ignored cols. 251 | $body$; 252 | 253 | -- Pg doesn't allow variadic calls with 0 params, so provide a wrapper 254 | CREATE OR REPLACE FUNCTION audit.audit_table(target_table REGCLASS, audit_rows BOOLEAN, audit_query_text BOOLEAN) RETURNS VOID AS $body$ 255 | SELECT audit.audit_table($1, $2, $3, ARRAY[]::TEXT[]); 256 | $body$ LANGUAGE SQL; 257 | 258 | -- And provide a convenience call wrapper for the simplest case 259 | -- of row-level logging with no excluded cols and query logging enabled. 260 | -- 261 | CREATE OR REPLACE FUNCTION audit.audit_table(target_table REGCLASS) RETURNS VOID AS $body$ 262 | SELECT audit.audit_table($1, BOOLEAN 't', BOOLEAN 't'); 263 | $body$ LANGUAGE 'sql'; 264 | 265 | COMMENT ON FUNCTION audit.audit_table(REGCLASS) IS $body$ 266 | Add auditing support to the given table. Row-level changes will be logged with full client query text. No cols are ignored. 267 | $body$; 268 | --------------------------------------------------------------------------------