├── README.md └── pgdiff /README.md: -------------------------------------------------------------------------------- 1 | # pgdiff 2 | 3 | Compare what data changed in your Postgres database between two points in time. 4 | 5 | It works by just dumping out the tables into files in `/var/tmp/pgdiff`, so if you have a really large database, then you might have a bad time. 6 | 7 | ### Usage 8 | 9 | Start/end a capture (use the same key a second time) 10 | 11 | ```bash 12 | $ pgdiff postgresql://user:password@host:port/db_name -k some_key 13 | ``` 14 | 15 | Delete existing captures 16 | 17 | ```bash 18 | # all 19 | $ pgdiff postgresql://user:password@host:port/db_name -d 20 | 21 | # some_key only 22 | $ pgdiff postgresql://user:password@host:port/db_name -d -k some_key 23 | ``` 24 | 25 | Limit capture to certain tables only 26 | 27 | ```bash 28 | # no need to use -t on the second time 29 | $ pgdiff postgresql://user:password@host:port/db_name -k some_key -t "table_a,table_b,other table" 30 | ``` 31 | 32 | Show help info 33 | 34 | ```bash 35 | $ pgdiff -h 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /pgdiff: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | help() { 4 | echo "" 5 | echo "Compare what data changed in your Postgres database between two points in time." 6 | echo "" 7 | echo "USAGE: $0 [-k key] [-t ] [-d] [-c always|never]" 8 | echo "" 9 | echo "connection string should be in the format 'postgresql://user:password@host:port/db_name'" 10 | echo "" 11 | echo "-k is used to specify a key, so as to classify data captures. Eg. The first time that a key is specified, a new capture is initialized. Next time the key is used, $0 will produce a diff of how the data changed from the initial capture." 12 | echo "" 13 | echo "-t can be used to specify a comma-separated list of table names (eg. -t \"table1,table2\"). If present, then only the specified tables will be considered. This flag is a no-op when the same key has been used more than once, so you only need to use it the first time with a key." 14 | echo "" 15 | echo "-d is used to delete all keys, which is wise to do often, since the script works by making copies of entire databases. This can be used in conjunction with -k to clear a specific data capture." 16 | echo "" 17 | echo "-c can specify either 'always' or 'never'. If 'always', then color codes are used regardless of whether STDOUT is going to a terminal or a pipe. When 'never' is specified, color will never be used in the output. When -c is not used, the default behavior is to include color codes only if STDOUT is a terminal/tty." 18 | } 19 | 20 | diffcmd() { 21 | coloropt="$1" 22 | path1="$2" 23 | path2="$3" 24 | 25 | set -- --no-pager diff -U10 26 | 27 | case "$coloropt" in 28 | auto|always) 29 | # STDOUT isatty? 30 | if [ -t 1 ] || [ "$coloropt" = "always" ] ; then 31 | set -- "$@" --color 32 | fi 33 | ;; 34 | never) set -- "$@" --no-color ;; 35 | esac 36 | 37 | set -- "$@" --no-index -- "$path1" "$path2" 38 | 39 | git "$@" | tail -n +6 40 | } 41 | 42 | if ! pg_isready -d "$1" > /dev/null || ! psql "$1" -c "select 1" > /dev/null; then 43 | echo "Connection string is not set, or unable to connect to database (eg. 'postgresql://user:password@host:port/db_name')"; 44 | help 45 | exit 1 46 | fi 47 | 48 | connstring="$1" 49 | db=${connstring##*/} 50 | coloropt="auto" 51 | 52 | shift 53 | 54 | while getopts "k:t:dc:h" flag; do 55 | case "$flag" in 56 | k) id=${OPTARG} ;; 57 | t) tablefilter=$(printf "%s" "$OPTARG" | sed -E "s/([^,]+)/\'\1\'/g") ;; 58 | d) shouldclear=true ;; 59 | c) 60 | case ${OPTARG} in 61 | auto|always|never) 62 | coloropt=${OPTARG} 63 | ;; 64 | *) 65 | echo "-c expects one of auto|always|never\n" 66 | help 67 | exit 1 68 | ;; 69 | esac 70 | ;; 71 | *) 72 | help 73 | exit 0 74 | ;; 75 | esac 76 | done 77 | 78 | if [ -n "$shouldclear" ]; then 79 | if [ -n "$id" ]; then 80 | rm -rf "/var/tmp/pgdiff/$db/$id" 81 | echo "[key=$id] Capture cleared" 82 | else 83 | rm -rf "/var/tmp/pgdiff" 84 | echo "All captures cleared" 85 | fi 86 | 87 | exit 0 88 | fi 89 | 90 | firstpath="/var/tmp/pgdiff/$db/$id/a" 91 | secondpath="/var/tmp/pgdiff/$db/$id/b" 92 | 93 | if [ -d "$firstpath" ]; then 94 | tables=$(ls "$firstpath") 95 | echo "[key=$id] Comparing data..." 96 | else 97 | echo "[key=$id] Creating new data capture" 98 | 99 | tablequery="SELECT tablename FROM pg_tables WHERE schemaname NOT IN ('information_schema', 'pg_catalog')" 100 | 101 | if [ -n "$tablefilter" ]; then 102 | tablequery="${tablequery} AND tablename IN ($tablefilter)" 103 | fi 104 | 105 | tables=$(psql \ 106 | --no-psqlrc \ 107 | --quiet \ 108 | "$connstring" \ 109 | -c "\pset tuples_only" \ 110 | -c "\pset format unaligned" \ 111 | -c "\pset pager off" \ 112 | -c "$tablequery") 113 | fi 114 | 115 | for table in $tables 116 | do 117 | primarykey=$(psql \ 118 | --no-psqlrc \ 119 | --quiet \ 120 | "$connstring" \ 121 | -c "\pset tuples_only" \ 122 | -c "\pset format unaligned" \ 123 | -c "\pset pager off" \ 124 | -c "SELECT STRING_AGG(a.attname, ',') FROM pg_index i JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) WHERE i.indrelid = '$table'::regclass AND i.indisprimary") 125 | 126 | if [ -z "$primarykey" ]; then 127 | echo "[key=$id] [table=$table] Skipping table, no PK found" 128 | continue 129 | fi 130 | 131 | if [ -f "$firstpath/$table" ]; then 132 | # key exists so this script must have been ran with the given key already 133 | mkdir -p "$secondpath" 134 | outputpath="$secondpath/$table" 135 | dodiff=true 136 | else 137 | # a diff has not been captured for the given $id yet 138 | mkdir -p "$firstpath" 139 | outputpath="$firstpath/$table" 140 | fi 141 | 142 | psql --no-psqlrc \ 143 | --quiet \ 144 | "$connstring" \ 145 | -c "\pset tuples_only" \ 146 | -c "\pset expanded on" \ 147 | -c "\pset pager off" \ 148 | -c "\pset null [null]" \ 149 | -c "\o $outputpath" \ 150 | -c "SELECT * FROM $table ORDER BY $primarykey" \ 151 | > /dev/null 152 | 153 | if [ -n "$dodiff" ]; then 154 | if ! cmp --silent "$firstpath/$table" "$secondpath/$table" ; then 155 | # yeah, they different 156 | changes=true 157 | printf "[key=%s] [table=%s] Changes detected\n\n" "$id" "$table" 158 | 159 | diffcmd "$coloropt" "$firstpath/$table" "$secondpath/$table" 160 | fi 161 | fi 162 | done 163 | 164 | if [ -n "$dodiff" ] && [ -z "$changes" ] ; then 165 | printf "[key=%s] No changes detected\n\n" "$id" 166 | fi 167 | --------------------------------------------------------------------------------