├── .gitignore
├── pg_csdvrx.gif
├── pg_csdvrx.ttyrecord
├── pg_filedump.debian10
├── decode.h
├── Makefile.contrib
├── Makefile
├── README.md
├── README.pg_filedump
├── stringinfo.c
├── pg_csdvrx.sh
├── pg_filedump.h
├── pg_lzcompress.c
├── LICENSE
├── decode.c
└── pg_filedump.c


/.gitignore:
--------------------------------------------------------------------------------
1 | /*.o
2 | /pg_filedump
3 | 


--------------------------------------------------------------------------------
/pg_csdvrx.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csdvrx/pg_csdvrx/HEAD/pg_csdvrx.gif


--------------------------------------------------------------------------------
/pg_csdvrx.ttyrecord:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csdvrx/pg_csdvrx/HEAD/pg_csdvrx.ttyrecord


--------------------------------------------------------------------------------
/pg_filedump.debian10:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csdvrx/pg_csdvrx/HEAD/pg_filedump.debian10


--------------------------------------------------------------------------------
/decode.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PG_FILEDUMP_DECODE_H_
 2 | #define _PG_FILEDUMP_DECODE_H_
 3 | 
 4 | int
 5 | ParseAttributeTypesString(const char *str);
 6 | 
 7 | void
 8 | FormatDecode(const char *tupleData, unsigned int tupleSize);
 9 | 
10 | void
11 | ToastChunkDecode(const char* tuple_data,
12 | 		unsigned int tuple_size,
13 | 		Oid toast_oid,
14 | 		uint32 *chunk_id,
15 | 		char *chunk_data,
16 | 		unsigned int *chunk_data_size);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/Makefile.contrib:
--------------------------------------------------------------------------------
 1 | PROGRAM = pg_filedump
 2 | OBJS	= decode.o pg_filedump.o pg_lzcompress.o stringinfo.o
 3 | 
 4 | DOCS = README.pg_filedump
 5 | 
 6 | ifdef USE_PGXS
 7 | 	PG_CONFIG = pg_config
 8 | 	PGXS := $(shell $(PG_CONFIG) --pgxs)
 9 | 	include $(PGXS)
10 | else
11 | 	subdir = contrib/pg_filedump
12 | 	top_builddir = ../..
13 | 	include $(top_builddir)/src/Makefile.global
14 | 	include $(top_srcdir)/contrib/contrib-global.mk
15 | endif
16 | 
17 | # avoid linking against all libs that the server links against (xml, selinux, ...)
18 | LIBS = $(libpq_pgport)
19 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # View README.pg_filedump first
 2 | 
 3 | # note this must match version macros in pg_filedump.h
 4 | FD_VERSION=11.0
 5 | 
 6 | PROGRAM = pg_filedump
 7 | OBJS = pg_filedump.o decode.o stringinfo.o pg_lzcompress.o
 8 | 
 9 | PG_CONFIG = pg_config
10 | PGXS := $(shell $(PG_CONFIG) --pgxs)
11 | include $(PGXS)
12 | 
13 | # avoid linking against all libs that the server links against (xml, selinux, ...)
14 | LIBS = $(libpq_pgport)
15 | 
16 | DISTFILES= README.pg_filedump Makefile Makefile.contrib \
17 | 	pg_filedump.h pg_filedump.c decode.h decode.c stringinfo.c pg_lzcompress.c
18 | 
19 | dist:
20 | 	rm -rf pg_filedump-${FD_VERSION} pg_filedump-${FD_VERSION}.tar.gz
21 | 	mkdir pg_filedump-${FD_VERSION}
22 | 	cp -p ${DISTFILES} pg_filedump-${FD_VERSION}
23 | 	tar cfz pg_filedump-${FD_VERSION}.tar.gz pg_filedump-${FD_VERSION}
24 | 	rm -rf pg_filedump-${FD_VERSION}
25 | 
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | > pg_csdvrx - Postgres Generic Clever Scanning Data Verify/Recovery Xpress
 2 | 
 3 | ## LICENSE
 4 | 
 5 | Copyright (c) by CS DVRX, 2019 - data consutant in NYC, tweet me for help!
 6 | 
 7 | This program is free software; you can redistribute it and/or modify
 8 | it under the terms of the GNU General Public License as published by
 9 | the Free Software Foundation; either version 3 of the License, or
10 | (at your option) any later version.
11 | 
12 | ## DEMO
13 | 
14 | ![gif](https://raw.githubusercontent.com/csdvrx/pg_csdvrx/master/pg_csdvrx.gif)
15 | 
16 | ## README
17 | 
18 | ```{text}
19 | This started after wasting hours trying to follow advice for pg_filedump:
20 | https://habr.com/en/company/postgrespro/blog/319770/
21 | https://habr.com/en/company/postgrespro/blog/323644/
22 | https://pgday.ru/presentation/146/59649714ee40a.pdf
23 | 
24 | This started after wasting hours trying to use various advice for pg_filedump
25 | With pg_csdvrx, you will quickly get everything that can be recovered
26 | but restoring from TSV is hard, so it is best kept for cases without
27 | any alternatives, such as after massive hardware RAID problems, as
28 | when most of your data is in /lost+found files named by inode!
29 | 
30 | Therefore, before attemping this heroic recovery, try the nice way first:
31 | 
32 | 0. Immediately backup somewhere else absolutely everything retrieved,
33 | 
34 | 1. Copy the etc/ files like postgres.conf into the data directory,
35 | 
36 | 2. Tweak as needed and lauch postgres in single user mode with:
37 | /usr/lib/postgresql/11/bin/postgres --single -O -D . dbname
38 | 
39 | 3. By tweaking, I mean adjust to the problem you have:
40 |  - systems indexes : '-c ignore_system_indexes=true'
41 |  - wal: /usr/lib/postgresql/11/bin/pg_resetwal -f .
42 |  - some pages: try to zero, vacuum then reindex:
43 | SET zero_damaged_pages = on; VACUUM FULL;
44 | 
45 | 4. Below is for when it was not enough:
46 | ./pg_csdvrx.sh /var/lib/postgresql/11/main
47 | ```
48 | 


--------------------------------------------------------------------------------
/README.pg_filedump:
--------------------------------------------------------------------------------
  1 | pg_filedump - Display formatted contents of a PostgreSQL heap, index,
  2 |               or control file.
  3 | 
  4 | Copyright (c) 2002-2010 Red Hat, Inc.
  5 | Copyright (c) 2011-2018, PostgreSQL Global Development Group
  6 | 
  7 | This program is free software; you can redistribute it and/or modify
  8 | it under the terms of the GNU General Public License as published by
  9 | the Free Software Foundation; either version 2 of the License, or
 10 | (at your option) any later version.
 11 | 
 12 | Original Author: Patrick Macdonald <patrickm@redhat.com>
 13 | 
 14 | 
 15 | ------------------------------------------------------------------------
 16 | Overview:
 17 | 
 18 | pg_filedump is a utility to format PostgreSQL heap/index/control files
 19 | into a human-readable form.  You can format/dump the files several ways,
 20 | as listed in the Invocation section, as well as dumping straight binary.
 21 | 
 22 | The type of file (heap/index) can usually be determined automatically
 23 | by the content of the blocks within the file.  However, to format a
 24 | pg_control file you must use the -c option.
 25 | 
 26 | The default is to format the entire file using the block size listed in
 27 | block 0 and display block relative addresses.  These defaults can be
 28 | modified using run-time options.
 29 | 
 30 | Some options may seem strange but they're there for a reason.  For
 31 | example, block size.  It's there because if the header of block 0 is
 32 | corrupt, you need a method of forcing a block size.
 33 | 
 34 | 
 35 | ------------------------------------------------------------------------
 36 | Compile/Installation:
 37 | 
 38 | To compile pg_filedump, you will need to have a properly configured
 39 | PostgreSQL source tree or complete install tree (with include files)
 40 | of the appropriate PostgreSQL major version.
 41 | 
 42 | There are two makefiles included in this package.  Makefile is a standalone
 43 | makefile for pg_filedump.  Makefile.contrib can be used if this package
 44 | was untarred in the contrib directory of a PostgreSQL build tree.
 45 | 
 46 |   make
 47 |   make install
 48 | 
 49 | It is also possible to use Makefile.contrib without being in the contrib
 50 | directory:
 51 | 
 52 |   make -f Makefile.contrib USE_PGXS=1
 53 | 
 54 | Both methods require that the pg_config program be in your PATH, but should
 55 | not require any manual adjustments of the Makefile.
 56 | 
 57 | 
 58 | ------------------------------------------------------------------------
 59 | Invocation:
 60 | 
 61 | pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-D attrlist] [-S blocksize] [-s segsize] [-n segnumber] file
 62 | 
 63 | Defaults are: relative addressing, range of the entire file, block size
 64 |               as listed on block 0 in the file
 65 | 
 66 | The following options are valid for heap and index files:
 67 |   -a  Display absolute addresses when formatting (Block header
 68 |       information is always block relative)
 69 |   -b  Display binary block images within a range (Option will turn
 70 |       off all formatting options)
 71 |   -d  Display formatted block content dump (Option will turn off
 72 |       all other formatting options)
 73 |   -D  Decode tuples using given comma separated list of types.
 74 |       List of supported types:
 75 |         * bigint
 76 |         * bigserial
 77 |         * bool
 78 |         * char
 79 |         * charN     -- char(n)
 80 |         * date
 81 |         * float
 82 |         * float4
 83 |         * float8
 84 |         * int
 85 |         * json
 86 |         * macaddr
 87 |         * name
 88 |         * oid
 89 |         * real
 90 |         * serial
 91 |         * smallint
 92 |         * smallserial
 93 |         * text
 94 |         * time
 95 |         * timestamp
 96 |         * timetz
 97 |         * uuid
 98 |         * varchar
 99 |         * varcharN -- varchar(n)
100 |         * xid
101 |         * xml
102 |         * ~        -- ignores all attributes left in a tuple
103 |   -f  Display formatted block content dump along with interpretation
104 |   -h  Display this information
105 |   -i  Display interpreted item details
106 |   -k  Verify block checksums
107 |   -R  Display specific block ranges within the file (Blocks are
108 |       indexed from 0)
109 |         [startblock]: block to start at
110 |         [endblock]: block to end at
111 |       A startblock without an endblock will format the single block
112 |   -s  Force segment size to [segsize]
113 |   -n  Force segment number to [segnumber]
114 |   -S  Force block size to [blocksize]
115 |   -x  Force interpreted formatting of block items as index items
116 |   -y  Force interpreted formatting of block items as heap items
117 | 
118 | The following options are valid for control files:
119 |   -c  Interpret the file listed as a control file
120 |   -f  Display formatted content dump along with interpretation
121 |   -S  Force block size to [blocksize]
122 | 
123 | In most cases it's recommended to use the -i and -f options to get
124 | the most useful dump output.
125 | 


--------------------------------------------------------------------------------
/stringinfo.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Code mostly borrowed from PostgreSQL's stringinfo.c
  3 |  * palloc replaced to malloc, etc.
  4 |  */
  5 | 
  6 | #include "postgres.h"
  7 | #include <lib/stringinfo.h>
  8 | #include <string.h>
  9 | #include <assert.h>
 10 | 
 11 | /* 64 Kb - until pg_filedump doesn't support TOAST it doesn't need more */
 12 | #define MaxAllocSize	((Size) (64*1024))
 13 | 
 14 | /*-------------------------
 15 |  * StringInfoData holds information about an extensible string.
 16 |  *	  data	  is the current buffer for the string.
 17 |  *	  len	  is the current string length.  There is guaranteed to be
 18 |  *			  a terminating '\0' at data[len], although this is not very
 19 |  *			  useful when the string holds binary data rather than text.
 20 |  *	  maxlen  is the allocated size in bytes of 'data', i.e. the maximum
 21 |  *			  string size (including the terminating '\0' char) that we can
 22 |  *			  currently store in 'data' without having to reallocate
 23 |  *			  more space.  We must always have maxlen > len.
 24 |  *	  cursor  is initialized to zero by makeStringInfo or initStringInfo,
 25 |  *			  but is not otherwise touched by the stringinfo.c routines.
 26 |  *			  Some routines use it to scan through a StringInfo.
 27 |  *-------------------------
 28 |  */
 29 | 
 30 | /*
 31 |  * initStringInfo
 32 |  *
 33 |  * Initialize a StringInfoData struct (with previously undefined contents)
 34 |  * to describe an empty string.
 35 |  */
 36 | void
 37 | initStringInfo(StringInfo str)
 38 | {
 39 | 	int			size = 1024;	/* initial default buffer size */
 40 | 
 41 | 	str->data = (char *) malloc(size);
 42 | 	str->maxlen = size;
 43 | 	resetStringInfo(str);
 44 | }
 45 | 
 46 | /*
 47 |  * resetStringInfo
 48 |  *
 49 |  * Reset the StringInfo: the data buffer remains valid, but its
 50 |  * previous content, if any, is cleared.
 51 |  */
 52 | void
 53 | resetStringInfo(StringInfo str)
 54 | {
 55 | 	str->data[0] = '\0';
 56 | 	str->len = 0;
 57 | 	str->cursor = 0;
 58 | }
 59 | 
 60 | /*
 61 |  * appendStringInfoString
 62 |  *
 63 |  * Append a null-terminated string to str.
 64 |  */
 65 | void
 66 | appendStringInfoString(StringInfo str, const char *s)
 67 | {
 68 | 	appendBinaryStringInfo(str, s, strlen(s));
 69 | }
 70 | 
 71 | /*
 72 |  * appendBinaryStringInfo
 73 |  *
 74 |  * Append arbitrary binary data to a StringInfo, allocating more space
 75 |  * if necessary.
 76 |  */
 77 | void
 78 | appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
 79 | {
 80 | 	assert(str != NULL);
 81 | 
 82 | 	/* Make more room if needed */
 83 | 	enlargeStringInfo(str, datalen);
 84 | 
 85 | 	/* OK, append the data */
 86 | 	memcpy(str->data + str->len, data, datalen);
 87 | 	str->len += datalen;
 88 | 
 89 | 	/*
 90 | 	 * Keep a trailing null in place, even though it's probably useless for
 91 | 	 * binary data.  (Some callers are dealing with text but call this because
 92 | 	 * their input isn't null-terminated.)
 93 | 	 */
 94 | 	str->data[str->len] = '\0';
 95 | }
 96 | 
 97 | /*
 98 |  * enlargeStringInfo
 99 |  *
100 |  * Make sure there is enough space for 'needed' more bytes
101 |  * ('needed' does not include the terminating null).
102 |  *
103 |  * External callers usually need not concern themselves with this, since
104 |  * all stringinfo.c routines do it automatically.  However, if a caller
105 |  * knows that a StringInfo will eventually become X bytes large, it
106 |  * can save some malloc overhead by enlarging the buffer before starting
107 |  * to store data in it.
108 |  */
109 | void
110 | enlargeStringInfo(StringInfo str, int needed)
111 | {
112 | 	Size		newlen;
113 | 	Size		limit;
114 | 	char	   *old_data;
115 | 
116 | 	limit = MaxAllocSize;
117 | 
118 | 	/*
119 | 	 * Guard against out-of-range "needed" values.  Without this, we can get
120 | 	 * an overflow or infinite loop in the following.
121 | 	 */
122 | 	if (needed < 0)				/* should not happen */
123 | 	{
124 | 		printf("Error: invalid string enlargement request size: %d", needed);
125 | 		exit(1);
126 | 	}
127 | 
128 | 	if (((Size) needed) >= (limit - (Size) str->len))
129 | 	{
130 | 		printf("Error: cannot enlarge string buffer containing %d bytes by %d more bytes.",
131 | 			   str->len, needed);
132 | 		exit(1);
133 | 	}
134 | 
135 | 	needed += str->len + 1;		/* total space required now */
136 | 
137 | 	/* Because of the above test, we now have needed <= limit */
138 | 
139 | 	if (needed <= str->maxlen)
140 | 		return;					/* got enough space already */
141 | 
142 | 	/*
143 | 	 * We don't want to allocate just a little more space with each append;
144 | 	 * for efficiency, double the buffer size each time it overflows.
145 | 	 * Actually, we might need to more than double it if 'needed' is big...
146 | 	 */
147 | 	newlen = 2 * str->maxlen;
148 | 	while (needed > newlen)
149 | 		newlen = 2 * newlen;
150 | 
151 | 	/*
152 | 	 * Clamp to the limit in case we went past it.  Note we are assuming here
153 | 	 * that limit <= INT_MAX/2, else the above loop could overflow.  We will
154 | 	 * still have newlen >= needed.
155 | 	 */
156 | 	if (newlen > limit)
157 | 		newlen = limit;
158 | 
159 | 	old_data = str->data;
160 | 	str->data = (char *) realloc(str->data, (Size) newlen);
161 | 	if (str->data == NULL)
162 | 	{
163 | 		free(old_data);
164 | 		printf("Error: realloc() failed!\n");
165 | 		exit(1);
166 | 	}
167 | 
168 | 	str->maxlen = newlen;
169 | }
170 | 


--------------------------------------------------------------------------------
/pg_csdvrx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # 
 3 | # pg_csdvrx - Postgres Generic Clever Scanning Data Verify/Recovery Xpress
 4 | # 
 5 | # Copyright (c) by CS DVRX, 2019 - data consutant in NYC, tweet me for help!
 6 | # 
 7 | # This program is free software; you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation; either version 3 of the License, or
10 | # (at your option) any later version.
11 | # 
12 | # This started after wasting hours trying to follow advice for pg_filedump:
13 | # https://habr.com/en/company/postgrespro/blog/319770/
14 | # https://habr.com/en/company/postgrespro/blog/323644/
15 | # https://pgday.ru/presentation/146/59649714ee40a.pdf
16 | #
17 | # This started after wasting hours trying to use various advice for pg_filedump
18 | # With pg_csdvrx, you will quickly get everything that can be recovered
19 | # but restoring from TSV is hard, so it is best kept for cases without
20 | # any alternatives, such as after massive hardware RAID problems, as
21 | # when most of your data is in /lost+found files named by inode!
22 | #
23 | # Therefore, before attemping this heroic recovery, try the nice way first:
24 | #
25 | # 0. Immediately backup somewhere else absolutely everything retrieved,
26 | #
27 | # 1. Copy the etc/ files like postgres.conf into the data directory,
28 | #
29 | # 2. Tweak as needed and lauch postgres in single user mode with:
30 | # /usr/lib/postgresql/11/bin/postgres --single -O -D . dbname
31 | #
32 | # 3. By tweaking, I mean adjust to the problem you have:
33 | #  - systems indexes : '-c ignore_system_indexes=true'
34 | #  - wal: /usr/lib/postgresql/11/bin/pg_resetwal -f .
35 | #  - some pages: try to zero, vacuum then reindex:
36 | # SET zero_damaged_pages = on; VACUUM FULL;
37 | #
38 | # 4. Below is for when it was not enough:
39 | # ./pg_csdvrx.sh /var/lib/postgresql/11/main
40 | 
41 | MAGIC_PG_CLASS=1259
42 | MAGIC_PG_ATTRIBUTE=1249
43 | MAGIC_PG_TYPE=1247
44 | PGPATH=$1
45 | 
46 | [[ $# -lt 1 ]] && printf "$0 usage:\n\tpgdata_path (namespace) (relfilenode)\n\nExample:\n\t$0 /var/lib/postgresql/11/mydatabase\n" && exit 255
47 | 
48 | # Step 1: find the namespace using pg_class magic and remove _* and pg_* system tables
49 | NAMESPACE=$2
50 | [[ $# -lt 2 ]] && printf "Step 1/4: Please chose the database namespace containing the tables you want:\n" && pg_filedump -D name,oid,oid,oid,oid,oid,oid,~  $PGPATH/base/16384/$MAGIC_PG_CLASS | grep "COPY: " | awk '{ print $3 "\t" $2 }' | grep -v $'\t_' | grep -v pg_ && exit 1
51 | 
52 | # Step 2: find the relfilenode, using the same pg_class magic
53 | # WONTFIX: redundant for simple 1/1 matches, but checking prevents copy/paste mistakes
54 | [[ $# -lt 3 ]] && printf "Step 2/4: Please confirm the relfilenode matching the table you want:\n" && pg_filedump -D name,oid,oid,oid,oid,oid,oid,~  $PGPATH/base/16384/$MAGIC_PG_CLASS | grep "COPY: " | grep $NAMESPACE | awk '{ print $2 "\t" $8 }' | grep -v $'\t_' | grep -v pg_ | sort | uniq && exit 2
55 | 
56 | # Step 3.A: find the oids (schema) in the relfilenode, using pg_attribute magic
57 | RELFILENODE=$3
58 | [[ $# -lt 4 ]] && printf "Step 3/4: Please make sure the schema matches:\n\t\n"
59 | # Compared to Sasha tutorial, the pg_attribute.attnum smallint field is used to match
60 | # the order on disk: index created will have changed the natural order, so sort here
61 | # an extra advantage is removing the system columns with negative values:
62 | # ctid xmin cmin xmax cmax tableoid
63 | OIDS=`pg_filedump -D oid,name,oid,int,smallint,smallint,~  $PGPATH/base/16384/$MAGIC_PG_ATTRIBUTE | grep "COPY:" | grep $RELFILENODE | sort -k7 -n | awk '{ if ($7>0) print $3 , $4 }'`
64 | TYPESNUM=`echo "$OIDS" | awk '{ for (i = 1; i <= NF; i++) if (++j % 2 ==0 ) print $i; }'  |grep -v ^.$`
65 | NAMES=`echo "$OIDS" | awk '{ for (i = 1; i <= NF; i++) if (++j % 2 ==1 ) print $i; }'  |grep -v ^.$`
66 | 
67 | # Step 3.B: obtain the table name to protect the user from copy/paste mistakes
68 | TABLENAME=$(pg_filedump -D name,oid,oid,oid,oid,oid,oid,~  $PGPATH/base/16384/$MAGIC_PG_CLASS |grep "COPY: "| awk '{ print $8,$2}' | grep ^$RELFILENODE | awk '{ print $2 }' | sort | uniq )
69 | 
70 | # Step 3.C: find the types using the pg_type magic
71 | TYPES=`for t in $TYPESNUM; do pg_filedump -i -D name,~  $PGPATH/base/16384/$MAGIC_PG_TYPE | grep -A5 -E "OID: $t$"  |grep "COPY:" | awk '{ print $2 '} ; done`
72 | 
73 | # Step 4: Display it all, with recodes until aliases are added to pg_filedump
74 | TYPESALIASED=`echo $TYPES | sed -e 's/int2/smallint/g' -e 's/int4/int/g' -e 's/int8/bigint/g' -e 's/timestamptz/timestamp/g'`
75 | 
76 | echo " $TABLENAME ("
77 | # j=0; for i in $NAMES; do ((j++)); echo -n "$i " ; echo $TYPE | cut -d ' ' -f $j ; done | tr -s "\n" "," | sed -e 's/,$/\)/' -e 's/,/,\n\t\t/g'
78 | # Show the field name to easily find which tuple pg_filedump complains about
79 | j=0; for i in $NAMES; do ((j++)); echo -n "   $i " ; echo -n $TYPES | cut -d ' ' -f $j | tr -d "\n" ; echo ",  -- field #$j on disk" ; done
80 | # WONTFIX: could interweave NAMES, TYPES using awk, but awk could also do everything else!
81 | # Too much awk would would complicate the script. Most people don't grok awk.
82 | echo " )"
83 | 
84 | printf "\nStep 4/4: If the table matches all the fields (in any order), recover the data with:\n\t"
85 | echo -n "pg_filedump -o -D '"
86 | echo -n $TYPESALIASED |sed -e 's/ /,/g'
87 | # WONTFIX: give the exact arguments without ,~ to single out genuine incomplete lines
88 | # otherwise pg_filedump complains on each line, before still giving the full data!
89 | # "unable to decode a tuple, no more bytes left. Partial data: " (full data)!!
90 | 
91 | echo "' $PGPATH/base/16384/$RELFILENODE | grep COPY: |sed -e 's/^.COPY://g' > recovered-$TABLENAME.tsv"
92 | printf "\nIf the decoding fail, pg_filedump can tell on which field number.\nYou can also replace 'COPY' by 'Partial.data:' to get at least some data.\n"
93 | 
94 | # FIXME? could also give instructions on how to use find to get from lost+found the
95 | # 1024*1024*1024=1G files containing each 1024^3/8192= 131072 records
96 | 


--------------------------------------------------------------------------------
/pg_filedump.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * pg_filedump.h - PostgreSQL file dump utility for dumping and
  3 |  *				   formatting heap (data), index and control files.
  4 |  *
  5 |  * Copyright (c) 2002-2010 Red Hat, Inc.
  6 |  * Copyright (c) 2011-2018, PostgreSQL Global Development Group
  7 |  *
  8 |  * This program is free software; you can redistribute it and/or modify
  9 |  * it under the terms of the GNU General Public License as published by
 10 |  * the Free Software Foundation; either version 2 of the License, or
 11 |  * (at your option) any later version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License
 19 |  * along with this program; if not, write to the Free Software
 20 |  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 21 |  *
 22 |  * Original Author: Patrick Macdonald <patrickm@redhat.com>
 23 |  */
 24 | 
 25 | #define FD_VERSION	"11.0"		/* version ID of pg_filedump */
 26 | #define FD_PG_VERSION	"PostgreSQL 11.x"		/* PG version it works with */
 27 | 
 28 | #include "postgres.h"
 29 | 
 30 | #include <time.h>
 31 | #include <ctype.h>
 32 | 
 33 | #include "access/gin_private.h"
 34 | #include "access/gist.h"
 35 | #include "access/hash.h"
 36 | #include "access/htup.h"
 37 | #include "access/htup_details.h"
 38 | #include "access/itup.h"
 39 | #include "access/nbtree.h"
 40 | #include "access/spgist_private.h"
 41 | #include "catalog/pg_control.h"
 42 | #include "storage/bufpage.h"
 43 | 
 44 | /*	Options for Block formatting operations */
 45 | extern unsigned int blockOptions;
 46 | 
 47 | typedef enum blockSwitches
 48 | {
 49 | 	BLOCK_ABSOLUTE = 0x00000001,		/* -a: Absolute(vs Relative) addressing */
 50 | 	BLOCK_BINARY = 0x00000002,			/* -b: Binary dump of block */
 51 | 	BLOCK_FORMAT = 0x00000004,			/* -f: Formatted dump of blocks / control file */
 52 | 	BLOCK_FORCED = 0x00000008,			/* -S: Block size forced */
 53 | 	BLOCK_NO_INTR = 0x00000010,			/* -d: Dump straight blocks */
 54 | 	BLOCK_RANGE = 0x00000020,			/* -R: Specific block range to dump */
 55 | 	BLOCK_CHECKSUMS = 0x00000040,		/* -k: verify block checksums */
 56 | 	BLOCK_DECODE = 0x00000080,			/* -D: Try to decode tuples */
 57 | 	BLOCK_DECODE_TOAST = 0x00000100,	/* -t: Try to decode TOAST values */
 58 | 	BLOCK_IGNORE_OLD = 0x00000200		/* -o: Decode old values */
 59 | } blockSwitches;
 60 | 
 61 | /* Segment-related options */
 62 | extern unsigned int segmentOptions;
 63 | 
 64 | typedef enum segmentSwitches
 65 | {
 66 | 	SEGMENT_SIZE_FORCED = 0x00000001,	/* -s: Segment size forced */
 67 | 	SEGMENT_NUMBER_FORCED = 0x00000002, /* -n: Segment number forced */
 68 | }			segmentSwitches;
 69 | 
 70 | /* -R[start]:Block range start */
 71 | extern int	blockStart;
 72 | 
 73 | /* -R[end]:Block range end */
 74 | extern int	blockEnd;
 75 | 
 76 | /* Options for Item formatting operations */
 77 | extern unsigned int itemOptions;
 78 | 
 79 | typedef enum itemSwitches
 80 | {
 81 | 	ITEM_DETAIL = 0x00000001,	/* -i: Display interpreted items */
 82 | 	ITEM_HEAP = 0x00000002,		/* -y: Blocks contain HeapTuple items */
 83 | 	ITEM_INDEX = 0x00000004,	/* -x: Blocks contain IndexTuple items */
 84 | 	ITEM_SPG_INNER = 0x00000008,	/* Blocks contain SpGistInnerTuple items */
 85 | 	ITEM_SPG_LEAF = 0x00000010	/* Blocks contain SpGistLeafTuple items */
 86 | } itemSwitches;
 87 | 
 88 | /* Options for Control File formatting operations */
 89 | extern unsigned int controlOptions;
 90 | 
 91 | typedef enum controlSwitches
 92 | {
 93 | 	CONTROL_DUMP = 0x00000001,	/* -c: Dump control file */
 94 | 	CONTROL_FORMAT = BLOCK_FORMAT,	/* -f: Formatted dump of control file */
 95 | 	CONTROL_FORCED = BLOCK_FORCED	/* -S: Block size forced */
 96 | } controlSwitches;
 97 | 
 98 | /* Possible value types for the Special Section */
 99 | typedef enum specialSectionTypes
100 | {
101 | 	SPEC_SECT_NONE,				/* No special section on block */
102 | 	SPEC_SECT_SEQUENCE,			/* Sequence info in special section */
103 | 	SPEC_SECT_INDEX_BTREE,		/* BTree index info in special section */
104 | 	SPEC_SECT_INDEX_HASH,		/* Hash index info in special section */
105 | 	SPEC_SECT_INDEX_GIST,		/* GIST index info in special section */
106 | 	SPEC_SECT_INDEX_GIN,		/* GIN index info in special section */
107 | 	SPEC_SECT_INDEX_SPGIST,		/* SP - GIST index info in special section */
108 | 	SPEC_SECT_ERROR_UNKNOWN,	/* Unknown error */
109 | 	SPEC_SECT_ERROR_BOUNDARY	/* Boundary error */
110 | }			specialSectionTypes;
111 | 
112 | extern unsigned int specialType;
113 | 
114 | /* Possible return codes from option validation routine.
115 |  * pg_filedump doesn't do much with them now but maybe in
116 |  * the future... */
117 | typedef enum optionReturnCodes
118 | {
119 | 	OPT_RC_VALID,				/* All options are valid */
120 | 	OPT_RC_INVALID,				/* Improper option string */
121 | 	OPT_RC_FILE,				/* File problems */
122 | 	OPT_RC_DUPLICATE,			/* Duplicate option encountered */
123 | 	OPT_RC_COPYRIGHT			/* Copyright should be displayed */
124 | }			optionReturnCodes;
125 | 
126 | /* Simple macro to check for duplicate options and then set
127 |  * an option flag for later consumption */
128 | #define SET_OPTION(_x,_y,_z) if (_x & _y)				\
129 | 							   {						\
130 | 								 rc = OPT_RC_DUPLICATE; \
131 | 								 duplicateSwitch = _z;	\
132 | 							   }						\
133 | 							 else						\
134 | 							   _x |= _y;
135 | 
136 | #define SEQUENCE_MAGIC 0x1717	/* PostgreSQL defined magic number */
137 | #define EOF_ENCOUNTERED (-1)	/* Indicator for partial read */
138 | #define BYTES_PER_LINE 16		/* Format the binary 16 bytes per line */
139 | 
140 | extern char *fileName;
141 | 
142 | /*
143 |  * Function Prototypes
144 |  */
145 | unsigned int GetBlockSize(FILE *fp);
146 | int DumpFileContents(unsigned int blockOptions, unsigned int controlOptions,
147 | 					 FILE *fp, unsigned int blockSize, int blockStart,
148 | 					int blockEnd, bool isToast, Oid toastOid,
149 | 					unsigned int toastExternalSize, char *toastValue);
150 | 


--------------------------------------------------------------------------------
/pg_lzcompress.c:
--------------------------------------------------------------------------------
  1 | /* ----------
  2 |  * pg_lzcompress.c -
  3 |  *
  4 |  *		This is an implementation of LZ compression for PostgreSQL.
  5 |  *		It uses a simple history table and generates 2-3 byte tags
  6 |  *		capable of backward copy information for 3-273 bytes with
  7 |  *		a max offset of 4095.
  8 |  *
  9 |  *		Entry routines:
 10 |  *
 11 |  *			int32
 12 |  *			pglz_compress(const char *source, int32 slen, char *dest,
 13 |  *						  const PGLZ_Strategy *strategy);
 14 |  *
 15 |  *				source is the input data to be compressed.
 16 |  *
 17 |  *				slen is the length of the input data.
 18 |  *
 19 |  *				dest is the output area for the compressed result.
 20 |  *					It must be at least as big as PGLZ_MAX_OUTPUT(slen).
 21 |  *
 22 |  *				strategy is a pointer to some information controlling
 23 |  *					the compression algorithm. If NULL, the compiled
 24 |  *					in default strategy is used.
 25 |  *
 26 |  *				The return value is the number of bytes written in the
 27 |  *				buffer dest, or -1 if compression fails; in the latter
 28 |  *				case the contents of dest are undefined.
 29 |  *
 30 |  *			int32
 31 |  *			pglz_decompress(const char *source, int32 slen, char *dest,
 32 |  *							int32 rawsize)
 33 |  *
 34 |  *				source is the compressed input.
 35 |  *
 36 |  *				slen is the length of the compressed input.
 37 |  *
 38 |  *				dest is the area where the uncompressed data will be
 39 |  *					written to. It is the callers responsibility to
 40 |  *					provide enough space.
 41 |  *
 42 |  *					The data is written to buff exactly as it was handed
 43 |  *					to pglz_compress(). No terminating zero byte is added.
 44 |  *
 45 |  *				rawsize is the length of the uncompressed data.
 46 |  *
 47 |  *				The return value is the number of bytes written in the
 48 |  *				buffer dest, or -1 if decompression fails.
 49 |  *
 50 |  *		The decompression algorithm and internal data format:
 51 |  *
 52 |  *			It is made with the compressed data itself.
 53 |  *
 54 |  *			The data representation is easiest explained by describing
 55 |  *			the process of decompression.
 56 |  *
 57 |  *			If compressed_size == rawsize, then the data
 58 |  *			is stored uncompressed as plain bytes. Thus, the decompressor
 59 |  *			simply copies rawsize bytes to the destination.
 60 |  *
 61 |  *			Otherwise the first byte tells what to do the next 8 times.
 62 |  *			We call this the control byte.
 63 |  *
 64 |  *			An unset bit in the control byte means, that one uncompressed
 65 |  *			byte follows, which is copied from input to output.
 66 |  *
 67 |  *			A set bit in the control byte means, that a tag of 2-3 bytes
 68 |  *			follows. A tag contains information to copy some bytes, that
 69 |  *			are already in the output buffer, to the current location in
 70 |  *			the output. Let's call the three tag bytes T1, T2 and T3. The
 71 |  *			position of the data to copy is coded as an offset from the
 72 |  *			actual output position.
 73 |  *
 74 |  *			The offset is in the upper nibble of T1 and in T2.
 75 |  *			The length is in the lower nibble of T1.
 76 |  *
 77 |  *			So the 16 bits of a 2 byte tag are coded as
 78 |  *
 79 |  *				7---T1--0  7---T2--0
 80 |  *				OOOO LLLL  OOOO OOOO
 81 |  *
 82 |  *			This limits the offset to 1-4095 (12 bits) and the length
 83 |  *			to 3-18 (4 bits) because 3 is always added to it. To emit
 84 |  *			a tag of 2 bytes with a length of 2 only saves one control
 85 |  *			bit. But we lose one byte in the possible length of a tag.
 86 |  *
 87 |  *			In the actual implementation, the 2 byte tag's length is
 88 |  *			limited to 3-17, because the value 0xF in the length nibble
 89 |  *			has special meaning. It means, that the next following
 90 |  *			byte (T3) has to be added to the length value of 18. That
 91 |  *			makes total limits of 1-4095 for offset and 3-273 for length.
 92 |  *
 93 |  *			Now that we have successfully decoded a tag. We simply copy
 94 |  *			the output that occurred <offset> bytes back to the current
 95 |  *			output location in the specified <length>. Thus, a
 96 |  *			sequence of 200 spaces (think about bpchar fields) could be
 97 |  *			coded in 4 bytes. One literal space and a three byte tag to
 98 |  *			copy 199 bytes with a -1 offset. Whow - that's a compression
 99 |  *			rate of 98%! Well, the implementation needs to save the
100 |  *			original data size too, so we need another 4 bytes for it
101 |  *			and end up with a total compression rate of 96%, what's still
102 |  *			worth a Whow.
103 |  *
104 |  *		The compression algorithm
105 |  *
106 |  *			The following uses numbers used in the default strategy.
107 |  *
108 |  *			The compressor works best for attributes of a size between
109 |  *			1K and 1M. For smaller items there's not that much chance of
110 |  *			redundancy in the character sequence (except for large areas
111 |  *			of identical bytes like trailing spaces) and for bigger ones
112 |  *			our 4K maximum look-back distance is too small.
113 |  *
114 |  *			The compressor creates a table for lists of positions.
115 |  *			For each input position (except the last 3), a hash key is
116 |  *			built from the 4 next input bytes and the position remembered
117 |  *			in the appropriate list. Thus, the table points to linked
118 |  *			lists of likely to be at least in the first 4 characters
119 |  *			matching strings. This is done on the fly while the input
120 |  *			is compressed into the output area.  Table entries are only
121 |  *			kept for the last 4096 input positions, since we cannot use
122 |  *			back-pointers larger than that anyway.  The size of the hash
123 |  *			table is chosen based on the size of the input - a larger table
124 |  *			has a larger startup cost, as it needs to be initialized to
125 |  *			zero, but reduces the number of hash collisions on long inputs.
126 |  *
127 |  *			For each byte in the input, its hash key (built from this
128 |  *			byte and the next 3) is used to find the appropriate list
129 |  *			in the table. The lists remember the positions of all bytes
130 |  *			that had the same hash key in the past in increasing backward
131 |  *			offset order. Now for all entries in the used lists, the
132 |  *			match length is computed by comparing the characters from the
133 |  *			entries position with the characters from the actual input
134 |  *			position.
135 |  *
136 |  *			The compressor starts with a so called "good_match" of 128.
137 |  *			It is a "prefer speed against compression ratio" optimizer.
138 |  *			So if the first entry looked at already has 128 or more
139 |  *			matching characters, the lookup stops and that position is
140 |  *			used for the next tag in the output.
141 |  *
142 |  *			For each subsequent entry in the history list, the "good_match"
143 |  *			is lowered by 10%. So the compressor will be more happy with
144 |  *			short matches the farer it has to go back in the history.
145 |  *			Another "speed against ratio" preference characteristic of
146 |  *			the algorithm.
147 |  *
148 |  *			Thus there are 3 stop conditions for the lookup of matches:
149 |  *
150 |  *				- a match >= good_match is found
151 |  *				- there are no more history entries to look at
152 |  *				- the next history entry is already too far back
153 |  *				  to be coded into a tag.
154 |  *
155 |  *			Finally the match algorithm checks that at least a match
156 |  *			of 3 or more bytes has been found, because that is the smallest
157 |  *			amount of copy information to code into a tag. If so, a tag
158 |  *			is omitted and all the input bytes covered by that are just
159 |  *			scanned for the history add's, otherwise a literal character
160 |  *			is omitted and only his history entry added.
161 |  *
162 |  *		Acknowledgements:
163 |  *
164 |  *			Many thanks to Adisak Pochanayon, who's article about SLZ
165 |  *			inspired me to write the PostgreSQL compression this way.
166 |  *
167 |  *			Jan Wieck
168 |  *
169 |  * Copyright (c) 1999-2018, PostgreSQL Global Development Group
170 |  *
171 |  * src/common/pg_lzcompress.c
172 |  * ----------
173 |  */
174 | #ifndef FRONTEND
175 | #include "postgres.h"
176 | #else
177 | #include "postgres_fe.h"
178 | #endif
179 | 
180 | #include <limits.h>
181 | 
182 | #include "common/pg_lzcompress.h"
183 | 
184 | 
185 | /* ----------
186 |  * Local definitions
187 |  * ----------
188 |  */
189 | #define PGLZ_MAX_HISTORY_LISTS	8192	/* must be power of 2 */
190 | #define PGLZ_HISTORY_SIZE		4096
191 | #define PGLZ_MAX_MATCH			273
192 | 
193 | 
194 | /* ----------
195 |  * PGLZ_HistEntry -
196 |  *
197 |  *		Linked list for the backward history lookup
198 |  *
199 |  * All the entries sharing a hash key are linked in a doubly linked list.
200 |  * This makes it easy to remove an entry when it's time to recycle it
201 |  * (because it's more than 4K positions old).
202 |  * ----------
203 |  */
204 | typedef struct PGLZ_HistEntry
205 | {
206 | 	struct PGLZ_HistEntry *next;	/* links for my hash key's list */
207 | 	struct PGLZ_HistEntry *prev;
208 | 	int			hindex;			/* my current hash key */
209 | 	const char *pos;			/* my input position */
210 | } PGLZ_HistEntry;
211 | 
212 | 
213 | /* ----------
214 |  * The provided standard strategies
215 |  * ----------
216 |  */
217 | static const PGLZ_Strategy strategy_default_data = {
218 | 	32,							/* Data chunks less than 32 bytes are not
219 | 								 * compressed */
220 | 	INT_MAX,					/* No upper limit on what we'll try to
221 | 								 * compress */
222 | 	25,							/* Require 25% compression rate, or not worth
223 | 								 * it */
224 | 	1024,						/* Give up if no compression in the first 1KB */
225 | 	128,						/* Stop history lookup if a match of 128 bytes
226 | 								 * is found */
227 | 	10							/* Lower good match size by 10% at every loop
228 | 								 * iteration */
229 | };
230 | const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data;
231 | 
232 | 
233 | static const PGLZ_Strategy strategy_always_data = {
234 | 	0,							/* Chunks of any size are compressed */
235 | 	INT_MAX,
236 | 	0,							/* It's enough to save one single byte */
237 | 	INT_MAX,					/* Never give up early */
238 | 	128,						/* Stop history lookup if a match of 128 bytes
239 | 								 * is found */
240 | 	6							/* Look harder for a good match */
241 | };
242 | const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
243 | 
244 | 
245 | /* ----------
246 |  * Statically allocated work arrays for history
247 |  * ----------
248 |  */
249 | static int16 hist_start[PGLZ_MAX_HISTORY_LISTS];
250 | static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
251 | 
252 | /*
253 |  * Element 0 in hist_entries is unused, and means 'invalid'. Likewise,
254 |  * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'.
255 |  */
256 | #define INVALID_ENTRY			0
257 | #define INVALID_ENTRY_PTR		(&hist_entries[INVALID_ENTRY])
258 | 
259 | /* ----------
260 |  * pglz_hist_idx -
261 |  *
262 |  *		Computes the history table slot for the lookup by the next 4
263 |  *		characters in the input.
264 |  *
265 |  * NB: because we use the next 4 characters, we are not guaranteed to
266 |  * find 3-character matches; they very possibly will be in the wrong
267 |  * hash list.  This seems an acceptable tradeoff for spreading out the
268 |  * hash keys more.
269 |  * ----------
270 |  */
271 | #define pglz_hist_idx(_s,_e, _mask) (										\
272 | 			((((_e) - (_s)) < 4) ? (int) (_s)[0] :							\
273 | 			 (((_s)[0] << 6) ^ ((_s)[1] << 4) ^								\
274 | 			  ((_s)[2] << 2) ^ (_s)[3])) & (_mask)				\
275 | 		)
276 | 
277 | 
278 | /* ----------
279 |  * pglz_hist_add -
280 |  *
281 |  *		Adds a new entry to the history table.
282 |  *
283 |  * If _recycle is true, then we are recycling a previously used entry,
284 |  * and must first delink it from its old hashcode's linked list.
285 |  *
286 |  * NOTE: beware of multiple evaluations of macro's arguments, and note that
287 |  * _hn and _recycle are modified in the macro.
288 |  * ----------
289 |  */
290 | #define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask)	\
291 | do {									\
292 | 			int __hindex = pglz_hist_idx((_s),(_e), (_mask));				\
293 | 			int16 *__myhsp = &(_hs)[__hindex];								\
294 | 			PGLZ_HistEntry *__myhe = &(_he)[_hn];							\
295 | 			if (_recycle) {													\
296 | 				if (__myhe->prev == NULL)									\
297 | 					(_hs)[__myhe->hindex] = __myhe->next - (_he);			\
298 | 				else														\
299 | 					__myhe->prev->next = __myhe->next;						\
300 | 				if (__myhe->next != NULL)									\
301 | 					__myhe->next->prev = __myhe->prev;						\
302 | 			}																\
303 | 			__myhe->next = &(_he)[*__myhsp];								\
304 | 			__myhe->prev = NULL;											\
305 | 			__myhe->hindex = __hindex;										\
306 | 			__myhe->pos  = (_s);											\
307 | 			/* If there was an existing entry in this hash slot, link */	\
308 | 			/* this new entry to it. However, the 0th entry in the */		\
309 | 			/* entries table is unused, so we can freely scribble on it. */ \
310 | 			/* So don't bother checking if the slot was used - we'll */		\
311 | 			/* scribble on the unused entry if it was not, but that's */	\
312 | 			/* harmless. Avoiding the branch in this critical path */		\
313 | 			/* speeds this up a little bit. */								\
314 | 			/* if (*__myhsp != INVALID_ENTRY) */							\
315 | 				(_he)[(*__myhsp)].prev = __myhe;							\
316 | 			*__myhsp = _hn;													\
317 | 			if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) {							\
318 | 				(_hn) = 1;													\
319 | 				(_recycle) = true;											\
320 | 			}																\
321 | } while (0)
322 | 
323 | 
324 | /* ----------
325 |  * pglz_out_ctrl -
326 |  *
327 |  *		Outputs the last and allocates a new control byte if needed.
328 |  * ----------
329 |  */
330 | #define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \
331 | do { \
332 | 	if ((__ctrl & 0xff) == 0)												\
333 | 	{																		\
334 | 		*(__ctrlp) = __ctrlb;												\
335 | 		__ctrlp = (__buf)++;												\
336 | 		__ctrlb = 0;														\
337 | 		__ctrl = 1;															\
338 | 	}																		\
339 | } while (0)
340 | 
341 | 
342 | /* ----------
343 |  * pglz_out_literal -
344 |  *
345 |  *		Outputs a literal byte to the destination buffer including the
346 |  *		appropriate control bit.
347 |  * ----------
348 |  */
349 | #define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \
350 | do { \
351 | 	pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);								\
352 | 	*(_buf)++ = (unsigned char)(_byte);										\
353 | 	_ctrl <<= 1;															\
354 | } while (0)
355 | 
356 | 
357 | /* ----------
358 |  * pglz_out_tag -
359 |  *
360 |  *		Outputs a backward reference tag of 2-4 bytes (depending on
361 |  *		offset and length) to the destination buffer including the
362 |  *		appropriate control bit.
363 |  * ----------
364 |  */
365 | #define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \
366 | do { \
367 | 	pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);								\
368 | 	_ctrlb |= _ctrl;														\
369 | 	_ctrl <<= 1;															\
370 | 	if (_len > 17)															\
371 | 	{																		\
372 | 		(_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f);		\
373 | 		(_buf)[1] = (unsigned char)(((_off) & 0xff));						\
374 | 		(_buf)[2] = (unsigned char)((_len) - 18);							\
375 | 		(_buf) += 3;														\
376 | 	} else {																\
377 | 		(_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \
378 | 		(_buf)[1] = (unsigned char)((_off) & 0xff);							\
379 | 		(_buf) += 2;														\
380 | 	}																		\
381 | } while (0)
382 | 
383 | 
384 | /* ----------
385 |  * pglz_find_match -
386 |  *
387 |  *		Lookup the history table if the actual input stream matches
388 |  *		another sequence of characters, starting somewhere earlier
389 |  *		in the input buffer.
390 |  * ----------
391 |  */
392 | static inline int
393 | pglz_find_match(int16 *hstart, const char *input, const char *end,
394 | 				int *lenp, int *offp, int good_match, int good_drop, int mask)
395 | {
396 | 	PGLZ_HistEntry *hent;
397 | 	int16		hentno;
398 | 	int32		len = 0;
399 | 	int32		off = 0;
400 | 
401 | 	/*
402 | 	 * Traverse the linked history list until a good enough match is found.
403 | 	 */
404 | 	hentno = hstart[pglz_hist_idx(input, end, mask)];
405 | 	hent = &hist_entries[hentno];
406 | 	while (hent != INVALID_ENTRY_PTR)
407 | 	{
408 | 		const char *ip = input;
409 | 		const char *hp = hent->pos;
410 | 		int32		thisoff;
411 | 		int32		thislen;
412 | 
413 | 		/*
414 | 		 * Stop if the offset does not fit into our tag anymore.
415 | 		 */
416 | 		thisoff = ip - hp;
417 | 		if (thisoff >= 0x0fff)
418 | 			break;
419 | 
420 | 		/*
421 | 		 * Determine length of match. A better match must be larger than the
422 | 		 * best so far. And if we already have a match of 16 or more bytes,
423 | 		 * it's worth the call overhead to use memcmp() to check if this match
424 | 		 * is equal for the same size. After that we must fallback to
425 | 		 * character by character comparison to know the exact position where
426 | 		 * the diff occurred.
427 | 		 */
428 | 		thislen = 0;
429 | 		if (len >= 16)
430 | 		{
431 | 			if (memcmp(ip, hp, len) == 0)
432 | 			{
433 | 				thislen = len;
434 | 				ip += len;
435 | 				hp += len;
436 | 				while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
437 | 				{
438 | 					thislen++;
439 | 					ip++;
440 | 					hp++;
441 | 				}
442 | 			}
443 | 		}
444 | 		else
445 | 		{
446 | 			while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
447 | 			{
448 | 				thislen++;
449 | 				ip++;
450 | 				hp++;
451 | 			}
452 | 		}
453 | 
454 | 		/*
455 | 		 * Remember this match as the best (if it is)
456 | 		 */
457 | 		if (thislen > len)
458 | 		{
459 | 			len = thislen;
460 | 			off = thisoff;
461 | 		}
462 | 
463 | 		/*
464 | 		 * Advance to the next history entry
465 | 		 */
466 | 		hent = hent->next;
467 | 
468 | 		/*
469 | 		 * Be happy with lesser good matches the more entries we visited. But
470 | 		 * no point in doing calculation if we're at end of list.
471 | 		 */
472 | 		if (hent != INVALID_ENTRY_PTR)
473 | 		{
474 | 			if (len >= good_match)
475 | 				break;
476 | 			good_match -= (good_match * good_drop) / 100;
477 | 		}
478 | 	}
479 | 
480 | 	/*
481 | 	 * Return match information only if it results at least in one byte
482 | 	 * reduction.
483 | 	 */
484 | 	if (len > 2)
485 | 	{
486 | 		*lenp = len;
487 | 		*offp = off;
488 | 		return 1;
489 | 	}
490 | 
491 | 	return 0;
492 | }
493 | 
494 | 
495 | /* ----------
496 |  * pglz_compress -
497 |  *
498 |  *		Compresses source into dest using strategy. Returns the number of
499 |  *		bytes written in buffer dest, or -1 if compression fails.
500 |  * ----------
501 |  */
502 | int32
503 | pglz_compress(const char *source, int32 slen, char *dest,
504 | 			  const PGLZ_Strategy *strategy)
505 | {
506 | 	unsigned char *bp = (unsigned char *) dest;
507 | 	unsigned char *bstart = bp;
508 | 	int			hist_next = 1;
509 | 	bool		hist_recycle = false;
510 | 	const char *dp = source;
511 | 	const char *dend = source + slen;
512 | 	unsigned char ctrl_dummy = 0;
513 | 	unsigned char *ctrlp = &ctrl_dummy;
514 | 	unsigned char ctrlb = 0;
515 | 	unsigned char ctrl = 0;
516 | 	bool		found_match = false;
517 | 	int32		match_len;
518 | 	int32		match_off;
519 | 	int32		good_match;
520 | 	int32		good_drop;
521 | 	int32		result_size;
522 | 	int32		result_max;
523 | 	int32		need_rate;
524 | 	int			hashsz;
525 | 	int			mask;
526 | 
527 | 	/*
528 | 	 * Our fallback strategy is the default.
529 | 	 */
530 | 	if (strategy == NULL)
531 | 		strategy = PGLZ_strategy_default;
532 | 
533 | 	/*
534 | 	 * If the strategy forbids compression (at all or if source chunk size out
535 | 	 * of range), fail.
536 | 	 */
537 | 	if (strategy->match_size_good <= 0 ||
538 | 		slen < strategy->min_input_size ||
539 | 		slen > strategy->max_input_size)
540 | 		return -1;
541 | 
542 | 	/*
543 | 	 * Limit the match parameters to the supported range.
544 | 	 */
545 | 	good_match = strategy->match_size_good;
546 | 	if (good_match > PGLZ_MAX_MATCH)
547 | 		good_match = PGLZ_MAX_MATCH;
548 | 	else if (good_match < 17)
549 | 		good_match = 17;
550 | 
551 | 	good_drop = strategy->match_size_drop;
552 | 	if (good_drop < 0)
553 | 		good_drop = 0;
554 | 	else if (good_drop > 100)
555 | 		good_drop = 100;
556 | 
557 | 	need_rate = strategy->min_comp_rate;
558 | 	if (need_rate < 0)
559 | 		need_rate = 0;
560 | 	else if (need_rate > 99)
561 | 		need_rate = 99;
562 | 
563 | 	/*
564 | 	 * Compute the maximum result size allowed by the strategy, namely the
565 | 	 * input size minus the minimum wanted compression rate.  This had better
566 | 	 * be <= slen, else we might overrun the provided output buffer.
567 | 	 */
568 | 	if (slen > (INT_MAX / 100))
569 | 	{
570 | 		/* Approximate to avoid overflow */
571 | 		result_max = (slen / 100) * (100 - need_rate);
572 | 	}
573 | 	else
574 | 		result_max = (slen * (100 - need_rate)) / 100;
575 | 
576 | 	/*
577 | 	 * Experiments suggest that these hash sizes work pretty well. A large
578 | 	 * hash table minimizes collision, but has a higher startup cost. For a
579 | 	 * small input, the startup cost dominates. The table size must be a power
580 | 	 * of two.
581 | 	 */
582 | 	if (slen < 128)
583 | 		hashsz = 512;
584 | 	else if (slen < 256)
585 | 		hashsz = 1024;
586 | 	else if (slen < 512)
587 | 		hashsz = 2048;
588 | 	else if (slen < 1024)
589 | 		hashsz = 4096;
590 | 	else
591 | 		hashsz = 8192;
592 | 	mask = hashsz - 1;
593 | 
594 | 	/*
595 | 	 * Initialize the history lists to empty.  We do not need to zero the
596 | 	 * hist_entries[] array; its entries are initialized as they are used.
597 | 	 */
598 | 	memset(hist_start, 0, hashsz * sizeof(int16));
599 | 
600 | 	/*
601 | 	 * Compress the source directly into the output buffer.
602 | 	 */
603 | 	while (dp < dend)
604 | 	{
605 | 		/*
606 | 		 * If we already exceeded the maximum result size, fail.
607 | 		 *
608 | 		 * We check once per loop; since the loop body could emit as many as 4
609 | 		 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
610 | 		 * allow 4 slop bytes.
611 | 		 */
612 | 		if (bp - bstart >= result_max)
613 | 			return -1;
614 | 
615 | 		/*
616 | 		 * If we've emitted more than first_success_by bytes without finding
617 | 		 * anything compressible at all, fail.  This lets us fall out
618 | 		 * reasonably quickly when looking at incompressible input (such as
619 | 		 * pre-compressed data).
620 | 		 */
621 | 		if (!found_match && bp - bstart >= strategy->first_success_by)
622 | 			return -1;
623 | 
624 | 		/*
625 | 		 * Try to find a match in the history
626 | 		 */
627 | 		if (pglz_find_match(hist_start, dp, dend, &match_len,
628 | 							&match_off, good_match, good_drop, mask))
629 | 		{
630 | 			/*
631 | 			 * Create the tag and add history entries for all matched
632 | 			 * characters.
633 | 			 */
634 | 			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
635 | 			while (match_len--)
636 | 			{
637 | 				pglz_hist_add(hist_start, hist_entries,
638 | 							  hist_next, hist_recycle,
639 | 							  dp, dend, mask);
640 | 				dp++;			/* Do not do this ++ in the line above! */
641 | 				/* The macro would do it four times - Jan.  */
642 | 			}
643 | 			found_match = true;
644 | 		}
645 | 		else
646 | 		{
647 | 			/*
648 | 			 * No match found. Copy one literal byte.
649 | 			 */
650 | 			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
651 | 			pglz_hist_add(hist_start, hist_entries,
652 | 						  hist_next, hist_recycle,
653 | 						  dp, dend, mask);
654 | 			dp++;				/* Do not do this ++ in the line above! */
655 | 			/* The macro would do it four times - Jan.  */
656 | 		}
657 | 	}
658 | 
659 | 	/*
660 | 	 * Write out the last control byte and check that we haven't overrun the
661 | 	 * output size allowed by the strategy.
662 | 	 */
663 | 	*ctrlp = ctrlb;
664 | 	result_size = bp - bstart;
665 | 	if (result_size >= result_max)
666 | 		return -1;
667 | 
668 | 	/* success */
669 | 	return result_size;
670 | }
671 | 
672 | 
673 | /* ----------
674 |  * pglz_decompress -
675 |  *
676 |  *		Decompresses source into dest. Returns the number of bytes
677 |  *		decompressed in the destination buffer, or -1 if decompression
678 |  *		fails.
679 |  * ----------
680 |  */
681 | int32
682 | pglz_decompress(const char *source, int32 slen, char *dest,
683 | 				int32 rawsize)
684 | {
685 | 	const unsigned char *sp;
686 | 	const unsigned char *srcend;
687 | 	unsigned char *dp;
688 | 	unsigned char *destend;
689 | 
690 | 	sp = (const unsigned char *) source;
691 | 	srcend = ((const unsigned char *) source) + slen;
692 | 	dp = (unsigned char *) dest;
693 | 	destend = dp + rawsize;
694 | 
695 | 	while (sp < srcend && dp < destend)
696 | 	{
697 | 		/*
698 | 		 * Read one control byte and process the next 8 items (or as many as
699 | 		 * remain in the compressed input).
700 | 		 */
701 | 		unsigned char ctrl = *sp++;
702 | 		int			ctrlc;
703 | 
704 | 		for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++)
705 | 		{
706 | 			if (ctrl & 1)
707 | 			{
708 | 				/*
709 | 				 * Otherwise it contains the match length minus 3 and the
710 | 				 * upper 4 bits of the offset. The next following byte
711 | 				 * contains the lower 8 bits of the offset. If the length is
712 | 				 * coded as 18, another extension tag byte tells how much
713 | 				 * longer the match really was (0-255).
714 | 				 */
715 | 				int32		len;
716 | 				int32		off;
717 | 
718 | 				len = (sp[0] & 0x0f) + 3;
719 | 				off = ((sp[0] & 0xf0) << 4) | sp[1];
720 | 				sp += 2;
721 | 				if (len == 18)
722 | 					len += *sp++;
723 | 
724 | 				/*
725 | 				 * Check for output buffer overrun, to ensure we don't clobber
726 | 				 * memory in case of corrupt input.  Note: we must advance dp
727 | 				 * here to ensure the error is detected below the loop.  We
728 | 				 * don't simply put the elog inside the loop since that will
729 | 				 * probably interfere with optimization.
730 | 				 */
731 | 				if (dp + len > destend)
732 | 				{
733 | 					dp += len;
734 | 					break;
735 | 				}
736 | 
737 | 				/*
738 | 				 * Now we copy the bytes specified by the tag from OUTPUT to
739 | 				 * OUTPUT. It is dangerous and platform dependent to use
740 | 				 * memcpy() here, because the copied areas could overlap
741 | 				 * extremely!
742 | 				 */
743 | 				while (len--)
744 | 				{
745 | 					*dp = dp[-off];
746 | 					dp++;
747 | 				}
748 | 			}
749 | 			else
750 | 			{
751 | 				/*
752 | 				 * An unset control bit means LITERAL BYTE. So we just copy
753 | 				 * one from INPUT to OUTPUT.
754 | 				 */
755 | 				if (dp >= destend)	/* check for buffer overrun */
756 | 					break;		/* do not clobber memory */
757 | 
758 | 				*dp++ = *sp++;
759 | 			}
760 | 
761 | 			/*
762 | 			 * Advance the control bit
763 | 			 */
764 | 			ctrl >>= 1;
765 | 		}
766 | 	}
767 | 
768 | 	/*
769 | 	 * Check we decompressed the right amount.
770 | 	 */
771 | 	if (dp != destend || sp != srcend)
772 | 		return -1;
773 | 
774 | 	/*
775 | 	 * That's it.
776 | 	 */
777 | 	return rawsize;
778 | }
779 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/decode.c:
--------------------------------------------------------------------------------
   1 | #include "postgres.h"
   2 | #include "decode.h"
   3 | #include "utils/inet.h"
   4 | #include "pg_filedump.h"
   5 | #include <lib/stringinfo.h>
   6 | #include <access/htup_details.h>
   7 | #include <access/tupmacs.h>
   8 | #include <access/tuptoaster.h>
   9 | #include <datatype/timestamp.h>
  10 | #include <common/pg_lzcompress.h>
  11 | #include <string.h>
  12 | #include <ctype.h>
  13 | #include <stdio.h>
  14 | #include <assert.h>
  15 | 
  16 | #define ATTRTYPES_STR_MAX_LEN (1024-1)
  17 | 
  18 | static int
  19 | ReadStringFromToast(const char *buffer,
  20 | 		unsigned int buff_size,
  21 | 		unsigned int* out_size);
  22 | 
  23 | /*
  24 |  * Utilities for manipulation of header information for compressed
  25 |  * toast entries.
  26 |  */
  27 | #define TOAST_COMPRESS_RAWSIZE(ptr) (*(uint32 *) ptr)
  28 | #define TOAST_COMPRESS_RAWDATA(ptr) (ptr + sizeof(uint32))
  29 | #define TOAST_COMPRESS_HEADER_SIZE (sizeof(uint32))
  30 | 
  31 | typedef int (*decode_callback_t) (const char *buffer, unsigned int buff_size,
  32 | 								  unsigned int *out_size);
  33 | 
  34 | static int
  35 | decode_smallint(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  36 | 
  37 | static int
  38 | decode_int(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  39 | 
  40 | static int
  41 | decode_bigint(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  42 | 
  43 | static int
  44 | decode_time(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  45 | 
  46 | static int
  47 | decode_timetz(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  48 | 
  49 | static int
  50 | decode_date(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  51 | 
  52 | static int
  53 | decode_timestamp(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  54 | 
  55 | static int
  56 | decode_float4(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  57 | 
  58 | static int
  59 | decode_float8(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  60 | 
  61 | static int
  62 | decode_bool(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  63 | 
  64 | static int
  65 | decode_uuid(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  66 | 
  67 | static int
  68 | decode_macaddr(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  69 | 
  70 | static int
  71 | decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  72 | 
  73 | static int
  74 | decode_char(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  75 | 
  76 | static int
  77 | decode_name(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  78 | 
  79 | static int
  80 | decode_inet(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  81 | 
  82 | static int
  83 | decode_ignore(const char *buffer, unsigned int buff_size, unsigned int *out_size);
  84 | 
  85 | static int	ncallbacks = 0;
  86 | static decode_callback_t callbacks[ATTRTYPES_STR_MAX_LEN / 2] =
  87 | {
  88 | 	NULL
  89 | };
  90 | 
  91 | typedef struct
  92 | {
  93 | 	char	   *name;
  94 | 	decode_callback_t callback;
  95 | }			ParseCallbackTableItem;
  96 | 
  97 | static ParseCallbackTableItem callback_table[] =
  98 | {
  99 | 	{
 100 | 		"smallserial", &decode_smallint
 101 | 	},
 102 | 	{
 103 | 		"smallint", &decode_smallint
 104 | 	},
 105 | 	{
 106 | 		"int", &decode_int
 107 | 	},
 108 | 	{
 109 | 		"oid", &decode_int
 110 | 	},
 111 | 	{
 112 | 		"xid", &decode_int
 113 | 	},
 114 | 	{
 115 | 		"serial", &decode_int
 116 | 	},
 117 | 	{
 118 | 		"bigint", &decode_bigint
 119 | 	},
 120 | 	{
 121 | 		"bigserial", &decode_bigint
 122 | 	},
 123 | 	{
 124 | 		"time", &decode_time
 125 | 	},
 126 | 	{
 127 | 		"timetz", &decode_timetz
 128 | 	},
 129 | 	{
 130 | 		"date", &decode_date
 131 | 	},
 132 | 	{
 133 | 		"timestamp", &decode_timestamp
 134 | 	},
 135 | 	{
 136 | 		"real", &decode_float4
 137 | 	},
 138 | 	{
 139 | 		"float4", &decode_float4
 140 | 	},
 141 | 	{
 142 | 		"float8", &decode_float8
 143 | 	},
 144 | 	{
 145 | 		"float", &decode_float8
 146 | 	},
 147 | 	{
 148 | 		"bool", &decode_bool
 149 | 	},
 150 | 	{
 151 | 		"uuid", &decode_uuid
 152 | 	},
 153 | 	{
 154 | 		"macaddr", &decode_macaddr
 155 | 	},
 156 | 	{
 157 | 		"name", &decode_name
 158 | 	},
 159 | 	{
 160 | 		"inet", &decode_inet
 161 | 	},
 162 | 	{
 163 | 		"char", &decode_char
 164 | 	},
 165 | 	{
 166 | 		"~", &decode_ignore
 167 | 	},
 168 | 
 169 | 	/* internally all string types are stored the same way */
 170 | 	{
 171 | 		"charn", &decode_string
 172 | 	},
 173 | 	{
 174 | 		"varchar", &decode_string
 175 | 	},
 176 | 	{
 177 | 		"varcharn", &decode_string
 178 | 	},
 179 | 	{
 180 | 		"text", &decode_string
 181 | 	},
 182 | 	{
 183 | 		"json", &decode_string
 184 | 	},
 185 | 	{
 186 | 		"xml", &decode_string
 187 | 	},
 188 | 	{
 189 | 		NULL, NULL
 190 | 	},
 191 | };
 192 | 
 193 | static StringInfoData copyString;
 194 | static bool copyStringInitDone = false;
 195 | 
 196 | /*
 197 |  * Temporary buffer for storing decompressed data.
 198 |  *
 199 |  * 64K should be enough in most cases. If it's not user can manually change
 200 |  * this limit. Unfortunately there is no way to know how much memory user
 201 |  * is willing to allocate.
 202 |  */
 203 | static char decompress_tmp_buff[64 * 1024];
 204 | 
 205 | /* Used by some PostgreSQL macro definitions */
 206 | void
 207 | ExceptionalCondition(const char *conditionName,
 208 | 					 const char *errorType,
 209 | 					 const char *fileName,
 210 | 					 int lineNumber)
 211 | {
 212 | 	printf("Exceptional condition: name = %s, type = %s, fname = %s, line = %d\n",
 213 | 		   conditionName ? conditionName : "(NULL)",
 214 | 		   errorType ? errorType : "(NULL)",
 215 | 		   fileName ? fileName : "(NULL)",
 216 | 		   lineNumber);
 217 | 	exit(1);
 218 | }
 219 | 
 220 | /* Append given string to current COPY line */
 221 | static void
 222 | CopyAppend(const char *str)
 223 | {
 224 | 	if (!copyStringInitDone)
 225 | 	{
 226 | 		initStringInfo(&copyString);
 227 | 		copyStringInitDone = true;
 228 | 	}
 229 | 
 230 | 	/* Caller probably wanted just to init copyString */
 231 | 	if (str == NULL)
 232 | 		return;
 233 | 
 234 | 	if (copyString.data[0] != '\0')
 235 | 		appendStringInfoString(&copyString, "\t");
 236 | 
 237 | 	appendStringInfoString(&copyString, str);
 238 | }
 239 | 
 240 | /*
 241 |  * Append given string to current COPY line and encode special symbols
 242 |  * like \r, \n, \t and \\.
 243 |  */
 244 | static void
 245 | CopyAppendEncode(const char *str, int orig_len)
 246 | {
 247 | 	/*
 248 | 	 * Should be enough in most cases. If it's not user can manually change
 249 | 	 * this limit. Unfortunately there is no way to know how much memory user
 250 | 	 * is willing to allocate.
 251 | 	 */
 252 | 	static char tmp_buff[64 * 1024];
 253 | 
 254 | 	/* Reserve one byte for a trailing zero. */
 255 | 	const int	max_offset = sizeof(tmp_buff) - 2;
 256 | 	int			curr_offset = 0;
 257 | 	int			len = orig_len;
 258 | 
 259 | 	while (len > 0)
 260 | 	{
 261 | 		/*
 262 | 		 * Make sure there is enough free space for at least one special
 263 | 		 * symbol and a trailing zero.
 264 | 		 */
 265 | 		if (curr_offset > max_offset - 2)
 266 | 		{
 267 | 			printf("ERROR: Unable to properly encode a string since it's too "
 268 | 				   "large (%d bytes). Try to increase tmp_buff size in CopyAppendEncode "
 269 | 				   "procedure.\n", orig_len);
 270 | 			exit(1);
 271 | 		}
 272 | 
 273 | 		/*
 274 | 		 * Since we are working with potentially corrupted data we can
 275 | 		 * encounter \0 as well.
 276 | 		 */
 277 | 		if (*str == '\0')
 278 | 		{
 279 | 			tmp_buff[curr_offset] = '\\';
 280 | 			tmp_buff[curr_offset + 1] = '0';
 281 | 			curr_offset += 2;
 282 | 		}
 283 | 		else if (*str == '\r')
 284 | 		{
 285 | 			tmp_buff[curr_offset] = '\\';
 286 | 			tmp_buff[curr_offset + 1] = 'r';
 287 | 			curr_offset += 2;
 288 | 		}
 289 | 		else if (*str == '\n')
 290 | 		{
 291 | 			tmp_buff[curr_offset] = '\\';
 292 | 			tmp_buff[curr_offset + 1] = 'n';
 293 | 			curr_offset += 2;
 294 | 		}
 295 | 		else if (*str == '\t')
 296 | 		{
 297 | 			tmp_buff[curr_offset] = '\\';
 298 | 			tmp_buff[curr_offset + 1] = 'r';
 299 | 			curr_offset += 2;
 300 | 		}
 301 | 		else if (*str == '\\')
 302 | 		{
 303 | 			tmp_buff[curr_offset] = '\\';
 304 | 			tmp_buff[curr_offset + 1] = '\\';
 305 | 			curr_offset += 2;
 306 | 		}
 307 | 		else
 308 | 		{
 309 | 			/* It's a regular symbol. */
 310 | 			tmp_buff[curr_offset] = *str;
 311 | 			curr_offset++;
 312 | 		}
 313 | 
 314 | 		str++;
 315 | 		len--;
 316 | 	}
 317 | 
 318 | 	tmp_buff[curr_offset] = '\0';
 319 | 	CopyAppend(tmp_buff);
 320 | }
 321 | 
 322 | /* CopyAppend version with format string support */
 323 | #define CopyAppendFmt(fmt, ...) do { \
 324 | 	  char __copy_format_buff[512]; \
 325 | 	  snprintf(__copy_format_buff, sizeof(__copy_format_buff), fmt, ##__VA_ARGS__); \
 326 | 	  CopyAppend(__copy_format_buff); \
 327 |   } while(0)
 328 | 
 329 | /* Discard accumulated COPY line */
 330 | static void
 331 | CopyClear(void)
 332 | {
 333 | 	/* Make sure init is done */
 334 | 	CopyAppend(NULL);
 335 | 
 336 | 	resetStringInfo(&copyString);
 337 | }
 338 | 
 339 | /* Output and then clear accumulated COPY line */
 340 | static void
 341 | CopyFlush(void)
 342 | {
 343 | 	/* Make sure init is done */
 344 | 	CopyAppend(NULL);
 345 | 
 346 | 	printf("COPY: %s\n", copyString.data);
 347 | 	CopyClear();
 348 | }
 349 | 
 350 | /*
 351 |  * Add a callback to `callbacks` table for given type name
 352 |  *
 353 |  * Arguments:
 354 |  *   type	   - name of a single type, always lowercase
 355 |  *
 356 |  * Return value is:
 357 |  *   == 0	   - no error
 358 |  *	< 0	   - invalid type name
 359 |  */
 360 | static int
 361 | AddTypeCallback(const char *type)
 362 | {
 363 | 	int			idx = 0;
 364 | 
 365 | 	if (*type == '\0')			/* ignore empty strings */
 366 | 		return 0;
 367 | 
 368 | 	while (callback_table[idx].name != NULL)
 369 | 	{
 370 | 		if (strcmp(callback_table[idx].name, type) == 0)
 371 | 		{
 372 | 			callbacks[ncallbacks] = callback_table[idx].callback;
 373 | 			ncallbacks++;
 374 | 			return 0;
 375 | 		}
 376 | 		idx++;
 377 | 	}
 378 | 
 379 | 	printf("Error: type <%s> doesn't exist or is not currently supported\n", type);
 380 | 	printf("Full list of known types: ");
 381 | 	idx = 0;
 382 | 	while (callback_table[idx].name != NULL)
 383 | 	{
 384 | 		printf("%s ", callback_table[idx].name);
 385 | 		idx++;
 386 | 	}
 387 | 	printf("\n");
 388 | 	return -1;
 389 | }
 390 | 
 391 | /*
 392 |  * Decode attribute types string like "int,timestamp,bool,uuid"
 393 |  *
 394 |  * Arguments:
 395 |  *   str		- types string
 396 |  * Return value is:
 397 |  *   == 0	   - if string is valid
 398 |  *	< 0	   - if string is invalid
 399 |  */
 400 | int
 401 | ParseAttributeTypesString(const char *str)
 402 | {
 403 | 	char	   *curr_type,
 404 | 			   *next_type;
 405 | 	char		attrtypes[ATTRTYPES_STR_MAX_LEN + 1];
 406 | 	int			i,
 407 | 				len = strlen(str);
 408 | 
 409 | 	if (len > ATTRTYPES_STR_MAX_LEN)
 410 | 	{
 411 | 		printf("Error: attribute types string is longer then %u characters!\n",
 412 | 			   ATTRTYPES_STR_MAX_LEN);
 413 | 		return -1;
 414 | 	}
 415 | 
 416 | 	strcpy(attrtypes, str);
 417 | 	for (i = 0; i < len; i++)
 418 | 		attrtypes[i] = tolower(attrtypes[i]);
 419 | 
 420 | 	curr_type = attrtypes;
 421 | 	while (curr_type)
 422 | 	{
 423 | 		next_type = strstr(curr_type, ",");
 424 | 		if (next_type)
 425 | 		{
 426 | 			*next_type = '\0';
 427 | 			next_type++;
 428 | 		}
 429 | 
 430 | 		if (AddTypeCallback(curr_type) < 0)
 431 | 			return -1;
 432 | 
 433 | 		curr_type = next_type;
 434 | 	}
 435 | 
 436 | 	return 0;
 437 | }
 438 | 
 439 | /*
 440 |  * Convert Julian day number (JDN) to a date.
 441 |  * Copy-pasted from src/backend/utils/adt/datetime.c
 442 |  */
 443 | static void
 444 | j2date(int jd, int *year, int *month, int *day)
 445 | {
 446 | 	unsigned int julian;
 447 | 	unsigned int quad;
 448 | 	unsigned int extra;
 449 | 	int			y;
 450 | 
 451 | 	julian = jd;
 452 | 	julian += 32044;
 453 | 	quad = julian / 146097;
 454 | 	extra = (julian - quad * 146097) * 4 + 3;
 455 | 	julian += 60 + quad * 3 + extra / 146097;
 456 | 	quad = julian / 1461;
 457 | 	julian -= quad * 1461;
 458 | 	y = julian * 4 / 1461;
 459 | 	julian = ((y != 0) ? ((julian + 305) % 365) : ((julian + 306) % 366))
 460 | 		+ 123;
 461 | 	y += quad * 4;
 462 | 	*year = y - 4800;
 463 | 	quad = julian * 2141 / 65536;
 464 | 	*day = julian - 7834 * quad / 256;
 465 | 	*month = (quad + 10) % MONTHS_PER_YEAR + 1;
 466 | }
 467 | 
 468 | /* Decode a smallint type */
 469 | static int
 470 | decode_smallint(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 471 | {
 472 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int16), (uintptr_t) buffer);
 473 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 474 | 
 475 | 	if (buff_size < delta)
 476 | 		return -1;
 477 | 
 478 | 	buff_size -= delta;
 479 | 	buffer = new_buffer;
 480 | 
 481 | 	if (buff_size < sizeof(int16))
 482 | 		return -2;
 483 | 
 484 | 	CopyAppendFmt("%d", (int) (*(int16 *) buffer));
 485 | 	*out_size = sizeof(int16) + delta;
 486 | 	return 0;
 487 | }
 488 | 
 489 | 
 490 | /* Decode an int type */
 491 | static int
 492 | decode_int(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 493 | {
 494 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int32), (uintptr_t) buffer);
 495 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 496 | 
 497 | 	if (buff_size < delta)
 498 | 		return -1;
 499 | 
 500 | 	buff_size -= delta;
 501 | 	buffer = new_buffer;
 502 | 
 503 | 	if (buff_size < sizeof(int32))
 504 | 		return -2;
 505 | 
 506 | 	CopyAppendFmt("%d", *(int32 *) buffer);
 507 | 	*out_size = sizeof(int32) + delta;
 508 | 	return 0;
 509 | }
 510 | 
 511 | /* Decode a bigint type */
 512 | static int
 513 | decode_bigint(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 514 | {
 515 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer);
 516 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 517 | 
 518 | 	if (buff_size < delta)
 519 | 		return -1;
 520 | 
 521 | 	buff_size -= delta;
 522 | 	buffer = new_buffer;
 523 | 
 524 | 	if (buff_size < sizeof(int64))
 525 | 		return -2;
 526 | 
 527 | 	CopyAppendFmt("%ld", *(int64 *) buffer);
 528 | 	*out_size = sizeof(int64) + delta;
 529 | 	return 0;
 530 | }
 531 | 
 532 | /* Decode a time type */
 533 | static int
 534 | decode_time(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 535 | {
 536 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer);
 537 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 538 | 	int64		timestamp,
 539 | 				timestamp_sec;
 540 | 
 541 | 	if (buff_size < delta)
 542 | 		return -1;
 543 | 
 544 | 	buff_size -= delta;
 545 | 	buffer = new_buffer;
 546 | 
 547 | 	if (buff_size < sizeof(int64))
 548 | 		return -2;
 549 | 
 550 | 	timestamp = *(int64 *) buffer;
 551 | 	timestamp_sec = timestamp / 1000000;
 552 | 	*out_size = sizeof(int64) + delta;
 553 | 
 554 | 	CopyAppendFmt("%02ld:%02ld:%02ld.%06ld",
 555 | 				  timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60,
 556 | 				  timestamp % 1000000);
 557 | 
 558 | 	return 0;
 559 | }
 560 | 
 561 | /* Decode a timetz type */
 562 | static int
 563 | decode_timetz(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 564 | {
 565 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer);
 566 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 567 | 	int64		timestamp,
 568 | 				timestamp_sec;
 569 | 	int32		tz_sec,
 570 | 				tz_min;
 571 | 
 572 | 	if (buff_size < delta)
 573 | 		return -1;
 574 | 
 575 | 	buff_size -= delta;
 576 | 	buffer = new_buffer;
 577 | 
 578 | 	if (buff_size < (sizeof(int64) + sizeof(int32)))
 579 | 		return -2;
 580 | 
 581 | 	timestamp = *(int64 *) buffer;
 582 | 	tz_sec = *(int32 *) (buffer + sizeof(int64));
 583 | 	timestamp_sec = timestamp / 1000000;
 584 | 	tz_min = -(tz_sec / 60);
 585 | 	*out_size = sizeof(int64) + sizeof(int32) + delta;
 586 | 
 587 | 	CopyAppendFmt("%02ld:%02ld:%02ld.%06ld%c%02d:%02d",
 588 | 				  timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60,
 589 | 				  timestamp % 1000000, (tz_min > 0 ? '+' : '-'), abs(tz_min / 60), abs(tz_min % 60));
 590 | 
 591 | 	return 0;
 592 | }
 593 | 
 594 | /* Decode a date type */
 595 | static int
 596 | decode_date(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 597 | {
 598 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int32), (uintptr_t) buffer);
 599 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 600 | 	int32		jd,
 601 | 				year,
 602 | 				month,
 603 | 				day;
 604 | 
 605 | 	if (buff_size < delta)
 606 | 		return -1;
 607 | 
 608 | 	buff_size -= delta;
 609 | 	buffer = new_buffer;
 610 | 
 611 | 	if (buff_size < sizeof(int32))
 612 | 		return -2;
 613 | 
 614 | 	*out_size = sizeof(int32) + delta;
 615 | 
 616 | 	jd = *(int32 *) buffer + POSTGRES_EPOCH_JDATE;
 617 | 	j2date(jd, &year, &month, &day);
 618 | 
 619 | 	CopyAppendFmt("%04d-%02d-%02d%s", (year <= 0) ? -year + 1 : year, month, day, (year <= 0) ? " BC" : "");
 620 | 
 621 | 	return 0;
 622 | }
 623 | 
 624 | /* Decode a timestamp type */
 625 | static int
 626 | decode_timestamp(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 627 | {
 628 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int64), (uintptr_t) buffer);
 629 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 630 | 	int64		timestamp,
 631 | 				timestamp_sec;
 632 | 	int32		jd,
 633 | 				year,
 634 | 				month,
 635 | 				day;
 636 | 
 637 | 	if (buff_size < delta)
 638 | 		return -1;
 639 | 
 640 | 	buff_size -= delta;
 641 | 	buffer = new_buffer;
 642 | 
 643 | 	if (buff_size < sizeof(int64))
 644 | 		return -2;
 645 | 
 646 | 	*out_size = sizeof(int64) + delta;
 647 | 	timestamp = *(int64 *) buffer;
 648 | 
 649 | 	jd = timestamp / USECS_PER_DAY;
 650 | 	if (jd != 0)
 651 | 		timestamp -= jd * USECS_PER_DAY;
 652 | 
 653 | 	if (timestamp < INT64CONST(0))
 654 | 	{
 655 | 		timestamp += USECS_PER_DAY;
 656 | 		jd -= 1;
 657 | 	}
 658 | 
 659 | 	/* add offset to go from J2000 back to standard Julian date */
 660 | 	jd += POSTGRES_EPOCH_JDATE;
 661 | 
 662 | 	j2date(jd, &year, &month, &day);
 663 | 	timestamp_sec = timestamp / 1000000;
 664 | 
 665 | 	CopyAppendFmt("%04d-%02d-%02d %02ld:%02ld:%02ld.%06ld%s",
 666 | 				  (year <= 0) ? -year + 1 : year, month, day,
 667 | 				  timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60,
 668 | 				  timestamp % 1000000,
 669 | 				  (year <= 0) ? " BC" : "");
 670 | 
 671 | 	return 0;
 672 | }
 673 | 
 674 | /* Decode a float4 type */
 675 | static int
 676 | decode_float4(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 677 | {
 678 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(float), (uintptr_t) buffer);
 679 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 680 | 
 681 | 	if (buff_size < delta)
 682 | 		return -1;
 683 | 
 684 | 	buff_size -= delta;
 685 | 	buffer = new_buffer;
 686 | 
 687 | 	if (buff_size < sizeof(float))
 688 | 		return -2;
 689 | 
 690 | 	CopyAppendFmt("%.12f", *(float *) buffer);
 691 | 	*out_size = sizeof(float) + delta;
 692 | 	return 0;
 693 | }
 694 | 
 695 | /* Decode a float8 type */
 696 | static int
 697 | decode_float8(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 698 | {
 699 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(double), (uintptr_t) buffer);
 700 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 701 | 
 702 | 	if (buff_size < delta)
 703 | 		return -1;
 704 | 
 705 | 	buff_size -= delta;
 706 | 	buffer = new_buffer;
 707 | 
 708 | 	if (buff_size < sizeof(double))
 709 | 		return -2;
 710 | 
 711 | 	CopyAppendFmt("%.12lf", *(double *) buffer);
 712 | 	*out_size = sizeof(double) + delta;
 713 | 	return 0;
 714 | }
 715 | 
 716 | /* Decode an uuid type */
 717 | static int
 718 | decode_uuid(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 719 | {
 720 | 	unsigned char uuid[16];
 721 | 
 722 | 	if (buff_size < sizeof(uuid))
 723 | 		return -1;
 724 | 
 725 | 	memcpy(uuid, buffer, sizeof(uuid));
 726 | 	CopyAppendFmt("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 727 | 				  uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7],
 728 | 				  uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
 729 | 		);
 730 | 	*out_size = sizeof(uuid);
 731 | 	return 0;
 732 | }
 733 | 
 734 | /* Decode a macaddr type */
 735 | static int
 736 | decode_macaddr(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 737 | {
 738 | 	unsigned char macaddr[6];
 739 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(int32), (uintptr_t) buffer);
 740 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 741 | 
 742 | 	if (buff_size < delta)
 743 | 		return -1;
 744 | 
 745 | 	buff_size -= delta;
 746 | 	buffer = new_buffer;
 747 | 
 748 | 	if (buff_size < sizeof(macaddr))
 749 | 		return -2;
 750 | 
 751 | 	memcpy(macaddr, buffer, sizeof(macaddr));
 752 | 	CopyAppendFmt("%02x:%02x:%02x:%02x:%02x:%02x",
 753 | 				  macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5]
 754 | 		);
 755 | 	*out_size = sizeof(macaddr) + delta;
 756 | 	return 0;
 757 | }
 758 | 
 759 | /* Decode a bool type */
 760 | static int
 761 | decode_bool(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 762 | {
 763 | 	if (buff_size < sizeof(bool))
 764 | 		return -1;
 765 | 
 766 | 	CopyAppend(*(bool *) buffer ? "t" : "f");
 767 | 	*out_size = sizeof(bool);
 768 | 	return 0;
 769 | }
 770 | 
 771 | /* Decode an inet address */
 772 | static int
 773 | decode_inet(const char *buffer, unsigned int buff_size, unsigned int *out_size) {
 774 | 
 775 |  typedef struct
 776 |  {
 777 |      unsigned char inetlength;
 778 |      unsigned char family;       /* PGSQL_AF_INET or PGSQL_AF_INET6 */
 779 |      unsigned char bits;         /* number of bits in netmask */
 780 |      unsigned char ipaddr[16];   /* up to 128 bits of address */
 781 |  } inet_struct_reordered;
 782 | 
 783 |     inet_struct_reordered *addr = (void*) buffer;
 784 | 
 785 |     const char *new_buffer = (const char *) TYPEALIGN(sizeof(int32), (uintptr_t) buffer);
 786 |     unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 787 | 
 788 |     if (buff_size < delta)
 789 |         return -1;
 790 | 
 791 |     buff_size -= delta;
 792 |     buffer = new_buffer;
 793 | 
 794 |     if (buff_size < sizeof(unsigned char))
 795 |         return -2;
 796 | 
 797 |     switch (addr->family) {
 798 | 	// ipv4=PGSQL_AF_INET=20 cf src/include/utils/inet.h PGSQL_AF_INET=AF_INET + 0
 799 |         case PGSQL_AF_INET:
 800 |             if (buff_size < sizeof(unsigned char)*(4))
 801 |                 return -2;
 802 | 	    // hide the netmask
 803 |             CopyAppendFmt("%u.%u.%u.%u", addr->ipaddr[0], addr->ipaddr[1], addr->ipaddr[2], addr->ipaddr[3]);
 804 |             // CopyAppendFmt("%u.%u.%u.%u/%u", addr->ipaddr[0], addr->ipaddr[1], addr->ipaddr[2], addr->ipaddr[3], addr->bits);
 805 | 	    break;
 806 | 	case PGSQL_AF_INET6:
 807 | 	    // hide the netmask
 808 |             if (buff_size < sizeof(unsigned char)*(16))
 809 |                 return -2;
 810 |             CopyAppendFmt("%u%u:%u%u:%u%u:%u%u:%u%u:%u%u:%u%u:%u%u", addr->ipaddr[0], addr->ipaddr[1], addr->ipaddr[2], addr->ipaddr[3], addr->ipaddr[4], addr->ipaddr[5], addr->ipaddr[6], addr->ipaddr[7], addr->ipaddr[8], addr->ipaddr[9], addr->ipaddr[10], addr->ipaddr[11], addr->ipaddr[12], addr->ipaddr[13], addr->ipaddr[14], addr->ipaddr[15]);
 811 |             // CopyAppendFmt("%u%u:%u%u:%u%u:%u%u:%u%u:%u%u:%u%u:%u%u/%u", addr->ipaddr[0], addr->ipaddr[1], addr->ipaddr[2], addr->ipaddr[3], addr->ipaddr[4], addr->ipaddr[5], addr->ipaddr[6], addr->ipaddr[7], addr->ipaddr[8], addr->ipaddr[9], addr->ipaddr[10], addr->ipaddr[11], addr->ipaddr[12], addr->ipaddr[13], addr->ipaddr[14], addr->ipaddr[15], addr->bits);
 812 | 	    break;
 813 | 	default:
 814 | 	    // Unknown
 815 | 	    return(-1);
 816 | 	}
 817 | 
 818 |         *out_size = addr->inetlength / sizeof(int16);
 819 |         // printf ("out_size=>>%u<</n", *out_size);
 820 | 
 821 | 	return 0;
 822 | }
 823 | 
 824 | /* Decode a name type (used mostly in catalog tables) */
 825 | static int
 826 | decode_name(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 827 | {
 828 | 	const char *new_buffer = (const char *) TYPEALIGN(sizeof(uint32), (uintptr_t) buffer);
 829 | 	unsigned int delta = (unsigned int) ((uintptr_t) new_buffer - (uintptr_t) buffer);
 830 | 
 831 | 	if (buff_size < delta)
 832 | 		return -1;
 833 | 
 834 | 	buff_size -= delta;
 835 | 	buffer = new_buffer;
 836 | 
 837 | 	if (buff_size < NAMEDATALEN)
 838 | 		return -2;
 839 | 
 840 | 	CopyAppendEncode(buffer, strnlen(buffer, NAMEDATALEN));
 841 | 	*out_size = NAMEDATALEN + delta;
 842 | 	return 0;
 843 | }
 844 | 
 845 | /* Decode a char type */
 846 | static int
 847 | decode_char(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 848 | {
 849 | 	if (buff_size < sizeof(char))
 850 | 		return -2;
 851 | 
 852 | 	CopyAppendEncode(buffer, 1);
 853 | 	*out_size = 1;
 854 | 	return 0;
 855 | }
 856 | 
 857 | /* Ignore all data left */
 858 | static int
 859 | decode_ignore(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 860 | {
 861 | 	*out_size = buff_size;
 862 | 	return 0;
 863 | }
 864 | 
 865 | /* Decode char(N), varchar(N), text, json or xml types */
 866 | static int
 867 | decode_string(const char *buffer, unsigned int buff_size, unsigned int *out_size)
 868 | {
 869 | 	int			padding = 0;
 870 | 
 871 | 	/* Skip padding bytes. */
 872 | 	while (*buffer == 0x00)
 873 | 	{
 874 | 		if (buff_size == 0)
 875 | 			return -1;
 876 | 
 877 | 		buff_size--;
 878 | 		buffer++;
 879 | 		padding++;
 880 | 	}
 881 | 
 882 | 	if (VARATT_IS_1B_E(buffer))
 883 | 	{
 884 | 		/*
 885 | 		 * 00000001 1-byte length word, unaligned, TOAST pointer
 886 | 		 */
 887 | 		uint32		len = VARSIZE_EXTERNAL(buffer);
 888 | 		int			result = 0;
 889 | 
 890 | 		if (len > buff_size)
 891 | 			return -1;
 892 | 
 893 | 		if (blockOptions & BLOCK_DECODE_TOAST)
 894 | 		{
 895 | 			result = ReadStringFromToast(buffer, buff_size, out_size);
 896 | 		}
 897 | 		else
 898 | 		{
 899 | 			CopyAppend("(TOASTED)");
 900 | 		}
 901 | 
 902 | 		*out_size = padding + len;
 903 | 		return result;
 904 | 	}
 905 | 
 906 | 	if (VARATT_IS_1B(buffer))
 907 | 	{
 908 | 		/*
 909 | 		 * xxxxxxx1 1-byte length word, unaligned, uncompressed data (up to
 910 | 		 * 126b) xxxxxxx is 1 + string length
 911 | 		 */
 912 | 		uint8		len = VARSIZE_1B(buffer);
 913 | 
 914 | 		if (len > buff_size)
 915 | 			return -1;
 916 | 
 917 | 		CopyAppendEncode(buffer + 1, len - 1);
 918 | 		*out_size = padding + len;
 919 | 		return 0;
 920 | 	}
 921 | 
 922 | 	if (VARATT_IS_4B_U(buffer) && buff_size >= 4)
 923 | 	{
 924 | 		/*
 925 | 		 * xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G)
 926 | 		 */
 927 | 		uint32		len = VARSIZE_4B(buffer);
 928 | 
 929 | 		if (len > buff_size)
 930 | 			return -1;
 931 | 
 932 | 		CopyAppendEncode(buffer + 4, len - 4);
 933 | 		*out_size = padding + len;
 934 | 		return 0;
 935 | 	}
 936 | 
 937 | 	if (VARATT_IS_4B_C(buffer) && buff_size >= 8)
 938 | 	{
 939 | 		/*
 940 | 		 * xxxxxx10 4-byte length word, aligned, *compressed* data (up to 1G)
 941 | 		 */
 942 | 		int			decompress_ret;
 943 | 		uint32		len = VARSIZE_4B(buffer);
 944 | 		uint32		decompressed_len = VARRAWSIZE_4B_C(buffer);
 945 | 
 946 | 		if (len > buff_size)
 947 | 			return -1;
 948 | 
 949 | 		if (decompressed_len > sizeof(decompress_tmp_buff))
 950 | 		{
 951 | 			printf("WARNING: Unable to decompress a string since it's too "
 952 | 				   "large (%d bytes after decompressing). Consider increasing "
 953 | 				   "decompress_tmp_buff size.\n", decompressed_len);
 954 | 
 955 | 			CopyAppend("(COMPRESSED)");
 956 | 			*out_size = padding + len;
 957 | 			return 0;
 958 | 		}
 959 | 
 960 | 		decompress_ret = pglz_decompress(VARDATA_4B_C(buffer), len - 2 * sizeof(uint32),
 961 | 										 decompress_tmp_buff, decompressed_len);
 962 | 		if ((decompress_ret != decompressed_len) || (decompress_ret < 0))
 963 | 		{
 964 | 			printf("WARNING: Unable to decompress a string. Data is corrupted.\n");
 965 | 			CopyAppend("(COMPRESSED)");
 966 | 			*out_size = padding + len;
 967 | 			return 0;
 968 | 		}
 969 | 
 970 | 		CopyAppendEncode(decompress_tmp_buff, decompressed_len);
 971 | 		*out_size = padding + len;
 972 | 		return 0;
 973 | 	}
 974 | 
 975 | 	return -9;
 976 | }
 977 | 
 978 | /*
 979 |  * Try to decode a tuple using a types string provided previously.
 980 |  *
 981 |  * Arguments:
 982 |  *   tupleData   - pointer to the tuple data
 983 |  *   tupleSize   - tuple size in bytes
 984 |  */
 985 | void
 986 | FormatDecode(const char *tupleData, unsigned int tupleSize)
 987 | {
 988 | 	HeapTupleHeader header = (HeapTupleHeader) tupleData;
 989 | 	const char *data = tupleData + header->t_hoff;
 990 | 	unsigned int size = tupleSize - header->t_hoff;
 991 | 	int			curr_attr;
 992 | 
 993 | 	CopyClear();
 994 | 
 995 | 	for (curr_attr = 0; curr_attr < ncallbacks; curr_attr++)
 996 | 	{
 997 | 		int			ret;
 998 | 		unsigned int processed_size = 0;
 999 | 
1000 | 		if ((header->t_infomask & HEAP_HASNULL) && att_isnull(curr_attr, header->t_bits))
1001 | 		{
1002 | 			CopyAppend("\\N");
1003 | 			continue;
1004 | 		}
1005 | 
1006 | 		if (size <= 0)
1007 | 		{
1008 | 			printf("Error: unable to decode a tuple, no more bytes left. Partial data: %s\n",
1009 | 				   copyString.data);
1010 | 			return;
1011 | 		}
1012 | 
1013 | 		ret = callbacks[curr_attr] (data, size, &processed_size);
1014 | 		if (ret < 0)
1015 | 		{
1016 | 			printf("Error: unable to decode a tuple, callback #%d returned %d. Partial data: %s\n",
1017 | 				   curr_attr + 1, ret, copyString.data);
1018 | 			return;
1019 | 		}
1020 | 
1021 | 		size -= processed_size;
1022 | 		data += processed_size;
1023 | 	}
1024 | 
1025 | 	if (size != 0)
1026 | 	{
1027 | 		printf("Error: unable to decode a tuple, %d bytes left, 0 expected. Partial data: %s\n",
1028 | 			   size, copyString.data);
1029 | 		return;
1030 | 	}
1031 | 
1032 | 	CopyFlush();
1033 | }
1034 | 
1035 | static int DumpCompressedString(const char *data, int32 decompressed_size)
1036 | {
1037 | 	int		decompress_ret;
1038 | 	char   *decompress_tmp_buff = malloc(TOAST_COMPRESS_RAWSIZE(data));
1039 | 
1040 | 	decompress_ret = pglz_decompress(TOAST_COMPRESS_RAWDATA(data),
1041 | 			decompressed_size - TOAST_COMPRESS_HEADER_SIZE,
1042 | 			decompress_tmp_buff, TOAST_COMPRESS_RAWSIZE(data));
1043 | 	if ((decompress_ret != TOAST_COMPRESS_RAWSIZE(data)) ||
1044 | 			(decompress_ret < 0))
1045 | 	{
1046 | 		printf("WARNING: Unable to decompress a string. Data is corrupted.\n");
1047 | 		printf("Returned %d while expected %d.\n", decompress_ret,
1048 | 				decompressed_size);
1049 | 	}
1050 | 	else
1051 | 	{
1052 | 		CopyAppendEncode(decompress_tmp_buff, *((uint32 *)data));
1053 | 	}
1054 | 
1055 | 	free(decompress_tmp_buff);
1056 | 
1057 | 	return decompress_ret;
1058 | }
1059 | 
1060 | static int
1061 | ReadStringFromToast(const char *buffer,
1062 | 		unsigned int buff_size,
1063 | 		unsigned int* out_size)
1064 | {
1065 | 	int		result = 0;
1066 | 
1067 | 	/* If toasted value is on disk, we'll try to restore it. */
1068 | 	if (VARATT_IS_EXTERNAL_ONDISK(buffer))
1069 | 	{
1070 | 		varatt_external toast_ptr;
1071 | 		char	   *toast_data = NULL;
1072 | 		/* Number of chunks the TOAST data is divided into */
1073 | 		int32		num_chunks;
1074 | 		/* Actual size of external TOASTed value */
1075 | 		int32		toast_ext_size;
1076 | 		/* Path to directory with TOAST realtion file */
1077 | 		char	   *toast_relation_path;
1078 | 		/* Filename of TOAST relation file */
1079 | 		char		toast_relation_filename[MAXPGPATH];
1080 | 		FILE	   *toast_rel_fp;
1081 | 		unsigned int block_options = 0;
1082 | 		unsigned int control_options = 0;
1083 | 
1084 | 		VARATT_EXTERNAL_GET_POINTER(toast_ptr, buffer);
1085 | 		printf("  TOAST value. Raw size: %8d, external size: %8d, "
1086 | 				"value id: %6d, toast relation id: %6d\n",
1087 | 				toast_ptr.va_rawsize,
1088 | 				toast_ptr.va_extsize,
1089 | 				toast_ptr.va_valueid,
1090 | 				toast_ptr.va_toastrelid);
1091 | 
1092 | 		/* Extract TOASTed value */
1093 | 		toast_ext_size = toast_ptr.va_extsize;
1094 | 		num_chunks = (toast_ext_size - 1) / TOAST_MAX_CHUNK_SIZE + 1;
1095 | 		printf("  Number of chunks: %d\n", num_chunks);
1096 | 
1097 | 		/* Open TOAST relation file */
1098 | 		toast_relation_path = strdup(fileName);
1099 | 		get_parent_directory(toast_relation_path);
1100 | 		sprintf(toast_relation_filename, "%s/%d", toast_relation_path,
1101 | 				toast_ptr.va_toastrelid);
1102 | 		printf("  Read TOAST relation %s\n", toast_relation_filename);
1103 | 		toast_rel_fp = fopen(toast_relation_filename, "rb");
1104 | 		if (!toast_rel_fp) {
1105 | 			printf("Cannot open TOAST relation %s\n",
1106 | 					toast_relation_filename);
1107 | 			result = -1;
1108 | 		}
1109 | 
1110 | 		if (result == 0)
1111 | 		{
1112 | 			unsigned int toast_relation_block_size = GetBlockSize(toast_rel_fp);
1113 | 			fseek(toast_rel_fp, 0, SEEK_SET);
1114 | 			toast_data = malloc(toast_ptr.va_rawsize);
1115 | 
1116 | 			result = DumpFileContents(block_options,
1117 | 					control_options,
1118 | 					toast_rel_fp,
1119 | 					toast_relation_block_size,
1120 | 					-1, /* no start block */
1121 | 					-1, /* no end block */
1122 | 					true, /* is toast relation */
1123 | 					toast_ptr.va_valueid,
1124 | 					toast_ptr.va_extsize,
1125 | 					toast_data);
1126 | 
1127 | 			if (result == 0)
1128 | 			{
1129 | 				if (VARATT_EXTERNAL_IS_COMPRESSED(toast_ptr))
1130 | 					result = DumpCompressedString(toast_data, toast_ext_size);
1131 | 				else
1132 | 					CopyAppendEncode(toast_data, toast_ext_size);
1133 | 			}
1134 | 			else
1135 | 			{
1136 | 				printf("Error in TOAST file.\n");
1137 | 			}
1138 | 
1139 | 			free(toast_data);
1140 | 		}
1141 | 
1142 | 		fclose(toast_rel_fp);
1143 | 		free(toast_relation_path);
1144 | 	}
1145 | 	/* If tag is indirect or expanded, it was stored in memory. */
1146 | 	else
1147 | 	{
1148 | 		CopyAppend("(TOASTED IN MEMORY)");
1149 | 	}
1150 | 
1151 | 	return result;
1152 | }
1153 | 
1154 | /* Decode an Oid as int type and pass value out. */
1155 | static int
1156 | DecodeOidBinary(const char *buffer,
1157 | 		unsigned int buff_size,
1158 | 		unsigned int *processed_size,
1159 | 		Oid *result)
1160 | {
1161 | 	const char	   *new_buffer =
1162 | 		(const char*)TYPEALIGN(sizeof(Oid), (uintptr_t)buffer);
1163 | 	unsigned int	delta =
1164 | 		(unsigned int)((uintptr_t)new_buffer - (uintptr_t)buffer);
1165 | 
1166 | 	if (buff_size < delta)
1167 | 		return -1;
1168 | 
1169 | 	buff_size -= delta;
1170 | 	buffer = new_buffer;
1171 | 
1172 | 	if (buff_size < sizeof(int32))
1173 | 		return -2;
1174 | 
1175 | 	*result = *(Oid *)buffer;
1176 | 	*processed_size = sizeof(Oid) + delta;
1177 | 
1178 | 	return 0;
1179 | }
1180 | 
1181 | /* Decode char(N), varchar(N), text, json or xml types and pass data out. */
1182 | static int
1183 | DecodeBytesBinary(const char *buffer,
1184 | 		unsigned int buff_size,
1185 | 		unsigned int *processed_size,
1186 | 		char *out_data,
1187 | 		unsigned int *out_length)
1188 | {
1189 | 	if (!VARATT_IS_EXTENDED(buffer))
1190 | 	{
1191 | 		*out_length = VARSIZE(buffer) - VARHDRSZ;
1192 | 
1193 | 		*processed_size = VARSIZE(buffer);
1194 | 		memcpy(out_data, VARDATA(buffer), *out_length);
1195 | 	}
1196 | 	else
1197 | 	{
1198 | 		printf("Error: unable read TOAST value.\n");
1199 | 	}
1200 | 
1201 | 	return 0;
1202 | }
1203 | 
1204 | /*
1205 |  * Decode a TOAST chunk as a tuple (Oid toast_id, Oid chunk_id, text data).
1206 |  * If decoded OID is equal toast_oid, copy data into chunk_data.
1207 |  *
1208 |  * Parameters:
1209 |  *     tuple_data - data of the tuple
1210 |  *     tuple_size - length of the tuple
1211 |  *     toast_oid - [out] oid of the TOAST value
1212 |  *     chunk_id - [out] number of the TOAST chunk stored in the tuple
1213 |  *     chunk - [out] extracted chunk data
1214 |  *     chunk_size - [out] number of bytes extracted from the chunk
1215 |  */
1216 | void
1217 | ToastChunkDecode(const char *tuple_data,
1218 | 		unsigned int tuple_size,
1219 | 		Oid toast_oid,
1220 | 		uint32 *chunk_id,
1221 | 		char *chunk_data,
1222 | 		unsigned int *chunk_data_size)
1223 | {
1224 | 	HeapTupleHeader		header = (HeapTupleHeader)tuple_data;
1225 | 	const char	   *data = tuple_data + header->t_hoff;
1226 | 	unsigned int	size = tuple_size - header->t_hoff;
1227 | 	unsigned int	processed_size = 0;
1228 | 	Oid				read_toast_oid;
1229 | 	int				ret;
1230 | 
1231 | 	*chunk_data_size = 0;
1232 | 	*chunk_id = 0;
1233 | 
1234 | 	/* decode toast_id */
1235 | 	ret = DecodeOidBinary(data, size, &processed_size, &read_toast_oid);
1236 | 	if (ret < 0)
1237 | 	{
1238 | 		printf("Error: unable to decode a TOAST tuple toast_id, "
1239 | 				"decode function returned %d. Partial data: %s\n",
1240 | 				ret, copyString.data);
1241 | 		return;
1242 | 	}
1243 | 
1244 | 	size -= processed_size;
1245 | 	data += processed_size;
1246 | 	if (size <= 0)
1247 | 	{
1248 | 		printf("Error: unable to decode a TOAST chunk tuple, no more bytes "
1249 | 			   "left. Partial data: %s\n", copyString.data);
1250 | 		return;
1251 | 	}
1252 | 
1253 | 	/* It is not what we are looking for */
1254 | 	if (toast_oid != read_toast_oid)
1255 | 		return;
1256 | 
1257 | 	/* decode chunk_id */
1258 | 	ret = DecodeOidBinary(data, size, &processed_size, chunk_id);
1259 | 	if (ret < 0)
1260 | 	{
1261 | 		printf("Error: unable to decode a TOAST tuple chunk_id, decode "
1262 | 				"function returned %d. Partial data: %s\n",
1263 | 				ret, copyString.data);
1264 | 		return;
1265 | 	}
1266 | 
1267 | 	size -= processed_size;
1268 | 	data += processed_size;
1269 | 	if (size <= 0)
1270 | 	{
1271 | 		printf("Error: unable to decode a TOAST chunk tuple, no more bytes "
1272 | 				"left. Partial data: %s\n", copyString.data);
1273 | 		return;
1274 | 	}
1275 | 
1276 | 	/* decode data */
1277 | 	ret = DecodeBytesBinary(data, size, &processed_size, chunk_data,
1278 | 			chunk_data_size);
1279 | 	if (ret < 0)
1280 | 	{
1281 | 		printf("Error: unable to decode a TOAST chunk data, decode function "
1282 | 				"returned %d. Partial data: %s\n", ret, copyString.data);
1283 | 		return;
1284 | 	}
1285 | 
1286 | 	size -= processed_size;
1287 | 	if (size != 0)
1288 | 	{
1289 | 		printf("Error: unable to decode a TOAST chunk tuple, %d bytes left. "
1290 | 				"Partial data: %s\n", size, copyString.data);
1291 | 		return;
1292 | 	}
1293 | }
1294 | 


--------------------------------------------------------------------------------
/pg_filedump.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * pg_filedump.c - PostgreSQL file dump utility for dumping and
   3 |  *				   formatting heap (data), index and control files.
   4 |  *
   5 |  * Copyright (c) 2002-2010 Red Hat, Inc.
   6 |  * Copyright (c) 2011-2019, PostgreSQL Global Development Group
   7 |  *
   8 |  * This program is free software; you can redistribute it and/or modify
   9 |  * it under the terms of the GNU General Public License as published by
  10 |  * the Free Software Foundation; either version 2 of the License, or
  11 |  * (at your option) any later version.
  12 |  *
  13 |  * This program is distributed in the hope that it will be useful,
  14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 |  * GNU General Public License for more details.
  17 |  *
  18 |  * You should have received a copy of the GNU General Public License
  19 |  * along with this program; if not, write to the Free Software
  20 |  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 |  *
  22 |  * Original Author: Patrick Macdonald <patrickm@redhat.com>
  23 |  */
  24 | 
  25 | #include "pg_filedump.h"
  26 | 
  27 | #include <utils/pg_crc.h>
  28 | 
  29 | /*	checksum_impl.h uses Assert, which doesn't work outside the server */
  30 | #undef Assert
  31 | #define Assert(X)
  32 | 
  33 | #include "storage/checksum.h"
  34 | #include "storage/checksum_impl.h"
  35 | #include "decode.h"
  36 | 
  37 | /*
  38 |  * Global variables for ease of use mostly
  39 |  */
  40 | /*	Options for Block formatting operations */
  41 | unsigned int blockOptions = 0;
  42 | 
  43 | /* Segment-related options */
  44 | unsigned int segmentOptions = 0;
  45 | 
  46 | /* -R[start]:Block range start */
  47 | int	blockStart = -1;
  48 | 
  49 | /* -R[end]:Block range end */
  50 | int	blockEnd = -1;
  51 | 
  52 | /* Options for Item formatting operations */
  53 | unsigned int itemOptions = 0;
  54 | 
  55 | /* Options for Control File formatting operations */
  56 | unsigned int controlOptions = 0;
  57 | 
  58 | unsigned int specialType = SPEC_SECT_NONE;
  59 | 
  60 | static bool verbose = false;
  61 | 
  62 | /* File to dump or format */
  63 | FILE *fp = NULL;
  64 | 
  65 | /* File name for display */
  66 | char *fileName = NULL;
  67 | 
  68 | /* Current block size */
  69 | static unsigned int blockSize = 0;
  70 | 
  71 | /* Segment size in bytes */
  72 | static unsigned int segmentSize = RELSEG_SIZE * BLCKSZ;
  73 | 
  74 | /* Number of current segment */
  75 | static unsigned int segmentNumber = 0;
  76 | 
  77 | /* Offset of current block */
  78 | static unsigned int pageOffset = 0;
  79 | 
  80 | /* Number of bytes to format */
  81 | static unsigned int bytesToFormat = 0;
  82 | 
  83 | /* Block version number */
  84 | static unsigned int blockVersion = 0;
  85 | 
  86 | /* Program exit code */
  87 | static int	exitCode = 0;
  88 | 
  89 | /*
  90 |  * Function Prototypes
  91 |  */
  92 | unsigned int GetBlockSize(FILE *fp);
  93 | 
  94 | static void DisplayOptions(unsigned int validOptions);
  95 | static unsigned int ConsumeOptions(int numOptions, char **options);
  96 | static int	GetOptionValue(char *optionString);
  97 | static void FormatBlock(unsigned int blockOptions,
  98 | 		unsigned int controlOptions,
  99 | 		char *buffer,
 100 | 		BlockNumber currentBlock,
 101 | 		unsigned int blockSize,
 102 | 		bool isToast,
 103 | 		Oid toastOid,
 104 | 		unsigned int toastExternalSize,
 105 | 		char *toastValue,
 106 | 		unsigned int *toastRead);
 107 | static unsigned int GetSpecialSectionType(char *buffer, Page page);
 108 | static bool IsBtreeMetaPage(Page page);
 109 | static void CreateDumpFileHeader(int numOptions, char **options);
 110 | static int	FormatHeader(char *buffer,
 111 | 		Page page,
 112 | 		BlockNumber blkno,
 113 | 		bool isToast);
 114 | static void FormatItemBlock(char *buffer,
 115 | 		Page page,
 116 | 		bool isToast,
 117 | 		Oid toastOid,
 118 | 		unsigned int toastExternalSize,
 119 | 		char *toastValue,
 120 | 		unsigned int *toastRead);
 121 | static void FormatItem(char *buffer,
 122 | 		unsigned int numBytes,
 123 | 		unsigned int startIndex,
 124 | 		unsigned int formatAs);
 125 | static void FormatSpecial(char *buffer);
 126 | static void FormatControl(char *buffer);
 127 | static void FormatBinary(char *buffer,
 128 | 		unsigned int numBytes, unsigned int startIndex);
 129 | static void DumpBinaryBlock(char *buffer);
 130 | 
 131 | 
 132 | /* Send properly formed usage information to the user. */
 133 | static void
 134 | DisplayOptions(unsigned int validOptions)
 135 | {
 136 | 	if (validOptions == OPT_RC_COPYRIGHT)
 137 | 		printf
 138 | 			("\nVersion %s (for %s)"
 139 | 			 "\nCopyright (c) 2002-2010 Red Hat, Inc."
 140 | 		  "\nCopyright (c) 2011-2018, PostgreSQL Global Development Group\n",
 141 | 			 FD_VERSION, FD_PG_VERSION);
 142 | 
 143 | 	printf
 144 | 		("\nUsage: pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-D attrlist] [-S blocksize] [-s segsize] [-n segnumber] file\n\n"
 145 | 		 "Display formatted contents of a PostgreSQL heap/index/control file\n"
 146 | 		 "Defaults are: relative addressing, range of the entire file, block\n"
 147 | 		 "               size as listed on block 0 in the file\n\n"
 148 | 		 "The following options are valid for heap and index files:\n"
 149 | 		 "  -a  Display absolute addresses when formatting (Block header\n"
 150 | 		 "      information is always block relative)\n"
 151 | 		 "  -b  Display binary block images within a range (Option will turn\n"
 152 | 		 "      off all formatting options)\n"
 153 | 		 "  -d  Display formatted block content dump (Option will turn off\n"
 154 | 		 "      all other formatting options)\n"
 155 | 		 "  -D  Decode tuples using given comma separated list of types\n"
 156 | 		 "      Supported types:\n"
 157 | 		 "        bigint bigserial bool char charN date float float4 float8 int\n"
 158 | 		 "        json macaddr name oid real serial smallint smallserial text\n"
 159 | 		 "        time timestamp timetz uuid varchar varcharN xid xml\n"
 160 | 		 "      ~ ignores all attributes left in a tuple\n"
 161 | 		 "  -f  Display formatted block content dump along with interpretation\n"
 162 | 		 "  -h  Display this information\n"
 163 | 		 "  -i  Display interpreted item details\n"
 164 | 		 "  -k  Verify block checksums\n"
 165 | 		 "  -o  Do not dump old values.\n"
 166 | 		 "  -R  Display specific block ranges within the file (Blocks are\n"
 167 | 		 "      indexed from 0)\n"
 168 | 		 "        [startblock]: block to start at\n"
 169 | 		 "        [endblock]: block to end at\n"
 170 | 		 "      A startblock without an endblock will format the single block\n"
 171 | 		 "  -s  Force segment size to [segsize]\n"
 172 | 		 "  -t  Dump TOAST files\n"
 173 | 		 "  -v  Ouput additional information about TOAST relations\n"
 174 | 		 "  -n  Force segment number to [segnumber]\n"
 175 | 		 "  -S  Force block size to [blocksize]\n"
 176 | 		 "  -x  Force interpreted formatting of block items as index items\n"
 177 | 		 "  -y  Force interpreted formatting of block items as heap items\n\n"
 178 | 		 "The following options are valid for control files:\n"
 179 | 		 "  -c  Interpret the file listed as a control file\n"
 180 | 		 "  -f  Display formatted content dump along with interpretation\n"
 181 | 		 "  -S  Force block size to [blocksize]\n"
 182 | 		 "\nReport bugs to <pgsql-bugs@postgresql.org>\n");
 183 | }
 184 | 
 185 | /*
 186 |  * Determine segment number by segment file name. For instance, if file
 187 |  * name is /path/to/xxxx.7 procedure returns 7. Default return value is 0.
 188 |  */
 189 | static unsigned int
 190 | GetSegmentNumberFromFileName(const char *fileName)
 191 | {
 192 | 	int			segnumOffset = strlen(fileName) - 1;
 193 | 
 194 | 	if (segnumOffset < 0)
 195 | 		return 0;
 196 | 
 197 | 	while (isdigit(fileName[segnumOffset]))
 198 | 	{
 199 | 		segnumOffset--;
 200 | 		if (segnumOffset < 0)
 201 | 			return 0;
 202 | 	}
 203 | 
 204 | 	if (fileName[segnumOffset] != '.')
 205 | 		return 0;
 206 | 
 207 | 	return atoi(&fileName[segnumOffset + 1]);
 208 | }
 209 | 
 210 | /*	Iterate through the provided options and set the option flags.
 211 |  *	An error will result in a positive rc and will force a display
 212 |  *	of the usage information.  This routine returns enum
 213 |  *	optionReturnCode values. */
 214 | static unsigned int
 215 | ConsumeOptions(int numOptions, char **options)
 216 | {
 217 | 	unsigned int rc = OPT_RC_VALID;
 218 | 	unsigned int x;
 219 | 	unsigned int optionStringLength;
 220 | 	char	   *optionString;
 221 | 	char		duplicateSwitch = 0x00;
 222 | 
 223 | 	for (x = 1; x < numOptions; x++)
 224 | 	{
 225 | 		optionString = options[x];
 226 | 		optionStringLength = strlen(optionString);
 227 | 
 228 | 		/* Range is a special case where we have to consume the next 1 or 2
 229 | 		 * parameters to mark the range start and end */
 230 | 		if ((optionStringLength == 2) && (strcmp(optionString, "-R") == 0))
 231 | 		{
 232 | 			int			range = 0;
 233 | 
 234 | 			SET_OPTION(blockOptions, BLOCK_RANGE, 'R');
 235 | 			/* Only accept the range option once */
 236 | 			if (rc == OPT_RC_DUPLICATE)
 237 | 				break;
 238 | 
 239 | 			/* Make sure there are options after the range identifier */
 240 | 			if (x >= (numOptions - 2))
 241 | 			{
 242 | 				rc = OPT_RC_INVALID;
 243 | 				printf("Error: Missing range start identifier.\n");
 244 | 				exitCode = 1;
 245 | 				break;
 246 | 			}
 247 | 
 248 | 			/*
 249 | 			 * Mark that we have the range and advance the option to what
 250 | 			 * should be the range start. Check the value of the next
 251 | 			 * parameter */
 252 | 			optionString = options[++x];
 253 | 			if ((range = GetOptionValue(optionString)) < 0)
 254 | 			{
 255 | 				rc = OPT_RC_INVALID;
 256 | 				printf("Error: Invalid range start identifier <%s>.\n",
 257 | 					   optionString);
 258 | 				exitCode = 1;
 259 | 				break;
 260 | 			}
 261 | 
 262 | 			/* The default is to dump only one block */
 263 | 			blockStart = blockEnd = (unsigned int) range;
 264 | 
 265 | 			/* We have our range start marker, check if there is an end
 266 | 			 * marker on the option line.  Assume that the last option
 267 | 			 * is the file we are dumping, so check if there are options
 268 | 			 * range start marker and the file */
 269 | 			if (x <= (numOptions - 3))
 270 | 			{
 271 | 				if ((range = GetOptionValue(options[x + 1])) >= 0)
 272 | 				{
 273 | 					/* End range must be => start range */
 274 | 					if (blockStart <= range)
 275 | 					{
 276 | 						blockEnd = (unsigned int) range;
 277 | 						x++;
 278 | 					}
 279 | 					else
 280 | 					{
 281 | 						rc = OPT_RC_INVALID;
 282 | 						printf("Error: Requested block range start <%d> is "
 283 | 							   "greater than end <%d>.\n", blockStart, range);
 284 | 						exitCode = 1;
 285 | 						break;
 286 | 					}
 287 | 				}
 288 | 			}
 289 | 		}
 290 | 		/* Check for the special case where the user forces a block size
 291 | 		 * instead of having the tool determine it.  This is useful if
 292 | 		 * the header of block 0 is corrupt and gives a garbage block size */
 293 | 		else if ((optionStringLength == 2)
 294 | 				 && (strcmp(optionString, "-S") == 0))
 295 | 		{
 296 | 			int			localBlockSize;
 297 | 
 298 | 			SET_OPTION(blockOptions, BLOCK_FORCED, 'S');
 299 | 			/* Only accept the forced size option once */
 300 | 			if (rc == OPT_RC_DUPLICATE)
 301 | 				break;
 302 | 
 303 | 			/* The token immediately following -S is the block size */
 304 | 			if (x >= (numOptions - 2))
 305 | 			{
 306 | 				rc = OPT_RC_INVALID;
 307 | 				printf("Error: Missing block size identifier.\n");
 308 | 				break;
 309 | 			}
 310 | 
 311 | 			/* Next option encountered must be forced block size */
 312 | 			optionString = options[++x];
 313 | 			if ((localBlockSize = GetOptionValue(optionString)) > 0)
 314 | 				blockSize = (unsigned int) localBlockSize;
 315 | 			else
 316 | 			{
 317 | 				rc = OPT_RC_INVALID;
 318 | 				printf("Error: Invalid block size requested <%s>.\n",
 319 | 					   optionString);
 320 | 				exitCode = 1;
 321 | 				break;
 322 | 			}
 323 | 		}
 324 | 		/* Check for the special case where the user forces a segment size. */
 325 | 		else if ((optionStringLength == 2)
 326 | 				 && (strcmp(optionString, "-s") == 0))
 327 | 		{
 328 | 			int			localSegmentSize;
 329 | 
 330 | 			SET_OPTION(segmentOptions, SEGMENT_SIZE_FORCED, 's');
 331 | 			/* Only accept the forced size option once */
 332 | 			if (rc == OPT_RC_DUPLICATE)
 333 | 				break;
 334 | 
 335 | 			/* The token immediately following -s is the segment size */
 336 | 			if (x >= (numOptions - 2))
 337 | 			{
 338 | 				rc = OPT_RC_INVALID;
 339 | 				printf("Error: Missing segment size identifier.\n");
 340 | 				exitCode = 1;
 341 | 				break;
 342 | 			}
 343 | 
 344 | 			/* Next option encountered must be forced segment size */
 345 | 			optionString = options[++x];
 346 | 			if ((localSegmentSize = GetOptionValue(optionString)) > 0)
 347 | 				segmentSize = (unsigned int) localSegmentSize;
 348 | 			else
 349 | 			{
 350 | 				rc = OPT_RC_INVALID;
 351 | 				printf("Error: Invalid segment size requested <%s>.\n",
 352 | 					   optionString);
 353 | 				exitCode = 1;
 354 | 				break;
 355 | 			}
 356 | 		}
 357 | 		/* Check for the special case where the user forces tuples decoding. */
 358 | 		else if ((optionStringLength == 2)
 359 | 				 && (strcmp(optionString, "-D") == 0))
 360 | 		{
 361 | 			SET_OPTION(blockOptions, BLOCK_DECODE, 'D');
 362 | 			/* Only accept the decode option once */
 363 | 			if (rc == OPT_RC_DUPLICATE)
 364 | 				break;
 365 | 
 366 | 			/* The token immediately following -D is attrubute types string */
 367 | 			if (x >= (numOptions - 2))
 368 | 			{
 369 | 				rc = OPT_RC_INVALID;
 370 | 				printf("Error: Missing attribute types string.\n");
 371 | 				exitCode = 1;
 372 | 				break;
 373 | 			}
 374 | 
 375 | 			/* Next option encountered must be attribute types string */
 376 | 			optionString = options[++x];
 377 | 
 378 | 			if (ParseAttributeTypesString(optionString) < 0)
 379 | 			{
 380 | 				rc = OPT_RC_INVALID;
 381 | 				printf("Error: Invalid attribute types string <%s>.\n",
 382 | 					   optionString);
 383 | 				exitCode = 1;
 384 | 				break;
 385 | 			}
 386 | 		}
 387 | 		/* Check for the special case where the user forces a segment number
 388 | 		 * instead of having the tool determine it by file name. */
 389 | 		else if ((optionStringLength == 2)
 390 | 				 && (strcmp(optionString, "-n") == 0))
 391 | 		{
 392 | 			int			localSegmentNumber;
 393 | 
 394 | 			SET_OPTION(segmentOptions, SEGMENT_NUMBER_FORCED, 'n');
 395 | 			/* Only accept the forced segment number option once */
 396 | 			if (rc == OPT_RC_DUPLICATE)
 397 | 				break;
 398 | 
 399 | 			/* The token immediately following -n is the segment number */
 400 | 			if (x >= (numOptions - 2))
 401 | 			{
 402 | 				rc = OPT_RC_INVALID;
 403 | 				printf("Error: Missing segment number identifier.\n");
 404 | 				exitCode = 1;
 405 | 				break;
 406 | 			}
 407 | 
 408 | 			/* Next option encountered must be forced segment number */
 409 | 			optionString = options[++x];
 410 | 			if ((localSegmentNumber = GetOptionValue(optionString)) > 0)
 411 | 				segmentNumber = (unsigned int) localSegmentNumber;
 412 | 			else
 413 | 			{
 414 | 				rc = OPT_RC_INVALID;
 415 | 				printf("Error: Invalid segment number requested <%s>.\n",
 416 | 					   optionString);
 417 | 				exitCode = 1;
 418 | 				break;
 419 | 			}
 420 | 		}
 421 | 		/* The last option MUST be the file name */
 422 | 		else if (x == (numOptions - 1))
 423 | 		{
 424 | 			/* Check to see if this looks like an option string before opening */
 425 | 			if (optionString[0] != '-')
 426 | 			{
 427 | 				fp = fopen(optionString, "rb");
 428 | 				if (fp)
 429 | 				{
 430 | 					fileName = options[x];
 431 | 					if (!(segmentOptions & SEGMENT_NUMBER_FORCED))
 432 | 						segmentNumber = GetSegmentNumberFromFileName(fileName);
 433 | 				}
 434 | 				else
 435 | 				{
 436 | 					rc = OPT_RC_FILE;
 437 | 					printf("Error: Could not open file <%s>.\n", optionString);
 438 | 					exitCode = 1;
 439 | 					break;
 440 | 				}
 441 | 			}
 442 | 			else
 443 | 			{
 444 | 				/* Could be the case where the help flag is used without a
 445 | 				 * filename. Otherwise, the last option isn't a file */
 446 | 				if (strcmp(optionString, "-h") == 0)
 447 | 					rc = OPT_RC_COPYRIGHT;
 448 | 				else
 449 | 				{
 450 | 					rc = OPT_RC_FILE;
 451 | 					printf("Error: Missing file name to dump.\n");
 452 | 					exitCode = 1;
 453 | 				}
 454 | 				break;
 455 | 			}
 456 | 		}
 457 | 		else
 458 | 		{
 459 | 			unsigned int y;
 460 | 
 461 | 			/* Option strings must start with '-' and contain switches */
 462 | 			if (optionString[0] != '-')
 463 | 			{
 464 | 				rc = OPT_RC_INVALID;
 465 | 				printf("Error: Invalid option string <%s>.\n", optionString);
 466 | 				exitCode = 1;
 467 | 				break;
 468 | 			}
 469 | 
 470 | 			/* Iterate through the singular option string, throw out
 471 | 			 * garbage, duplicates and set flags to be used in formatting */
 472 | 			for (y = 1; y < optionStringLength; y++)
 473 | 			{
 474 | 				switch (optionString[y])
 475 | 				{
 476 | 						/* Use absolute addressing */
 477 | 					case 'a':
 478 | 						SET_OPTION(blockOptions, BLOCK_ABSOLUTE, 'a');
 479 | 						break;
 480 | 
 481 | 						/* Dump the binary contents of the page */
 482 | 					case 'b':
 483 | 						SET_OPTION(blockOptions, BLOCK_BINARY, 'b');
 484 | 						break;
 485 | 
 486 | 						/* Dump the listed file as a control file */
 487 | 					case 'c':
 488 | 						SET_OPTION(controlOptions, CONTROL_DUMP, 'c');
 489 | 						break;
 490 | 
 491 | 						/* Do not interpret the data. Format to hex and ascii. */
 492 | 					case 'd':
 493 | 						SET_OPTION(blockOptions, BLOCK_NO_INTR, 'd');
 494 | 						break;
 495 | 
 496 | 						/*
 497 | 						 * Format the contents of the block with
 498 | 						 * interpretation of the headers */
 499 | 					case 'f':
 500 | 						SET_OPTION(blockOptions, BLOCK_FORMAT, 'f');
 501 | 						break;
 502 | 
 503 | 						/* Display the usage screen */
 504 | 					case 'h':
 505 | 						rc = OPT_RC_COPYRIGHT;
 506 | 						break;
 507 | 
 508 | 						/* Format the items in detail */
 509 | 					case 'i':
 510 | 						SET_OPTION(itemOptions, ITEM_DETAIL, 'i');
 511 | 						break;
 512 | 
 513 | 						/* Verify block checksums */
 514 | 					case 'k':
 515 | 						SET_OPTION(blockOptions, BLOCK_CHECKSUMS, 'k');
 516 | 						break;
 517 | 
 518 | 						/* Display old values. Ignore Xmax */
 519 | 					case 'o':
 520 | 						SET_OPTION(blockOptions, BLOCK_IGNORE_OLD, 'o');
 521 | 						break;
 522 | 
 523 | 					case 't':
 524 | 						SET_OPTION(blockOptions, BLOCK_DECODE_TOAST, 't');
 525 | 						break;
 526 | 
 527 | 					case 'v':
 528 | 						verbose = true;
 529 | 						break;
 530 | 
 531 | 						/* Interpret items as standard index values */
 532 | 					case 'x':
 533 | 						SET_OPTION(itemOptions, ITEM_INDEX, 'x');
 534 | 						if (itemOptions & ITEM_HEAP)
 535 | 						{
 536 | 							rc = OPT_RC_INVALID;
 537 | 							printf("Error: Options <y> and <x> are "
 538 | 								   "mutually exclusive.\n");
 539 | 							exitCode = 1;
 540 | 						}
 541 | 						break;
 542 | 
 543 | 						/* Interpret items as heap values */
 544 | 					case 'y':
 545 | 						SET_OPTION(itemOptions, ITEM_HEAP, 'y');
 546 | 						if (itemOptions & ITEM_INDEX)
 547 | 						{
 548 | 							rc = OPT_RC_INVALID;
 549 | 							printf("Error: Options <x> and <y> are "
 550 | 								   "mutually exclusive.\n");
 551 | 							exitCode = 1;
 552 | 						}
 553 | 						break;
 554 | 
 555 | 					default:
 556 | 						rc = OPT_RC_INVALID;
 557 | 						printf("Error: Unknown option <%c>.\n", optionString[y]);
 558 | 						exitCode = 1;
 559 | 						break;
 560 | 				}
 561 | 
 562 | 				if (rc)
 563 | 					break;
 564 | 			}
 565 | 		}
 566 | 	}
 567 | 
 568 | 	if (rc == OPT_RC_DUPLICATE)
 569 | 	{
 570 | 		printf("Error: Duplicate option listed <%c>.\n", duplicateSwitch);
 571 | 		exitCode = 1;
 572 | 	}
 573 | 
 574 | 	/* If the user requested a control file dump, a pure binary
 575 | 	 * block dump or a non-interpreted formatted dump, mask off
 576 | 	 * all other block level options (with a few exceptions) */
 577 | 	if (rc == OPT_RC_VALID)
 578 | 	{
 579 | 		/* The user has requested a control file dump, only -f and */
 580 | 		/* -S are valid... turn off all other formatting */
 581 | 		if (controlOptions & CONTROL_DUMP)
 582 | 		{
 583 | 			if ((blockOptions & ~(BLOCK_FORMAT | BLOCK_FORCED))
 584 | 				|| (itemOptions))
 585 | 			{
 586 | 				rc = OPT_RC_INVALID;
 587 | 				printf("Error: Invalid options used for Control File dump.\n"
 588 | 					   "       Only options <Sf> may be used with <c>.\n");
 589 | 				exitCode = 1;
 590 | 			}
 591 | 			else
 592 | 			{
 593 | 				controlOptions |=
 594 | 					(blockOptions & (BLOCK_FORMAT | BLOCK_FORCED));
 595 | 				blockOptions = itemOptions = 0;
 596 | 			}
 597 | 		}
 598 | 		/* The user has requested a binary block dump... only -R and -f
 599 | 		 * are honoured */
 600 | 		else if (blockOptions & BLOCK_BINARY)
 601 | 		{
 602 | 			blockOptions &= (BLOCK_BINARY | BLOCK_RANGE | BLOCK_FORCED);
 603 | 			itemOptions = 0;
 604 | 		}
 605 | 		/* The user has requested a non-interpreted dump... only -a, -R
 606 | 		 * and -f are honoured */
 607 | 		else if (blockOptions & BLOCK_NO_INTR)
 608 | 		{
 609 | 			blockOptions &=
 610 | 				(BLOCK_NO_INTR | BLOCK_ABSOLUTE | BLOCK_RANGE | BLOCK_FORCED);
 611 | 			itemOptions = 0;
 612 | 		}
 613 | 	}
 614 | 
 615 | 	return (rc);
 616 | }
 617 | 
 618 | /* Given the index into the parameter list, convert and return the
 619 |  * current string to a number if possible */
 620 | static int
 621 | GetOptionValue(char *optionString)
 622 | {
 623 | 	unsigned int x;
 624 | 	int			value = -1;
 625 | 	int			optionStringLength = strlen(optionString);
 626 | 
 627 | 	/* Verify the next option looks like a number */
 628 | 	for (x = 0; x < optionStringLength; x++)
 629 | 		if (!isdigit((int) optionString[x]))
 630 | 			break;
 631 | 
 632 | 	/* Convert the string to a number if it looks good */
 633 | 	if (x == optionStringLength)
 634 | 		value = atoi(optionString);
 635 | 
 636 | 	return (value);
 637 | }
 638 | 
 639 | /* Read the page header off of block 0 to determine the block size
 640 |  * used in this file.  Can be overridden using the -S option. The
 641 |  * returned value is the block size of block 0 on disk */
 642 | unsigned int
 643 | GetBlockSize(FILE *fp)
 644 | {
 645 | 	unsigned int localSize = 0;
 646 | 	int			bytesRead = 0;
 647 | 	char		localCache[sizeof(PageHeaderData)];
 648 | 
 649 | 	/* Read the first header off of block 0 to determine the block size */
 650 | 	bytesRead = fread(&localCache, 1, sizeof(PageHeaderData), fp);
 651 | 	rewind(fp);
 652 | 
 653 | 	if (bytesRead == sizeof(PageHeaderData))
 654 | 		localSize = (unsigned int) PageGetPageSize(&localCache);
 655 | 	else
 656 | 	{
 657 | 		printf("Error: Unable to read full page header from block 0.\n"
 658 | 			   "  ===> Read %u bytes\n", bytesRead);
 659 | 		exitCode = 1;
 660 | 	}
 661 | 
 662 | 	if (localSize == 0)
 663 | 	{
 664 | 		printf("Notice: Block size determined from reading block 0 is zero, using default %d instead.\n", BLCKSZ);
 665 | 		printf("Hint: Use -S <size> to specify the size manually.\n");
 666 | 		localSize = BLCKSZ;
 667 | 	}
 668 | 
 669 | 	return (localSize);
 670 | }
 671 | 
 672 | /* Determine the contents of the special section on the block and
 673 |  * return this enum value */
 674 | static unsigned int
 675 | GetSpecialSectionType(char *buffer, Page page)
 676 | {
 677 | 	unsigned int rc;
 678 | 	unsigned int specialOffset;
 679 | 	unsigned int specialSize;
 680 | 	unsigned int specialValue;
 681 | 	PageHeader	pageHeader = (PageHeader) page;
 682 | 
 683 | 	/* If this is not a partial header, check the validity of the
 684 | 	 * special section offset and contents */
 685 | 	if (bytesToFormat > sizeof(PageHeaderData))
 686 | 	{
 687 | 		specialOffset = (unsigned int) pageHeader->pd_special;
 688 | 
 689 | 		/* Check that the special offset can remain on the block or
 690 | 		 * the partial block */
 691 | 		if ((specialOffset == 0) ||
 692 | 			(specialOffset > blockSize) || (specialOffset > bytesToFormat))
 693 | 			rc = SPEC_SECT_ERROR_BOUNDARY;
 694 | 		else
 695 | 		{
 696 | 			/* we may need to examine last 2 bytes of page to identify index */
 697 | 			uint16	   *ptype = (uint16 *) (buffer + blockSize - sizeof(uint16));
 698 | 
 699 | 			specialSize = blockSize - specialOffset;
 700 | 
 701 | 			/* If there is a special section, use its size to guess its
 702 | 			 * contents, checking the last 2 bytes of the page in cases
 703 | 			 * that are ambiguous.  Note we don't attempt to dereference
 704 | 			 * the pointers without checking bytesToFormat == blockSize. */
 705 | 			if (specialSize == 0)
 706 | 				rc = SPEC_SECT_NONE;
 707 | 			else if (specialSize == MAXALIGN(sizeof(uint32)))
 708 | 			{
 709 | 				/* If MAXALIGN is 8, this could be either a sequence or
 710 | 				 * SP-GiST or GIN. */
 711 | 				if (bytesToFormat == blockSize)
 712 | 				{
 713 | 					specialValue = *((int *) (buffer + specialOffset));
 714 | 					if (specialValue == SEQUENCE_MAGIC)
 715 | 						rc = SPEC_SECT_SEQUENCE;
 716 | 					else if (specialSize == MAXALIGN(sizeof(SpGistPageOpaqueData)) &&
 717 | 							 *ptype == SPGIST_PAGE_ID)
 718 | 						rc = SPEC_SECT_INDEX_SPGIST;
 719 | 					else if (specialSize == MAXALIGN(sizeof(GinPageOpaqueData)))
 720 | 						rc = SPEC_SECT_INDEX_GIN;
 721 | 					else
 722 | 						rc = SPEC_SECT_ERROR_UNKNOWN;
 723 | 				}
 724 | 				else
 725 | 					rc = SPEC_SECT_ERROR_UNKNOWN;
 726 | 			}
 727 | 			/* SP-GiST and GIN have same size special section, so check
 728 | 			 * the page ID bytes first. */
 729 | 			else if (specialSize == MAXALIGN(sizeof(SpGistPageOpaqueData)) &&
 730 | 					 bytesToFormat == blockSize &&
 731 | 					 *ptype == SPGIST_PAGE_ID)
 732 | 				rc = SPEC_SECT_INDEX_SPGIST;
 733 | 			else if (specialSize == MAXALIGN(sizeof(GinPageOpaqueData)))
 734 | 				rc = SPEC_SECT_INDEX_GIN;
 735 | 			else if (specialSize > 2 && bytesToFormat == blockSize)
 736 | 			{
 737 | 				/* As of 8.3, BTree, Hash, and GIST all have the same size
 738 | 				 * special section, but the last two bytes of the section
 739 | 				 * can be checked to determine what's what. */
 740 | 				if (*ptype <= MAX_BT_CYCLE_ID &&
 741 | 					specialSize == MAXALIGN(sizeof(BTPageOpaqueData)))
 742 | 					rc = SPEC_SECT_INDEX_BTREE;
 743 | 				else if (*ptype == HASHO_PAGE_ID &&
 744 | 						 specialSize == MAXALIGN(sizeof(HashPageOpaqueData)))
 745 | 					rc = SPEC_SECT_INDEX_HASH;
 746 | 				else if (*ptype == GIST_PAGE_ID &&
 747 | 						 specialSize == MAXALIGN(sizeof(GISTPageOpaqueData)))
 748 | 					rc = SPEC_SECT_INDEX_GIST;
 749 | 				else
 750 | 					rc = SPEC_SECT_ERROR_UNKNOWN;
 751 | 			}
 752 | 			else
 753 | 				rc = SPEC_SECT_ERROR_UNKNOWN;
 754 | 		}
 755 | 	}
 756 | 	else
 757 | 		rc = SPEC_SECT_ERROR_UNKNOWN;
 758 | 
 759 | 	return (rc);
 760 | }
 761 | 
 762 | /*	Check whether page is a btree meta page */
 763 | static bool
 764 | IsBtreeMetaPage(Page page)
 765 | {
 766 | 	PageHeader	pageHeader = (PageHeader) page;
 767 | 
 768 | 	if ((PageGetSpecialSize(page) == (MAXALIGN(sizeof(BTPageOpaqueData))))
 769 | 		&& (bytesToFormat == blockSize))
 770 | 	{
 771 | 		BTPageOpaque btpo =
 772 | 		(BTPageOpaque) ((char *) page + pageHeader->pd_special);
 773 | 
 774 | 		/* Must check the cycleid to be sure it's really btree. */
 775 | 		if ((btpo->btpo_cycleid <= MAX_BT_CYCLE_ID) &&
 776 | 			(btpo->btpo_flags & BTP_META))
 777 | 			return true;
 778 | 	}
 779 | 	return false;
 780 | }
 781 | 
 782 | /* Display a header for the dump so we know the file name, the options
 783 |  * used and the time the dump was taken */
 784 | static void
 785 | CreateDumpFileHeader(int numOptions, char **options)
 786 | {
 787 | 	unsigned int x;
 788 | 	char		optionBuffer[52] = "\0";
 789 | 	time_t		rightNow = time(NULL);
 790 | 
 791 | 	/* Iterate through the options and cache them.
 792 | 	 * The maximum we can display is 50 option characters + spaces. */
 793 | 	for (x = 1; x < (numOptions - 1); x++)
 794 | 	{
 795 | 		if ((strlen(optionBuffer) + strlen(options[x])) > 50)
 796 | 			break;
 797 | 		strcat(optionBuffer, options[x]);
 798 | 		strcat(optionBuffer, " ");
 799 | 	}
 800 | 
 801 | 	printf
 802 | 		("\n*******************************************************************\n"
 803 | 		 "* PostgreSQL File/Block Formatted Dump Utility - Version %s\n"
 804 | 		 "*\n"
 805 | 		 "* File: %s\n"
 806 | 		 "* Options used: %s\n*\n"
 807 | 		 "* Dump created on: %s"
 808 | 		 "*******************************************************************\n",
 809 | 		 FD_VERSION, fileName, (strlen(optionBuffer)) ? optionBuffer : "None",
 810 | 		 ctime(&rightNow));
 811 | }
 812 | 
 813 | /*	Dump out a formatted block header for the requested block */
 814 | static int
 815 | FormatHeader(char *buffer, Page page, BlockNumber blkno, bool isToast)
 816 | {
 817 | 	int			rc = 0;
 818 | 	unsigned int headerBytes;
 819 | 	PageHeader	pageHeader = (PageHeader) page;
 820 | 	char	   *indent = isToast ? "\t" : "";
 821 | 
 822 | 	if (!isToast || verbose)
 823 | 		printf("%s<Header> -----\n", indent);
 824 | 
 825 | 	/* Only attempt to format the header if the entire header (minus the item
 826 | 	 * array) is available */
 827 | 	if (bytesToFormat < offsetof(PageHeaderData, pd_linp[0]))
 828 | 	{
 829 | 		headerBytes = bytesToFormat;
 830 | 		rc = EOF_ENCOUNTERED;
 831 | 	}
 832 | 	else
 833 | 	{
 834 | 		XLogRecPtr	pageLSN = PageGetLSN(page);
 835 | 		int			maxOffset = PageGetMaxOffsetNumber(page);
 836 | 		char		flagString[100];
 837 | 
 838 | 		headerBytes = offsetof(PageHeaderData, pd_linp[0]);
 839 | 		blockVersion = (unsigned int) PageGetPageLayoutVersion(page);
 840 | 
 841 | 		/* The full header exists but we have to check that the item array
 842 | 		 * is available or how far we can index into it */
 843 | 		if (maxOffset > 0)
 844 | 		{
 845 | 			unsigned int itemsLength = maxOffset * sizeof(ItemIdData);
 846 | 
 847 | 			if (bytesToFormat < (headerBytes + itemsLength))
 848 | 			{
 849 | 				headerBytes = bytesToFormat;
 850 | 				rc = EOF_ENCOUNTERED;
 851 | 			}
 852 | 			else
 853 | 				headerBytes += itemsLength;
 854 | 		}
 855 | 
 856 | 		flagString[0] = '\0';
 857 | 		if (pageHeader->pd_flags & PD_HAS_FREE_LINES)
 858 | 			strcat(flagString, "HAS_FREE_LINES|");
 859 | 		if (pageHeader->pd_flags & PD_PAGE_FULL)
 860 | 			strcat(flagString, "PAGE_FULL|");
 861 | 		if (pageHeader->pd_flags & PD_ALL_VISIBLE)
 862 | 			strcat(flagString, "ALL_VISIBLE|");
 863 | 		if (strlen(flagString))
 864 | 			flagString[strlen(flagString) - 1] = '\0';
 865 | 
 866 | 		/* Interpret the content of the header */
 867 | 		if (!isToast || verbose)
 868 | 		{
 869 | 			printf("%s Block Offset: 0x%08x         Offsets: Lower    %4u (0x%04hx)\n",
 870 | 					indent, pageOffset, pageHeader->pd_lower, pageHeader->pd_lower);
 871 | 			printf("%s Block: Size %4d  Version %4u            Upper    %4u (0x%04hx)\n",
 872 | 					indent, (int) PageGetPageSize(page), blockVersion,
 873 | 					pageHeader->pd_upper, pageHeader->pd_upper);
 874 | 			printf("%s LSN:  logid %6d recoff 0x%08x      Special  %4u (0x%04hx)\n",
 875 | 					indent, (uint32) (pageLSN >> 32), (uint32) pageLSN,
 876 | 					pageHeader->pd_special, pageHeader->pd_special);
 877 | 			printf("%s Items: %4d                      Free Space: %4u\n",
 878 | 					indent, maxOffset, pageHeader->pd_upper - pageHeader->pd_lower);
 879 | 			printf("%s Checksum: 0x%04x  Prune XID: 0x%08x  Flags: 0x%04x (%s)\n",
 880 | 					indent, pageHeader->pd_checksum, pageHeader->pd_prune_xid,
 881 | 					pageHeader->pd_flags, flagString);
 882 | 			printf("%s Length (including item array): %u\n\n",
 883 | 					indent, headerBytes);
 884 | 		}
 885 | 
 886 | 		/* If it's a btree meta page, print the contents of the meta block. */
 887 | 		if (IsBtreeMetaPage(page))
 888 | 		{
 889 | 			BTMetaPageData *btpMeta = BTPageGetMeta(buffer);
 890 | 
 891 | 			if (!isToast || verbose)
 892 | 			{
 893 | 				printf("%s BTree Meta Data:  Magic (0x%08x)   Version (%u)\n",
 894 | 						indent, btpMeta->btm_magic, btpMeta->btm_version);
 895 | 				printf("%s                   Root:     Block (%u)  Level (%u)\n",
 896 | 						indent, btpMeta->btm_root, btpMeta->btm_level);
 897 | 				printf("%s                   FastRoot: Block (%u)  Level (%u)\n\n",
 898 | 					indent, btpMeta->btm_fastroot, btpMeta->btm_fastlevel);
 899 | 			}
 900 | 			headerBytes += sizeof(BTMetaPageData);
 901 | 		}
 902 | 
 903 | 		/* Eye the contents of the header and alert the user to possible 
 904 | 		 * problems. */
 905 | 		if ((maxOffset < 0) ||
 906 | 			(maxOffset > blockSize) ||
 907 | 			(blockVersion != PG_PAGE_LAYOUT_VERSION) || /* only one we support */
 908 | 			(pageHeader->pd_upper > blockSize) ||
 909 | 			(pageHeader->pd_upper > pageHeader->pd_special) ||
 910 | 			(pageHeader->pd_lower <
 911 | 			 (sizeof(PageHeaderData) - sizeof(ItemIdData)))
 912 | 			|| (pageHeader->pd_lower > blockSize)
 913 | 			|| (pageHeader->pd_upper < pageHeader->pd_lower)
 914 | 			|| (pageHeader->pd_special > blockSize))
 915 | 		{
 916 | 			printf(" Error: Invalid header information.\n\n");
 917 | 			exitCode = 1;
 918 | 		}
 919 | 
 920 | 		if (blockOptions & BLOCK_CHECKSUMS)
 921 | 		{
 922 | 			uint32		delta = (segmentSize / blockSize) * segmentNumber;
 923 | 			uint16		calc_checksum = pg_checksum_page(page, delta + blkno);
 924 | 
 925 | 			if (calc_checksum != pageHeader->pd_checksum)
 926 | 			{
 927 | 				printf(" Error: checksum failure: calculated 0x%04x.\n\n",
 928 | 					   calc_checksum);
 929 | 				exitCode = 1;
 930 | 			}
 931 | 		}
 932 | 	}
 933 | 
 934 | 	/* If we have reached the end of file while interpreting the header, let
 935 | 	 * the user know about it */
 936 | 	if (rc == EOF_ENCOUNTERED)
 937 | 	{
 938 | 		if (!isToast || verbose)
 939 | 		{
 940 | 			printf("%s Error: End of block encountered within the header."
 941 | 					" Bytes read: %4u.\n\n", indent, bytesToFormat);
 942 | 		}
 943 | 		exitCode = 1;
 944 | 	}
 945 | 
 946 | 	/* A request to dump the formatted binary of the block (header,
 947 | 	 * items and special section).  It's best to dump even on an error
 948 | 	 * so the user can see the raw image. */
 949 | 	if (blockOptions & BLOCK_FORMAT)
 950 | 		FormatBinary(buffer, headerBytes, 0);
 951 | 
 952 | 	return (rc);
 953 | }
 954 | 
 955 | /*	Dump out formatted items that reside on this block */
 956 | static void
 957 | FormatItemBlock(char *buffer,
 958 | 		Page page,
 959 | 		bool isToast,
 960 | 		Oid toastOid,
 961 | 		unsigned int toastExternalSize,
 962 | 		char *toastValue,
 963 | 		unsigned int *toastRead)
 964 | {
 965 | 	unsigned int x;
 966 | 	unsigned int itemSize;
 967 | 	unsigned int itemOffset;
 968 | 	unsigned int itemFlags;
 969 | 	ItemId		itemId;
 970 | 	int			maxOffset = PageGetMaxOffsetNumber(page);
 971 | 	char	   *indent = isToast ? "\t" : "";
 972 | 
 973 | 	/* If it's a btree meta page, the meta block is where items would normally
 974 | 	 * be; don't print garbage. */
 975 | 	if (IsBtreeMetaPage(page))
 976 | 		return;
 977 | 
 978 | 	if (!isToast || verbose)
 979 | 		printf("%s<Data> ------ \n", indent);
 980 | 
 981 | 	/* Loop through the items on the block.  Check if the block is
 982 | 	 * empty and has a sensible item array listed before running
 983 | 	 * through each item */
 984 | 	if (maxOffset == 0)
 985 | 	{
 986 | 		if (!isToast || verbose)
 987 | 			printf("%s Empty block - no items listed \n\n", indent);
 988 | 	}
 989 | 	else if ((maxOffset < 0) || (maxOffset > blockSize))
 990 | 	{
 991 | 		if (!isToast || verbose)
 992 | 			printf("%s Error: Item index corrupt on block. Offset: <%d>.\n\n",
 993 | 				   indent,
 994 | 				   maxOffset);
 995 | 		exitCode = 1;
 996 | 	}
 997 | 	else
 998 | 	{
 999 | 		int				formatAs;
1000 | 		char			textFlags[16];
1001 | 		uint32			chunkId;
1002 | 		unsigned int	chunkSize = 0;
1003 | 
1004 | 		/* First, honour requests to format items a special way, then
1005 | 		 * use the special section to determine the format style */
1006 | 		if (itemOptions & ITEM_INDEX)
1007 | 			formatAs = ITEM_INDEX;
1008 | 		else if (itemOptions & ITEM_HEAP)
1009 | 			formatAs = ITEM_HEAP;
1010 | 		else
1011 | 			switch (specialType)
1012 | 			{
1013 | 				case SPEC_SECT_INDEX_BTREE:
1014 | 				case SPEC_SECT_INDEX_HASH:
1015 | 				case SPEC_SECT_INDEX_GIST:
1016 | 				case SPEC_SECT_INDEX_GIN:
1017 | 					formatAs = ITEM_INDEX;
1018 | 					break;
1019 | 				case SPEC_SECT_INDEX_SPGIST:
1020 | 					{
1021 | 						SpGistPageOpaque spgpo =
1022 | 						(SpGistPageOpaque) ((char *) page +
1023 | 											((PageHeader) page)->pd_special);
1024 | 
1025 | 						if (spgpo->flags & SPGIST_LEAF)
1026 | 							formatAs = ITEM_SPG_LEAF;
1027 | 						else
1028 | 							formatAs = ITEM_SPG_INNER;
1029 | 					}
1030 | 					break;
1031 | 				default:
1032 | 					formatAs = ITEM_HEAP;
1033 | 					break;
1034 | 			}
1035 | 
1036 | 		for (x = 1; x < (maxOffset + 1); x++)
1037 | 		{
1038 | 			itemId = PageGetItemId(page, x);
1039 | 			itemFlags = (unsigned int) ItemIdGetFlags(itemId);
1040 | 			itemSize = (unsigned int) ItemIdGetLength(itemId);
1041 | 			itemOffset = (unsigned int) ItemIdGetOffset(itemId);
1042 | 
1043 | 			switch (itemFlags)
1044 | 			{
1045 | 				case LP_UNUSED:
1046 | 					strcpy(textFlags, "UNUSED");
1047 | 					break;
1048 | 				case LP_NORMAL:
1049 | 					strcpy(textFlags, "NORMAL");
1050 | 					break;
1051 | 				case LP_REDIRECT:
1052 | 					strcpy(textFlags, "REDIRECT");
1053 | 					break;
1054 | 				case LP_DEAD:
1055 | 					strcpy(textFlags, "DEAD");
1056 | 					break;
1057 | 				default:
1058 | 					/* shouldn't be possible */
1059 | 					sprintf(textFlags, "0x%02x", itemFlags);
1060 | 					break;
1061 | 			}
1062 | 
1063 | 			if (!isToast || verbose)
1064 | 				printf("%s Item %3u -- Length: %4u  Offset: %4u (0x%04x)"
1065 | 					   "  Flags: %s\n",
1066 | 					   indent,
1067 | 					   x,
1068 | 					   itemSize,
1069 | 					   itemOffset,
1070 | 					   itemOffset,
1071 | 					   textFlags);
1072 | 
1073 | 			/* Make sure the item can physically fit on this block before
1074 | 			 * formatting */
1075 | 			if ((itemOffset + itemSize > blockSize) ||
1076 | 				(itemOffset + itemSize > bytesToFormat))
1077 | 			{
1078 | 				if (!isToast || verbose)
1079 | 					printf("%s  Error: Item contents extend beyond block.\n"
1080 | 						   "%s         BlockSize<%d> Bytes Read<%d> Item Start<%d>.\n",
1081 | 						   indent, indent, blockSize, bytesToFormat, itemOffset + itemSize);
1082 | 				exitCode = 1;
1083 | 			}
1084 | 			else
1085 | 			{
1086 | 				HeapTupleHeader tuple_header;
1087 | 				TransactionId xmax;
1088 | 
1089 | 				/* If the user requests that the items be interpreted as
1090 | 				 * heap or index items... */
1091 | 				if (itemOptions & ITEM_DETAIL)
1092 | 					FormatItem(buffer, itemSize, itemOffset, formatAs);
1093 | 
1094 | 				/* Dump the items contents in hex and ascii */
1095 | 				if (blockOptions & BLOCK_FORMAT)
1096 | 					FormatBinary(buffer, itemSize, itemOffset);
1097 | 
1098 | 				/* Check if tuple was deleted */
1099 | 				tuple_header = (HeapTupleHeader) (&buffer[itemOffset]);
1100 | 				xmax = HeapTupleHeaderGetRawXmax(tuple_header);
1101 | 				if ((blockOptions & BLOCK_IGNORE_OLD) && (xmax != 0))
1102 | 				{
1103 | 					if (!isToast || verbose)
1104 | 						printf("%stuple was removed by transaction #%d\n",
1105 | 								indent,
1106 | 								xmax);
1107 | 				}
1108 | 				else if (isToast)
1109 | 				{
1110 | 					ToastChunkDecode(&buffer[itemOffset], itemSize, toastOid,
1111 | 									 &chunkId, toastValue + *toastRead,
1112 | 									 &chunkSize);
1113 | 
1114 | 					if (!isToast || verbose)
1115 | 						printf("%s  Read TOAST chunk. TOAST Oid: %d, chunk id: %d, "
1116 | 							   "chunk data size: %d\n",
1117 | 							   indent, toastOid, chunkId, chunkSize);
1118 | 
1119 | 					*toastRead += chunkSize;
1120 | 
1121 | 					if (*toastRead >= toastExternalSize)
1122 | 						break;
1123 | 				}
1124 | 				else if ((blockOptions & BLOCK_DECODE) && (itemFlags == LP_NORMAL))
1125 | 				{
1126 | 					/* Decode tuple data */
1127 | 					FormatDecode(&buffer[itemOffset], itemSize);
1128 | 				}
1129 | 
1130 | 				if (x == maxOffset)
1131 | 					printf("\n");
1132 | 			}
1133 | 		}
1134 | 	}
1135 | }
1136 | 
1137 | /* Interpret the contents of the item based on whether it has a special
1138 |  * section and/or the user has hinted */
1139 | static void
1140 | FormatItem(char *buffer, unsigned int numBytes, unsigned int startIndex,
1141 | 		   unsigned int formatAs)
1142 | {
1143 | 	static const char *const spgist_tupstates[4] = {
1144 | 		"LIVE",
1145 | 		"REDIRECT",
1146 | 		"DEAD",
1147 | 		"PLACEHOLDER"
1148 | 	};
1149 | 
1150 | 	if (formatAs == ITEM_INDEX)
1151 | 	{
1152 | 		/* It is an IndexTuple item, so dump the index header */
1153 | 		if (numBytes < sizeof(ItemPointerData))
1154 | 		{
1155 | 			if (numBytes)
1156 | 			{
1157 | 				printf("  Error: This item does not look like an index item.\n");
1158 | 				exitCode = 1;
1159 | 			}
1160 | 		}
1161 | 		else
1162 | 		{
1163 | 			IndexTuple	itup = (IndexTuple) (&(buffer[startIndex]));
1164 | 
1165 | 			printf("  Block Id: %u  linp Index: %u  Size: %d\n"
1166 | 				   "  Has Nulls: %u  Has Varwidths: %u\n\n",
1167 | 				   ((uint32) ((itup->t_tid.ip_blkid.bi_hi << 16) |
1168 | 							  (uint16) itup->t_tid.ip_blkid.bi_lo)),
1169 | 				   itup->t_tid.ip_posid,
1170 | 				   (int) IndexTupleSize(itup),
1171 | 				   IndexTupleHasNulls(itup) ? 1 : 0,
1172 | 				   IndexTupleHasVarwidths(itup) ? 1 : 0);
1173 | 
1174 | 			if (numBytes != IndexTupleSize(itup))
1175 | 			{
1176 | 				printf("  Error: Item size difference. Given <%u>, "
1177 | 					   "Internal <%d>.\n", numBytes, (int) IndexTupleSize(itup));
1178 | 				exitCode = 1;
1179 | 			}
1180 | 		}
1181 | 	}
1182 | 	else if (formatAs == ITEM_SPG_INNER)
1183 | 	{
1184 | 		/* It is an SpGistInnerTuple item, so dump the index header */
1185 | 		if (numBytes < SGITHDRSZ)
1186 | 		{
1187 | 			if (numBytes)
1188 | 			{
1189 | 				printf("  Error: This item does not look like an SPGiST item.\n");
1190 | 				exitCode = 1;
1191 | 			}
1192 | 		}
1193 | 		else
1194 | 		{
1195 | 			SpGistInnerTuple itup = (SpGistInnerTuple) (&(buffer[startIndex]));
1196 | 
1197 | 			printf("  State: %s  allTheSame: %d nNodes: %u prefixSize: %u\n\n",
1198 | 				   spgist_tupstates[itup->tupstate],
1199 | 				   itup->allTheSame,
1200 | 				   itup->nNodes,
1201 | 				   itup->prefixSize);
1202 | 
1203 | 			if (numBytes != itup->size)
1204 | 			{
1205 | 				printf("  Error: Item size difference. Given <%u>, "
1206 | 					   "Internal <%d>.\n", numBytes, (int) itup->size);
1207 | 				exitCode = 1;
1208 | 			}
1209 | 			else if (itup->prefixSize == MAXALIGN(itup->prefixSize))
1210 | 			{
1211 | 				int			i;
1212 | 				SpGistNodeTuple node;
1213 | 
1214 | 				/* Dump the prefix contents in hex and ascii */
1215 | 				if ((blockOptions & BLOCK_FORMAT) &&
1216 | 					SGITHDRSZ + itup->prefixSize <= numBytes)
1217 | 					FormatBinary(buffer,
1218 | 							SGITHDRSZ + itup->prefixSize, startIndex);
1219 | 
1220 | 				/* Try to print the nodes, but only while pointer is sane */
1221 | 				SGITITERATE(itup, i, node)
1222 | 				{
1223 | 					int			off = (char *) node - (char *) itup;
1224 | 
1225 | 					if (off + SGNTHDRSZ > numBytes)
1226 | 						break;
1227 | 					printf("  Node %2u:  Downlink: %u/%u  Size: %d  Null: %u\n",
1228 | 						   i,
1229 | 						   ((uint32) ((node->t_tid.ip_blkid.bi_hi << 16) |
1230 | 									  (uint16) node->t_tid.ip_blkid.bi_lo)),
1231 | 						   node->t_tid.ip_posid,
1232 | 						   (int) IndexTupleSize(node),
1233 | 						   IndexTupleHasNulls(node) ? 1 : 0);
1234 | 					/* Dump the node's contents in hex and ascii */
1235 | 					if ((blockOptions & BLOCK_FORMAT) &&
1236 | 						off + IndexTupleSize(node) <= numBytes)
1237 | 						FormatBinary(buffer,
1238 | 								IndexTupleSize(node), startIndex + off);
1239 | 					if (IndexTupleSize(node) != MAXALIGN(IndexTupleSize(node)))
1240 | 						break;
1241 | 				}
1242 | 			}
1243 | 			printf("\n");
1244 | 		}
1245 | 	}
1246 | 	else if (formatAs == ITEM_SPG_LEAF)
1247 | 	{
1248 | 		/* It is an SpGistLeafTuple item, so dump the index header */
1249 | 		if (numBytes < SGLTHDRSZ)
1250 | 		{
1251 | 			if (numBytes)
1252 | 			{
1253 | 				printf("  Error: This item does not look like an SPGiST item.\n");
1254 | 				exitCode = 1;
1255 | 			}
1256 | 		}
1257 | 		else
1258 | 		{
1259 | 			SpGistLeafTuple itup = (SpGistLeafTuple) (&(buffer[startIndex]));
1260 | 
1261 | 			printf("  State: %s  nextOffset: %u  Block Id: %u  linp Index: %u\n\n",
1262 | 				   spgist_tupstates[itup->tupstate],
1263 | 				   itup->nextOffset,
1264 | 				   ((uint32) ((itup->heapPtr.ip_blkid.bi_hi << 16) |
1265 | 							  (uint16) itup->heapPtr.ip_blkid.bi_lo)),
1266 | 				   itup->heapPtr.ip_posid);
1267 | 
1268 | 			if (numBytes != itup->size)
1269 | 			{
1270 | 				printf("  Error: Item size difference. Given <%u>, "
1271 | 					   "Internal <%d>.\n", numBytes, (int) itup->size);
1272 | 				exitCode = 1;
1273 | 			}
1274 | 		}
1275 | 	}
1276 | 	else
1277 | 	{
1278 | 		/* It is a HeapTuple item, so dump the heap header */
1279 | 		int			alignedSize = MAXALIGN(sizeof(HeapTupleHeaderData));
1280 | 
1281 | 		if (numBytes < alignedSize)
1282 | 		{
1283 | 			if (numBytes)
1284 | 			{
1285 | 				printf("  Error: This item does not look like a heap item.\n");
1286 | 				exitCode = 1;
1287 | 			}
1288 | 		}
1289 | 		else
1290 | 		{
1291 | 			char		flagString[256];
1292 | 			unsigned int x;
1293 | 			unsigned int bitmapLength = 0;
1294 | 			unsigned int oidLength = 0;
1295 | 			unsigned int computedLength;
1296 | 			unsigned int infoMask;
1297 | 			unsigned int infoMask2;
1298 | 			int			localNatts;
1299 | 			unsigned int localHoff;
1300 | 			bits8	   *localBits;
1301 | 			unsigned int localBitOffset;
1302 | 
1303 | 			HeapTupleHeader htup = (HeapTupleHeader) (&buffer[startIndex]);
1304 | 
1305 | 			infoMask = htup->t_infomask;
1306 | 			infoMask2 = htup->t_infomask2;
1307 | 			localBits = &(htup->t_bits[0]);
1308 | 			localNatts = HeapTupleHeaderGetNatts(htup);
1309 | 			localHoff = htup->t_hoff;
1310 | 			localBitOffset = offsetof(HeapTupleHeaderData, t_bits);
1311 | 
1312 | 			printf("  XMIN: %u  XMAX: %u  CID|XVAC: %u",
1313 | 				   HeapTupleHeaderGetXmin(htup),
1314 | 				   HeapTupleHeaderGetRawXmax(htup),
1315 | 				   HeapTupleHeaderGetRawCommandId(htup));
1316 | 
1317 | 			if (infoMask & HEAP_HASOID)
1318 | 				printf("  OID: %u",
1319 | 					   HeapTupleHeaderGetOid(htup));
1320 | 
1321 | 			printf("\n"
1322 | 				   "  Block Id: %u  linp Index: %u   Attributes: %d   Size: %d\n",
1323 | 				   ((uint32)
1324 | 					((htup->t_ctid.ip_blkid.bi_hi << 16) | (uint16) htup->
1325 | 					 t_ctid.ip_blkid.bi_lo)), htup->t_ctid.ip_posid,
1326 | 				   localNatts, htup->t_hoff);
1327 | 
1328 | 			/* Place readable versions of the tuple info mask into a buffer.
1329 | 			 * Assume that the string can not expand beyond 256. */
1330 | 			flagString[0] = '\0';
1331 | 			if (infoMask & HEAP_HASNULL)
1332 | 				strcat(flagString, "HASNULL|");
1333 | 			if (infoMask & HEAP_HASVARWIDTH)
1334 | 				strcat(flagString, "HASVARWIDTH|");
1335 | 			if (infoMask & HEAP_HASEXTERNAL)
1336 | 				strcat(flagString, "HASEXTERNAL|");
1337 | 			if (infoMask & HEAP_HASOID)
1338 | 				strcat(flagString, "HASOID|");
1339 | 			if (infoMask & HEAP_XMAX_KEYSHR_LOCK)
1340 | 				strcat(flagString, "XMAX_KEYSHR_LOCK|");
1341 | 			if (infoMask & HEAP_COMBOCID)
1342 | 				strcat(flagString, "COMBOCID|");
1343 | 			if (infoMask & HEAP_XMAX_EXCL_LOCK)
1344 | 				strcat(flagString, "XMAX_EXCL_LOCK|");
1345 | 			if (infoMask & HEAP_XMAX_LOCK_ONLY)
1346 | 				strcat(flagString, "XMAX_LOCK_ONLY|");
1347 | 			if (infoMask & HEAP_XMIN_COMMITTED)
1348 | 				strcat(flagString, "XMIN_COMMITTED|");
1349 | 			if (infoMask & HEAP_XMIN_INVALID)
1350 | 				strcat(flagString, "XMIN_INVALID|");
1351 | 			if (infoMask & HEAP_XMAX_COMMITTED)
1352 | 				strcat(flagString, "XMAX_COMMITTED|");
1353 | 			if (infoMask & HEAP_XMAX_INVALID)
1354 | 				strcat(flagString, "XMAX_INVALID|");
1355 | 			if (infoMask & HEAP_XMAX_IS_MULTI)
1356 | 				strcat(flagString, "XMAX_IS_MULTI|");
1357 | 			if (infoMask & HEAP_UPDATED)
1358 | 				strcat(flagString, "UPDATED|");
1359 | 			if (infoMask & HEAP_MOVED_OFF)
1360 | 				strcat(flagString, "MOVED_OFF|");
1361 | 			if (infoMask & HEAP_MOVED_IN)
1362 | 				strcat(flagString, "MOVED_IN|");
1363 | 
1364 | 			if (infoMask2 & HEAP_KEYS_UPDATED)
1365 | 				strcat(flagString, "KEYS_UPDATED|");
1366 | 			if (infoMask2 & HEAP_HOT_UPDATED)
1367 | 				strcat(flagString, "HOT_UPDATED|");
1368 | 			if (infoMask2 & HEAP_ONLY_TUPLE)
1369 | 				strcat(flagString, "HEAP_ONLY|");
1370 | 
1371 | 			if (strlen(flagString))
1372 | 				flagString[strlen(flagString) - 1] = '\0';
1373 | 
1374 | 			printf("  infomask: 0x%04x (%s) \n", infoMask, flagString);
1375 | 
1376 | 			/* As t_bits is a variable length array, determine the length of
1377 | 			 * the header proper */
1378 | 			if (infoMask & HEAP_HASNULL)
1379 | 				bitmapLength = BITMAPLEN(localNatts);
1380 | 			else
1381 | 				bitmapLength = 0;
1382 | 
1383 | 			if (infoMask & HEAP_HASOID)
1384 | 				oidLength += sizeof(Oid);
1385 | 
1386 | 			computedLength =
1387 | 				MAXALIGN(localBitOffset + bitmapLength + oidLength);
1388 | 
1389 | 			/* Inform the user of a header size mismatch or dump the t_bits
1390 | 			 * array */
1391 | 			if (computedLength != localHoff)
1392 | 			{
1393 | 				printf
1394 | 					("  Error: Computed header length not equal to header size.\n"
1395 | 					 "         Computed <%u>  Header: <%d>\n", computedLength,
1396 | 					 localHoff);
1397 | 
1398 | 				exitCode = 1;
1399 | 			}
1400 | 			else if ((infoMask & HEAP_HASNULL) && bitmapLength)
1401 | 			{
1402 | 				printf("  t_bits: ");
1403 | 				for (x = 0; x < bitmapLength; x++)
1404 | 				{
1405 | 					printf("[%u]: 0x%02x ", x, localBits[x]);
1406 | 					if (((x & 0x03) == 0x03) && (x < bitmapLength - 1))
1407 | 						printf("\n          ");
1408 | 				}
1409 | 				printf("\n");
1410 | 			}
1411 | 			printf("\n");
1412 | 		}
1413 | 	}
1414 | }
1415 | 
1416 | 
1417 | /* On blocks that have special sections, print the contents
1418 |  * according to previously determined special section type */
1419 | static void
1420 | FormatSpecial(char *buffer)
1421 | {
1422 | 	PageHeader	pageHeader = (PageHeader) buffer;
1423 | 	char		flagString[100] = "\0";
1424 | 	unsigned int specialOffset = pageHeader->pd_special;
1425 | 	unsigned int specialSize =
1426 | 	(blockSize >= specialOffset) ? (blockSize - specialOffset) : 0;
1427 | 
1428 | 	printf("<Special Section> -----\n");
1429 | 
1430 | 	switch (specialType)
1431 | 	{
1432 | 		case SPEC_SECT_ERROR_UNKNOWN:
1433 | 		case SPEC_SECT_ERROR_BOUNDARY:
1434 | 			printf(" Error: Invalid special section encountered.\n");
1435 | 			exitCode = 1;
1436 | 			break;
1437 | 
1438 | 		case SPEC_SECT_SEQUENCE:
1439 | 			printf(" Sequence: 0x%08x\n", SEQUENCE_MAGIC);
1440 | 			break;
1441 | 
1442 | 			/* Btree index section */
1443 | 		case SPEC_SECT_INDEX_BTREE:
1444 | 			{
1445 | 				BTPageOpaque btreeSection = (BTPageOpaque) (buffer + specialOffset);
1446 | 
1447 | 				if (btreeSection->btpo_flags & BTP_LEAF)
1448 | 					strcat(flagString, "LEAF|");
1449 | 				if (btreeSection->btpo_flags & BTP_ROOT)
1450 | 					strcat(flagString, "ROOT|");
1451 | 				if (btreeSection->btpo_flags & BTP_DELETED)
1452 | 					strcat(flagString, "DELETED|");
1453 | 				if (btreeSection->btpo_flags & BTP_META)
1454 | 					strcat(flagString, "META|");
1455 | 				if (btreeSection->btpo_flags & BTP_HALF_DEAD)
1456 | 					strcat(flagString, "HALFDEAD|");
1457 | 				if (btreeSection->btpo_flags & BTP_SPLIT_END)
1458 | 					strcat(flagString, "SPLITEND|");
1459 | 				if (btreeSection->btpo_flags & BTP_HAS_GARBAGE)
1460 | 					strcat(flagString, "HASGARBAGE|");
1461 | 				if (btreeSection->btpo_flags & BTP_INCOMPLETE_SPLIT)
1462 | 					strcat(flagString, "INCOMPLETESPLIT|");
1463 | 				if (strlen(flagString))
1464 | 					flagString[strlen(flagString) - 1] = '\0';
1465 | 
1466 | 				printf(" BTree Index Section:\n"
1467 | 					   "  Flags: 0x%04x (%s)\n"
1468 | 					   "  Blocks: Previous (%d)  Next (%d)  %s (%d)  CycleId (%d)\n\n",
1469 | 					   btreeSection->btpo_flags, flagString,
1470 | 					   btreeSection->btpo_prev, btreeSection->btpo_next,
1471 | 					   (btreeSection->
1472 | 						btpo_flags & BTP_DELETED) ? "Next XID" : "Level",
1473 | 					   btreeSection->btpo.level,
1474 | 					   btreeSection->btpo_cycleid);
1475 | 			}
1476 | 			break;
1477 | 
1478 | 			/* Hash index section */
1479 | 		case SPEC_SECT_INDEX_HASH:
1480 | 			{
1481 | 				HashPageOpaque hashSection = (HashPageOpaque) (buffer + specialOffset);
1482 | 
1483 | 				if (hashSection->hasho_flag & LH_UNUSED_PAGE)
1484 | 					strcat(flagString, "UNUSED|");
1485 | 				if (hashSection->hasho_flag & LH_OVERFLOW_PAGE)
1486 | 					strcat(flagString, "OVERFLOW|");
1487 | 				if (hashSection->hasho_flag & LH_BUCKET_PAGE)
1488 | 					strcat(flagString, "BUCKET|");
1489 | 				if (hashSection->hasho_flag & LH_BITMAP_PAGE)
1490 | 					strcat(flagString, "BITMAP|");
1491 | 				if (hashSection->hasho_flag & LH_META_PAGE)
1492 | 					strcat(flagString, "META|");
1493 | 				if (strlen(flagString))
1494 | 					flagString[strlen(flagString) - 1] = '\0';
1495 | 				printf(" Hash Index Section:\n"
1496 | 					   "  Flags: 0x%04x (%s)\n"
1497 | 					   "  Bucket Number: 0x%04x\n"
1498 | 					   "  Blocks: Previous (%d)  Next (%d)\n\n",
1499 | 					   hashSection->hasho_flag, flagString,
1500 | 					   hashSection->hasho_bucket,
1501 | 					   hashSection->hasho_prevblkno, hashSection->hasho_nextblkno);
1502 | 			}
1503 | 			break;
1504 | 
1505 | 			/* GIST index section */
1506 | 		case SPEC_SECT_INDEX_GIST:
1507 | 			{
1508 | 				GISTPageOpaque gistSection = (GISTPageOpaque) (buffer + specialOffset);
1509 | 
1510 | 				if (gistSection->flags & F_LEAF)
1511 | 					strcat(flagString, "LEAF|");
1512 | 				if (gistSection->flags & F_DELETED)
1513 | 					strcat(flagString, "DELETED|");
1514 | 				if (gistSection->flags & F_TUPLES_DELETED)
1515 | 					strcat(flagString, "TUPLES_DELETED|");
1516 | 				if (gistSection->flags & F_FOLLOW_RIGHT)
1517 | 					strcat(flagString, "FOLLOW_RIGHT|");
1518 | 				if (strlen(flagString))
1519 | 					flagString[strlen(flagString) - 1] = '\0';
1520 | 				printf(" GIST Index Section:\n"
1521 | 					   "  NSN: 0x%08x/0x%08x\n"
1522 | 					   "  RightLink: %d\n"
1523 | 					   "  Flags: 0x%08x (%s)\n\n",
1524 | 					   gistSection->nsn.xlogid, gistSection->nsn.xrecoff,
1525 | 					   gistSection->rightlink,
1526 | 					   gistSection->flags, flagString);
1527 | 			}
1528 | 			break;
1529 | 
1530 | 			/* GIN index section */
1531 | 		case SPEC_SECT_INDEX_GIN:
1532 | 			{
1533 | 				GinPageOpaque ginSection = (GinPageOpaque) (buffer + specialOffset);
1534 | 
1535 | 				if (ginSection->flags & GIN_DATA)
1536 | 					strcat(flagString, "DATA|");
1537 | 				if (ginSection->flags & GIN_LEAF)
1538 | 					strcat(flagString, "LEAF|");
1539 | 				if (ginSection->flags & GIN_DELETED)
1540 | 					strcat(flagString, "DELETED|");
1541 | 				if (ginSection->flags & GIN_META)
1542 | 					strcat(flagString, "META|");
1543 | 				if (ginSection->flags & GIN_LIST)
1544 | 					strcat(flagString, "LIST|");
1545 | 				if (ginSection->flags & GIN_LIST_FULLROW)
1546 | 					strcat(flagString, "FULLROW|");
1547 | 				if (ginSection->flags & GIN_INCOMPLETE_SPLIT)
1548 | 					strcat(flagString, "INCOMPLETESPLIT|");
1549 | 				if (ginSection->flags & GIN_COMPRESSED)
1550 | 					strcat(flagString, "COMPRESSED|");
1551 | 				if (strlen(flagString))
1552 | 					flagString[strlen(flagString) - 1] = '\0';
1553 | 				printf(" GIN Index Section:\n"
1554 | 					   "  Flags: 0x%08x (%s)  Maxoff: %d\n"
1555 | 					   "  Blocks: RightLink (%d)\n\n",
1556 | 					   ginSection->flags, flagString,
1557 | 					   ginSection->maxoff,
1558 | 					   ginSection->rightlink);
1559 | 			}
1560 | 			break;
1561 | 
1562 | 			/* SP-GIST index section */
1563 | 		case SPEC_SECT_INDEX_SPGIST:
1564 | 			{
1565 | 				SpGistPageOpaque spgistSection = (SpGistPageOpaque) (buffer + specialOffset);
1566 | 
1567 | 				if (spgistSection->flags & SPGIST_META)
1568 | 					strcat(flagString, "META|");
1569 | 				if (spgistSection->flags & SPGIST_DELETED)
1570 | 					strcat(flagString, "DELETED|");
1571 | 				if (spgistSection->flags & SPGIST_LEAF)
1572 | 					strcat(flagString, "LEAF|");
1573 | 				if (spgistSection->flags & SPGIST_NULLS)
1574 | 					strcat(flagString, "NULLS|");
1575 | 				if (strlen(flagString))
1576 | 					flagString[strlen(flagString) - 1] = '\0';
1577 | 				printf(" SPGIST Index Section:\n"
1578 | 					   "  Flags: 0x%08x (%s)\n"
1579 | 					   "  nRedirection: %d\n"
1580 | 					   "  nPlaceholder: %d\n\n",
1581 | 					   spgistSection->flags, flagString,
1582 | 					   spgistSection->nRedirection,
1583 | 					   spgistSection->nPlaceholder);
1584 | 			}
1585 | 			break;
1586 | 
1587 | 			/* No idea what type of special section this is */
1588 | 		default:
1589 | 			printf(" Unknown special section type. Type: <%u>.\n", specialType);
1590 | 			exitCode = 1;
1591 | 			break;
1592 | 	}
1593 | 
1594 | 	/* Dump the formatted contents of the special section */
1595 | 	if (blockOptions & BLOCK_FORMAT)
1596 | 	{
1597 | 		if (specialType == SPEC_SECT_ERROR_BOUNDARY)
1598 | 		{
1599 | 			printf(" Error: Special section points off page."
1600 | 				   " Unable to dump contents.\n");
1601 | 
1602 | 			exitCode = 1;
1603 | 		}
1604 | 		else
1605 | 			FormatBinary(buffer, specialSize, specialOffset);
1606 | 	}
1607 | }
1608 | 
1609 | /*	For each block, dump out formatted header and content information */
1610 | static void
1611 | FormatBlock(unsigned int blockOptions,
1612 | 		unsigned int controlOptions,
1613 | 		char *buffer,
1614 | 		BlockNumber currentBlock,
1615 | 		unsigned int blockSize,
1616 | 		bool isToast,
1617 | 		Oid toastOid,
1618 | 		unsigned int toastExternalSize,
1619 | 		char *toastValue,
1620 | 		unsigned int *toastRead)
1621 | {
1622 | 	Page		page = (Page) buffer;
1623 | 	char	   *indent = isToast ? "\t" : "";
1624 | 
1625 | 	pageOffset = blockSize * currentBlock;
1626 | 	specialType = GetSpecialSectionType(buffer, page);
1627 | 
1628 | 	if (!isToast || verbose)
1629 | 		printf("\n%sBlock %4u **%s***************************************\n",
1630 | 			   indent,
1631 | 			   currentBlock,
1632 | 			   (bytesToFormat ==
1633 | 				blockSize) ? "***************" : " PARTIAL BLOCK ");
1634 | 
1635 | 	/* Either dump out the entire block in hex+acsii fashion or
1636 | 	 * interpret the data based on block structure */
1637 | 	if (blockOptions & BLOCK_NO_INTR)
1638 | 		FormatBinary(buffer, bytesToFormat, 0);
1639 | 	else
1640 | 	{
1641 | 		int			rc;
1642 | 
1643 | 		/* Every block contains a header, items and possibly a special
1644 | 		 * section.  Beware of partial block reads though */
1645 | 		rc = FormatHeader(buffer, page, currentBlock, isToast);
1646 | 
1647 | 		/* If we didn't encounter a partial read in the header, carry on... */
1648 | 		if (rc != EOF_ENCOUNTERED)
1649 | 		{
1650 | 			FormatItemBlock(buffer,
1651 | 					page,
1652 | 					isToast,
1653 | 					toastOid,
1654 | 					toastExternalSize,
1655 | 					toastValue,
1656 | 					toastRead);
1657 | 
1658 | 			if (specialType != SPEC_SECT_NONE)
1659 | 				FormatSpecial(buffer);
1660 | 		}
1661 | 	}
1662 | }
1663 | 
1664 | /*	Dump out the content of the PG control file */
1665 | static void
1666 | FormatControl(char *buffer)
1667 | {
1668 | 	unsigned int localPgVersion = 0;
1669 | 	unsigned int controlFileSize = 0;
1670 | 	time_t		cd_time;
1671 | 	time_t		cp_time;
1672 | 
1673 | 	printf
1674 | 		("\n<pg_control Contents> *********************************************\n\n");
1675 | 
1676 | 	/* Check the version */
1677 | 	if (bytesToFormat >= offsetof(ControlFileData, catalog_version_no))
1678 | 		localPgVersion = ((ControlFileData *) buffer)->pg_control_version;
1679 | 
1680 | 	if (localPgVersion >= 72)
1681 | 		controlFileSize = sizeof(ControlFileData);
1682 | 	else
1683 | 	{
1684 | 		printf("pg_filedump: pg_control version %u not supported.\n",
1685 | 			   localPgVersion);
1686 | 		return;
1687 | 	}
1688 | 
1689 | 	/* Interpret the control file if it's all there */
1690 | 	if (bytesToFormat >= controlFileSize)
1691 | 	{
1692 | 		ControlFileData *controlData = (ControlFileData *) buffer;
1693 | 		CheckPoint *checkPoint = &(controlData->checkPointCopy);
1694 | 		pg_crc32	crcLocal;
1695 | 		char	   *dbState;
1696 | 
1697 | 		/* Compute a local copy of the CRC to verify the one on disk */
1698 | 		INIT_CRC32C(crcLocal);
1699 | 		COMP_CRC32C(crcLocal, buffer, offsetof(ControlFileData, crc));
1700 | 		FIN_CRC32C(crcLocal);
1701 | 
1702 | 		/* Grab a readable version of the database state */
1703 | 		switch (controlData->state)
1704 | 		{
1705 | 			case DB_STARTUP:
1706 | 				dbState = "STARTUP";
1707 | 				break;
1708 | 			case DB_SHUTDOWNED:
1709 | 				dbState = "SHUTDOWNED";
1710 | 				break;
1711 | 			case DB_SHUTDOWNED_IN_RECOVERY:
1712 | 				dbState = "SHUTDOWNED_IN_RECOVERY";
1713 | 				break;
1714 | 			case DB_SHUTDOWNING:
1715 | 				dbState = "SHUTDOWNING";
1716 | 				break;
1717 | 			case DB_IN_CRASH_RECOVERY:
1718 | 				dbState = "IN CRASH RECOVERY";
1719 | 				break;
1720 | 			case DB_IN_ARCHIVE_RECOVERY:
1721 | 				dbState = "IN ARCHIVE RECOVERY";
1722 | 				break;
1723 | 			case DB_IN_PRODUCTION:
1724 | 				dbState = "IN PRODUCTION";
1725 | 				break;
1726 | 			default:
1727 | 				dbState = "UNKNOWN";
1728 | 				break;
1729 | 		}
1730 | 
1731 | 		/* convert timestamps to system's time_t width */
1732 | 		cd_time = controlData->time;
1733 | 		cp_time = checkPoint->time;
1734 | 
1735 | 		printf("                          CRC: %s\n"
1736 | 			   "           pg_control Version: %u%s\n"
1737 | 			   "              Catalog Version: %u\n"
1738 | 			   "            System Identifier: " UINT64_FORMAT "\n"
1739 | 			   "                        State: %s\n"
1740 | 			   "                Last Mod Time: %s"
1741 | 			   "       Last Checkpoint Record: Log File (%u) Offset (0x%08x)\n"
1742 | #if PG_VERSION_NUM < 110000
1743 | 			   "   Previous Checkpoint Record: Log File (%u) Offset (0x%08x)\n"
1744 | #endif
1745 | 			   "  Last Checkpoint Record Redo: Log File (%u) Offset (0x%08x)\n"
1746 | 			   "             |-    TimeLineID: %u\n"
1747 | 			   "             |-      Next XID: %u/%u\n"
1748 | 			   "             |-      Next OID: %u\n"
1749 | 			   "             |-    Next Multi: %u\n"
1750 | 			   "             |- Next MultiOff: %u\n"
1751 | 			   "             |-          Time: %s"
1752 | 			   "       Minimum Recovery Point: Log File (%u) Offset (0x%08x)\n"
1753 | 			   "       Maximum Data Alignment: %u\n"
1754 | 			   "        Floating-Point Sample: %.7g%s\n"
1755 | 			   "          Database Block Size: %u\n"
1756 | 			   "           Blocks Per Segment: %u\n"
1757 | 			   "              XLOG Block Size: %u\n"
1758 | 			   "            XLOG Segment Size: %u\n"
1759 | 			   "    Maximum Identifier Length: %u\n"
1760 | 			   "           Maximum Index Keys: %u\n"
1761 | 			   "             TOAST Chunk Size: %u\n\n",
1762 | 			   EQ_CRC32C(crcLocal,
1763 | 						 controlData->crc) ? "Correct" : "Not Correct",
1764 | 			   controlData->pg_control_version,
1765 | 			   (controlData->pg_control_version == PG_CONTROL_VERSION ?
1766 | 				"" : " (Not Correct!)"),
1767 | 			   controlData->catalog_version_no,
1768 | 			   controlData->system_identifier,
1769 | 			   dbState,
1770 | 			   ctime(&(cd_time)),
1771 | 			   (uint32) (controlData->checkPoint >> 32), (uint32) controlData->checkPoint,
1772 | #if PG_VERSION_NUM < 110000
1773 | 			   (uint32) (controlData->prevCheckPoint >> 32), (uint32) controlData->prevCheckPoint,
1774 | #endif
1775 | 			   (uint32) (checkPoint->redo >> 32), (uint32) checkPoint->redo,
1776 | 			   checkPoint->ThisTimeLineID,
1777 | 			   checkPoint->nextXidEpoch, checkPoint->nextXid,
1778 | 			   checkPoint->nextOid,
1779 | 			   checkPoint->nextMulti, checkPoint->nextMultiOffset,
1780 | 			   ctime(&cp_time),
1781 | 			   (uint32) (controlData->minRecoveryPoint >> 32), (uint32) controlData->minRecoveryPoint,
1782 | 			   controlData->maxAlign,
1783 | 			   controlData->floatFormat,
1784 | 			   (controlData->floatFormat == FLOATFORMAT_VALUE ?
1785 | 				"" : " (Not Correct!)"),
1786 | 			   controlData->blcksz,
1787 | 			   controlData->relseg_size,
1788 | 			   controlData->xlog_blcksz,
1789 | 			   controlData->xlog_seg_size,
1790 | 			   controlData->nameDataLen,
1791 | 			   controlData->indexMaxKeys,
1792 | 			   controlData->toast_max_chunk_size);
1793 | 	}
1794 | 	else
1795 | 	{
1796 | 		printf(" Error: pg_control file size incorrect.\n"
1797 | 			   "        Size: Correct <%u>  Received <%u>.\n\n",
1798 | 			   controlFileSize, bytesToFormat);
1799 | 
1800 | 		/* If we have an error, force a formatted dump so we can see
1801 | 		 * where things are going wrong */
1802 | 		controlOptions |= CONTROL_FORMAT;
1803 | 
1804 | 		exitCode = 1;
1805 | 	}
1806 | 
1807 | 	/* Dump hex and ascii representation of data */
1808 | 	if (controlOptions & CONTROL_FORMAT)
1809 | 	{
1810 | 		printf("<pg_control Formatted Dump> *****************"
1811 | 			   "**********************\n\n");
1812 | 		FormatBinary(buffer, bytesToFormat, 0);
1813 | 	}
1814 | }
1815 | 
1816 | /* Dump out the contents of the block in hex and ascii.
1817 |  * BYTES_PER_LINE bytes are formatted in each line. */
1818 | static void
1819 | FormatBinary(char *buffer, unsigned int numBytes, unsigned int startIndex)
1820 | {
1821 | 	unsigned int index = 0;
1822 | 	unsigned int stopIndex = 0;
1823 | 	unsigned int x = 0;
1824 | 	unsigned int lastByte = startIndex + numBytes;
1825 | 
1826 | 	if (numBytes)
1827 | 	{
1828 | 		/* Iterate through a printable row detailing the current
1829 | 		 * address, the hex and ascii values */
1830 | 		for (index = startIndex; index < lastByte; index += BYTES_PER_LINE)
1831 | 		{
1832 | 			stopIndex = index + BYTES_PER_LINE;
1833 | 
1834 | 			/* Print out the address */
1835 | 			if (blockOptions & BLOCK_ABSOLUTE)
1836 | 				printf("  %08x: ", (unsigned int) (pageOffset + index));
1837 | 			else
1838 | 				printf("  %04x: ", (unsigned int) index);
1839 | 
1840 | 			/* Print out the hex version of the data */
1841 | 			for (x = index; x < stopIndex; x++)
1842 | 			{
1843 | 				if (x < lastByte)
1844 | 					printf("%02x", 0xff & ((unsigned) buffer[x]));
1845 | 				else
1846 | 					printf("  ");
1847 | 				if ((x & 0x03) == 0x03)
1848 | 					printf(" ");
1849 | 			}
1850 | 			printf(" ");
1851 | 
1852 | 			/* Print out the ascii version of the data */
1853 | 			for (x = index; x < stopIndex; x++)
1854 | 			{
1855 | 				if (x < lastByte)
1856 | 					printf("%c", isprint(buffer[x]) ? buffer[x] : '.');
1857 | 				else
1858 | 					printf(" ");
1859 | 			}
1860 | 			printf("\n");
1861 | 		}
1862 | 		printf("\n");
1863 | 	}
1864 | }
1865 | 
1866 | /* Dump the binary image of the block */
1867 | static void
1868 | DumpBinaryBlock(char *buffer)
1869 | {
1870 | 	unsigned int x;
1871 | 
1872 | 	for (x = 0; x < bytesToFormat; x++)
1873 | 		putchar(buffer[x]);
1874 | }
1875 | 
1876 | /* Control the dumping of the blocks within the file */
1877 | int
1878 | DumpFileContents(unsigned int blockOptions,
1879 | 		unsigned int controlOptions,
1880 | 		FILE *fp,
1881 | 		unsigned int blockSize,
1882 | 		int blockStart,
1883 | 		int blockEnd,
1884 | 		bool isToast,
1885 | 		Oid toastOid,
1886 | 		unsigned int toastExternalSize,
1887 | 		char *toastValue)
1888 | {
1889 | 	unsigned int	initialRead = 1;
1890 | 	unsigned int	contentsToDump = 1;
1891 | 	unsigned int	toastDataRead = 0;
1892 | 	BlockNumber		currentBlock = 0;
1893 | 	int				result = 0;
1894 | 	/* On a positive block size, allocate a local buffer to store
1895 | 	 * the subsequent blocks */
1896 | 	char		   *block = (char *)malloc(blockSize);
1897 | 	if (!block)
1898 | 	{
1899 | 		printf("\nError: Unable to create buffer of size <%d>.\n",
1900 | 			   blockSize);
1901 | 		result = 1;
1902 | 	}
1903 | 
1904 | 	/* If the user requested a block range, seek to the correct position
1905 | 	 * within the file for the start block. */
1906 | 	if (result == 0 && blockOptions & BLOCK_RANGE)
1907 | 	{
1908 | 		unsigned int	position = blockSize * blockStart;
1909 | 
1910 | 		if (fseek(fp, position, SEEK_SET) != 0)
1911 | 		{
1912 | 			printf("Error: Seek error encountered before requested "
1913 | 				   "start block <%d>.\n", blockStart);
1914 | 			contentsToDump = 0;
1915 | 			result = 1;
1916 | 		}
1917 | 		else
1918 | 			currentBlock = blockStart;
1919 | 	}
1920 | 
1921 | 	/* Iterate through the blocks in the file until you reach the end or
1922 | 	 * the requested range end */
1923 | 	while (contentsToDump && result == 0)
1924 | 	{
1925 | 		bytesToFormat = fread(block, 1, blockSize, fp);
1926 | 
1927 | 		if (bytesToFormat == 0)
1928 | 		{
1929 | 			/* fseek() won't pop an error if you seek passed eof. The next
1930 | 			 * subsequent read gets the error. */
1931 | 			if (initialRead)
1932 | 				printf("Error: Premature end of file encountered.\n");
1933 | 			else if (!(blockOptions & BLOCK_BINARY))
1934 | 				printf("\n*** End of File Encountered. Last Block "
1935 | 					   "Read: %d ***\n", currentBlock - 1);
1936 | 
1937 | 			contentsToDump = 0;
1938 | 		}
1939 | 		else
1940 | 		{
1941 | 			if (blockOptions & BLOCK_BINARY)
1942 | 				DumpBinaryBlock(block);
1943 | 			else
1944 | 			{
1945 | 				if (controlOptions & CONTROL_DUMP)
1946 | 				{
1947 | 					FormatControl(block);
1948 | 					contentsToDump = false;
1949 | 				}
1950 | 				else
1951 | 				{
1952 | 					FormatBlock(blockOptions,
1953 | 							controlOptions,
1954 | 							block,
1955 | 							currentBlock,
1956 | 							blockSize,
1957 | 							isToast,
1958 | 							toastOid,
1959 | 							toastExternalSize,
1960 | 							toastValue,
1961 | 							&toastDataRead);
1962 | 				}
1963 | 			}
1964 | 		}
1965 | 
1966 | 		/* Check to see if we are at the end of the requested range. */
1967 | 		if ((blockOptions & BLOCK_RANGE) &&
1968 | 			(currentBlock >= blockEnd) && (contentsToDump))
1969 | 		{
1970 | 			/* Don't print out message if we're doing a binary dump */
1971 | 			if (!(blockOptions & BLOCK_BINARY))
1972 | 				printf("\n*** End of Requested Range Encountered. "
1973 | 					   "Last Block Read: %d ***\n", currentBlock);
1974 | 			contentsToDump = 0;
1975 | 		}
1976 | 		else
1977 | 			currentBlock++;
1978 | 
1979 | 		initialRead = 0;
1980 | 
1981 | 		/* If TOAST data is read */
1982 | 		if (isToast && toastDataRead >= toastExternalSize)
1983 | 			break;
1984 | 	}
1985 | 
1986 | 	free(block);
1987 | 
1988 | 	return result;
1989 | }
1990 | 
1991 | /* Consume the options and iterate through the given file, formatting as
1992 |  * requested. */
1993 | int
1994 | main(int argv, char **argc)
1995 | {
1996 | 	/* If there is a parameter list, validate the options */
1997 | 	unsigned int validOptions;
1998 | 
1999 | 	validOptions = (argv < 2) ? OPT_RC_COPYRIGHT : ConsumeOptions(argv, argc);
2000 | 
2001 | 	/* Display valid options if no parameters are received or invalid options
2002 | 	 * where encountered */
2003 | 	if (validOptions != OPT_RC_VALID)
2004 | 		DisplayOptions(validOptions);
2005 | 	else
2006 | 	{
2007 | 		/* Don't dump the header if we're dumping binary pages */
2008 | 		if (!(blockOptions & BLOCK_BINARY))
2009 | 			CreateDumpFileHeader(argv, argc);
2010 | 
2011 | 		/* If the user has not forced a block size, use the size of the
2012 | 		 * control file data or the information from the block 0 header */
2013 | 		if (controlOptions)
2014 | 		{
2015 | 			if (!(controlOptions & CONTROL_FORCED))
2016 | 				blockSize = sizeof(ControlFileData);
2017 | 		}
2018 | 		else if (!(blockOptions & BLOCK_FORCED))
2019 | 			blockSize = GetBlockSize(fp);
2020 | 
2021 | 		exitCode = DumpFileContents(blockOptions,
2022 | 				controlOptions,
2023 | 				fp,
2024 | 				blockSize,
2025 | 				blockStart,
2026 | 				blockEnd,
2027 | 				false /* is toast realtion */,
2028 | 				0,    /* no toast Oid */
2029 | 				0,    /* no toast external size */
2030 | 				NULL  /* no out toast value */
2031 | 				);
2032 | 	}
2033 | 
2034 | 	if (fp)
2035 | 		fclose(fp);
2036 | 
2037 | 	exit(exitCode);
2038 | }
2039 | 


--------------------------------------------------------------------------------