├── .gitignore
├── LICENSE.md
├── README.md
├── admin
    └── db_creation_date.sql
├── diff
    └── diff-tables.sql
├── global_search
    ├── README.md
    └── global_search.sql
├── hamming_weight
    ├── Makefile
    ├── hamming_weight.c
    └── hamming_weight.sql
├── large_objects
    ├── lo_digest.sql
    └── lo_size.sql
├── pivots
    ├── dynamic-pivot-create.sql
    └── dynamic-pivot-refcursor.sql
├── psql-cli
    ├── psql-coproc-functions.sh
    ├── psql-edit-replace.sh
    ├── psql-quote-meta.bash
    ├── psqlrc-for-edit-replace
    └── tar-to-pg-copy.sh
├── strings
    ├── parse-option.sql
    ├── plperl
    │   ├── env_vars.sql
    │   └── multi_replace_plperl.sql
    └── utf8-truncate.sql
└── tsearch
    └── dict_maxlen
        ├── Makefile
        ├── README.md
        ├── dict_maxlen--1.0.sql
        ├── dict_maxlen.c
        └── dict_maxlen.control


/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore backup files
2 | *~
3 | 
4 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Copyright and License
 2 | 
 3 | Copyright (c) 2016-2024, Daniel Vérité
 4 | 
 5 | Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
 6 | 
 7 | In no event shall Daniel Vérité be liable to any party for direct, indirect, special, incidental, or consequential damages, including lost profits, arising out of the use of this software and its documentation, even if Daniel Vérité has been advised of the possibility of such damage.
 8 | 
 9 | Daniel Vérité specifically disclaims any warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The software provided hereunder is on an "AS IS" basis, and Daniel Vérité has no obligations to provide maintenance, support, updates, enhancements, or modifications.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PostgreSQL functions
 2 | 
 3 | A repository of custom PostgreSQL functions and extensions.
 4 | 
 5 | ## admin/db\_creation\_date
 6 | A `plperlu` function returning the creation date of a database,
 7 | based on the creation time of the database directory
 8 | under `$PGATA/base`.
 9 | 
10 | ## diff_tables
11 | A simple plpgsql function that takes two table names (through the
12 | `regclass` type), builds a query comparing their contents, runs
13 | it, and returns a set of diff-like results with the rows that differ.
14 | It does not require a primary key on tables to compare.
15 | 
16 | ## dynamic_pivot
17 | Return a CURSOR pointing to pivoted results of a query passed as the
18 | 1st parameter, with sorted headers passed as a query as the 2nd
19 | parameter.  
20 | See https://postgresql.verite.pro/blog/2018/06/19/crosstab-pivot.html for
21 | a lot of context about this function.
22 | 
23 | ## global_search
24 | A `plpgsql` function that finds occurrences of a string or more
25 | generally perform any kind of text-based matching in all or some of the
26 | tables of an entire database.
27 | It returns the table, column name, `ctid` and column's value of the rows
28 | that match.
29 | The search can be limited to an array of tables and/or of
30 | schemas. Progress is optionally reported by emitting `raise info`
31 | messages.
32 | 
33 | ## hamming_weight
34 | C functions that return the number of bits set to `1` in a bytea, int
35 | or bigint value. The `bytea` variant is available as a built-in function
36 | (named `bit_count`) since PostgreSQL 14.
37 | 
38 | ## large_objects
39 | ### lo_size
40 | A plpgsql function that returns the size of a given large object.
41 | 
42 | ### lo_digest
43 | A plperlu function that returns the digest (hash output) of a large
44 | object for any hash supported by perl's Digest module.
45 | 
46 | ## strings/parse_option
47 | A simple function to parse name=value settings.
48 | 
49 | ## strings/plperl/multi_replace
50 | Replace strings by other strings within a larger text, with
51 | Perl s// operator, in a single pass.
52 | Each string in the first array is replaced by the element at the same
53 | index in the second array.
54 | 
55 | ## strings/utf8_truncate
56 | Truncate an UTF-8 string to a given number of bytes, respecting the
57 | constraint that any multibyte sequence at the end must be complete.
58 | 
59 | ## tsearch/dict_maxlen
60 | A text search dictionary to filter out tokens longer than a given length.
61 | 
62 | ## psql-cli
63 | psqlrc declarations, companion scripts, tricks for the psql command-line interpreter.
64 | 


--------------------------------------------------------------------------------
/admin/db_creation_date.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Return the creation date of a database based on its directory.
 3 |  * Requires superuser privileges.
 4 |  * Example:
 5 |  *  SELECT datname, db_creation_date(oid) FROM pg_database;
 6 |  */
 7 | CREATE FUNCTION db_creation_date(oid) RETURNS timestamptz as $$
 8 |   my $oid=shift;
 9 |   my $rv = spi_exec_query("SELECT setting FROM pg_settings WHERE name='data_directory'")
10 |      or elog(ERROR, "cannot read 'data_directory' setting");
11 |   my $datadir = $rv->{rows}[0]->{setting};
12 |   my @info = stat("$datadir/base/$oid");
13 |   if (!@info) {
14 |      elog(ERROR, "cannot stat database directory: $!");
15 |   }
16 |   my $ctime = $info[10];
17 |   $rv = spi_exec_query("SELECT to_timestamp($ctime) as t") or elog(ERROR, "query error");
18 |   return $rv->{rows}[0]->{t};
19 | $$ LANGUAGE plperlu;
20 | 


--------------------------------------------------------------------------------
/diff/diff-tables.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Take two table names (through the `regtable` type),
 3 |  build a query comparing their contents, execute it, and
 4 |  return a set of diff-like results with the rows that differ.
 5 | */
 6 | CREATE FUNCTION diff_tables(t1 regclass, t2 regclass) 
 7 |  returns TABLE("+/-" text, line text)
 8 | AS $func$
 9 | BEGIN
10 |   RETURN QUERY EXECUTE format($$
11 |    SELECT '+', d1.*::text FROM (
12 |     SELECT * FROM %s
13 |      EXCEPT
14 |     SELECT * FROM %s) AS d1
15 |    UNION ALL
16 |    SELECT '-', d2.*::text FROM (
17 |     SELECT * FROM %s
18 |      EXCEPT
19 |     SELECT * FROM %s) AS d2
20 |    $$, t2, t1, t1, t2);
21 | END
22 | $func$ language plpgsql;
23 | 


--------------------------------------------------------------------------------
/global_search/README.md:
--------------------------------------------------------------------------------
  1 | ## global_search
  2 | 
  3 | ```
  4 | CREATE FUNCTION global_search(
  5 |   search_term text,
  6 |   comparator regproc default 'pg_catalog.texteq',   -- comparison function
  7 |   tables text[] default null,
  8 |   schemas text[] default null,
  9 |   progress text default null,   -- 'tables', 'hits', 'all'
 10 |   max_width int default -1     -- returned value's max width in chars, or -1 for unlimited
 11 | )
 12 | RETURNS table(schemaname text, tablename text, columnname text,
 13 |               columnvalue text, rowctid tid)
 14 | ```
 15 | 
 16 | `global_search` is a plpgsql function that finds `search_term` in all
 17 | columns of all accessible tables. The list of schemas to scan by
 18 | default are those accessible through the current `search_path`
 19 | (except `pg_catalog` that must be explicitly added if desired).
 20 | The `tables` or `schemas` parameters can be
 21 | used to scan a specific subset of tables and/or schemas.
 22 | 
 23 | The comparison is done with `comparator`, which must be the OID of a
 24 | function taking two text arguments and returning a bool with
 25 | the result of the match.
 26 | It will be called with each column value (cast as `text`) being tested
 27 | as its first argument, and `search_term` as its second argument.
 28 | Typically, you should just pass the name of the function, and the implicit
 29 | casting to `regproc` will take care of translating it into an
 30 | OID. Other than `texteq`, built-in functions that may be passed directly
 31 | are `textregexeq`, `texticregexeq`, `textlike`, `texticlike`
 32 | for regular expressions and the SQL `like` operator (`ic` are the case
 33 | insensitive variants). See also the the `\doS+` command in `psql`
 34 | for more.
 35 | 
 36 | The progress is optionally reported with `raise info` messages, immediately
 37 | available to the caller, contrary to the function result that is available
 38 | only on function completion.
 39 | When `progress` is `tables` or `all`, each table searched into is reported.
 40 | When `progress` is `hits` or `all`, each hit is reported.
 41 | 
 42 | In all cases, the hits are returned by the function in the form of a
 43 | table-like value: `(schemaname text, tablename text, columnname text,
 44 | columnvalue text, rowctid tid)`.
 45 | 
 46 | Beware that `ctid` are transient, since rows can be relocated
 47 | physically. You may use the `REPEATABLE READ` transaction isolation mode to
 48 | ensure that the corresponding version of the row is kept around until you're done
 49 | with the `ctid`.
 50 | 
 51 | The returned column values might be suppressed or truncated by setting
 52 | a maximum width with the `max_width` argument.
 53 | 
 54 | 
 55 | ### Examples:
 56 | 
 57 | 
 58 | ```
 59 | -- Setup
 60 | => CREATE TABLE tst(t text);
 61 | => INSERT INTO tst VALUES('foo'),('bar'),('baz'),('barbaz'),('Foo'),(null);
 62 | ```
 63 | 
 64 | #### Simple equality search:
 65 | ```
 66 | => SELECT * FROM global_search('Foo');
 67 | 
 68 |  schemaname | tablename | columnname | columnvalue | rowctid 
 69 | ------------+-----------+------------+-------------+---------
 70 |  public     | tst       | t          | Foo         | (0,5)
 71 | ```
 72 | 
 73 | 
 74 | #### Regular expression matching
 75 | ```sql
 76 | => SELECT * FROM global_search('^bar', comparator=>'textregexeq');
 77 | 
 78 |  schemaname | tablename | columnname | columnvalue | rowctid 
 79 | ------------+-----------+------------+-------------+---------
 80 |  public     | tst       | t          | bar         | (0,2)
 81 |  public     | tst       | t          | barbaz      | (0,4)
 82 | (2 rows)
 83 | ```
 84 | 
 85 | 
 86 | #### Case insensitive LIKE (equivalent to: column ILIKE search_term)
 87 | ```sql
 88 | => SELECT * FROM global_search('fo%', comparator=>'texticlike');
 89 | 
 90 |  schemaname | tablename | columnname | columnvalue | rowctid 
 91 | ------------+-----------+------------+-------------+---------
 92 |  public     | tst       | t          | foo         | (0,1)
 93 |  public     | tst       | t          | Foo         | (0,5)
 94 | (2 rows)
 95 | ```
 96 | 
 97 | Sometimes there are several functions with the same name. In the
 98 | case of `texticlike`, the `citext` extension overloads this function
 99 | with two variants that take a `citext` parameter instead of `text`.
100 | In that kind of case, you want to use the `regprocedure` cast with a
101 | function name qualified with arguments to disambiguate.
102 | 
103 | For instance:
104 | 
105 | ```sql
106 | SELECT * FROM global_search('fo%', comparator=>'texticlike(text,text)'::regprocedure);
107 | ```
108 | 
109 | #### Find all "incorrect" values with a custom function
110 | The following custom function finds values that do not conform
111 | to the Unicode NFC normalization
112 | Note that even though the second argument is not used in that case,
113 | it needs to be declared nonetheless.
114 | 
115 | ```sql
116 | => CREATE FUNCTION pg_temp.check_normal_form(text,text)
117 |  returns boolean as
118 | 'select $1 is not NFC normalized' -- requires Postgres 13 or newer
119 | language sql;
120 | 
121 | => INSERT INTO tst VALUES (E'El Nin\u0303o');
122 | => SELECT * FROM global_search(null, comparator=>'pg_temp.check_normal_form');
123 | 
124 |  schemaname | tablename | columnname | columnvalue | rowctid 
125 | ------------+-----------+------------+-------------+---------
126 |  public     | tst       | t          | El Niño     | (0,7)
127 | ```
128 | 
129 | 
130 | #### Find references to the OID of a namespace
131 | That sort of query can be useful when exploring the catalogs.
132 | `2200` is the OID of the `public` namespace.
133 | ```sql
134 | => SELECT * FROM global_search(
135 |      (select oid::text from pg_namespace where nspname='public'), 
136 |      schemas=>'{pg_catalog}'
137 |    );
138 | 
139 |  schemaname |   tablename    |  columnname  | columnvalue | rowctid 
140 | ------------+----------------+--------------+-------------+---------
141 |  pg_catalog | pg_proc        | pronamespace | 2200        | (95,5)
142 |  pg_catalog | pg_description | objoid       | 2200        | (28,58)
143 |  pg_catalog | pg_namespace   | oid          | 2200        | (0,8)
144 |  pg_catalog | pg_depend      | refobjid     | 2200        | (13,70)
145 |  pg_catalog | pg_init_privs  | objoid       | 2200        | (2,27)
146 | (5 rows)
147 | ```
148 | 
149 | #### Case and accent insensitive global search
150 | 
151 | Using an ICU collation for advanced non-bitwise equality tests.
152 | 
153 | ```
154 | -- create a collation that ignores accents and case
155 | => CREATE COLLATION nd (
156 |   provider = 'icu',
157 |   locale = '@colStrength=primary',
158 |   deterministic = false
159 | );
160 | 
161 | => CREATE FUNCTION pg_temp.ci_equal(text,text)
162 |  returns boolean as
163 | 'select $1=$2 collate "nd"'
164 | language sql;
165 | 
166 | => SELECT * FROM global_search('foo', comparator=>'pg_temp.ci_equal');
167 | 
168 |  schemaname | tablename | columnname | columnvalue | rowctid 
169 | ------------+-----------+------------+-------------+---------
170 |  public     | tst       | t          | foo         | (0,1)
171 |  public     | tst       | t          | Foo         | (0,5)
172 | (2 rows)
173 | 
174 | ```
175 | 


--------------------------------------------------------------------------------
/global_search/global_search.sql:
--------------------------------------------------------------------------------
 1 | CREATE or replace FUNCTION global_search(
 2 |   search_term text,
 3 |   comparator regproc default 'pg_catalog.texteq',   -- comparison function
 4 |   tables text[] default null,
 5 |   schemas text[] default null,
 6 |   progress text default null,   -- 'tables', 'hits', 'all'
 7 |   max_width int default -1     -- returned value's max width in chars, or -1 for unlimited
 8 | )
 9 | RETURNS table(schemaname text, tablename text, columnname text, columnvalue text, rowctid tid)
10 | AS $$
11 | DECLARE
12 |   query text;
13 |   clauses text[];
14 |   columns text[];
15 |   pos int;
16 |   positions int[];
17 |   col_expr text;
18 | BEGIN
19 |   IF schemas IS NULL THEN
20 |     -- by default, exclude pg_catalog and non-readable schemas
21 |     schemas := current_schemas(false);
22 |   END IF;
23 | 
24 |   FOR schemaname,tablename IN
25 |     -- select tables for which all columns are readable
26 |     SELECT t.table_schema, t.table_name
27 |       FROM information_schema.tables t
28 |       JOIN information_schema.schemata s ON
29 | 	(s.schema_name=t.table_schema)
30 |     WHERE (t.table_name=ANY(tables) OR tables is null)
31 |       AND t.table_schema=ANY(schemas)
32 |       AND t.table_type='BASE TABLE'
33 |       AND EXISTS (SELECT 1 FROM information_schema.table_privileges p
34 | 	WHERE p.table_name=t.table_name
35 | 	  AND p.table_schema=t.table_schema
36 | 	  AND p.privilege_type='SELECT'
37 |       )
38 |   LOOP
39 |     IF (progress in ('tables','all')) THEN
40 |       RAISE INFO '%', format('Searching globally in table: %I.%I',
41 |          schemaname, tablename);
42 |     END IF;
43 | 
44 |     -- Get lists of columns and per-column boolean expressions
45 |     SELECT array_agg(column_name ORDER BY ordinal_position),
46 |            array_agg(format('%s(cast(%I as text), %L)', comparator, column_name, search_term)
47 | 	     ORDER BY ordinal_position)
48 |       FROM information_schema.columns
49 |       WHERE table_name=tablename
50 |         AND table_schema=schemaname
51 |     INTO columns, clauses;
52 | 
53 |     -- Main query to get each matching row and the ordinal positions of matching columns
54 |     query := format('SELECT s.ctid, p from (SELECT ctid,'
55 | 		    'array_positions(array[%s],true) AS p FROM ONLY %I.%I) s'
56 | 		    ' WHERE cardinality(p)>0',
57 |       array_to_string(clauses, ','), schemaname, tablename );
58 | 
59 |     FOR rowctid,positions IN EXECUTE query -- for each matching row
60 |     LOOP
61 |       FOREACH pos IN ARRAY positions -- for each matching field
62 |       LOOP
63 | 	columnname := columns[pos];
64 | 	IF (max_width <> 0) THEN -- fetch value only if needed
65 | 	  IF (max_width > 0) THEN
66 | 	    -- fetch a truncated value
67 | 	    col_expr := format('left(%I,%s)', columnname, max_width);
68 | 	  ELSE
69 | 	    col_expr := format('%I', columnname);
70 | 	  END IF;
71 | 	  EXECUTE format('SELECT %s FROM %I.%I WHERE ctid=''%s''',
72 | 	    col_expr, schemaname, tablename, rowctid) INTO columnvalue;
73 | 	ELSE
74 | 	  columnvalue:=null;
75 | 	END IF;
76 | 	IF (progress in ('hits', 'all')) THEN
77 | 	  RAISE INFO '%', format('Found in %I.%I.%I at ctid %s',
78 | 	     schemaname, tablename, columnname, rowctid);
79 | 	END IF;
80 | 	RETURN NEXT;
81 |       END LOOP;
82 |     END LOOP;
83 |   END LOOP; -- for each table
84 | END;
85 | $$ language plpgsql;
86 | 


--------------------------------------------------------------------------------
/hamming_weight/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for hamming_weight
 2 | # Assumes that pg_config is installed
 3 | # See https://www.postgresql.org/docs/current/static/app-pgconfig.html
 4 | # and https://www.postgresql.org/docs/current/static/extend-pgxs.html
 5 | # on how to extend PostgreSQL with custom functions in general
 6 | 
 7 | ifndef PG_CONFIG
 8 | PG_CONFIG = pg_config
 9 | endif
10 | 
11 | INCLUDEDIRS := -I.
12 | INCLUDEDIRS += -I$(shell $(PG_CONFIG) --includedir-server)
13 | INCLUDEDIRS += -I$(shell $(PG_CONFIG) --includedir)
14 | CFLAGS := $(shell $(PG_CONFIG) --cflags)
15 | CFLAGS_SL := $(shell $(PG_CONFIG) --cflags_sl)
16 | 
17 | # If you are using shared libraries, make sure this location can be
18 | # found at runtime (see /etc/ld.so.conf and ldconfig command).
19 | LIBDIR = -L$(shell $(PG_CONFIG) --libdir)
20 | # This is where the shared object should be installed
21 | LIBINSTALL = $(shell $(PG_CONFIG) --pkglibdir)
22 | 
23 | all: hamming_weight.so
24 | 
25 | hamming_weight.so: hamming_weight.c Makefile
26 | 	gcc $(CFLAGS) $(CFLAGS_SL)  $(INCLUDEDIRS) -o hamming_weight.o -c hamming_weight.c
27 | 	gcc -shared -o hamming_weight.so hamming_weight.o $(LIBDIR) -lpq -lm
28 | 
29 | install: hamming_weight.so
30 | 	cp $? $(LIBINSTALL)
31 | 


--------------------------------------------------------------------------------
/hamming_weight/hamming_weight.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2015-2020 Daniel VERITE
 3 |  * BSD license, see README.md
 4 |  */
 5 | 
 6 | #include "postgres.h"
 7 | #include <string.h>
 8 | #include "fmgr.h"
 9 | 
10 | PG_MODULE_MAGIC;
11 | 
12 | Datum hamming_weight_int4(PG_FUNCTION_ARGS);
13 | Datum hamming_weight_int8(PG_FUNCTION_ARGS);
14 | Datum hamming_weight_bytea(PG_FUNCTION_ARGS);
15 | 
16 | /* number of bits for all 8-bit numbers */
17 | static const int bitcount[256]={
18 | 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
19 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
20 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
21 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
22 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
23 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
24 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
25 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
26 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
27 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
28 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
29 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
30 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
31 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
32 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
33 | 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
34 | };
35 | 
36 | /* SQL function: hamming_weight(bytea) returns int4 */
37 | PG_FUNCTION_INFO_V1(hamming_weight_bytea);
38 | 
39 | /* returns the Hamming weight of a bytea value (number of 1's bits) */
40 | Datum
41 | hamming_weight_bytea(PG_FUNCTION_ARGS)
42 | {
43 | 	bytea *v = PG_GETARG_BYTEA_PP(0);
44 | 	int64 count = 0;
45 | 	int len, i;
46 | 	unsigned char* buf;
47 | 
48 | 	len = VARSIZE_ANY_EXHDR(v); /* should be lower than 2^30 */
49 | 	buf = (unsigned char*)VARDATA_ANY(v);
50 | 	for (i=0; i<len; i++) {
51 | 		count += bitcount[buf[i]&0xff];
52 | 	}
53 | 	PG_RETURN_INT64(count);
54 | }
55 | 
56 | /* SQL function: hamming_weight(int4) returns int4 */
57 | PG_FUNCTION_INFO_V1(hamming_weight_int4);
58 | 
59 | /* returns the Hamming weight of an int4 value (number of 1's bits) */
60 | Datum
61 | hamming_weight_int4(PG_FUNCTION_ARGS)
62 | {
63 | 	int32 val = PG_GETARG_INT32(0);
64 | 	int count = bitcount[(val>>24)&0xff] + bitcount[(val>>16)&0xff] + 
65 | 			bitcount[(val>>8)&0xff] + bitcount[val&0xff];
66 | 	PG_RETURN_INT32(count);
67 | }
68 | 
69 | /* SQL function: hamming_weight(int8) returns int4 */
70 | PG_FUNCTION_INFO_V1(hamming_weight_int8);
71 | 
72 | /* returns the Hamming weight of an int4 value (number of 1's bits) */
73 | Datum
74 | hamming_weight_int8(PG_FUNCTION_ARGS)
75 | {
76 | 	int64 val = PG_GETARG_INT64(0);
77 | 	int32 v32 = val & 0xffffffff;
78 | 	int count = bitcount[(v32>>24)&0xff] + bitcount[(v32>>16)&0xff] + 
79 | 			bitcount[(v32>>8)&0xff] + bitcount[v32&0xff];
80 | 	v32 = (val >> 32) & 0xffffffff;
81 | 	count += bitcount[(v32>>24)&0xff] + bitcount[(v32>>16)&0xff] + 
82 | 			bitcount[(v32>>8)&0xff] + bitcount[v32&0xff];
83 | 	PG_RETURN_INT32(count);
84 | }
85 | 


--------------------------------------------------------------------------------
/hamming_weight/hamming_weight.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Compute the Hamming weight (count of bits set to '1')
 3 |   for bytea, int4, int8
 4 | */
 5 | CREATE FUNCTION hamming_weight(bytea) RETURNS bigint
 6 |  AS '$libdir/hamming_weight.so', 'hamming_weight_bytea'
 7 | LANGUAGE C immutable strict;
 8 | 
 9 | CREATE FUNCTION hamming_weight(int) RETURNS int
10 |  AS '$libdir/hamming_weight.so', 'hamming_weight_int4'
11 | LANGUAGE C immutable strict;
12 | 
13 | CREATE FUNCTION hamming_weight(bigint) RETURNS int
14 |  AS '$libdir/hamming_weight.so', 'hamming_weight_int8'
15 | LANGUAGE C immutable strict;
16 | 


--------------------------------------------------------------------------------
/large_objects/lo_digest.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Return the digest (hash output) of a large object for
 3 |    any hash algorithm supported by perl's Digest module.
 4 | 
 5 |    Input parameters:
 6 |    - name of hash as text (see https://perldoc.perl.org/Digest.html)
 7 |    - OID of large object to digest
 8 |    - optional chunk size as int. Contents are digested one chunk at a time.
 9 | */
10 | CREATE FUNCTION lo_digest(text, oid, int default 2048)
11 |  RETURNS bytea AS
12 | $$
13 |   use Digest;
14 |   use strict;
15 | 
16 |   my $ctxt = Digest->new($_[0]);
17 |   my $sz=$_[2];
18 |   elog(ERROR, "Invalid chunk size: $sz") if ($sz<=0);
19 |   my $sth = spi_query("SELECT lo_open($_[1], 262144) as fd");
20 |   my $row = spi_fetchrow($sth);
21 |   spi_cursor_close($sth);
22 | 
23 |   if ($row) {
24 |      my $fd = $row->{fd};
25 |      my $bytes;
26 |      my $plan = spi_prepare("SELECT loread($fd, $sz) as chunk");
27 |      do {
28 |        $sth = spi_query_prepared($plan);
29 |        $row = spi_fetchrow($sth);
30 |        $bytes = decode_bytea($row->{chunk});
31 |        $ctxt->add($bytes);
32 |        spi_cursor_close($sth);
33 |      } while (length($bytes)>0);
34 |      spi_exec_query("select lo_close($fd)");
35 |      spi_freeplan($plan);
36 |   }
37 |   return encode_bytea($ctxt->digest);
38 | $$ LANGUAGE plperlu;
39 | 


--------------------------------------------------------------------------------
/large_objects/lo_size.sql:
--------------------------------------------------------------------------------
 1 | /* Return the size (bigint) of the large object passed as parameter */
 2 | CREATE FUNCTION lo_size(oid) RETURNS bigint
 3 | AS $$
 4 | DECLARE
 5 |  fd integer;
 6 |  sz bigint;
 7 | BEGIN
 8 |  fd := lo_open($1, 262144); -- INV_READ
 9 |  if (fd < 0) then
10 |    raise exception 'Failed to open large object %', $1;
11 |  end if;
12 |  sz := lo_lseek64(fd, 0, 2);
13 |  if (lo_close(fd) <> 0) then
14 |    raise exception 'Failed to close large object %', $1;
15 |  end if;
16 |  return sz;
17 | END;
18 | $$ LANGUAGE plpgsql VOLATILE;
19 | 


--------------------------------------------------------------------------------
/pivots/dynamic-pivot-create.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Create a table or view for the pivoted results of a query
 3 |  passed as the 1st parameter, with sorted headers passed as a query
 4 |  as the 2nd parameter.
 5 |  3rd parameter: name of the table or view
 6 |  4th parameter: object type:
 7 |    'v': view, 'tv': temp view, 't': table, 'tt': temp table, 'm': materialized view
 8 | 
 9 |  See https://postgresql.verite.pro/blog/2018/06/19/crosstab-pivot.html
10 |  for a lot of context about this function.
11 | 
12 |  Example usage:
13 |     CREATE TABLE tmp1 (row_id, key, val) AS
14 |     VALUES
15 |     (1, 'a', 1),
16 |     (1, 'b', 2),
17 |     (2, 'a', 3),
18 |     (2, 'b', 4);
19 | 
20 |     SELECT dynamic_pivot_create(
21 |     'select row_id, key, val from tmp1',
22 |     'select distinct key from tmp1',
23 |     'm_tmp1', 'mv');
24 | 
25 |     SELECT * FROM m_tmp1;
26 | */
27 | CREATE OR REPLACE FUNCTION dynamic_pivot_create(central_query text,
28 |                                                 headers_query text,
29 |                                                 obj_name text,
30 |                                                 obj_type text default 'tv')
31 | RETURNS void AS
32 | $$
33 | DECLARE
34 |   left_column text;
35 |   header_column text;
36 |   value_column text;
37 |   h_value text;
38 |   headers_clause text;
39 |   query text;
40 |   j json;
41 |   r record;
42 |   i int:=1;
43 | BEGIN
44 |   -- find the column names of the source query
45 |   EXECUTE 'select row_to_json(_r.*) from (' ||  central_query || ') AS _r' into j;
46 |   FOR r in SELECT * FROM json_each_text(j)
47 |   LOOP
48 |     IF (i=1) THEN left_column := r.key;
49 |       ELSEIF (i=2) THEN header_column := r.key;
50 |       ELSEIF (i=3) THEN value_column := r.key;
51 |     END IF;
52 |     i := i+1;
53 |   END LOOP;
54 | 
55 |   -- build the dynamic transposition query, based on the canonical model
56 |   -- (CASE WHEN...)
57 |   FOR h_value in EXECUTE headers_query
58 |   LOOP
59 |     headers_clause := concat(headers_clause,
60 |      format(chr(10)||',min(case when %I=%L then %I::text end) as %I',
61 |            header_column,
62 | 	   h_value,
63 | 	   value_column,
64 | 	   h_value ));
65 |   END LOOP;
66 | 
67 |   query := format('CREATE %s %I AS SELECT %I %s FROM (select *,row_number() over() as rn from (%s) AS _c) as _d GROUP BY %I order by min(rn)',
68 |            case obj_type
69 | 	     when 't'  then 'TABLE'
70 | 	     when 'tt' then 'TEMP TABLE'
71 | 	     when 'v'  then 'VIEW'
72 | 	     when 'tv' then 'TEMP VIEW'
73 | 	     when 'mv' then 'MATERIALIZED VIEW'
74 | 	     else 'VIEW'
75 |            end,
76 |            obj_name,
77 |            left_column,
78 | 	   headers_clause,
79 | 	   central_query,
80 | 	   left_column);
81 | 
82 |  -- RAISE NOTICE '%', query;
83 |  EXECUTE query;
84 | 
85 | END
86 | $$ LANGUAGE plpgsql;
87 | 


--------------------------------------------------------------------------------
/pivots/dynamic-pivot-refcursor.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Return a CURSOR pointing to pivoted results of a query
 3 |  passed as the 1st parameter, with sorted headers passed as a query
 4 |  as the 2nd parameter.
 5 |  The cursor name can be passed as an optional 3rd parameter, or a name
 6 |  will be automatically assigned.
 7 |  See https://postgresql.verite.pro/blog/2018/06/19/crosstab-pivot.html
 8 |  for a lot of context about this function.
 9 | 
10 |   Example usage:
11 |     CREATE TABLE tmp1 (row_id, key, val) AS
12 |     VALUES
13 |     (1, 'a', 1),
14 |     (1, 'b', 2),
15 |     (2, 'a', 3),
16 |     (2, 'b', 4);
17 | 
18 |     BEGIN;
19 | 
20 |     SELECT dynamic_pivot(
21 |     'select row_id, key, val from tmp1',
22 |     'select distinct key from tmp1',
23 |     'tmpcur');
24 | 
25 |      FETCH ALL FROM "tmpcur";
26 | 
27 |      COMMIT;
28 | 
29 | */
30 | CREATE FUNCTION dynamic_pivot(central_query text,
31 |                               headers_query text,
32 |                               INOUT cname refcursor default null)
33 |  RETURNS refcursor AS
34 | $$
35 | DECLARE
36 |   left_column text;
37 |   header_column text;
38 |   value_column text;
39 |   h_value text;
40 |   headers_clause text;
41 |   query text;
42 |   j json;
43 |   r record;
44 |   i int:=1;
45 | BEGIN
46 |   -- find the column names of the source query
47 |   EXECUTE 'select row_to_json(_r.*) from (' ||  central_query || ') AS _r' into j;
48 |   FOR r in SELECT * FROM json_each_text(j)
49 |   LOOP
50 |     IF (i=1) THEN left_column := r.key;
51 |       ELSEIF (i=2) THEN header_column := r.key;
52 |       ELSEIF (i=3) THEN value_column := r.key;
53 |     END IF;
54 |     i := i+1;
55 |   END LOOP;
56 | 
57 |   -- build the dynamic transposition quer, based on the canonical model
58 |   -- (CASE WHEN...)
59 |   FOR h_value in EXECUTE headers_query
60 |   LOOP
61 |     headers_clause := concat(headers_clause,
62 |      format(chr(10)||',min(case when %I=%L then %I::text end) as %I',
63 |            header_column,
64 | 	   h_value,
65 | 	   value_column,
66 | 	   h_value ));
67 |   END LOOP;
68 | 
69 |   query := format('SELECT %I %s FROM (select *,row_number() over() as rn from (%s) AS _c) as _d GROUP BY %I order by min(rn)',
70 |            left_column,
71 | 	   headers_clause,
72 | 	   central_query,
73 | 	   left_column);
74 | 
75 |   -- open the cursor so the caller can FETCH right away.
76 |   -- if cname is not null it will be used as the name of the cursor,
77 |   -- otherwise a name "<unnamed portal unique-number>" will be generated.
78 |   OPEN cname FOR execute query;
79 | END
80 | $$ LANGUAGE plpgsql;
81 | 


--------------------------------------------------------------------------------
/psql-cli/psql-coproc-functions.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # Bash functions to use psql as a coprocess
 3 | #
 4 | 
 5 | # Pass psql arguments
 6 | function psql_coproc
 7 | {
 8 |     coproc PSQL { psql "$@" ; }
 9 | }
10 | 
11 | function get_uuid
12 | {
13 |     if [ -f /proc/sys/kernel/random/uuid ]; then
14 | 	cat /proc/sys/kernel/random/uuid  # linux-specific
15 |     else
16 | 	uuidgen
17 |     fi
18 | }
19 | 
20 | function end_marker
21 | {
22 |     echo "-- END RESULTS MARK -- $(get_uuid) --"
23 | }
24 | 
25 | function psql_check_alive
26 | {
27 |     if [[ -z "$PSQL_PID" ]]; then exit 1; fi
28 | }
29 | 
30 | # Send one psql command and get back results
31 | function psql_command
32 | {
33 |     end=$(end_marker)
34 | 
35 |     psql_check_alive
36 |     echo "$1"  >&${PSQL[1]}
37 |     echo "\\echo '$end'" >&${PSQL[1]}
38 | 
39 |     psql_check_alive
40 |     while read -r -u ${PSQL[0]} result
41 |     do
42 | 	if [[ $result = $end ]]; then
43 | 	    break
44 | 	fi
45 | 	echo $result
46 |     done
47 | }
48 | 
49 | function psql_quit
50 | {
51 |     echo '\q' >&${PSQL[1]}
52 | }
53 | 
54 | # Takes a list of queries to run in a transaction
55 | # Retry the entire transaction if a transient error
56 | # occurs
57 | function retriable_transaction
58 | {
59 |   while true
60 |   do
61 |     psql_command "BEGIN;"
62 |     for query in "$@"
63 |     do
64 |       results=$(psql_command "$query")
65 |       # check for errors
66 |       sqlstate=$(psql_command '\echo :SQLSTATE')
67 |       case "$sqlstate" in
68 | 	00000)
69 | 	  echo "$results"   # output results of $query
70 | 	  ;;
71 | 	57014 | 40001 | 40P01)
72 | 	  # Rollback and retry on
73 | 	  # query canceled, or serialization failure, or deadlock
74 | 	  # see https://www.postgresql.org/docs/current/errcodes-appendix.html
75 | 	  psql_command "ROLLBACK;"
76 | 	  continue 2;  # restart transaction at first query
77 | 	  ;;
78 | 	*)
79 | 	  # rollback and stop
80 | 	  err=$(psql_command '\echo :LAST_ERROR_MESSAGE');
81 | 	  echo 1>&2 "SQL error: $sqlstate $err";
82 | 	  psql_command "ROLLBACK;"
83 | 	  return
84 | 	  ;;
85 |       esac
86 |     done
87 |     psql_command "COMMIT;"
88 |     break;
89 |   done
90 | }
91 | 


--------------------------------------------------------------------------------
/psql-cli/psql-edit-replace.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # A custom editor for psql that pre-processes the query string
 4 | # to replace "* /* special comment */" with a list of columns.
 5 | # The replacement options are:
 6 | #  * /*expand*/ : this text will be replaced by the list of columns of the query.
 7 | #  * /*except:col1,col2,...*/: this text will be replaced by the list of columns of the query, except those listed.
 8 | #  * /*except-type:bytea,jsonb,...*/ : this text will be replaced by the list of columns of the query, except those of the types listed.
 9 | 
10 | # Example: SELECT * /*expand*/ FROM users JOIN posts USING(user_id) :expand
11 | # (see the macro definition of :expand in psqlrc-for-edit-replace
12 | #  or https://postgresql.verite.pro/blog/2022/02/21/psql-hack-select-except.html
13 | #  for detailed explanations)
14 | 
15 | # The columns are passed in a temporary file pointed to by
16 | # the PSQL_TMP_STRUCT environment variable.
17 | 
18 | # Set up PSQL_EDITOR to point to that script.
19 | 
20 | 
21 | read -r line1 < "$1"
22 | rx='\*\s*/\*(expand|except:|except-type:).*\*/'
23 | if [[ $line1 =~ $rx && -r "$PSQL_TMP_STRUCT" ]]; then
24 |   perl - $1 "$PSQL_TMP_STRUCT" << "EOP"
25 | require 5.014;
26 | use Text::CSV qw(csv);
27 | 
28 | sub expand {
29 |   # filter and format the list of columns
30 |   my ($cols,$filter_type,$filter) = @_;
31 |   # filter_type => undef:none, 0:by name, 1: by type
32 |   my $qi = 1; # quote the columns (for case sensitive names and reserved keywords)
33 |   if (defined $filter_type) {
34 |     my @xcols = split /,/, $filter;	# list of arguments inside the comment
35 |     my %xhcols = map { $_=>1 } @xcols;
36 |     $cols = [ grep { !defined $xhcols{$_->[$filter_type]} } @{$cols} ];
37 |   }
38 |   return join ",\n\t", (map { $qi?('"' . $_->[0]=~ s/"/""/r . '"') : $_->[0]}
39 |   	      	        @{$cols});
40 | }
41 | 
42 | my $cols = csv(in=>$ARGV[1], headers=>"skip", binary=>1);
43 | open(my $fi, "<", $ARGV[0]) or die "cannot open $ARGV[0]: $!";
44 | my $lines = <$fi>;   # 1st line of query
45 | 
46 | my $rx = qr{^(.*)\*\s*/\*expand\*/(.*)$};
47 | if ($lines =~ $rx) {
48 |   # expand to all columns
49 |   $lines = "$1" . expand($cols, undef, undef) . "\n$2";
50 | }
51 | else {
52 |   $rx = qr{^(.*)\*\s*/\*except:(.*)\*/(.*)$};
53 |   if ($lines =~ $rx) {
54 |     # expand to all columns except those listed
55 |     $lines = "$1" . expand($cols, 0, $2) . "\n$3";
56 |   }
57 |   else {
58 |     $rx = qr{^(.*)\*\s*/\*except-type:(.*)\*/(.*)$};
59 |     if ($lines =~ $rx) {
60 |       # expand to all column except for the types listed
61 |       $lines = "$1" . expand($cols, 1, $2) . "\n$3";
62 |     }
63 |   }
64 | }
65 | # copy the rest of the lines
66 | do {
67 |   $lines .= $_;
68 | } while (<$fi>);
69 | close $fi;
70 | # overwrite the file with the new query
71 | open (my $fo, ">", $ARGV[0]) or die "cannot open $ARGV[0] for writing: $!";
72 | print $fo $lines;
73 | close $fo;
74 | EOP
75 | 
76 |   # When the replacement in the query buffer occurred, we could
77 |   # return into psql at this point rather than going into the actual
78 |   # editor.
79 |   # But before version 13, psql won't display the modified
80 |   # query when returning at this point, so it might seem opaque.
81 |   # Let's always call the actual editor, but you may uncomment
82 |   # the line below to skip it.
83 | 
84 |   # rm -f "$PSQL_TMP_STRUCT" ; exit
85 | fi
86 | rm -f "$PSQL_TMP_STRUCT"
87 | ${EDITOR:-vi} $*
88 | 


--------------------------------------------------------------------------------
/psql-cli/psql-quote-meta.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This function returns its arguments quoted to be safely injected
 4 | # into a psql meta-command.
 5 | # Note: it does not transform control characters (CR,FF,tab...)
 6 | function quote_psql_meta
 7 | {
 8 |    local t=${1//\\/\\\\}
 9 |    t=${t//\'/\\\'}
10 |    echo "'$t'"
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/psql-cli/psqlrc-for-edit-replace:
--------------------------------------------------------------------------------
 1 | -- psqlrc stanza
 2 | -- for psql version 12 and newer
 3 | 
 4 | -- Declare the :expand macro.
 5 | -- This macro calls \gdesc with the query in the buffer, puts the
 6 | -- result in a csv temporary file, and invoke a custom editor to
 7 | -- transform the query buffer using this result and SQL comments
 8 | -- inside the query
 9 | -- see psql-edit-replace.sh for a custom editor implementation.
10 | 
11 | -- EDIT THIS with your preferred path
12 | \setenv PSQL_EDITOR ~/bin/psql-edit-replace.sh
13 | 
14 | \set expand ' \\set _tmpstruct `mktemp` \\setenv PSQL_TMP_STRUCT :_tmpstruct \\set QUIET on \\pset format csv \\x off \\pset tuples_only off \\o :_tmpstruct \\gdesc \\o \\pset format aligned \\set QUIET off \\e \\unset _tmpstruct'
15 | 


--------------------------------------------------------------------------------
/psql-cli/tar-to-pg-copy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Takes a tar file in standard input containing 'tablename.copy' files
 4 | # in COPY text format.
 5 | # Each file is \copy'ed with psql into 'tablename'
 6 | # The import is done in a single transaction and without
 7 | # extracting any file to disk
 8 | 
 9 | set -e
10 | 
11 | source psql-coproc-functions.sh
12 | 
13 | psql_coproc -AtX -v ON_ERROR_STOP=1 --single-transaction
14 | 
15 | fifo_names="$(mktemp -u)-$(get_uuid)"
16 | mkfifo -m 0600 $fifo_names
17 | 
18 | fifo_contents="$(mktemp -u)-$(get_uuid)"
19 | mkfifo -m 0600 $fifo_contents
20 | 
21 | function cleanup
22 | {
23 |     rm -f $fifo_names $fifo_contents
24 | }
25 | 
26 | trap cleanup EXIT
27 | 
28 | end_files=$(end_marker)
29 | 
30 | cat | (
31 |   tar --to-command "echo \$TAR_FILENAME >>$fifo_names; cat > $fifo_contents" \
32 |      -xjf - ;
33 |   echo "$end_files" >$fifo_names
34 | ) &
35 | 
36 | 
37 | while read -r copyfilename < $fifo_names; do
38 |   if [[ "$copyfilename" = "$end_files" ]]; then
39 |     break
40 |   else
41 |     tablename=${copyfilename%.copy}
42 |     echo "Importing $copyfilename into $tablename"
43 |     psql_command "\\copy $tablename from $fifo_contents"
44 |   fi
45 | done
46 | 
47 | psql_check_alive
48 | echo '\q' >&${PSQL[1]}
49 | 
50 | # Wait for completion of background processes
51 | wait
52 | 


--------------------------------------------------------------------------------
/strings/parse-option.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Parse a GUC as the C function ParseLongOption() from Postgres source
 3 |  * code in src/backend/utils/misc/guc.c
 4 |  * Input: string of the form 'name=value'
 5 |  * Output: name and value as a record.
 6 |  */
 7 | CREATE OR REPLACE FUNCTION
 8 |  parse_option(string text, name OUT text,  value OUT text)
 9 | RETURNS record
10 | as $$
11 | declare
12 |  p int := strpos(string, '=');
13 | begin
14 |   if (p > 0) then
15 |     name := replace(left(string, p-1), '-', '_');
16 |     value := substr(string, p+1);
17 |   else
18 |     name := replace(string, '-', '_');
19 |     value := NULL;
20 |   end if;
21 | end
22 | $$ language plpgsql immutable;
23 | 
24 | /*
25 |  * Consider this less readable version if you want it in the
26 |  *  SQL language (it doesn't seem to be faster in that case).
27 |  */
28 | /*
29 | CREATE OR REPLACE FUNCTION
30 |  parse_option(string text, name OUT text,  value OUT text)
31 | RETURNS record
32 | as $$
33 |  select
34 |     replace(left(string, greatest(strpos($1, '=')-1, 0)), '-', '_'),
35 |       substr(string, strpos($1, '=')+1);
36 | $$ language sql immutable;
37 | */
38 | 


--------------------------------------------------------------------------------
/strings/plperl/env_vars.sql:
--------------------------------------------------------------------------------
 1 | /* plperl functions to get and set environment variables */
 2 | 
 3 | CREATE FUNCTION getenv(varname text)
 4 | RETURNS text
 5 | AS $BODY$
 6 |  my ($var) = @_;
 7 | 
 8 |  $ENV{$var};
 9 | $BODY$ language plperl strict stable;
10 | 
11 | CREATE FUNCTION setenv(varname text, value text)
12 | RETURNS text
13 | AS $BODY$
14 |  my ($var,$value) = @_;
15 | 
16 |  $ENV{$var}=$value;
17 | $BODY$ language plperl strict stable;
18 | 


--------------------------------------------------------------------------------
/strings/plperl/multi_replace_plperl.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Substitute substrings within a larger string, with Perl s// operator,
 3 |   in a single pass. Each element in @orig found in @string (scanned left
 4 |   to right) is replaced by the element at the same index in @repl.
 5 |   When multiple strings in the array match simultaneously, the longest one
 6 |   wins.
 7 | */
 8 | CREATE OR REPLACE FUNCTION multi_replace(string text, orig text[], repl text[])
 9 | RETURNS text
10 | AS $BODY$
11 |   my ($string, $orig, $repl) = @_;
12 |   my %subs;
13 | 
14 |   # Check that the arrays are of the same size, unidimensional,
15 |   # and contain no null values.
16 |   if (@$orig != @$repl) {
17 |      elog(ERROR, "array sizes mismatch");
18 |   }
19 |   if (ref @$orig[0] eq 'ARRAY' || ref @$repl[0] eq 'ARRAY') {
20 |      elog(ERROR, "multi-dimensional arrays are not allowed");
21 |   }
22 |   if (grep { !defined } (@$orig, @$repl)) {
23 |      elog(ERROR, "null elements are not allowed");
24 |   }
25 | 
26 |   # Each element of $orig is a key in %subs to the element at the same
27 |   # index in $repl
28 |   @subs{@$orig} = @$repl;
29 | 
30 |   # Build a regexp of the form (s1|s2|...)
31 |   # with the substrings sorted to match longest first
32 |   my $re = join "|", map quotemeta,
33 |      sort { (length($b) <=> length($a)) } keys %subs;
34 |   $re = qr/($re)/;
35 | 
36 |   # The order will be kept in matching because (from perlre):
37 |   # "Alternatives are tried from left to right, so the first alternative
38 |   # found for which the entire expression matches, is the one that is
39 |   # chosen"
40 | 
41 |   $string =~ s/$re/$subs{$1}/g;
42 |   return $string;
43 | 
44 | $BODY$ language plperl strict immutable;
45 | 


--------------------------------------------------------------------------------
/strings/utf8-truncate.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Truncate the UTF-8 string argument at the given number of bytes,
 3 |  * removing any additional bytes at the end that would otherwise
 4 |  * be part of an incomplete multibyte sequence.
 5 |  */
 6 | CREATE FUNCTION utf8_truncate(str text, len int) returns text
 7 | as $$
 8 | select case when octet_length(str) <= len then str
 9 | else (
10 |    with bstr(s) as (select convert_to(str, 'UTF-8'))
11 |    select
12 |    case
13 |    when len>=1 and (get_byte(s, len) & 192) <> 128
14 |    then convert_from(substring(s, 1, len), 'UTF-8')
15 |    else
16 |      case
17 |      when len>=2 and (get_byte(s, len-1) & 192) <> 128
18 |      then convert_from(substring(s, 1, len-1), 'UTF-8')
19 |      else
20 |        case
21 |        when len>=3 and (get_byte(s, len-2) & 192) <> 128
22 |        then convert_from(substring(s, 1, len-2), 'UTF-8')
23 |        else
24 |          case
25 |          when len>=4 and (get_byte(s, len-3) & 192) <> 128
26 |          then convert_from(substring(s, 1, len-3), 'UTF-8')
27 |          else ''
28 |          end
29 |        end
30 |      end
31 |    end
32 |  from bstr)
33 |    end;
34 | $$ language sql strict immutable parallel safe;
35 | 


--------------------------------------------------------------------------------
/tsearch/dict_maxlen/Makefile:
--------------------------------------------------------------------------------
 1 | EXTENSION = dict_maxlen
 2 | EXTVERSION = 1.0
 3 | PG_CONFIG = pg_config
 4 | 
 5 | MODULE_big = dict_maxlen
 6 | OBJS = dict_maxlen.o
 7 | 
 8 | DATA = $(wildcard *.sql)
 9 | 
10 | PGXS := $(shell $(PG_CONFIG) --pgxs)
11 | include $(PGXS)
12 | 


--------------------------------------------------------------------------------
/tsearch/dict_maxlen/README.md:
--------------------------------------------------------------------------------
 1 | ## dict_maxlen
 2 | 
 3 | A dictionary (for PostgreSQL [text search engine](https://www.postgresql.org/docs/current/textsearch.html)) that filters out tokens longer than a given length.
 4 | 
 5 | ### Example of use:
 6 | 
 7 | Consider this query for a baseline comparison, using the english built-in
 8 | configuration:
 9 | 
10 | ```
11 | =# select to_tsvector('english', 'This is a long checksum that will be indexed: 81c3b1eccdb8e1ca7a5475aaf2f362ab3ec2ac4274974a12626d4bf603db4d6f');
12 |                                               to_tsvector
13 | -------------------------------------------------------------------------------------------------------
14 |  '81c3b1eccdb8e1ca7a5475aaf2f362ab3ec2ac4274974a12626d4bf603db4d6f':10 'checksum':5 'index':9 'long':4
15 | ```
16 | 
17 | Installation of the dictionary, in a new distinct text search configuration:
18 | 
19 | ```
20 | CREATE EXTENSION dict_maxlen;
21 | 
22 | CREATE TEXT SEARCH DICTIONARY dictmaxlen (
23 |   TEMPLATE = dictmaxlen_template,
24 |   LENGTH = 40 -- or another maximum number of characters
25 | );
26 | COMMENT ON TEXT SEARCH DICTIONARY dictmaxlen IS 'A dictionary to filter out long lexemes';
27 | 
28 | CREATE TEXT SEARCH CONFIGURATION mytsconf ( COPY = pg_catalog.english );
29 | 
30 | -- Map the dictionary to some of the token types produced by the parser
31 | 
32 | ALTER TEXT SEARCH CONFIGURATION mytsconf
33 |  ALTER MAPPING FOR asciiword, word
34 |   WITH dictmaxlen,english_stem;
35 | 
36 | ALTER TEXT SEARCH CONFIGURATION mytsconf
37 |  ALTER MAPPING FOR numword
38 |   WITH dictmaxlen,simple;
39 | 
40 | ```
41 | 
42 | Result with the dictionary installed and configured to filter out tokens longer than 40 characters:
43 | 
44 | ```
45 | 
46 | =# select to_tsvector('mytsconf', 'This is a long checksum that will NOT be indexed: 81c3b1eccdb8e1ca7a5475aaf2f362ab3ec2ac4274974a12626d4bf603db4d6f');
47 |            to_tsvector
48 | ---------------------------------
49 |  'checksum':5 'index':10 'long':4
50 | 
51 | 
52 | ```
53 | 


--------------------------------------------------------------------------------
/tsearch/dict_maxlen/dict_maxlen--1.0.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 3 | \echo Use "CREATE EXTENSION dictmaxlen" to load this file. \quit
 4 | 
 5 | CREATE FUNCTION dictmaxlen_init(internal)
 6 |         RETURNS internal
 7 |         AS 'MODULE_PATHNAME'
 8 |         LANGUAGE C STRICT;
 9 | 
10 | CREATE FUNCTION dictmaxlen_lexize(internal, internal, internal, internal)
11 |         RETURNS internal
12 |         AS 'MODULE_PATHNAME'
13 |         LANGUAGE C STRICT;
14 | 
15 | CREATE TEXT SEARCH TEMPLATE dictmaxlen_template (
16 |         LEXIZE = dictmaxlen_lexize,
17 | 	INIT   = dictmaxlen_init
18 | );
19 | 
20 | /*
21 | Instantiate with:
22 | 
23 | CREATE TEXT SEARCH DICTIONARY dictmaxlen (
24 | 	TEMPLATE = dictmaxlen_template,
25 | 	LENGTH = ?
26 | );
27 | 
28 | COMMENT ON TEXT SEARCH DICTIONARY dictmaxlen IS
29 |   'A dictionary to filter out long tokens';
30 | */
31 | 


--------------------------------------------------------------------------------
/tsearch/dict_maxlen/dict_maxlen.c:
--------------------------------------------------------------------------------
 1 | #include "postgres.h"
 2 | 
 3 | #include "commands/defrem.h"
 4 | #include "tsearch/ts_utils.h"
 5 | #include "mb/pg_wchar.h"
 6 | 
 7 | PG_MODULE_MAGIC;
 8 | 
 9 | PG_FUNCTION_INFO_V1(dictmaxlen_init);
10 | PG_FUNCTION_INFO_V1(dictmaxlen_lexize);
11 | 
12 | Datum dictmaxlen_lexize(PG_FUNCTION_ARGS);
13 | Datum dictmaxlen_init(PG_FUNCTION_ARGS);
14 | 
15 | typedef struct
16 | {
17 | 	int			maxlen;
18 | } DictMaxLen;
19 | 
20 | Datum
21 | dictmaxlen_init(PG_FUNCTION_ARGS)
22 | {
23 | 	List	   *options = (List *) PG_GETARG_POINTER(0);
24 | 	DictMaxLen   *d;
25 | 	ListCell   *l;
26 | 
27 | 	d = (DictMaxLen *) palloc0(sizeof(DictMaxLen));
28 | 
29 | 	d->maxlen = 50;				/* default */
30 | 
31 | 	foreach(l, options)
32 | 	{
33 | 		DefElem    *defel = (DefElem *) lfirst(l);
34 | 
35 | 		if (strcmp(defel->defname, "length") == 0)
36 | 		{
37 | 			d->maxlen = atoi(defGetString(defel));
38 | 		}
39 | 		else
40 | 		{
41 | 			ereport(ERROR,
42 | 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
43 | 					 errmsg("unrecognized dictionary parameter: \"%s\"",
44 | 							defel->defname)));
45 | 		}
46 | 	}
47 | 
48 | 	PG_RETURN_POINTER(d);
49 | }
50 | 
51 | Datum
52 | dictmaxlen_lexize(PG_FUNCTION_ARGS)
53 | {
54 | 	DictMaxLen	*d = (DictMaxLen *) PG_GETARG_POINTER(0);
55 | 	char	   	*token = (char *) PG_GETARG_POINTER(1);
56 | 	int			byte_length = PG_GETARG_INT32(2);
57 | 
58 | 	if (pg_mbstrlen_with_len(token, byte_length) > d->maxlen)
59 | 	{
60 | 		/* If the word is longer than our max length, return an empty
61 | 		 * lexeme */
62 | 		TSLexeme   *res = palloc0(sizeof(TSLexeme));
63 | 		/* res[0].lexeme = NULL; */       /* implied by palloc0() */
64 | 		PG_RETURN_POINTER(res);
65 | 	}
66 | 	else
67 | 	{
68 | 		/* If the word is short, pass it unmodified */
69 | 		PG_RETURN_POINTER(NULL);
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/tsearch/dict_maxlen/dict_maxlen.control:
--------------------------------------------------------------------------------
1 | # dict_maxlen extension
2 | comment = 'text search template for a dictionary filtering out long words'
3 | default_version = '1.0'
4 | module_pathname = '$libdir/dict_maxlen'
5 | relocatable = true
6 | 


--------------------------------------------------------------------------------