├── .gitignore ├── LICENSE.md ├── README.md ├── admin └── db_creation_date.sql ├── diff └── diff-tables.sql ├── global_search ├── README.md └── global_search.sql ├── hamming_weight ├── Makefile ├── hamming_weight.c └── hamming_weight.sql ├── large_objects ├── lo_digest.sql └── lo_size.sql ├── pivots ├── dynamic-pivot-create.sql └── dynamic-pivot-refcursor.sql ├── psql-cli ├── psql-coproc-functions.sh ├── psql-edit-replace.sh ├── psql-quote-meta.bash ├── psqlrc-for-edit-replace └── tar-to-pg-copy.sh ├── strings ├── parse-option.sql ├── plperl │ ├── env_vars.sql │ └── multi_replace_plperl.sql └── utf8-truncate.sql └── tsearch └── dict_maxlen ├── Makefile ├── README.md ├── dict_maxlen--1.0.sql ├── dict_maxlen.c └── dict_maxlen.control /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore backup files 2 | *~ 3 | 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # Copyright and License 2 | 3 | Copyright (c) 2016-2024, Daniel Vérité 4 | 5 | Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. 6 | 7 | In no event shall Daniel Vérité be liable to any party for direct, indirect, special, incidental, or consequential damages, including lost profits, arising out of the use of this software and its documentation, even if Daniel Vérité has been advised of the possibility of such damage. 8 | 9 | Daniel Vérité specifically disclaims any warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose. The software provided hereunder is on an "AS IS" basis, and Daniel Vérité has no obligations to provide maintenance, support, updates, enhancements, or modifications. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PostgreSQL functions 2 | 3 | A repository of custom PostgreSQL functions and extensions. 4 | 5 | ## admin/db\_creation\_date 6 | A `plperlu` function returning the creation date of a database, 7 | based on the creation time of the database directory 8 | under `$PGATA/base`. 9 | 10 | ## diff_tables 11 | A simple plpgsql function that takes two table names (through the 12 | `regclass` type), builds a query comparing their contents, runs 13 | it, and returns a set of diff-like results with the rows that differ. 14 | It does not require a primary key on tables to compare. 15 | 16 | ## dynamic_pivot 17 | Return a CURSOR pointing to pivoted results of a query passed as the 18 | 1st parameter, with sorted headers passed as a query as the 2nd 19 | parameter. 20 | See https://postgresql.verite.pro/blog/2018/06/19/crosstab-pivot.html for 21 | a lot of context about this function. 22 | 23 | ## global_search 24 | A `plpgsql` function that finds occurrences of a string or more 25 | generally perform any kind of text-based matching in all or some of the 26 | tables of an entire database. 27 | It returns the table, column name, `ctid` and column's value of the rows 28 | that match. 29 | The search can be limited to an array of tables and/or of 30 | schemas. Progress is optionally reported by emitting `raise info` 31 | messages. 32 | 33 | ## hamming_weight 34 | C functions that return the number of bits set to `1` in a bytea, int 35 | or bigint value. The `bytea` variant is available as a built-in function 36 | (named `bit_count`) since PostgreSQL 14. 37 | 38 | ## large_objects 39 | ### lo_size 40 | A plpgsql function that returns the size of a given large object. 41 | 42 | ### lo_digest 43 | A plperlu function that returns the digest (hash output) of a large 44 | object for any hash supported by perl's Digest module. 45 | 46 | ## strings/parse_option 47 | A simple function to parse name=value settings. 48 | 49 | ## strings/plperl/multi_replace 50 | Replace strings by other strings within a larger text, with 51 | Perl s// operator, in a single pass. 52 | Each string in the first array is replaced by the element at the same 53 | index in the second array. 54 | 55 | ## strings/utf8_truncate 56 | Truncate an UTF-8 string to a given number of bytes, respecting the 57 | constraint that any multibyte sequence at the end must be complete. 58 | 59 | ## tsearch/dict_maxlen 60 | A text search dictionary to filter out tokens longer than a given length. 61 | 62 | ## psql-cli 63 | psqlrc declarations, companion scripts, tricks for the psql command-line interpreter. 64 | -------------------------------------------------------------------------------- /admin/db_creation_date.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * Return the creation date of a database based on its directory. 3 | * Requires superuser privileges. 4 | * Example: 5 | * SELECT datname, db_creation_date(oid) FROM pg_database; 6 | */ 7 | CREATE FUNCTION db_creation_date(oid) RETURNS timestamptz as $$ 8 | my $oid=shift; 9 | my $rv = spi_exec_query("SELECT setting FROM pg_settings WHERE name='data_directory'") 10 | or elog(ERROR, "cannot read 'data_directory' setting"); 11 | my $datadir = $rv->{rows}[0]->{setting}; 12 | my @info = stat("$datadir/base/$oid"); 13 | if (!@info) { 14 | elog(ERROR, "cannot stat database directory: $!"); 15 | } 16 | my $ctime = $info[10]; 17 | $rv = spi_exec_query("SELECT to_timestamp($ctime) as t") or elog(ERROR, "query error"); 18 | return $rv->{rows}[0]->{t}; 19 | $$ LANGUAGE plperlu; 20 | -------------------------------------------------------------------------------- /diff/diff-tables.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Take two table names (through the `regtable` type), 3 | build a query comparing their contents, execute it, and 4 | return a set of diff-like results with the rows that differ. 5 | */ 6 | CREATE FUNCTION diff_tables(t1 regclass, t2 regclass) 7 | returns TABLE("+/-" text, line text) 8 | AS $func$ 9 | BEGIN 10 | RETURN QUERY EXECUTE format($$ 11 | SELECT '+', d1.*::text FROM ( 12 | SELECT * FROM %s 13 | EXCEPT 14 | SELECT * FROM %s) AS d1 15 | UNION ALL 16 | SELECT '-', d2.*::text FROM ( 17 | SELECT * FROM %s 18 | EXCEPT 19 | SELECT * FROM %s) AS d2 20 | $$, t2, t1, t1, t2); 21 | END 22 | $func$ language plpgsql; 23 | -------------------------------------------------------------------------------- /global_search/README.md: -------------------------------------------------------------------------------- 1 | ## global_search 2 | 3 | ``` 4 | CREATE FUNCTION global_search( 5 | search_term text, 6 | comparator regproc default 'pg_catalog.texteq', -- comparison function 7 | tables text[] default null, 8 | schemas text[] default null, 9 | progress text default null, -- 'tables', 'hits', 'all' 10 | max_width int default -1 -- returned value's max width in chars, or -1 for unlimited 11 | ) 12 | RETURNS table(schemaname text, tablename text, columnname text, 13 | columnvalue text, rowctid tid) 14 | ``` 15 | 16 | `global_search` is a plpgsql function that finds `search_term` in all 17 | columns of all accessible tables. The list of schemas to scan by 18 | default are those accessible through the current `search_path` 19 | (except `pg_catalog` that must be explicitly added if desired). 20 | The `tables` or `schemas` parameters can be 21 | used to scan a specific subset of tables and/or schemas. 22 | 23 | The comparison is done with `comparator`, which must be the OID of a 24 | function taking two text arguments and returning a bool with 25 | the result of the match. 26 | It will be called with each column value (cast as `text`) being tested 27 | as its first argument, and `search_term` as its second argument. 28 | Typically, you should just pass the name of the function, and the implicit 29 | casting to `regproc` will take care of translating it into an 30 | OID. Other than `texteq`, built-in functions that may be passed directly 31 | are `textregexeq`, `texticregexeq`, `textlike`, `texticlike` 32 | for regular expressions and the SQL `like` operator (`ic` are the case 33 | insensitive variants). See also the the `\doS+` command in `psql` 34 | for more. 35 | 36 | The progress is optionally reported with `raise info` messages, immediately 37 | available to the caller, contrary to the function result that is available 38 | only on function completion. 39 | When `progress` is `tables` or `all`, each table searched into is reported. 40 | When `progress` is `hits` or `all`, each hit is reported. 41 | 42 | In all cases, the hits are returned by the function in the form of a 43 | table-like value: `(schemaname text, tablename text, columnname text, 44 | columnvalue text, rowctid tid)`. 45 | 46 | Beware that `ctid` are transient, since rows can be relocated 47 | physically. You may use the `REPEATABLE READ` transaction isolation mode to 48 | ensure that the corresponding version of the row is kept around until you're done 49 | with the `ctid`. 50 | 51 | The returned column values might be suppressed or truncated by setting 52 | a maximum width with the `max_width` argument. 53 | 54 | 55 | ### Examples: 56 | 57 | 58 | ``` 59 | -- Setup 60 | => CREATE TABLE tst(t text); 61 | => INSERT INTO tst VALUES('foo'),('bar'),('baz'),('barbaz'),('Foo'),(null); 62 | ``` 63 | 64 | #### Simple equality search: 65 | ``` 66 | => SELECT * FROM global_search('Foo'); 67 | 68 | schemaname | tablename | columnname | columnvalue | rowctid 69 | ------------+-----------+------------+-------------+--------- 70 | public | tst | t | Foo | (0,5) 71 | ``` 72 | 73 | 74 | #### Regular expression matching 75 | ```sql 76 | => SELECT * FROM global_search('^bar', comparator=>'textregexeq'); 77 | 78 | schemaname | tablename | columnname | columnvalue | rowctid 79 | ------------+-----------+------------+-------------+--------- 80 | public | tst | t | bar | (0,2) 81 | public | tst | t | barbaz | (0,4) 82 | (2 rows) 83 | ``` 84 | 85 | 86 | #### Case insensitive LIKE (equivalent to: column ILIKE search_term) 87 | ```sql 88 | => SELECT * FROM global_search('fo%', comparator=>'texticlike'); 89 | 90 | schemaname | tablename | columnname | columnvalue | rowctid 91 | ------------+-----------+------------+-------------+--------- 92 | public | tst | t | foo | (0,1) 93 | public | tst | t | Foo | (0,5) 94 | (2 rows) 95 | ``` 96 | 97 | Sometimes there are several functions with the same name. In the 98 | case of `texticlike`, the `citext` extension overloads this function 99 | with two variants that take a `citext` parameter instead of `text`. 100 | In that kind of case, you want to use the `regprocedure` cast with a 101 | function name qualified with arguments to disambiguate. 102 | 103 | For instance: 104 | 105 | ```sql 106 | SELECT * FROM global_search('fo%', comparator=>'texticlike(text,text)'::regprocedure); 107 | ``` 108 | 109 | #### Find all "incorrect" values with a custom function 110 | The following custom function finds values that do not conform 111 | to the Unicode NFC normalization 112 | Note that even though the second argument is not used in that case, 113 | it needs to be declared nonetheless. 114 | 115 | ```sql 116 | => CREATE FUNCTION pg_temp.check_normal_form(text,text) 117 | returns boolean as 118 | 'select $1 is not NFC normalized' -- requires Postgres 13 or newer 119 | language sql; 120 | 121 | => INSERT INTO tst VALUES (E'El Nin\u0303o'); 122 | => SELECT * FROM global_search(null, comparator=>'pg_temp.check_normal_form'); 123 | 124 | schemaname | tablename | columnname | columnvalue | rowctid 125 | ------------+-----------+------------+-------------+--------- 126 | public | tst | t | El Niño | (0,7) 127 | ``` 128 | 129 | 130 | #### Find references to the OID of a namespace 131 | That sort of query can be useful when exploring the catalogs. 132 | `2200` is the OID of the `public` namespace. 133 | ```sql 134 | => SELECT * FROM global_search( 135 | (select oid::text from pg_namespace where nspname='public'), 136 | schemas=>'{pg_catalog}' 137 | ); 138 | 139 | schemaname | tablename | columnname | columnvalue | rowctid 140 | ------------+----------------+--------------+-------------+--------- 141 | pg_catalog | pg_proc | pronamespace | 2200 | (95,5) 142 | pg_catalog | pg_description | objoid | 2200 | (28,58) 143 | pg_catalog | pg_namespace | oid | 2200 | (0,8) 144 | pg_catalog | pg_depend | refobjid | 2200 | (13,70) 145 | pg_catalog | pg_init_privs | objoid | 2200 | (2,27) 146 | (5 rows) 147 | ``` 148 | 149 | #### Case and accent insensitive global search 150 | 151 | Using an ICU collation for advanced non-bitwise equality tests. 152 | 153 | ``` 154 | -- create a collation that ignores accents and case 155 | => CREATE COLLATION nd ( 156 | provider = 'icu', 157 | locale = '@colStrength=primary', 158 | deterministic = false 159 | ); 160 | 161 | => CREATE FUNCTION pg_temp.ci_equal(text,text) 162 | returns boolean as 163 | 'select $1=$2 collate "nd"' 164 | language sql; 165 | 166 | => SELECT * FROM global_search('foo', comparator=>'pg_temp.ci_equal'); 167 | 168 | schemaname | tablename | columnname | columnvalue | rowctid 169 | ------------+-----------+------------+-------------+--------- 170 | public | tst | t | foo | (0,1) 171 | public | tst | t | Foo | (0,5) 172 | (2 rows) 173 | 174 | ``` 175 | -------------------------------------------------------------------------------- /global_search/global_search.sql: -------------------------------------------------------------------------------- 1 | CREATE or replace FUNCTION global_search( 2 | search_term text, 3 | comparator regproc default 'pg_catalog.texteq', -- comparison function 4 | tables text[] default null, 5 | schemas text[] default null, 6 | progress text default null, -- 'tables', 'hits', 'all' 7 | max_width int default -1 -- returned value's max width in chars, or -1 for unlimited 8 | ) 9 | RETURNS table(schemaname text, tablename text, columnname text, columnvalue text, rowctid tid) 10 | AS $$ 11 | DECLARE 12 | query text; 13 | clauses text[]; 14 | columns text[]; 15 | pos int; 16 | positions int[]; 17 | col_expr text; 18 | BEGIN 19 | IF schemas IS NULL THEN 20 | -- by default, exclude pg_catalog and non-readable schemas 21 | schemas := current_schemas(false); 22 | END IF; 23 | 24 | FOR schemaname,tablename IN 25 | -- select tables for which all columns are readable 26 | SELECT t.table_schema, t.table_name 27 | FROM information_schema.tables t 28 | JOIN information_schema.schemata s ON 29 | (s.schema_name=t.table_schema) 30 | WHERE (t.table_name=ANY(tables) OR tables is null) 31 | AND t.table_schema=ANY(schemas) 32 | AND t.table_type='BASE TABLE' 33 | AND EXISTS (SELECT 1 FROM information_schema.table_privileges p 34 | WHERE p.table_name=t.table_name 35 | AND p.table_schema=t.table_schema 36 | AND p.privilege_type='SELECT' 37 | ) 38 | LOOP 39 | IF (progress in ('tables','all')) THEN 40 | RAISE INFO '%', format('Searching globally in table: %I.%I', 41 | schemaname, tablename); 42 | END IF; 43 | 44 | -- Get lists of columns and per-column boolean expressions 45 | SELECT array_agg(column_name ORDER BY ordinal_position), 46 | array_agg(format('%s(cast(%I as text), %L)', comparator, column_name, search_term) 47 | ORDER BY ordinal_position) 48 | FROM information_schema.columns 49 | WHERE table_name=tablename 50 | AND table_schema=schemaname 51 | INTO columns, clauses; 52 | 53 | -- Main query to get each matching row and the ordinal positions of matching columns 54 | query := format('SELECT s.ctid, p from (SELECT ctid,' 55 | 'array_positions(array[%s],true) AS p FROM ONLY %I.%I) s' 56 | ' WHERE cardinality(p)>0', 57 | array_to_string(clauses, ','), schemaname, tablename ); 58 | 59 | FOR rowctid,positions IN EXECUTE query -- for each matching row 60 | LOOP 61 | FOREACH pos IN ARRAY positions -- for each matching field 62 | LOOP 63 | columnname := columns[pos]; 64 | IF (max_width <> 0) THEN -- fetch value only if needed 65 | IF (max_width > 0) THEN 66 | -- fetch a truncated value 67 | col_expr := format('left(%I,%s)', columnname, max_width); 68 | ELSE 69 | col_expr := format('%I', columnname); 70 | END IF; 71 | EXECUTE format('SELECT %s FROM %I.%I WHERE ctid=''%s''', 72 | col_expr, schemaname, tablename, rowctid) INTO columnvalue; 73 | ELSE 74 | columnvalue:=null; 75 | END IF; 76 | IF (progress in ('hits', 'all')) THEN 77 | RAISE INFO '%', format('Found in %I.%I.%I at ctid %s', 78 | schemaname, tablename, columnname, rowctid); 79 | END IF; 80 | RETURN NEXT; 81 | END LOOP; 82 | END LOOP; 83 | END LOOP; -- for each table 84 | END; 85 | $$ language plpgsql; 86 | -------------------------------------------------------------------------------- /hamming_weight/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for hamming_weight 2 | # Assumes that pg_config is installed 3 | # See https://www.postgresql.org/docs/current/static/app-pgconfig.html 4 | # and https://www.postgresql.org/docs/current/static/extend-pgxs.html 5 | # on how to extend PostgreSQL with custom functions in general 6 | 7 | ifndef PG_CONFIG 8 | PG_CONFIG = pg_config 9 | endif 10 | 11 | INCLUDEDIRS := -I. 12 | INCLUDEDIRS += -I$(shell $(PG_CONFIG) --includedir-server) 13 | INCLUDEDIRS += -I$(shell $(PG_CONFIG) --includedir) 14 | CFLAGS := $(shell $(PG_CONFIG) --cflags) 15 | CFLAGS_SL := $(shell $(PG_CONFIG) --cflags_sl) 16 | 17 | # If you are using shared libraries, make sure this location can be 18 | # found at runtime (see /etc/ld.so.conf and ldconfig command). 19 | LIBDIR = -L$(shell $(PG_CONFIG) --libdir) 20 | # This is where the shared object should be installed 21 | LIBINSTALL = $(shell $(PG_CONFIG) --pkglibdir) 22 | 23 | all: hamming_weight.so 24 | 25 | hamming_weight.so: hamming_weight.c Makefile 26 | gcc $(CFLAGS) $(CFLAGS_SL) $(INCLUDEDIRS) -o hamming_weight.o -c hamming_weight.c 27 | gcc -shared -o hamming_weight.so hamming_weight.o $(LIBDIR) -lpq -lm 28 | 29 | install: hamming_weight.so 30 | cp $? $(LIBINSTALL) 31 | -------------------------------------------------------------------------------- /hamming_weight/hamming_weight.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2020 Daniel VERITE 3 | * BSD license, see README.md 4 | */ 5 | 6 | #include "postgres.h" 7 | #include 8 | #include "fmgr.h" 9 | 10 | PG_MODULE_MAGIC; 11 | 12 | Datum hamming_weight_int4(PG_FUNCTION_ARGS); 13 | Datum hamming_weight_int8(PG_FUNCTION_ARGS); 14 | Datum hamming_weight_bytea(PG_FUNCTION_ARGS); 15 | 16 | /* number of bits for all 8-bit numbers */ 17 | static const int bitcount[256]={ 18 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 19 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 20 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 21 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 22 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 23 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 24 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 25 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 26 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 27 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 28 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 29 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 30 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 31 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 32 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 33 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 34 | }; 35 | 36 | /* SQL function: hamming_weight(bytea) returns int4 */ 37 | PG_FUNCTION_INFO_V1(hamming_weight_bytea); 38 | 39 | /* returns the Hamming weight of a bytea value (number of 1's bits) */ 40 | Datum 41 | hamming_weight_bytea(PG_FUNCTION_ARGS) 42 | { 43 | bytea *v = PG_GETARG_BYTEA_PP(0); 44 | int64 count = 0; 45 | int len, i; 46 | unsigned char* buf; 47 | 48 | len = VARSIZE_ANY_EXHDR(v); /* should be lower than 2^30 */ 49 | buf = (unsigned char*)VARDATA_ANY(v); 50 | for (i=0; i>24)&0xff] + bitcount[(val>>16)&0xff] + 65 | bitcount[(val>>8)&0xff] + bitcount[val&0xff]; 66 | PG_RETURN_INT32(count); 67 | } 68 | 69 | /* SQL function: hamming_weight(int8) returns int4 */ 70 | PG_FUNCTION_INFO_V1(hamming_weight_int8); 71 | 72 | /* returns the Hamming weight of an int4 value (number of 1's bits) */ 73 | Datum 74 | hamming_weight_int8(PG_FUNCTION_ARGS) 75 | { 76 | int64 val = PG_GETARG_INT64(0); 77 | int32 v32 = val & 0xffffffff; 78 | int count = bitcount[(v32>>24)&0xff] + bitcount[(v32>>16)&0xff] + 79 | bitcount[(v32>>8)&0xff] + bitcount[v32&0xff]; 80 | v32 = (val >> 32) & 0xffffffff; 81 | count += bitcount[(v32>>24)&0xff] + bitcount[(v32>>16)&0xff] + 82 | bitcount[(v32>>8)&0xff] + bitcount[v32&0xff]; 83 | PG_RETURN_INT32(count); 84 | } 85 | -------------------------------------------------------------------------------- /hamming_weight/hamming_weight.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Compute the Hamming weight (count of bits set to '1') 3 | for bytea, int4, int8 4 | */ 5 | CREATE FUNCTION hamming_weight(bytea) RETURNS bigint 6 | AS '$libdir/hamming_weight.so', 'hamming_weight_bytea' 7 | LANGUAGE C immutable strict; 8 | 9 | CREATE FUNCTION hamming_weight(int) RETURNS int 10 | AS '$libdir/hamming_weight.so', 'hamming_weight_int4' 11 | LANGUAGE C immutable strict; 12 | 13 | CREATE FUNCTION hamming_weight(bigint) RETURNS int 14 | AS '$libdir/hamming_weight.so', 'hamming_weight_int8' 15 | LANGUAGE C immutable strict; 16 | -------------------------------------------------------------------------------- /large_objects/lo_digest.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Return the digest (hash output) of a large object for 3 | any hash algorithm supported by perl's Digest module. 4 | 5 | Input parameters: 6 | - name of hash as text (see https://perldoc.perl.org/Digest.html) 7 | - OID of large object to digest 8 | - optional chunk size as int. Contents are digested one chunk at a time. 9 | */ 10 | CREATE FUNCTION lo_digest(text, oid, int default 2048) 11 | RETURNS bytea AS 12 | $$ 13 | use Digest; 14 | use strict; 15 | 16 | my $ctxt = Digest->new($_[0]); 17 | my $sz=$_[2]; 18 | elog(ERROR, "Invalid chunk size: $sz") if ($sz<=0); 19 | my $sth = spi_query("SELECT lo_open($_[1], 262144) as fd"); 20 | my $row = spi_fetchrow($sth); 21 | spi_cursor_close($sth); 22 | 23 | if ($row) { 24 | my $fd = $row->{fd}; 25 | my $bytes; 26 | my $plan = spi_prepare("SELECT loread($fd, $sz) as chunk"); 27 | do { 28 | $sth = spi_query_prepared($plan); 29 | $row = spi_fetchrow($sth); 30 | $bytes = decode_bytea($row->{chunk}); 31 | $ctxt->add($bytes); 32 | spi_cursor_close($sth); 33 | } while (length($bytes)>0); 34 | spi_exec_query("select lo_close($fd)"); 35 | spi_freeplan($plan); 36 | } 37 | return encode_bytea($ctxt->digest); 38 | $$ LANGUAGE plperlu; 39 | -------------------------------------------------------------------------------- /large_objects/lo_size.sql: -------------------------------------------------------------------------------- 1 | /* Return the size (bigint) of the large object passed as parameter */ 2 | CREATE FUNCTION lo_size(oid) RETURNS bigint 3 | AS $$ 4 | DECLARE 5 | fd integer; 6 | sz bigint; 7 | BEGIN 8 | fd := lo_open($1, 262144); -- INV_READ 9 | if (fd < 0) then 10 | raise exception 'Failed to open large object %', $1; 11 | end if; 12 | sz := lo_lseek64(fd, 0, 2); 13 | if (lo_close(fd) <> 0) then 14 | raise exception 'Failed to close large object %', $1; 15 | end if; 16 | return sz; 17 | END; 18 | $$ LANGUAGE plpgsql VOLATILE; 19 | -------------------------------------------------------------------------------- /pivots/dynamic-pivot-create.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Create a table or view for the pivoted results of a query 3 | passed as the 1st parameter, with sorted headers passed as a query 4 | as the 2nd parameter. 5 | 3rd parameter: name of the table or view 6 | 4th parameter: object type: 7 | 'v': view, 'tv': temp view, 't': table, 'tt': temp table, 'm': materialized view 8 | 9 | See https://postgresql.verite.pro/blog/2018/06/19/crosstab-pivot.html 10 | for a lot of context about this function. 11 | 12 | Example usage: 13 | CREATE TABLE tmp1 (row_id, key, val) AS 14 | VALUES 15 | (1, 'a', 1), 16 | (1, 'b', 2), 17 | (2, 'a', 3), 18 | (2, 'b', 4); 19 | 20 | SELECT dynamic_pivot_create( 21 | 'select row_id, key, val from tmp1', 22 | 'select distinct key from tmp1', 23 | 'm_tmp1', 'mv'); 24 | 25 | SELECT * FROM m_tmp1; 26 | */ 27 | CREATE OR REPLACE FUNCTION dynamic_pivot_create(central_query text, 28 | headers_query text, 29 | obj_name text, 30 | obj_type text default 'tv') 31 | RETURNS void AS 32 | $$ 33 | DECLARE 34 | left_column text; 35 | header_column text; 36 | value_column text; 37 | h_value text; 38 | headers_clause text; 39 | query text; 40 | j json; 41 | r record; 42 | i int:=1; 43 | BEGIN 44 | -- find the column names of the source query 45 | EXECUTE 'select row_to_json(_r.*) from (' || central_query || ') AS _r' into j; 46 | FOR r in SELECT * FROM json_each_text(j) 47 | LOOP 48 | IF (i=1) THEN left_column := r.key; 49 | ELSEIF (i=2) THEN header_column := r.key; 50 | ELSEIF (i=3) THEN value_column := r.key; 51 | END IF; 52 | i := i+1; 53 | END LOOP; 54 | 55 | -- build the dynamic transposition query, based on the canonical model 56 | -- (CASE WHEN...) 57 | FOR h_value in EXECUTE headers_query 58 | LOOP 59 | headers_clause := concat(headers_clause, 60 | format(chr(10)||',min(case when %I=%L then %I::text end) as %I', 61 | header_column, 62 | h_value, 63 | value_column, 64 | h_value )); 65 | END LOOP; 66 | 67 | query := format('CREATE %s %I AS SELECT %I %s FROM (select *,row_number() over() as rn from (%s) AS _c) as _d GROUP BY %I order by min(rn)', 68 | case obj_type 69 | when 't' then 'TABLE' 70 | when 'tt' then 'TEMP TABLE' 71 | when 'v' then 'VIEW' 72 | when 'tv' then 'TEMP VIEW' 73 | when 'mv' then 'MATERIALIZED VIEW' 74 | else 'VIEW' 75 | end, 76 | obj_name, 77 | left_column, 78 | headers_clause, 79 | central_query, 80 | left_column); 81 | 82 | -- RAISE NOTICE '%', query; 83 | EXECUTE query; 84 | 85 | END 86 | $$ LANGUAGE plpgsql; 87 | -------------------------------------------------------------------------------- /pivots/dynamic-pivot-refcursor.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Return a CURSOR pointing to pivoted results of a query 3 | passed as the 1st parameter, with sorted headers passed as a query 4 | as the 2nd parameter. 5 | The cursor name can be passed as an optional 3rd parameter, or a name 6 | will be automatically assigned. 7 | See https://postgresql.verite.pro/blog/2018/06/19/crosstab-pivot.html 8 | for a lot of context about this function. 9 | 10 | Example usage: 11 | CREATE TABLE tmp1 (row_id, key, val) AS 12 | VALUES 13 | (1, 'a', 1), 14 | (1, 'b', 2), 15 | (2, 'a', 3), 16 | (2, 'b', 4); 17 | 18 | BEGIN; 19 | 20 | SELECT dynamic_pivot( 21 | 'select row_id, key, val from tmp1', 22 | 'select distinct key from tmp1', 23 | 'tmpcur'); 24 | 25 | FETCH ALL FROM "tmpcur"; 26 | 27 | COMMIT; 28 | 29 | */ 30 | CREATE FUNCTION dynamic_pivot(central_query text, 31 | headers_query text, 32 | INOUT cname refcursor default null) 33 | RETURNS refcursor AS 34 | $$ 35 | DECLARE 36 | left_column text; 37 | header_column text; 38 | value_column text; 39 | h_value text; 40 | headers_clause text; 41 | query text; 42 | j json; 43 | r record; 44 | i int:=1; 45 | BEGIN 46 | -- find the column names of the source query 47 | EXECUTE 'select row_to_json(_r.*) from (' || central_query || ') AS _r' into j; 48 | FOR r in SELECT * FROM json_each_text(j) 49 | LOOP 50 | IF (i=1) THEN left_column := r.key; 51 | ELSEIF (i=2) THEN header_column := r.key; 52 | ELSEIF (i=3) THEN value_column := r.key; 53 | END IF; 54 | i := i+1; 55 | END LOOP; 56 | 57 | -- build the dynamic transposition quer, based on the canonical model 58 | -- (CASE WHEN...) 59 | FOR h_value in EXECUTE headers_query 60 | LOOP 61 | headers_clause := concat(headers_clause, 62 | format(chr(10)||',min(case when %I=%L then %I::text end) as %I', 63 | header_column, 64 | h_value, 65 | value_column, 66 | h_value )); 67 | END LOOP; 68 | 69 | query := format('SELECT %I %s FROM (select *,row_number() over() as rn from (%s) AS _c) as _d GROUP BY %I order by min(rn)', 70 | left_column, 71 | headers_clause, 72 | central_query, 73 | left_column); 74 | 75 | -- open the cursor so the caller can FETCH right away. 76 | -- if cname is not null it will be used as the name of the cursor, 77 | -- otherwise a name "" will be generated. 78 | OPEN cname FOR execute query; 79 | END 80 | $$ LANGUAGE plpgsql; 81 | -------------------------------------------------------------------------------- /psql-cli/psql-coproc-functions.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Bash functions to use psql as a coprocess 3 | # 4 | 5 | # Pass psql arguments 6 | function psql_coproc 7 | { 8 | coproc PSQL { psql "$@" ; } 9 | } 10 | 11 | function get_uuid 12 | { 13 | if [ -f /proc/sys/kernel/random/uuid ]; then 14 | cat /proc/sys/kernel/random/uuid # linux-specific 15 | else 16 | uuidgen 17 | fi 18 | } 19 | 20 | function end_marker 21 | { 22 | echo "-- END RESULTS MARK -- $(get_uuid) --" 23 | } 24 | 25 | function psql_check_alive 26 | { 27 | if [[ -z "$PSQL_PID" ]]; then exit 1; fi 28 | } 29 | 30 | # Send one psql command and get back results 31 | function psql_command 32 | { 33 | end=$(end_marker) 34 | 35 | psql_check_alive 36 | echo "$1" >&${PSQL[1]} 37 | echo "\\echo '$end'" >&${PSQL[1]} 38 | 39 | psql_check_alive 40 | while read -r -u ${PSQL[0]} result 41 | do 42 | if [[ $result = $end ]]; then 43 | break 44 | fi 45 | echo $result 46 | done 47 | } 48 | 49 | function psql_quit 50 | { 51 | echo '\q' >&${PSQL[1]} 52 | } 53 | 54 | # Takes a list of queries to run in a transaction 55 | # Retry the entire transaction if a transient error 56 | # occurs 57 | function retriable_transaction 58 | { 59 | while true 60 | do 61 | psql_command "BEGIN;" 62 | for query in "$@" 63 | do 64 | results=$(psql_command "$query") 65 | # check for errors 66 | sqlstate=$(psql_command '\echo :SQLSTATE') 67 | case "$sqlstate" in 68 | 00000) 69 | echo "$results" # output results of $query 70 | ;; 71 | 57014 | 40001 | 40P01) 72 | # Rollback and retry on 73 | # query canceled, or serialization failure, or deadlock 74 | # see https://www.postgresql.org/docs/current/errcodes-appendix.html 75 | psql_command "ROLLBACK;" 76 | continue 2; # restart transaction at first query 77 | ;; 78 | *) 79 | # rollback and stop 80 | err=$(psql_command '\echo :LAST_ERROR_MESSAGE'); 81 | echo 1>&2 "SQL error: $sqlstate $err"; 82 | psql_command "ROLLBACK;" 83 | return 84 | ;; 85 | esac 86 | done 87 | psql_command "COMMIT;" 88 | break; 89 | done 90 | } 91 | -------------------------------------------------------------------------------- /psql-cli/psql-edit-replace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A custom editor for psql that pre-processes the query string 4 | # to replace "* /* special comment */" with a list of columns. 5 | # The replacement options are: 6 | # * /*expand*/ : this text will be replaced by the list of columns of the query. 7 | # * /*except:col1,col2,...*/: this text will be replaced by the list of columns of the query, except those listed. 8 | # * /*except-type:bytea,jsonb,...*/ : this text will be replaced by the list of columns of the query, except those of the types listed. 9 | 10 | # Example: SELECT * /*expand*/ FROM users JOIN posts USING(user_id) :expand 11 | # (see the macro definition of :expand in psqlrc-for-edit-replace 12 | # or https://postgresql.verite.pro/blog/2022/02/21/psql-hack-select-except.html 13 | # for detailed explanations) 14 | 15 | # The columns are passed in a temporary file pointed to by 16 | # the PSQL_TMP_STRUCT environment variable. 17 | 18 | # Set up PSQL_EDITOR to point to that script. 19 | 20 | 21 | read -r line1 < "$1" 22 | rx='\*\s*/\*(expand|except:|except-type:).*\*/' 23 | if [[ $line1 =~ $rx && -r "$PSQL_TMP_STRUCT" ]]; then 24 | perl - $1 "$PSQL_TMP_STRUCT" << "EOP" 25 | require 5.014; 26 | use Text::CSV qw(csv); 27 | 28 | sub expand { 29 | # filter and format the list of columns 30 | my ($cols,$filter_type,$filter) = @_; 31 | # filter_type => undef:none, 0:by name, 1: by type 32 | my $qi = 1; # quote the columns (for case sensitive names and reserved keywords) 33 | if (defined $filter_type) { 34 | my @xcols = split /,/, $filter; # list of arguments inside the comment 35 | my %xhcols = map { $_=>1 } @xcols; 36 | $cols = [ grep { !defined $xhcols{$_->[$filter_type]} } @{$cols} ]; 37 | } 38 | return join ",\n\t", (map { $qi?('"' . $_->[0]=~ s/"/""/r . '"') : $_->[0]} 39 | @{$cols}); 40 | } 41 | 42 | my $cols = csv(in=>$ARGV[1], headers=>"skip", binary=>1); 43 | open(my $fi, "<", $ARGV[0]) or die "cannot open $ARGV[0]: $!"; 44 | my $lines = <$fi>; # 1st line of query 45 | 46 | my $rx = qr{^(.*)\*\s*/\*expand\*/(.*)$}; 47 | if ($lines =~ $rx) { 48 | # expand to all columns 49 | $lines = "$1" . expand($cols, undef, undef) . "\n$2"; 50 | } 51 | else { 52 | $rx = qr{^(.*)\*\s*/\*except:(.*)\*/(.*)$}; 53 | if ($lines =~ $rx) { 54 | # expand to all columns except those listed 55 | $lines = "$1" . expand($cols, 0, $2) . "\n$3"; 56 | } 57 | else { 58 | $rx = qr{^(.*)\*\s*/\*except-type:(.*)\*/(.*)$}; 59 | if ($lines =~ $rx) { 60 | # expand to all column except for the types listed 61 | $lines = "$1" . expand($cols, 1, $2) . "\n$3"; 62 | } 63 | } 64 | } 65 | # copy the rest of the lines 66 | do { 67 | $lines .= $_; 68 | } while (<$fi>); 69 | close $fi; 70 | # overwrite the file with the new query 71 | open (my $fo, ">", $ARGV[0]) or die "cannot open $ARGV[0] for writing: $!"; 72 | print $fo $lines; 73 | close $fo; 74 | EOP 75 | 76 | # When the replacement in the query buffer occurred, we could 77 | # return into psql at this point rather than going into the actual 78 | # editor. 79 | # But before version 13, psql won't display the modified 80 | # query when returning at this point, so it might seem opaque. 81 | # Let's always call the actual editor, but you may uncomment 82 | # the line below to skip it. 83 | 84 | # rm -f "$PSQL_TMP_STRUCT" ; exit 85 | fi 86 | rm -f "$PSQL_TMP_STRUCT" 87 | ${EDITOR:-vi} $* 88 | -------------------------------------------------------------------------------- /psql-cli/psql-quote-meta.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This function returns its arguments quoted to be safely injected 4 | # into a psql meta-command. 5 | # Note: it does not transform control characters (CR,FF,tab...) 6 | function quote_psql_meta 7 | { 8 | local t=${1//\\/\\\\} 9 | t=${t//\'/\\\'} 10 | echo "'$t'" 11 | } 12 | 13 | -------------------------------------------------------------------------------- /psql-cli/psqlrc-for-edit-replace: -------------------------------------------------------------------------------- 1 | -- psqlrc stanza 2 | -- for psql version 12 and newer 3 | 4 | -- Declare the :expand macro. 5 | -- This macro calls \gdesc with the query in the buffer, puts the 6 | -- result in a csv temporary file, and invoke a custom editor to 7 | -- transform the query buffer using this result and SQL comments 8 | -- inside the query 9 | -- see psql-edit-replace.sh for a custom editor implementation. 10 | 11 | -- EDIT THIS with your preferred path 12 | \setenv PSQL_EDITOR ~/bin/psql-edit-replace.sh 13 | 14 | \set expand ' \\set _tmpstruct `mktemp` \\setenv PSQL_TMP_STRUCT :_tmpstruct \\set QUIET on \\pset format csv \\x off \\pset tuples_only off \\o :_tmpstruct \\gdesc \\o \\pset format aligned \\set QUIET off \\e \\unset _tmpstruct' 15 | -------------------------------------------------------------------------------- /psql-cli/tar-to-pg-copy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Takes a tar file in standard input containing 'tablename.copy' files 4 | # in COPY text format. 5 | # Each file is \copy'ed with psql into 'tablename' 6 | # The import is done in a single transaction and without 7 | # extracting any file to disk 8 | 9 | set -e 10 | 11 | source psql-coproc-functions.sh 12 | 13 | psql_coproc -AtX -v ON_ERROR_STOP=1 --single-transaction 14 | 15 | fifo_names="$(mktemp -u)-$(get_uuid)" 16 | mkfifo -m 0600 $fifo_names 17 | 18 | fifo_contents="$(mktemp -u)-$(get_uuid)" 19 | mkfifo -m 0600 $fifo_contents 20 | 21 | function cleanup 22 | { 23 | rm -f $fifo_names $fifo_contents 24 | } 25 | 26 | trap cleanup EXIT 27 | 28 | end_files=$(end_marker) 29 | 30 | cat | ( 31 | tar --to-command "echo \$TAR_FILENAME >>$fifo_names; cat > $fifo_contents" \ 32 | -xjf - ; 33 | echo "$end_files" >$fifo_names 34 | ) & 35 | 36 | 37 | while read -r copyfilename < $fifo_names; do 38 | if [[ "$copyfilename" = "$end_files" ]]; then 39 | break 40 | else 41 | tablename=${copyfilename%.copy} 42 | echo "Importing $copyfilename into $tablename" 43 | psql_command "\\copy $tablename from $fifo_contents" 44 | fi 45 | done 46 | 47 | psql_check_alive 48 | echo '\q' >&${PSQL[1]} 49 | 50 | # Wait for completion of background processes 51 | wait 52 | -------------------------------------------------------------------------------- /strings/parse-option.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * Parse a GUC as the C function ParseLongOption() from Postgres source 3 | * code in src/backend/utils/misc/guc.c 4 | * Input: string of the form 'name=value' 5 | * Output: name and value as a record. 6 | */ 7 | CREATE OR REPLACE FUNCTION 8 | parse_option(string text, name OUT text, value OUT text) 9 | RETURNS record 10 | as $$ 11 | declare 12 | p int := strpos(string, '='); 13 | begin 14 | if (p > 0) then 15 | name := replace(left(string, p-1), '-', '_'); 16 | value := substr(string, p+1); 17 | else 18 | name := replace(string, '-', '_'); 19 | value := NULL; 20 | end if; 21 | end 22 | $$ language plpgsql immutable; 23 | 24 | /* 25 | * Consider this less readable version if you want it in the 26 | * SQL language (it doesn't seem to be faster in that case). 27 | */ 28 | /* 29 | CREATE OR REPLACE FUNCTION 30 | parse_option(string text, name OUT text, value OUT text) 31 | RETURNS record 32 | as $$ 33 | select 34 | replace(left(string, greatest(strpos($1, '=')-1, 0)), '-', '_'), 35 | substr(string, strpos($1, '=')+1); 36 | $$ language sql immutable; 37 | */ 38 | -------------------------------------------------------------------------------- /strings/plperl/env_vars.sql: -------------------------------------------------------------------------------- 1 | /* plperl functions to get and set environment variables */ 2 | 3 | CREATE FUNCTION getenv(varname text) 4 | RETURNS text 5 | AS $BODY$ 6 | my ($var) = @_; 7 | 8 | $ENV{$var}; 9 | $BODY$ language plperl strict stable; 10 | 11 | CREATE FUNCTION setenv(varname text, value text) 12 | RETURNS text 13 | AS $BODY$ 14 | my ($var,$value) = @_; 15 | 16 | $ENV{$var}=$value; 17 | $BODY$ language plperl strict stable; 18 | -------------------------------------------------------------------------------- /strings/plperl/multi_replace_plperl.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Substitute substrings within a larger string, with Perl s// operator, 3 | in a single pass. Each element in @orig found in @string (scanned left 4 | to right) is replaced by the element at the same index in @repl. 5 | When multiple strings in the array match simultaneously, the longest one 6 | wins. 7 | */ 8 | CREATE OR REPLACE FUNCTION multi_replace(string text, orig text[], repl text[]) 9 | RETURNS text 10 | AS $BODY$ 11 | my ($string, $orig, $repl) = @_; 12 | my %subs; 13 | 14 | # Check that the arrays are of the same size, unidimensional, 15 | # and contain no null values. 16 | if (@$orig != @$repl) { 17 | elog(ERROR, "array sizes mismatch"); 18 | } 19 | if (ref @$orig[0] eq 'ARRAY' || ref @$repl[0] eq 'ARRAY') { 20 | elog(ERROR, "multi-dimensional arrays are not allowed"); 21 | } 22 | if (grep { !defined } (@$orig, @$repl)) { 23 | elog(ERROR, "null elements are not allowed"); 24 | } 25 | 26 | # Each element of $orig is a key in %subs to the element at the same 27 | # index in $repl 28 | @subs{@$orig} = @$repl; 29 | 30 | # Build a regexp of the form (s1|s2|...) 31 | # with the substrings sorted to match longest first 32 | my $re = join "|", map quotemeta, 33 | sort { (length($b) <=> length($a)) } keys %subs; 34 | $re = qr/($re)/; 35 | 36 | # The order will be kept in matching because (from perlre): 37 | # "Alternatives are tried from left to right, so the first alternative 38 | # found for which the entire expression matches, is the one that is 39 | # chosen" 40 | 41 | $string =~ s/$re/$subs{$1}/g; 42 | return $string; 43 | 44 | $BODY$ language plperl strict immutable; 45 | -------------------------------------------------------------------------------- /strings/utf8-truncate.sql: -------------------------------------------------------------------------------- 1 | /* 2 | * Truncate the UTF-8 string argument at the given number of bytes, 3 | * removing any additional bytes at the end that would otherwise 4 | * be part of an incomplete multibyte sequence. 5 | */ 6 | CREATE FUNCTION utf8_truncate(str text, len int) returns text 7 | as $$ 8 | select case when octet_length(str) <= len then str 9 | else ( 10 | with bstr(s) as (select convert_to(str, 'UTF-8')) 11 | select 12 | case 13 | when len>=1 and (get_byte(s, len) & 192) <> 128 14 | then convert_from(substring(s, 1, len), 'UTF-8') 15 | else 16 | case 17 | when len>=2 and (get_byte(s, len-1) & 192) <> 128 18 | then convert_from(substring(s, 1, len-1), 'UTF-8') 19 | else 20 | case 21 | when len>=3 and (get_byte(s, len-2) & 192) <> 128 22 | then convert_from(substring(s, 1, len-2), 'UTF-8') 23 | else 24 | case 25 | when len>=4 and (get_byte(s, len-3) & 192) <> 128 26 | then convert_from(substring(s, 1, len-3), 'UTF-8') 27 | else '' 28 | end 29 | end 30 | end 31 | end 32 | from bstr) 33 | end; 34 | $$ language sql strict immutable parallel safe; 35 | -------------------------------------------------------------------------------- /tsearch/dict_maxlen/Makefile: -------------------------------------------------------------------------------- 1 | EXTENSION = dict_maxlen 2 | EXTVERSION = 1.0 3 | PG_CONFIG = pg_config 4 | 5 | MODULE_big = dict_maxlen 6 | OBJS = dict_maxlen.o 7 | 8 | DATA = $(wildcard *.sql) 9 | 10 | PGXS := $(shell $(PG_CONFIG) --pgxs) 11 | include $(PGXS) 12 | -------------------------------------------------------------------------------- /tsearch/dict_maxlen/README.md: -------------------------------------------------------------------------------- 1 | ## dict_maxlen 2 | 3 | A dictionary (for PostgreSQL [text search engine](https://www.postgresql.org/docs/current/textsearch.html)) that filters out tokens longer than a given length. 4 | 5 | ### Example of use: 6 | 7 | Consider this query for a baseline comparison, using the english built-in 8 | configuration: 9 | 10 | ``` 11 | =# select to_tsvector('english', 'This is a long checksum that will be indexed: 81c3b1eccdb8e1ca7a5475aaf2f362ab3ec2ac4274974a12626d4bf603db4d6f'); 12 | to_tsvector 13 | ------------------------------------------------------------------------------------------------------- 14 | '81c3b1eccdb8e1ca7a5475aaf2f362ab3ec2ac4274974a12626d4bf603db4d6f':10 'checksum':5 'index':9 'long':4 15 | ``` 16 | 17 | Installation of the dictionary, in a new distinct text search configuration: 18 | 19 | ``` 20 | CREATE EXTENSION dict_maxlen; 21 | 22 | CREATE TEXT SEARCH DICTIONARY dictmaxlen ( 23 | TEMPLATE = dictmaxlen_template, 24 | LENGTH = 40 -- or another maximum number of characters 25 | ); 26 | COMMENT ON TEXT SEARCH DICTIONARY dictmaxlen IS 'A dictionary to filter out long lexemes'; 27 | 28 | CREATE TEXT SEARCH CONFIGURATION mytsconf ( COPY = pg_catalog.english ); 29 | 30 | -- Map the dictionary to some of the token types produced by the parser 31 | 32 | ALTER TEXT SEARCH CONFIGURATION mytsconf 33 | ALTER MAPPING FOR asciiword, word 34 | WITH dictmaxlen,english_stem; 35 | 36 | ALTER TEXT SEARCH CONFIGURATION mytsconf 37 | ALTER MAPPING FOR numword 38 | WITH dictmaxlen,simple; 39 | 40 | ``` 41 | 42 | Result with the dictionary installed and configured to filter out tokens longer than 40 characters: 43 | 44 | ``` 45 | 46 | =# select to_tsvector('mytsconf', 'This is a long checksum that will NOT be indexed: 81c3b1eccdb8e1ca7a5475aaf2f362ab3ec2ac4274974a12626d4bf603db4d6f'); 47 | to_tsvector 48 | --------------------------------- 49 | 'checksum':5 'index':10 'long':4 50 | 51 | 52 | ``` 53 | -------------------------------------------------------------------------------- /tsearch/dict_maxlen/dict_maxlen--1.0.sql: -------------------------------------------------------------------------------- 1 | 2 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 3 | \echo Use "CREATE EXTENSION dictmaxlen" to load this file. \quit 4 | 5 | CREATE FUNCTION dictmaxlen_init(internal) 6 | RETURNS internal 7 | AS 'MODULE_PATHNAME' 8 | LANGUAGE C STRICT; 9 | 10 | CREATE FUNCTION dictmaxlen_lexize(internal, internal, internal, internal) 11 | RETURNS internal 12 | AS 'MODULE_PATHNAME' 13 | LANGUAGE C STRICT; 14 | 15 | CREATE TEXT SEARCH TEMPLATE dictmaxlen_template ( 16 | LEXIZE = dictmaxlen_lexize, 17 | INIT = dictmaxlen_init 18 | ); 19 | 20 | /* 21 | Instantiate with: 22 | 23 | CREATE TEXT SEARCH DICTIONARY dictmaxlen ( 24 | TEMPLATE = dictmaxlen_template, 25 | LENGTH = ? 26 | ); 27 | 28 | COMMENT ON TEXT SEARCH DICTIONARY dictmaxlen IS 29 | 'A dictionary to filter out long tokens'; 30 | */ 31 | -------------------------------------------------------------------------------- /tsearch/dict_maxlen/dict_maxlen.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | 3 | #include "commands/defrem.h" 4 | #include "tsearch/ts_utils.h" 5 | #include "mb/pg_wchar.h" 6 | 7 | PG_MODULE_MAGIC; 8 | 9 | PG_FUNCTION_INFO_V1(dictmaxlen_init); 10 | PG_FUNCTION_INFO_V1(dictmaxlen_lexize); 11 | 12 | Datum dictmaxlen_lexize(PG_FUNCTION_ARGS); 13 | Datum dictmaxlen_init(PG_FUNCTION_ARGS); 14 | 15 | typedef struct 16 | { 17 | int maxlen; 18 | } DictMaxLen; 19 | 20 | Datum 21 | dictmaxlen_init(PG_FUNCTION_ARGS) 22 | { 23 | List *options = (List *) PG_GETARG_POINTER(0); 24 | DictMaxLen *d; 25 | ListCell *l; 26 | 27 | d = (DictMaxLen *) palloc0(sizeof(DictMaxLen)); 28 | 29 | d->maxlen = 50; /* default */ 30 | 31 | foreach(l, options) 32 | { 33 | DefElem *defel = (DefElem *) lfirst(l); 34 | 35 | if (strcmp(defel->defname, "length") == 0) 36 | { 37 | d->maxlen = atoi(defGetString(defel)); 38 | } 39 | else 40 | { 41 | ereport(ERROR, 42 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 43 | errmsg("unrecognized dictionary parameter: \"%s\"", 44 | defel->defname))); 45 | } 46 | } 47 | 48 | PG_RETURN_POINTER(d); 49 | } 50 | 51 | Datum 52 | dictmaxlen_lexize(PG_FUNCTION_ARGS) 53 | { 54 | DictMaxLen *d = (DictMaxLen *) PG_GETARG_POINTER(0); 55 | char *token = (char *) PG_GETARG_POINTER(1); 56 | int byte_length = PG_GETARG_INT32(2); 57 | 58 | if (pg_mbstrlen_with_len(token, byte_length) > d->maxlen) 59 | { 60 | /* If the word is longer than our max length, return an empty 61 | * lexeme */ 62 | TSLexeme *res = palloc0(sizeof(TSLexeme)); 63 | /* res[0].lexeme = NULL; */ /* implied by palloc0() */ 64 | PG_RETURN_POINTER(res); 65 | } 66 | else 67 | { 68 | /* If the word is short, pass it unmodified */ 69 | PG_RETURN_POINTER(NULL); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /tsearch/dict_maxlen/dict_maxlen.control: -------------------------------------------------------------------------------- 1 | # dict_maxlen extension 2 | comment = 'text search template for a dictionary filtering out long words' 3 | default_version = '1.0' 4 | module_pathname = '$libdir/dict_maxlen' 5 | relocatable = true 6 | --------------------------------------------------------------------------------