├── .gitignore ├── tsexact.control ├── Makefile ├── tsexact--1.0.sql ├── sql └── tsexact.sql ├── expected └── tsexact.out ├── README.md └── tsexact.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | regression.diffs 4 | regression.out 5 | results 6 | -------------------------------------------------------------------------------- /tsexact.control: -------------------------------------------------------------------------------- 1 | # tsexact extension 2 | comment = 'full text search exact match function' 3 | default_version = '1.0' 4 | module_pathname = '$libdir/tsexact' 5 | relocatable = true 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # tsexact/Makefile 2 | 3 | MODULE_big = tsexact 4 | OBJS = tsexact.o 5 | EXTENSION = tsexact 6 | DATA = tsexact--1.0.sql 7 | REGRESS = tsexact 8 | 9 | PG_CONFIG = pg_config 10 | PGXS := $(shell $(PG_CONFIG) --pgxs) 11 | include $(PGXS) 12 | 13 | -------------------------------------------------------------------------------- /tsexact--1.0.sql: -------------------------------------------------------------------------------- 1 | /* tsexact/tsexact--1.0.sql */ 2 | 3 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 4 | \echo Use "CREATE EXTENSION tsexact" to load this file. \quit 5 | 6 | -- 7 | -- PostgreSQL code for TSEXACT. 8 | -- 9 | 10 | CREATE FUNCTION ts_exact_match(tsvector, tsvector) 11 | RETURNS bool 12 | AS 'MODULE_PATHNAME' 13 | LANGUAGE C IMMUTABLE STRICT; 14 | 15 | CREATE FUNCTION ts_exact_match(tsvector, tsvector, text) 16 | RETURNS bool 17 | AS 'MODULE_PATHNAME' 18 | LANGUAGE C IMMUTABLE STRICT; 19 | 20 | CREATE FUNCTION ts_squeeze(tsvector) 21 | RETURNS tsvector 22 | AS 'MODULE_PATHNAME' 23 | LANGUAGE C IMMUTABLE STRICT; 24 | 25 | CREATE FUNCTION setweight(tsquery, text) 26 | RETURNS tsquery 27 | AS 'MODULE_PATHNAME', 'setweight_tsquery' 28 | LANGUAGE C IMMUTABLE STRICT; 29 | 30 | CREATE FUNCTION poslen(tsvector) 31 | RETURNS int 32 | AS 'MODULE_PATHNAME', 'poslen' 33 | LANGUAGE C IMMUTABLE STRICT; 34 | -------------------------------------------------------------------------------- /sql/tsexact.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION tsexact; 2 | 3 | SELECT ts_exact_match('a:2 b:3 c:5'::tsvector, 'a:1 b:2 c:4'::tsvector); 4 | SELECT ts_exact_match('a:2 b:3 c:4'::tsvector, 'a:1 b:2 c:4'::tsvector); 5 | SELECT ts_exact_match('a:2 b:3 c:4'::tsvector, 'a:1 b:2 c:3'::tsvector); 6 | SELECT ts_exact_match('a:1,2,6,7,8 b:3,4,5,9,10'::tsvector, 'a:1,2,3 b:4,5'::tsvector); 7 | SELECT ts_exact_match('a:1,2,6,7 b:3,4,5,8,9'::tsvector, 'a:1,2,3 b:4,5'::tsvector); 8 | 9 | SELECT ts_exact_match('a:1 b:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 10 | SELECT ts_exact_match('a:1 c:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 11 | SELECT ts_exact_match('a:1 d:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 12 | SELECT ts_exact_match('a:1 b:2 c:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 13 | SELECT ts_exact_match(''::tsvector, ''::tsvector); 14 | SELECT ts_exact_match('a:1'::tsvector, ''::tsvector); 15 | SELECT ts_exact_match(''::tsvector, 'a:1'::tsvector); 16 | 17 | SELECT ts_exact_match('a:2A b:3B c:5C'::tsvector, 'a:1 b:2 c:4'::tsvector, 'ABC'); 18 | SELECT ts_exact_match('a:2A b:3B c:5'::tsvector, 'a:1 b:2 c:4'::tsvector, 'ABC'); 19 | SELECT ts_exact_match('a:2 b:3 c:5'::tsvector, 'a:1A b:2B c:4C'::tsvector, 'D'); 20 | SELECT ts_exact_match('a:1A,4C b:2B,5 c:3A,6C'::tsvector, 'a:1A b:2B c:3C'::tsvector, 'CD'); 21 | SELECT ts_exact_match('a:1A,4C b:2C,5 c:3A,6B'::tsvector, 'a:1A b:2B c:3C'::tsvector, 'CD'); 22 | 23 | SELECT ts_squeeze('a:1,6 b:2,9 c:4'::tsvector); 24 | SELECT ts_squeeze('a:2,10 b:5,6 c:8 d:12'::tsvector); 25 | SELECT ts_squeeze('a:2A,10 b:5B,6C c:8 d:12A'::tsvector); 26 | 27 | SELECT setweight('a & b'::tsquery, 'A'); 28 | SELECT setweight('a:A & (b:B | c:C)'::tsquery, 'CD'); 29 | SELECT setweight('a:B | b:AD'::tsquery, ''); 30 | 31 | SELECT poslen('a:1 b:2'::tsvector); 32 | SELECT poslen('a:2A,10 b:5B,6C c:8 d:12A'::tsvector); 33 | SELECT poslen('a:1,2,6,7,8 b:3,4,5,9,10'::tsvector); 34 | -------------------------------------------------------------------------------- /expected/tsexact.out: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION tsexact; 2 | SELECT ts_exact_match('a:2 b:3 c:5'::tsvector, 'a:1 b:2 c:4'::tsvector); 3 | ts_exact_match 4 | ---------------- 5 | t 6 | (1 row) 7 | 8 | SELECT ts_exact_match('a:2 b:3 c:4'::tsvector, 'a:1 b:2 c:4'::tsvector); 9 | ts_exact_match 10 | ---------------- 11 | f 12 | (1 row) 13 | 14 | SELECT ts_exact_match('a:2 b:3 c:4'::tsvector, 'a:1 b:2 c:3'::tsvector); 15 | ts_exact_match 16 | ---------------- 17 | t 18 | (1 row) 19 | 20 | SELECT ts_exact_match('a:1,2,6,7,8 b:3,4,5,9,10'::tsvector, 'a:1,2,3 b:4,5'::tsvector); 21 | ts_exact_match 22 | ---------------- 23 | t 24 | (1 row) 25 | 26 | SELECT ts_exact_match('a:1,2,6,7 b:3,4,5,8,9'::tsvector, 'a:1,2,3 b:4,5'::tsvector); 27 | ts_exact_match 28 | ---------------- 29 | f 30 | (1 row) 31 | 32 | SELECT ts_exact_match('a:1 b:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 33 | ts_exact_match 34 | ---------------- 35 | t 36 | (1 row) 37 | 38 | SELECT ts_exact_match('a:1 c:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 39 | ts_exact_match 40 | ---------------- 41 | t 42 | (1 row) 43 | 44 | SELECT ts_exact_match('a:1 d:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 45 | ts_exact_match 46 | ---------------- 47 | f 48 | (1 row) 49 | 50 | SELECT ts_exact_match('a:1 b:2 c:2'::tsvector, 'a:1 b:2 c:2'::tsvector); 51 | ts_exact_match 52 | ---------------- 53 | t 54 | (1 row) 55 | 56 | SELECT ts_exact_match(''::tsvector, ''::tsvector); 57 | ts_exact_match 58 | ---------------- 59 | t 60 | (1 row) 61 | 62 | SELECT ts_exact_match('a:1'::tsvector, ''::tsvector); 63 | ts_exact_match 64 | ---------------- 65 | t 66 | (1 row) 67 | 68 | SELECT ts_exact_match(''::tsvector, 'a:1'::tsvector); 69 | ts_exact_match 70 | ---------------- 71 | f 72 | (1 row) 73 | 74 | SELECT ts_exact_match('a:2A b:3B c:5C'::tsvector, 'a:1 b:2 c:4'::tsvector, 'ABC'); 75 | ts_exact_match 76 | ---------------- 77 | t 78 | (1 row) 79 | 80 | SELECT ts_exact_match('a:2A b:3B c:5'::tsvector, 'a:1 b:2 c:4'::tsvector, 'ABC'); 81 | ts_exact_match 82 | ---------------- 83 | f 84 | (1 row) 85 | 86 | SELECT ts_exact_match('a:2 b:3 c:5'::tsvector, 'a:1A b:2B c:4C'::tsvector, 'D'); 87 | ts_exact_match 88 | ---------------- 89 | t 90 | (1 row) 91 | 92 | SELECT ts_exact_match('a:1A,4C b:2B,5 c:3A,6C'::tsvector, 'a:1A b:2B c:3C'::tsvector, 'CD'); 93 | ts_exact_match 94 | ---------------- 95 | t 96 | (1 row) 97 | 98 | SELECT ts_exact_match('a:1A,4C b:2C,5 c:3A,6B'::tsvector, 'a:1A b:2B c:3C'::tsvector, 'CD'); 99 | ts_exact_match 100 | ---------------- 101 | f 102 | (1 row) 103 | 104 | SELECT ts_squeeze('a:1,6 b:2,9 c:4'::tsvector); 105 | ts_squeeze 106 | ----------------------- 107 | 'a':1,4 'b':2,5 'c':3 108 | (1 row) 109 | 110 | SELECT ts_squeeze('a:2,10 b:5,6 c:8 d:12'::tsvector); 111 | ts_squeeze 112 | ----------------------------- 113 | 'a':1,5 'b':2,3 'c':4 'd':6 114 | (1 row) 115 | 116 | SELECT ts_squeeze('a:2A,10 b:5B,6C c:8 d:12A'::tsvector); 117 | ts_squeeze 118 | --------------------------------- 119 | 'a':1A,5 'b':2B,3C 'c':4 'd':6A 120 | (1 row) 121 | 122 | SELECT setweight('a & b'::tsquery, 'A'); 123 | setweight 124 | --------------- 125 | 'a':A & 'b':A 126 | (1 row) 127 | 128 | SELECT setweight('a:A & (b:B | c:C)'::tsquery, 'CD'); 129 | setweight 130 | ------------------------------ 131 | 'a':CD & ( 'b':CD | 'c':CD ) 132 | (1 row) 133 | 134 | SELECT setweight('a:B | b:AD'::tsquery, ''); 135 | setweight 136 | ----------- 137 | 'a' | 'b' 138 | (1 row) 139 | 140 | SELECT poslen('a:1 b:2'::tsvector); 141 | poslen 142 | -------- 143 | 2 144 | (1 row) 145 | 146 | SELECT poslen('a:2A,10 b:5B,6C c:8 d:12A'::tsvector); 147 | poslen 148 | -------- 149 | 6 150 | (1 row) 151 | 152 | SELECT poslen('a:1,2,6,7,8 b:3,4,5,9,10'::tsvector); 153 | poslen 154 | -------- 155 | 10 156 | (1 row) 157 | 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | TSExact – PostgreSQL fulltext search addon 2 | ========================================== 3 | 4 | Introduction 5 | ------------ 6 | 7 | TSExact – is a PostgreSQL extension with various helper function for fulltext 8 | search. Basically TSExact contains functions which emulate phrase search on 9 | PostgreSQL versions 9.5 and lower. If you're using PostgreSQL 9.6 and higher 10 | you should consider using builtin phrase search rather than TSExact. 11 | 12 | 13 | Authors 14 | ------- 15 | 16 | * Alexander Korotkov , Postgres Professional, Moscow, Russia 17 | 18 | Availability 19 | ------------ 20 | 21 | TSExact is released as an extension and not available in default PostgreSQL 22 | installation. It is available from 23 | [github](https://github.com/postgrespro/tsexact) 24 | under the same license as 25 | [PostgreSQL](http://www.postgresql.org/about/licence/) 26 | and supports PostgreSQL 9.0+. 27 | 28 | Installation 29 | ------------ 30 | 31 | Before build and install TSExact you should ensure following: 32 | 33 | * PostgreSQL version is 9.0 or higher. 34 | * You have development package of PostgreSQL installed or you built 35 | PostgreSQL from source. 36 | * Your PATH variable is configured so that pg\_config command available. 37 | 38 | Typical installation procedure may look like this: 39 | 40 | $ git clone https://github.com/postgrespro/tsexact.git 41 | $ cd tsexact 42 | $ make USE_PGXS=1 43 | $ sudo make USE_PGXS=1 install 44 | $ make USE_PGXS=1 installcheck 45 | $ psql DB -c "CREATE EXTENSION tsexact;" 46 | 47 | Usage 48 | ----- 49 | 50 | TSExact offers various helper functions which are listed in the table below. In particular these functions could be used for simple fulltext search. 51 | 52 | | Function | Return type | Description | 53 | | ----------------------------------------------------------------- | ----------- | ---------------------------------------------------------- | 54 | | ts_exact_match(document tsvector, fragment tsvector) | bool | Check if given fragment is present in document | 55 | | ts_exact_match(document tsvector, fragment tsvector, weight text) | bool | Check if given fragment is present in document with weight | 56 | | ts_squeeze(document tsvector) | tsvector | Remove empty positions from document | 57 | | setweight(query tsquery, weight text) | tsquery | Assign weight for each lexeme in tsquery | 58 | | poslen(documents tsvector) | integer | Return total number of positions in document | 59 | 60 | `ts_exact_match(tsvector, tsvector)` function checks if given fragment appears in given document at some offset. 61 | 62 | # SELECT ts_exact_match('cat:3 fat:2 sad:4'::tsvector, 'cat:2 fat:1 sad:4'::tsvector); 63 | ts_exact_match 64 | ---------------- 65 | f 66 | (1 row) 67 | 68 | # SELECT ts_exact_match('cat:3 fat:2 sad:5'::tsvector, 'cat:2 fat:1 sad:4'::tsvector); 69 | ts_exact_match 70 | ---------------- 71 | t 72 | (1 row) 73 | 74 | `ts_exact_match(tsvector, tsvector)` ignores lexemes weights. `ts_exact_match(tsvector, tsvector, text)` only finds fragments in given weight of document. Weights of fragment are always ignored. 75 | 76 | # SELECT ts_exact_match('cat:3 fat:2 sad:5'::tsvector, 'cat:2 fat:1 sad:4'::tsvector, 'ABC'); 77 | ts_exact_match 78 | ---------------- 79 | f 80 | (1 row) 81 | 82 | # SELECT ts_exact_match('cat:3A fat:2B sad:5C'::tsvector, 'cat:2 fat:1 sad:4'::tsvector, 'ABC'); 83 | ts_exact_match 84 | ---------------- 85 | t 86 | (1 row) 87 | 88 | Since tsvectors could contain gaps in position numbering it's suitable to remove gaps using `ts_squeeze(tsvector)`. 89 | 90 | # SELECT ts_squeeze('cat:2,9 fat:1,6 sad:4'::tsvector); 91 | ts_squeeze 92 | ----------------------------- 93 | 'cat':2,5 'fat':1,4 'sad':3 94 | (1 row) 95 | 96 | Fulltext search indexes doesn't support `ts_exact_match()` functions. Thus, it's useful to combine `ts_exact_match()` with `tsvector @@ tsquery` expression in order to use indexed search. Therefore, complete example of phrase search may be following. 97 | 98 | -- Calculate tsvector using ts_squeeze() function in order to remove gaps in 99 | -- lexemes offsets. 100 | UPDATE tt SET ti = 101 | ts_squeeze( 102 | setweight(to_tsvector(coalesce(title,'')), 'A') || 103 | setweight(to_tsvector(coalesce(keyword,'')), 'B') || 104 | setweight(to_tsvector(coalesce(abstract,'')), 'C') || 105 | setweight(to_tsvector(coalesce(body,'')), 'D')); 106 | 107 | -- Search for phrase. "tsvector @@ tsquery" operator is used for phrase search, 108 | -- ts_exact_match() function is used to recheck an exact phrase match. 109 | SELECT * 110 | FROM tt 111 | WHERE tt.ti @@ plainto_tsquery('fat rat') AND 112 | ts_exact_match(tt.ti, ts_squeeze(to_tsvector('fat rat'))); 113 | 114 | 115 | `setweight(tsquery, text)` assigns given weight to each lexeme of tsquery. 116 | 117 | # SELECT setweight('fat:A & (cat:B | rat:C)'::tsquery, 'CD'); 118 | setweight 119 | ------------------------------------ 120 | 'fat':CD & ( 'cat':CD | 'rat':CD ) 121 | (1 row) 122 | 123 | `poslen(tsvector)` returns total number of lexeme positions in tsvector. 124 | 125 | # SELECT poslen('cat:3,4,5,9,10 fat:1,2,6,7,8'::tsvector); 126 | poslen 127 | -------- 128 | 10 129 | (1 row) 130 | -------------------------------------------------------------------------------- /tsexact.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | 3 | #include "c.h" 4 | #include "fmgr.h" 5 | #include "tsearch/ts_cache.h" 6 | #include "tsearch/ts_type.h" 7 | #include "tsearch/ts_utils.h" 8 | #include "utils/memutils.h" 9 | 10 | PG_MODULE_MAGIC; 11 | 12 | PG_FUNCTION_INFO_V1(ts_exact_match); 13 | Datum ts_exact_match(PG_FUNCTION_ARGS); 14 | PG_FUNCTION_INFO_V1(ts_squeeze); 15 | Datum ts_squeeze(PG_FUNCTION_ARGS); 16 | PG_FUNCTION_INFO_V1(setweight_tsquery); 17 | Datum setweight_tsquery(PG_FUNCTION_ARGS); 18 | PG_FUNCTION_INFO_V1(poslen); 19 | Datum poslen(PG_FUNCTION_ARGS); 20 | 21 | 22 | typedef struct 23 | { 24 | WordEntry *arrb; 25 | WordEntry *arre; 26 | char *arrvalues; 27 | char *queryvalues; 28 | } CHKVAL; 29 | 30 | /* 31 | * is there value 'val' in array or not ? 32 | */ 33 | static WordEntry * 34 | checkcondition_str(CHKVAL *chkval, WordEntry *val) 35 | { 36 | WordEntry *StopLow = chkval->arrb; 37 | WordEntry *StopHigh = chkval->arre; 38 | WordEntry *StopMiddle = StopHigh; 39 | int difference = -1; 40 | bool res = false; 41 | 42 | /* Loop invariant: StopLow <= val < StopHigh */ 43 | while (StopLow < StopHigh) 44 | { 45 | StopMiddle = StopLow + (StopHigh - StopLow) / 2; 46 | difference = tsCompareString(chkval->queryvalues + val->pos, val->len, 47 | chkval->arrvalues + StopMiddle->pos, StopMiddle->len, 48 | false); 49 | 50 | if (difference == 0) 51 | { 52 | res = true; 53 | break; 54 | } 55 | else if (difference > 0) 56 | StopLow = StopMiddle + 1; 57 | else 58 | StopHigh = StopMiddle; 59 | } 60 | 61 | if (res) 62 | return StopMiddle; 63 | else 64 | return NULL; 65 | } 66 | 67 | typedef struct 68 | { 69 | WordEntry *we; 70 | WordEntryPos *pos; 71 | int len, index; 72 | } OperandInfo; 73 | 74 | TSVector cachedQuery = NULL; 75 | OperandInfo *cachedOpInfo = NULL; 76 | int cachedOpInfoLen = 0; 77 | 78 | static int 79 | operandInfoCmp(const void *a1, const void *a2) 80 | { 81 | const OperandInfo *o1 = (const OperandInfo *)a1; 82 | const OperandInfo *o2 = (const OperandInfo *)a2; 83 | 84 | if (o1->index < o2->index) 85 | return -1; 86 | else if (o1->index == o2->index) 87 | return 0; 88 | else 89 | return 1; 90 | } 91 | 92 | /* 93 | * Convert weight mask into binary representation 94 | */ 95 | static uint8 96 | getWeightMask(text *weight) 97 | { 98 | uint8 weightMask = 0; 99 | char *w, *we; 100 | 101 | weightMask = 0; 102 | w = VARDATA_ANY(weight); 103 | we = w + VARSIZE_ANY_EXHDR(weight); 104 | 105 | while (w < we) 106 | { 107 | switch (*w) 108 | { 109 | case 'A': 110 | case 'a': 111 | weightMask |= (1 << 3); 112 | break; 113 | case 'B': 114 | case 'b': 115 | weightMask |= (1 << 2); 116 | break; 117 | case 'C': 118 | case 'c': 119 | weightMask |= (1 << 1); 120 | break; 121 | case 'D': 122 | case 'd': 123 | weightMask |= (1 << 0); 124 | break; 125 | default: 126 | /* internal error */ 127 | elog(ERROR, "unrecognized weight: %d", *w); 128 | } 129 | w++; 130 | } 131 | return weightMask; 132 | } 133 | 134 | /* 135 | * Invalidate cache of query tsvector 136 | */ 137 | static void 138 | invalidateCache(TSVector query) 139 | { 140 | int len, i, j, k; 141 | OperandInfo *opInfo; 142 | WordEntry *we; 143 | 144 | if (cachedQuery && VARSIZE_ANY(query) == VARSIZE_ANY(cachedQuery) && 145 | !memcmp(query, cachedQuery, VARSIZE_ANY(query))) 146 | return; 147 | 148 | if (cachedQuery) 149 | free(cachedQuery); 150 | if (cachedOpInfo) 151 | free(cachedOpInfo); 152 | 153 | cachedQuery = (TSVector)malloc(VARSIZE_ANY(query)); 154 | memcpy(cachedQuery, query, VARSIZE_ANY(query)); 155 | 156 | we = ARRPTR(cachedQuery); 157 | 158 | cachedOpInfoLen = 0; 159 | for (i = 0; i < cachedQuery->size; i++) 160 | cachedOpInfoLen += POSDATALEN(cachedQuery, &we[i]); 161 | 162 | k = 0; 163 | opInfo = (OperandInfo *)malloc(sizeof(OperandInfo) * cachedOpInfoLen); 164 | for (i = 0; i < cachedQuery->size; i++) 165 | { 166 | WordEntryPos *pos = POSDATAPTR(cachedQuery, &we[i]); 167 | len = POSDATALEN(cachedQuery, &we[i]); 168 | for (j = 0; j < len; j++) 169 | { 170 | opInfo[k].we = &we[i]; 171 | opInfo[k].index = WEP_GETPOS(*pos); 172 | k++; pos++; 173 | } 174 | } 175 | qsort(opInfo, cachedOpInfoLen, sizeof(OperandInfo), operandInfoCmp); 176 | cachedOpInfo = opInfo; 177 | } 178 | 179 | /* 180 | * Checks if tsvector contains another tsvector 181 | */ 182 | Datum 183 | ts_exact_match(PG_FUNCTION_ARGS) 184 | { 185 | TSVector val = PG_GETARG_TSVECTOR(0); 186 | TSVector query = PG_GETARG_TSVECTOR(1); 187 | CHKVAL chkval; 188 | int i; 189 | uint8 weightMask; 190 | OperandInfo *opInfo; 191 | bool notFound = false; 192 | 193 | if (PG_NARGS() >= 3) 194 | weightMask = getWeightMask(PG_GETARG_TEXT_PP(2)); 195 | else 196 | weightMask = 0xF; 197 | 198 | invalidateCache(query); 199 | 200 | if (cachedOpInfoLen == 0) 201 | PG_RETURN_BOOL(true); 202 | 203 | /* Find all lexemes of query tsvector */ 204 | 205 | chkval.arrb = ARRPTR(val); 206 | chkval.arre = chkval.arrb + val->size; 207 | chkval.arrvalues = STRPTR(val); 208 | chkval.queryvalues = STRPTR(cachedQuery); 209 | 210 | opInfo = cachedOpInfo; 211 | 212 | for (i = 0; i < cachedOpInfoLen; i++) 213 | { 214 | WordEntry *we; 215 | we = checkcondition_str(&chkval, opInfo[i].we); 216 | if (!we) 217 | { 218 | if (i == 0 || opInfo[i].index != opInfo[i - 1].index) 219 | notFound = true; 220 | if (notFound && (i == cachedOpInfoLen - 1 || opInfo[i].index != opInfo[i + 1].index)) 221 | PG_RETURN_BOOL(false); 222 | opInfo[i].pos = NULL; 223 | opInfo[i].len = 0; 224 | } 225 | else 226 | { 227 | notFound = false; 228 | opInfo[i].pos = POSDATAPTR(val, we); 229 | opInfo[i].len = POSDATALEN(val, we); 230 | } 231 | } 232 | 233 | /* Check lexemes have same order as in query */ 234 | 235 | while (opInfo[0].len > 0) 236 | { 237 | int offset = WEP_GETPOS(*opInfo[0].pos) - opInfo[0].index; 238 | notFound = false; 239 | 240 | if (!(weightMask & (1 << WEP_GETWEIGHT(*opInfo[0].pos)))) 241 | { 242 | opInfo[0].pos++; 243 | opInfo[0].len--; 244 | continue; 245 | } 246 | 247 | for (i = 0; i < cachedOpInfoLen; i++) 248 | { 249 | while (opInfo[i].len > 0 && WEP_GETPOS(*opInfo[i].pos) < offset + opInfo[i].index) 250 | { 251 | opInfo[i].pos++; 252 | opInfo[i].len--; 253 | } 254 | 255 | if (opInfo[i].len <= 0) 256 | { 257 | /* No more WEPs */ 258 | if (i == 0 || opInfo[i].index != opInfo[i - 1].index) 259 | notFound = true; 260 | if (notFound && (i == cachedOpInfoLen - 1 || opInfo[i].index != opInfo[i + 1].index)) 261 | PG_RETURN_BOOL(false); 262 | continue; 263 | } 264 | else 265 | { 266 | notFound = false; 267 | } 268 | 269 | if (WEP_GETPOS(*opInfo[i].pos) > offset + opInfo[i].index || !(weightMask & (1 << WEP_GETWEIGHT(*opInfo[i].pos)))) 270 | { 271 | /* No match */ 272 | if (i < cachedOpInfoLen - 1 && opInfo[i].index == opInfo[i + 1].index) 273 | continue; 274 | else 275 | break; 276 | } 277 | else 278 | { 279 | /* Match: skip same offsets*/ 280 | while (i < cachedOpInfoLen - 1 && opInfo[i].index == opInfo[i + 1].index) 281 | i++; 282 | } 283 | } 284 | 285 | if (i == cachedOpInfoLen) 286 | { 287 | PG_RETURN_BOOL(true); 288 | } 289 | 290 | opInfo[0].pos++; 291 | opInfo[0].len--; 292 | } 293 | 294 | PG_RETURN_BOOL(false); 295 | } 296 | 297 | /* 298 | * Compare WEPs: position first, weight second. 299 | */ 300 | static int 301 | cmpPos(const void *a1, const void *a2) 302 | { 303 | const WordEntryPos **pos1, **pos2; 304 | uint16 w1, w2, p1, p2; 305 | 306 | pos1 = (const WordEntryPos **)a1; 307 | pos2 = (const WordEntryPos **)a2; 308 | 309 | p1 = WEP_GETPOS(**pos1); 310 | p2 = WEP_GETPOS(**pos2); 311 | 312 | if (p1 < p2) 313 | return -1; 314 | else if (p1 > p2) 315 | return 1; 316 | 317 | w1 = WEP_GETWEIGHT(**pos1); 318 | w2 = WEP_GETWEIGHT(**pos2); 319 | 320 | if (w1 < w2) 321 | return -1; 322 | else if (w1 == w2) 323 | return 0; 324 | else 325 | return 1; 326 | } 327 | 328 | /* 329 | * Calculate total length of positions 330 | */ 331 | static int 332 | getTotalPosLen(TSVector val) 333 | { 334 | int i, len = 0; 335 | WordEntry *we = ARRPTR(val); 336 | 337 | for (i = 0; i < val->size; i++) 338 | len += POSDATALEN(val, &we[i]); 339 | 340 | return len; 341 | } 342 | 343 | /* 344 | * Remove unused offsets from tsvector 345 | */ 346 | Datum 347 | ts_squeeze(PG_FUNCTION_ARGS) 348 | { 349 | TSVector val = PG_GETARG_TSVECTOR_COPY(0); 350 | WordEntry *we = ARRPTR(val); 351 | WordEntryPos **pos; 352 | int i, j, k, len = getTotalPosLen(val); 353 | uint16 p, prev_p; 354 | 355 | /* Put pointers to all WEPs into single array */ 356 | pos = (WordEntryPos **)palloc(sizeof(WordEntryPos *) * len); 357 | k = 0; 358 | for (i = 0; i < val->size; i++) 359 | { 360 | for (j = 0; j < POSDATALEN(val, &we[i]); j++) 361 | { 362 | pos[k] = POSDATAPTR(val, &we[i]) + j; 363 | k++; 364 | } 365 | } 366 | 367 | /* Sort WEPs */ 368 | qsort(pos, len, sizeof(WordEntryPos *), cmpPos); 369 | 370 | /* Make positions ascending with step 1 */ 371 | p = 0; prev_p = 0; 372 | for (i = 0; i < len; i++) 373 | { 374 | if (WEP_GETPOS(*pos[i]) > prev_p) 375 | { 376 | p++; 377 | prev_p = WEP_GETPOS(*pos[i]); 378 | } 379 | WEP_SETPOS(*pos[i], p); 380 | } 381 | 382 | PG_RETURN_TSVECTOR(val); 383 | } 384 | 385 | /* 386 | * SQL-visible function for calculate total length of positions 387 | */ 388 | Datum 389 | poslen(PG_FUNCTION_ARGS) 390 | { 391 | TSVector val = PG_GETARG_TSVECTOR(0); 392 | 393 | PG_RETURN_INT32(getTotalPosLen(val)); 394 | } 395 | 396 | /* 397 | * Set same weights for all lexemes in tsquery 398 | */ 399 | Datum 400 | setweight_tsquery(PG_FUNCTION_ARGS) 401 | { 402 | TSQuery query = PG_GETARG_TSQUERY_COPY(0); 403 | uint8 weightMask; 404 | QueryItem *items; 405 | int i; 406 | 407 | items = GETQUERY(query); 408 | weightMask = getWeightMask(PG_GETARG_TEXT_PP(1)); 409 | for (i = 0; i < query->size; i++) 410 | { 411 | if (items[i].type == QI_VAL) 412 | { 413 | items[i].qoperand.weight = weightMask; 414 | } 415 | } 416 | PG_RETURN_TSQUERY(query); 417 | } 418 | --------------------------------------------------------------------------------