├── .gitignore ├── postgresql.conf ├── shared_ispell.control ├── Makefile ├── META.json ├── LICENSE ├── meson.build ├── sql ├── security.sql └── shared_ispell.sql ├── expected ├── security.out ├── security_1.out └── shared_ispell.out ├── shared_ispell--1.1.0.sql ├── src ├── shared_ispell.h └── shared_ispell.c └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | /log/ 4 | /results/ 5 | /tmp_check/ 6 | -------------------------------------------------------------------------------- /postgresql.conf: -------------------------------------------------------------------------------- 1 | shared_preload_libraries = 'shared_ispell' 2 | shared_ispell.max_size = 1MB 3 | -------------------------------------------------------------------------------- /shared_ispell.control: -------------------------------------------------------------------------------- 1 | # shared ispell dictionary 2 | comment = 'Provides shared ispell dictionaries.' 3 | default_version = '1.1.0' 4 | relocatable = true 5 | 6 | module_pathname = '$libdir/shared_ispell' 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # contrib/shared_ispell/Makefile 2 | 3 | MODULE_big = shared_ispell 4 | OBJS = src/shared_ispell.o 5 | 6 | EXTENSION = shared_ispell 7 | DATA = shared_ispell--1.1.0.sql 8 | 9 | REGRESS = security shared_ispell 10 | 11 | EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/postgresql.conf 12 | 13 | ifdef USE_PGXS 14 | PG_CONFIG = pg_config 15 | PGXS := $(shell $(PG_CONFIG) --pgxs) 16 | include $(PGXS) 17 | else 18 | subdir = contrib/shared_ispell 19 | top_builddir = ../.. 20 | include $(top_builddir)/src/Makefile.global 21 | include $(top_srcdir)/contrib/contrib-global.mk 22 | endif 23 | 24 | -------------------------------------------------------------------------------- /META.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "shared_ispell", 3 | "abstract": "Provides a shared ispell dictionary - initialized once and stored in shared segment.", 4 | "description": "Allows you to allocate area within a shared segment and use it for ispell dictionaries.", 5 | "version": "1.0.0", 6 | "maintainer": "Tomas Vondra ", 7 | "license": "bsd", 8 | "prereqs": { 9 | "runtime": { 10 | "requires": { 11 | "PostgreSQL": "8.4.0" 12 | } 13 | } 14 | }, 15 | "provides": { 16 | "query_histogram": { 17 | "file": "shared_ispell--1.0.0.sql", 18 | "version": "1.0.0" 19 | } 20 | }, 21 | "resources": { 22 | "repository": { 23 | "url": "https://github.com:tvondra/shared_ispell.git", 24 | "web": "http://github.com/tvondra/shared_ispell", 25 | "type": "git" 26 | } 27 | }, 28 | "tags" : ["ispell", "shared", "fulltext", "dictionary"], 29 | "meta-spec": { 30 | "version": "1.0.0", 31 | "url": "http://pgxn.org/meta/spec.txt" 32 | }, 33 | "release_status" : "testing" 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2018, Postgres Professional 2 | Portions Copyright 2012, Tomas Vondra (tv@fuzzy.cz). All rights reserved. 3 | 4 | Permission to use, copy, modify, and distribute this software and its 5 | documentation for any purpose, without fee, and without a written agreement 6 | is hereby granted, provided that the above copyright notice and this 7 | paragraph and the following two paragraphs appear in all copies. 8 | 9 | IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR 10 | DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING 11 | LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS 12 | DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE 13 | POSSIBILITY OF SUCH DAMAGE. 14 | 15 | POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, 16 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 17 | AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 18 | ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO 19 | PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 20 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, Postgres Professional 2 | 3 | # Does not support the PGXS infrastructure at this time. Please, compile as part 4 | # of the contrib source tree. 5 | 6 | shared_ispell_sources = files( 7 | 'src' / 'shared_ispell.c' 8 | ) 9 | 10 | if host_system == 'windows' 11 | shared_ispell_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ 12 | '--NAME', 'shared_ispell', 13 | '--FILEDESC', 'shared_ispell - provides a shared ispell dictionary, i.e. a dictionary that\'s stored in shared segment.',]) 14 | endif 15 | 16 | shared_ispell = shared_module('shared_ispell', 17 | shared_ispell_sources, 18 | kwargs: contrib_mod_args, 19 | ) 20 | contrib_targets += shared_ispell 21 | 22 | install_data( 23 | 'shared_ispell.control', 24 | 'shared_ispell--1.1.0.sql', 25 | kwargs: contrib_data_args, 26 | ) 27 | 28 | tests += { 29 | 'name': 'shared_ispell', 30 | 'sd': meson.current_source_dir(), 31 | 'bd': meson.current_build_dir(), 32 | 'regress': { 33 | 'sql': [ 34 | 'security', 35 | 'shared_ispell', 36 | ], 37 | 'regress_args': ['--temp-config', files('postgresql.conf')], 38 | }, 39 | } 40 | -------------------------------------------------------------------------------- /sql/security.sql: -------------------------------------------------------------------------------- 1 | create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); 2 | 3 | create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) 4 | returns SETOF record as $$ 5 | declare 6 | qString varchar(4000); 7 | rec si_dicts_result; 8 | begin 9 | qString := 'select * from shared_ispell_dicts()'; 10 | for rec in execute qString loop 11 | return NEXT; 12 | end loop; 13 | return; 14 | end 15 | $$ language plpgsql; 16 | 17 | create extension shared_ispell; 18 | 19 | drop extension if exists shared_ispell; 20 | drop type si_dicts_result; 21 | drop function shared_ispell_dicts(); 22 | 23 | create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); 24 | 25 | create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) 26 | returns SETOF record as $$ 27 | declare 28 | rec si_stoplists_result; 29 | qString varchar(4000); 30 | begin 31 | qString := 'select * from shared_ispell_stoplists()'; 32 | for rec in execute qString loop 33 | return NEXT; 34 | end loop; 35 | return; 36 | end 37 | $$ language plpgsql; 38 | 39 | create extension shared_ispell; 40 | 41 | drop extension if exists shared_ispell; 42 | drop type si_stoplists_result; 43 | drop function shared_ispell_stoplists(); 44 | -------------------------------------------------------------------------------- /expected/security.out: -------------------------------------------------------------------------------- 1 | create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); 2 | create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) 3 | returns SETOF record as $$ 4 | declare 5 | qString varchar(4000); 6 | rec si_dicts_result; 7 | begin 8 | qString := 'select * from shared_ispell_dicts()'; 9 | for rec in execute qString loop 10 | return NEXT; 11 | end loop; 12 | return; 13 | end 14 | $$ language plpgsql; 15 | create extension shared_ispell; 16 | ERROR: function "shared_ispell_dicts" already exists with same argument types 17 | drop extension if exists shared_ispell; 18 | NOTICE: extension "shared_ispell" does not exist, skipping 19 | drop type si_dicts_result; 20 | drop function shared_ispell_dicts(); 21 | create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); 22 | create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) 23 | returns SETOF record as $$ 24 | declare 25 | rec si_stoplists_result; 26 | qString varchar(4000); 27 | begin 28 | qString := 'select * from shared_ispell_stoplists()'; 29 | for rec in execute qString loop 30 | return NEXT; 31 | end loop; 32 | return; 33 | end 34 | $$ language plpgsql; 35 | create extension shared_ispell; 36 | ERROR: function "shared_ispell_stoplists" already exists with same argument types 37 | drop extension if exists shared_ispell; 38 | NOTICE: extension "shared_ispell" does not exist, skipping 39 | drop type si_stoplists_result; 40 | drop function shared_ispell_stoplists(); 41 | -------------------------------------------------------------------------------- /shared_ispell--1.1.0.sql: -------------------------------------------------------------------------------- 1 | CREATE FUNCTION shared_ispell_init(internal) 2 | RETURNS internal 3 | AS 'MODULE_PATHNAME', 'dispell_init' 4 | LANGUAGE C IMMUTABLE; 5 | 6 | CREATE FUNCTION shared_ispell_lexize(internal,internal,internal,internal) 7 | RETURNS internal 8 | AS 'MODULE_PATHNAME', 'dispell_lexize' 9 | LANGUAGE C IMMUTABLE; 10 | 11 | CREATE FUNCTION shared_ispell_reset() 12 | RETURNS void 13 | AS 'MODULE_PATHNAME', 'dispell_reset' 14 | LANGUAGE C IMMUTABLE; 15 | 16 | CREATE FUNCTION shared_ispell_mem_used() 17 | RETURNS integer 18 | AS 'MODULE_PATHNAME', 'dispell_mem_used' 19 | LANGUAGE C IMMUTABLE; 20 | 21 | CREATE FUNCTION shared_ispell_mem_available() 22 | RETURNS integer 23 | AS 'MODULE_PATHNAME', 'dispell_mem_available' 24 | LANGUAGE C IMMUTABLE; 25 | 26 | CREATE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) 27 | RETURNS SETOF record 28 | AS 'MODULE_PATHNAME', 'dispell_list_dicts' 29 | LANGUAGE C IMMUTABLE; 30 | 31 | CREATE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) 32 | RETURNS SETOF record 33 | AS 'MODULE_PATHNAME', 'dispell_list_stoplists' 34 | LANGUAGE C IMMUTABLE; 35 | 36 | CREATE TEXT SEARCH TEMPLATE shared_ispell ( 37 | INIT = shared_ispell_init, 38 | LEXIZE = shared_ispell_lexize 39 | ); 40 | 41 | /* 42 | CREATE TEXT SEARCH DICTIONARY czech_shared ( 43 | TEMPLATE = shared_ispell, 44 | DictFile = czech, 45 | AffFile = czech, 46 | StopWords = czech 47 | ); 48 | 49 | CREATE TEXT SEARCH CONFIGURATION public.czech_shared ( COPY = pg_catalog.simple ); 50 | 51 | ALTER TEXT SEARCH CONFIGURATION czech_shared 52 | ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, 53 | word, hword, hword_part 54 | WITH czech_shared; 55 | */ -------------------------------------------------------------------------------- /src/shared_ispell.h: -------------------------------------------------------------------------------- 1 | #ifndef __SHARED_ISPELL_H__ 2 | #define __SHARED_ISPELL_H__ 3 | 4 | #include "storage/lwlock.h" 5 | #include "utils/memutils.h" 6 | #include "utils/timestamp.h" 7 | #include "tsearch/dicts/spell.h" 8 | #include "tsearch/ts_public.h" 9 | 10 | /* This segment is initialized in the first process that accesses it (see 11 | * ispell_shmem_startup function). 12 | */ 13 | #define SEGMENT_NAME "shared_ispell" 14 | 15 | #define MAXLEN 255 16 | 17 | typedef struct SharedIspellDict 18 | { 19 | /* this is used for selecting the dictionary */ 20 | char *dictFile; 21 | char *affixFile; 22 | int nbytes; 23 | int nwords; 24 | 25 | /* next dictionary in the chain (essentially a linked list) */ 26 | struct SharedIspellDict *next; 27 | 28 | IspellDict dict; 29 | } SharedIspellDict; 30 | 31 | typedef struct SharedStopList 32 | { 33 | char *stopFile; 34 | int nbytes; 35 | 36 | struct SharedStopList *next; 37 | 38 | StopList stop; 39 | } SharedStopList; 40 | 41 | /* used to allocate memory in the shared segment */ 42 | typedef struct SegmentInfo 43 | { 44 | LWLockId lock; 45 | char *firstfree; /* first free address (always maxaligned) */ 46 | size_t available; /* free space remaining at firstfree */ 47 | instr_time lastReset; /* last reset of the dictionary */ 48 | 49 | /* the shared segment (info and data) */ 50 | SharedIspellDict *shdict; 51 | SharedStopList *shstop; 52 | } SegmentInfo; 53 | 54 | /* used to keep track of dictionary in each backend */ 55 | typedef struct DictInfo 56 | { 57 | instr_time lookup; 58 | 59 | char dictFile[MAXLEN]; 60 | char affixFile[MAXLEN]; 61 | char stopFile[MAXLEN]; 62 | 63 | /* We split word list and affix list. 64 | * In shdict we store a word list, word list will be stored in shared segment. 65 | * In dict we store an affix list in each process. 66 | */ 67 | SharedIspellDict *shdict; 68 | IspellDict dict; 69 | SharedStopList *shstop; 70 | 71 | /* MemoryContext of dict local content */ 72 | MemoryContext infoCntx; 73 | } DictInfo; 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /expected/security_1.out: -------------------------------------------------------------------------------- 1 | create type si_dicts_result as (dict_name VARCHAR, affix_name VARCHAR, words INT, affixes INT, bytes INT); 2 | create function shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) 3 | returns SETOF record as $$ 4 | declare 5 | qString varchar(4000); 6 | rec si_dicts_result; 7 | begin 8 | qString := 'select * from shared_ispell_dicts()'; 9 | for rec in execute qString loop 10 | return NEXT; 11 | end loop; 12 | return; 13 | end 14 | $$ language plpgsql; 15 | create extension shared_ispell; 16 | ERROR: function "shared_ispell_dicts" already exists with same argument types 17 | CONTEXT: SQL statement "CREATE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT) 18 | RETURNS SETOF record 19 | AS '$libdir/shared_ispell', 'dispell_list_dicts' 20 | LANGUAGE C IMMUTABLE" 21 | extension script file "shared_ispell--1.1.0.sql", near line 26 22 | drop extension if exists shared_ispell; 23 | NOTICE: extension "shared_ispell" does not exist, skipping 24 | drop type si_dicts_result; 25 | drop function shared_ispell_dicts(); 26 | create type si_stoplists_result as (stop_name VARCHAR, words INT, bytes INT); 27 | create function shared_ispell_stoplists(OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) 28 | returns SETOF record as $$ 29 | declare 30 | rec si_stoplists_result; 31 | qString varchar(4000); 32 | begin 33 | qString := 'select * from shared_ispell_stoplists()'; 34 | for rec in execute qString loop 35 | return NEXT; 36 | end loop; 37 | return; 38 | end 39 | $$ language plpgsql; 40 | create extension shared_ispell; 41 | ERROR: function "shared_ispell_stoplists" already exists with same argument types 42 | CONTEXT: SQL statement "CREATE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT) 43 | RETURNS SETOF record 44 | AS '$libdir/shared_ispell', 'dispell_list_stoplists' 45 | LANGUAGE C IMMUTABLE" 46 | extension script file "shared_ispell--1.1.0.sql", near line 31 47 | drop extension if exists shared_ispell; 48 | NOTICE: extension "shared_ispell" does not exist, skipping 49 | drop type si_stoplists_result; 50 | drop function shared_ispell_stoplists(); 51 | -------------------------------------------------------------------------------- /sql/shared_ispell.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION shared_ispell; 2 | 3 | -- Test ISpell dictionary with ispell affix file 4 | CREATE TEXT SEARCH DICTIONARY shared_ispell ( 5 | Template=shared_ispell, 6 | DictFile=ispell_sample, 7 | AffFile=ispell_sample, 8 | Stopwords=english 9 | ); 10 | 11 | SELECT ts_lexize('shared_ispell', 'skies'); 12 | SELECT ts_lexize('shared_ispell', 'bookings'); 13 | SELECT ts_lexize('shared_ispell', 'booking'); 14 | SELECT ts_lexize('shared_ispell', 'foot'); 15 | SELECT ts_lexize('shared_ispell', 'foots'); 16 | SELECT ts_lexize('shared_ispell', 'rebookings'); 17 | SELECT ts_lexize('shared_ispell', 'rebooking'); 18 | SELECT ts_lexize('shared_ispell', 'unbookings'); 19 | SELECT ts_lexize('shared_ispell', 'unbooking'); 20 | SELECT ts_lexize('shared_ispell', 'unbook'); 21 | 22 | SELECT ts_lexize('shared_ispell', 'footklubber'); 23 | SELECT ts_lexize('shared_ispell', 'footballklubber'); 24 | SELECT ts_lexize('shared_ispell', 'ballyklubber'); 25 | SELECT ts_lexize('shared_ispell', 'footballyklubber'); 26 | 27 | -- Test ISpell dictionary with hunspell affix file 28 | CREATE TEXT SEARCH DICTIONARY shared_hunspell ( 29 | Template=shared_ispell, 30 | DictFile=ispell_sample, 31 | AffFile=hunspell_sample 32 | ); 33 | 34 | SELECT ts_lexize('shared_hunspell', 'skies'); 35 | SELECT ts_lexize('shared_hunspell', 'bookings'); 36 | SELECT ts_lexize('shared_hunspell', 'booking'); 37 | SELECT ts_lexize('shared_hunspell', 'foot'); 38 | SELECT ts_lexize('shared_hunspell', 'foots'); 39 | SELECT ts_lexize('shared_hunspell', 'rebookings'); 40 | SELECT ts_lexize('shared_hunspell', 'rebooking'); 41 | SELECT ts_lexize('shared_hunspell', 'unbookings'); 42 | SELECT ts_lexize('shared_hunspell', 'unbooking'); 43 | SELECT ts_lexize('shared_hunspell', 'unbook'); 44 | 45 | SELECT ts_lexize('shared_hunspell', 'footklubber'); 46 | SELECT ts_lexize('shared_hunspell', 'footballklubber'); 47 | SELECT ts_lexize('shared_hunspell', 'ballyklubber'); 48 | SELECT ts_lexize('shared_hunspell', 'footballyklubber'); 49 | 50 | SELECT dict_name, affix_name, words, affixes FROM shared_ispell_dicts(); 51 | SELECT stop_name, words FROM shared_ispell_stoplists(); 52 | 53 | SELECT shared_ispell_reset(); 54 | 55 | SELECT ts_lexize('shared_ispell', 'skies'); 56 | SELECT ts_lexize('shared_hunspell', 'skies'); 57 | SELECT ts_lexize('shared_hunspell', 'skies'); 58 | -------------------------------------------------------------------------------- /expected/shared_ispell.out: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION shared_ispell; 2 | -- Test ISpell dictionary with ispell affix file 3 | CREATE TEXT SEARCH DICTIONARY shared_ispell ( 4 | Template=shared_ispell, 5 | DictFile=ispell_sample, 6 | AffFile=ispell_sample, 7 | Stopwords=english 8 | ); 9 | SELECT ts_lexize('shared_ispell', 'skies'); 10 | ts_lexize 11 | ----------- 12 | {sky} 13 | (1 row) 14 | 15 | SELECT ts_lexize('shared_ispell', 'bookings'); 16 | ts_lexize 17 | ---------------- 18 | {booking,book} 19 | (1 row) 20 | 21 | SELECT ts_lexize('shared_ispell', 'booking'); 22 | ts_lexize 23 | ---------------- 24 | {booking,book} 25 | (1 row) 26 | 27 | SELECT ts_lexize('shared_ispell', 'foot'); 28 | ts_lexize 29 | ----------- 30 | {foot} 31 | (1 row) 32 | 33 | SELECT ts_lexize('shared_ispell', 'foots'); 34 | ts_lexize 35 | ----------- 36 | {foot} 37 | (1 row) 38 | 39 | SELECT ts_lexize('shared_ispell', 'rebookings'); 40 | ts_lexize 41 | ---------------- 42 | {booking,book} 43 | (1 row) 44 | 45 | SELECT ts_lexize('shared_ispell', 'rebooking'); 46 | ts_lexize 47 | ---------------- 48 | {booking,book} 49 | (1 row) 50 | 51 | SELECT ts_lexize('shared_ispell', 'unbookings'); 52 | ts_lexize 53 | ----------- 54 | {book} 55 | (1 row) 56 | 57 | SELECT ts_lexize('shared_ispell', 'unbooking'); 58 | ts_lexize 59 | ----------- 60 | {book} 61 | (1 row) 62 | 63 | SELECT ts_lexize('shared_ispell', 'unbook'); 64 | ts_lexize 65 | ----------- 66 | {book} 67 | (1 row) 68 | 69 | SELECT ts_lexize('shared_ispell', 'footklubber'); 70 | ts_lexize 71 | ---------------- 72 | {foot,klubber} 73 | (1 row) 74 | 75 | SELECT ts_lexize('shared_ispell', 'footballklubber'); 76 | ts_lexize 77 | ------------------------------------------------------ 78 | {footballklubber,foot,ball,klubber,football,klubber} 79 | (1 row) 80 | 81 | SELECT ts_lexize('shared_ispell', 'ballyklubber'); 82 | ts_lexize 83 | ---------------- 84 | {ball,klubber} 85 | (1 row) 86 | 87 | SELECT ts_lexize('shared_ispell', 'footballyklubber'); 88 | ts_lexize 89 | --------------------- 90 | {foot,ball,klubber} 91 | (1 row) 92 | 93 | -- Test ISpell dictionary with hunspell affix file 94 | CREATE TEXT SEARCH DICTIONARY shared_hunspell ( 95 | Template=shared_ispell, 96 | DictFile=ispell_sample, 97 | AffFile=hunspell_sample 98 | ); 99 | SELECT ts_lexize('shared_hunspell', 'skies'); 100 | ts_lexize 101 | ----------- 102 | {sky} 103 | (1 row) 104 | 105 | SELECT ts_lexize('shared_hunspell', 'bookings'); 106 | ts_lexize 107 | ---------------- 108 | {booking,book} 109 | (1 row) 110 | 111 | SELECT ts_lexize('shared_hunspell', 'booking'); 112 | ts_lexize 113 | ---------------- 114 | {booking,book} 115 | (1 row) 116 | 117 | SELECT ts_lexize('shared_hunspell', 'foot'); 118 | ts_lexize 119 | ----------- 120 | {foot} 121 | (1 row) 122 | 123 | SELECT ts_lexize('shared_hunspell', 'foots'); 124 | ts_lexize 125 | ----------- 126 | {foot} 127 | (1 row) 128 | 129 | SELECT ts_lexize('shared_hunspell', 'rebookings'); 130 | ts_lexize 131 | ---------------- 132 | {booking,book} 133 | (1 row) 134 | 135 | SELECT ts_lexize('shared_hunspell', 'rebooking'); 136 | ts_lexize 137 | ---------------- 138 | {booking,book} 139 | (1 row) 140 | 141 | SELECT ts_lexize('shared_hunspell', 'unbookings'); 142 | ts_lexize 143 | ----------- 144 | {book} 145 | (1 row) 146 | 147 | SELECT ts_lexize('shared_hunspell', 'unbooking'); 148 | ts_lexize 149 | ----------- 150 | {book} 151 | (1 row) 152 | 153 | SELECT ts_lexize('shared_hunspell', 'unbook'); 154 | ts_lexize 155 | ----------- 156 | {book} 157 | (1 row) 158 | 159 | SELECT ts_lexize('shared_hunspell', 'footklubber'); 160 | ts_lexize 161 | ---------------- 162 | {foot,klubber} 163 | (1 row) 164 | 165 | SELECT ts_lexize('shared_hunspell', 'footballklubber'); 166 | ts_lexize 167 | ------------------------------------------------------ 168 | {footballklubber,foot,ball,klubber,football,klubber} 169 | (1 row) 170 | 171 | SELECT ts_lexize('shared_hunspell', 'ballyklubber'); 172 | ts_lexize 173 | ---------------- 174 | {ball,klubber} 175 | (1 row) 176 | 177 | SELECT ts_lexize('shared_hunspell', 'footballyklubber'); 178 | ts_lexize 179 | --------------------- 180 | {foot,ball,klubber} 181 | (1 row) 182 | 183 | SELECT dict_name, affix_name, words, affixes FROM shared_ispell_dicts(); 184 | dict_name | affix_name | words | affixes 185 | ---------------+-----------------+-------+--------- 186 | ispell_sample | hunspell_sample | 8 | 7 187 | ispell_sample | ispell_sample | 8 | 7 188 | (2 rows) 189 | 190 | SELECT stop_name, words FROM shared_ispell_stoplists(); 191 | stop_name | words 192 | -----------+------- 193 | english | 127 194 | (1 row) 195 | 196 | SELECT shared_ispell_reset(); 197 | shared_ispell_reset 198 | --------------------- 199 | 200 | (1 row) 201 | 202 | SELECT ts_lexize('shared_ispell', 'skies'); 203 | ts_lexize 204 | ----------- 205 | {sky} 206 | (1 row) 207 | 208 | SELECT ts_lexize('shared_hunspell', 'skies'); 209 | ts_lexize 210 | ----------- 211 | {sky} 212 | (1 row) 213 | 214 | SELECT ts_lexize('shared_hunspell', 'skies'); 215 | ts_lexize 216 | ----------- 217 | {sky} 218 | (1 row) 219 | 220 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Shared ISpell Dictionary 2 | ======================== 3 | This PostgreSQL extension provides a shared ispell dictionary, i.e. 4 | a dictionary that's stored in shared segment. The traditional ispell 5 | implementation means that each session initializes and stores the 6 | dictionary on it's own, which means a lot of CPU/RAM is wasted. 7 | 8 | This extension allocates an area in shared segment (you have to 9 | choose the size in advance) and then loads the dictionary into it 10 | when it's used for the first time. 11 | 12 | If you need just snowball-type dictionaries, this extension is not 13 | really interesting for you. But if you really need an ispell 14 | dictionary, this may save you a lot of resources. 15 | 16 | 17 | Install 18 | ------- 19 | 20 | Before build and install `shared_ispell` you should ensure following: 21 | 22 | * PostgreSQL version is 9.6 or later. 23 | 24 | Installing the extension is quite simple. In that case all you need to do is this: 25 | 26 | $ git clone git@github.com:postgrespro/shared_ispell.git 27 | $ cd shared_ispell 28 | $ make USE_PGXS=1 29 | $ make USE_PGXS=1 install 30 | 31 | and then (after connecting to the database) 32 | 33 | db=# CREATE EXTENSION shared_ispell; 34 | 35 | > **Important:** Don't forget to set the `PG_CONFIG` variable in case you want to test `shared_ispell` on a custom build of PostgreSQL. Read more [here](https://wiki.postgresql.org/wiki/Building_and_Installing_PostgreSQL_Extension_Modules). 36 | 37 | 38 | Config 39 | ------ 40 | No the functions are created, but you still need to load the shared 41 | module. This needs to be done from postgresql.conf, as the module 42 | needs to allocate space in the shared memory segment. So add this to 43 | the config file (or update the current values) 44 | 45 | # libraries to load 46 | shared_preload_libraries = 'shared_ispell' 47 | 48 | # config of the shared memory 49 | shared_ispell.max_size = 32MB 50 | 51 | Yes, there's a single GUC variable that defines the maximum size of 52 | the shared segment. This is a hard limit, the shared segment is not 53 | extensible and you need to set it so that all the dictionaries fit 54 | into it and not much memory is wasted. 55 | 56 | To find out how much memory you actually need, use a large value 57 | (e.g. 200MB) and load all the dictionaries you want to use. Then use 58 | the shared_ispell_mem_used() function to find out how much memory 59 | was actually used (and set the max_size GUC variable accordingly). 60 | 61 | Don't set it exactly to that value, leave there some free space, 62 | so that you can reload the dictionaries without changing the GUC 63 | max_size limit (which requires a restart of the DB). Ssomething 64 | like 512kB should be just fine. 65 | 66 | The shared segment can contain several dictionaries at the same time, 67 | the amount of memory is the only limit. There's no limit on number 68 | of dictionaries / words etc. Just the max_size GUC variable. 69 | 70 | 71 | Using the dictionary 72 | -------------------- 73 | Technically, the extension defines a 'shared_ispell' template that 74 | you may use to define custom dictionaries. E.g. you may do this 75 | 76 | CREATE TEXT SEARCH DICTIONARY czech_shared ( 77 | TEMPLATE = shared_ispell, 78 | DictFile = czech, 79 | AffFile = czech, 80 | StopWords = czech 81 | ); 82 | 83 | CREATE TEXT SEARCH CONFIGURATION public.czech_shared 84 | ( COPY = pg_catalog.simple ); 85 | 86 | ALTER TEXT SEARCH CONFIGURATION czech_shared 87 | ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, 88 | word, hword, hword_part 89 | WITH czech_shared; 90 | 91 | and then do the usual stuff, e.g. 92 | 93 | db=# SELECT ts_lexize('czech_shared', 'automobile'); 94 | 95 | or whatever you want. 96 | 97 | 98 | Available functions 99 | ------------------- 100 | The extension provides five management functions, that allow you to 101 | manage and get info about the preloaded dictionaries. The first two 102 | functions 103 | 104 | shared_ispell_mem_used() 105 | shared_ispell_mem_available() 106 | 107 | allow you to get info about the shared segment (used and free memory) 108 | e.g. to properly size the segment (max_size). Then there are functions 109 | return list of dictionaries / stop lists loaded in the shared segment 110 | 111 | shared_ispell_dicts() 112 | shared_ispell_stoplists() 113 | 114 | e.g. like this 115 | 116 | db=# SELECT * FROM shared_ispell_dicts(); 117 | 118 | dict_name | affix_name | words | affixes | bytes 119 | -----------+------------+-------+---------+---------- 120 | bulgarian | bulgarian | 79267 | 12 | 7622128 121 | czech | czech | 96351 | 2544 | 12715000 122 | (2 rows) 123 | 124 | 125 | db=# SELECT * FROM shared_ispell_stoplists(); 126 | 127 | stop_name | words | bytes 128 | -----------+-------+------- 129 | czech | 259 | 4552 130 | (1 row) 131 | 132 | The last function allows you to reset the dictionary (e.g. so that you 133 | can reload the updated files from disk). The sessions that already use 134 | the dictionaries will be forced to reinitialize them (the first one 135 | will rebuild and copy them in the shared segment, the other ones will 136 | use this prepared data). 137 | 138 | db=# SELECT shared_ispell_reset(); 139 | 140 | That's all for now ... 141 | 142 | Changes from original version 143 | ----------------------------- 144 | The original version of this module located in the Tomas Vondra's 145 | [GitHub](https://github.com/tvondra/shared_ispell). That version does not handle 146 | affixes that require full regular expressions (regex_t, implemented in regex.h). 147 | 148 | This version of the module can handle that affixes with full regular 149 | exressions. To handle it the module loads and stores affix files in each 150 | sessions. The affix list is tiny and takes a little time and memory to parse. 151 | Actually this is Tomas 152 | [idea](http://www.postgresql.org/message-id/56A5F3D5.9030702@2ndquadrant.com), 153 | but there is not related code in the GitHub. 154 | 155 | Author 156 | ------ 157 | Tomas Vondra [GitHub](https://github.com/tvondra) -------------------------------------------------------------------------------- /src/shared_ispell.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Shared ispell dictionary stored in a shared memory segment, so that 3 | * backends may save memory and CPU time. By default each connection 4 | * keeps a private copy of the dictionary, which is wasteful as the 5 | * dictionaries are copied in memory multiple times. The connections 6 | * also need to initialize the dictionary on their own, which may take 7 | * up to a few seconds. 8 | * 9 | * This means the connections are either long-lived (and each keeps 10 | * a private copy of the dictionary, wasting memory), or short-lived 11 | * (resulting in high latencies when the dictionary is initialized). 12 | * 13 | * This extension is storing a single copy of the dictionary in a shared 14 | * memory so that all connections may use it, saving memory and CPU time. 15 | * 16 | * 17 | * The flow within the shared ispell may be slightly confusing, so this 18 | * is a brief summary of the main flows within the code. 19 | * 20 | * ===== shared segment init (postmaster startup) ===== 21 | * 22 | * _PG_init 23 | * -> ispell_shmem_startup (registered as a hook) 24 | * 25 | * ===== dictionary init (backend) ===== 26 | * 27 | * dispell_init 28 | * -> init_shared_dict 29 | * -> get_shared_dict 30 | * -> NIStartBuild 31 | * -> NIImportDictionary 32 | * -> NIImportAffixes 33 | * -> NISortDictionary 34 | * -> NISortAffixes 35 | * -> NIFinishBuild 36 | * -> sizeIspellDict 37 | * -> copyIspellDict 38 | * -> copySPNode 39 | * -> get_shared_stop_list 40 | * -> readstoplist 41 | * -> copyStopList 42 | * 43 | * ===== dictionary reinit after reset (backend) ===== 44 | * 45 | * dispell_lexize 46 | * -> timestamp of lookup < last reset 47 | * -> init_shared_dict 48 | * (see dispell_init above) 49 | * -> SharedNINormalizeWord 50 | */ 51 | 52 | #include "postgres.h" 53 | #include "miscadmin.h" 54 | #include "storage/ipc.h" 55 | #include "storage/shmem.h" 56 | 57 | #include "catalog/pg_collation_d.h" 58 | #include "commands/defrem.h" 59 | #include "tsearch/ts_locale.h" 60 | #include "utils/formatting.h" 61 | #include "access/htup_details.h" 62 | #include "funcapi.h" 63 | #include "utils/builtins.h" 64 | #include "utils/guc.h" 65 | 66 | #include "shared_ispell.h" 67 | #include "tsearch/dicts/spell.h" 68 | 69 | PG_MODULE_MAGIC; 70 | 71 | void _PG_init(void); 72 | 73 | /* Memory for dictionaries in kbytes */ 74 | static int max_ispell_mem_size_kb; 75 | 76 | /* Saved hook value for proper chaining */ 77 | static shmem_startup_hook_type prev_shmem_startup_hook = NULL; 78 | 79 | /* These are used to allocate data within shared segment */ 80 | static SegmentInfo *segment_info = NULL; 81 | 82 | static void ispell_shmem_startup(void); 83 | 84 | static char *shalloc(int bytes); 85 | static char *shstrcpy(const char *str); 86 | 87 | static SharedIspellDict *copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int bytes, int words); 88 | static SharedStopList *copyStopList(StopList *list, char *stopFile, int bytes); 89 | 90 | static int sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile); 91 | static int sizeStopList(StopList *list, char *stopFile); 92 | 93 | /* 94 | * Get memory for dictionaries in bytes 95 | */ 96 | static Size 97 | max_ispell_mem_size() 98 | { 99 | return (Size) max_ispell_mem_size_kb * 1024L; 100 | } 101 | 102 | #if (PG_VERSION_NUM >= 150000) 103 | static shmem_request_hook_type prev_shmem_request_hook = NULL; 104 | static void shared_ispell_shmem_request(void); 105 | #endif 106 | 107 | /* 108 | * Module load callback 109 | */ 110 | void 111 | _PG_init(void) 112 | { 113 | if (!process_shared_preload_libraries_in_progress) { 114 | elog(ERROR, "shared_ispell has to be loaded using shared_preload_libraries"); 115 | return; 116 | } 117 | 118 | /* Define custom GUC variables. */ 119 | 120 | /* How much memory should we preallocate for the dictionaries (limits how many 121 | * dictionaries you can load into the shared segment). */ 122 | DefineCustomIntVariable("shared_ispell.max_size", 123 | "amount of memory to pre-allocate for ispell dictionaries", 124 | NULL, 125 | &max_ispell_mem_size_kb, 126 | 50 * 1024, /* default 50MB */ 127 | 1024, /* min 1MB */ 128 | INT_MAX, 129 | PGC_POSTMASTER, 130 | GUC_UNIT_KB, 131 | NULL, 132 | NULL, 133 | NULL); 134 | 135 | EmitWarningsOnPlaceholders("shared_ispell"); 136 | 137 | #if PG_VERSION_NUM >= 150000 138 | prev_shmem_request_hook = shmem_request_hook; 139 | shmem_request_hook = shared_ispell_shmem_request; 140 | #else 141 | RequestAddinShmemSpace(max_ispell_mem_size()); 142 | 143 | #if PG_VERSION_NUM >= 90600 144 | RequestNamedLWLockTranche("shared_ispell", 1); 145 | #else 146 | RequestAddinLWLocks(1); 147 | #endif 148 | #endif 149 | 150 | /* Install hooks. */ 151 | prev_shmem_startup_hook = shmem_startup_hook; 152 | shmem_startup_hook = ispell_shmem_startup; 153 | } 154 | 155 | #if PG_VERSION_NUM >= 150000 156 | static void 157 | shared_ispell_shmem_request(void) 158 | { 159 | if (prev_shmem_request_hook) 160 | prev_shmem_request_hook(); 161 | 162 | RequestAddinShmemSpace(max_ispell_mem_size()); 163 | 164 | RequestNamedLWLockTranche("shared_ispell", 1); 165 | } 166 | #endif 167 | 168 | /* 169 | * Probably the most important part of the startup - initializes the 170 | * memory in shared memory segment (creates and initializes the 171 | * SegmentInfo data structure). 172 | * 173 | * This is called from a shmem_startup_hook (see _PG_init). 174 | */ 175 | static void 176 | ispell_shmem_startup() 177 | { 178 | bool found = false; 179 | char *segment; 180 | 181 | if (prev_shmem_startup_hook) 182 | prev_shmem_startup_hook(); 183 | 184 | /* 185 | * Create or attach to the shared memory state, including hash table 186 | */ 187 | LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); 188 | 189 | segment = ShmemInitStruct(SEGMENT_NAME, max_ispell_mem_size(), &found); 190 | segment_info = (SegmentInfo *) segment; 191 | 192 | /* Was the shared memory segment already initialized? */ 193 | if (!found) 194 | { 195 | memset(segment, 0, max_ispell_mem_size()); 196 | 197 | #if PG_VERSION_NUM >= 90600 198 | segment_info->lock = &(GetNamedLWLockTranche("shared_ispell"))->lock; 199 | #else 200 | segment_info->lock = LWLockAssign(); 201 | #endif 202 | segment_info->firstfree = segment + MAXALIGN(sizeof(SegmentInfo)); 203 | segment_info->available = max_ispell_mem_size() - 204 | (int) (segment_info->firstfree - segment); 205 | 206 | INSTR_TIME_SET_CURRENT(segment_info->lastReset); 207 | } 208 | 209 | LWLockRelease(AddinShmemInitLock); 210 | } 211 | 212 | /* 213 | * This is called from backends that are looking up for a shared dictionary 214 | * definition using a filename with dictionary / affixes. 215 | * 216 | * This is called through dispell_init() which is responsible for proper locking 217 | * of the shared memory (using SegmentInfo->lock). 218 | */ 219 | static SharedIspellDict * 220 | get_shared_dict(char *words, char *affixes) 221 | { 222 | SharedIspellDict *dict = segment_info->shdict; 223 | 224 | while (dict != NULL) 225 | { 226 | if ((strcmp(dict->dictFile, words) == 0) && 227 | (strcmp(dict->affixFile, affixes) == 0)) 228 | return dict; 229 | dict = dict->next; 230 | } 231 | 232 | return NULL; 233 | } 234 | 235 | /* 236 | * This is called from backends that are looking up for a list of stop words 237 | * using a filename of the list. 238 | * 239 | * This is called through dispell_init() which is responsible for proper locking 240 | * of the shared memory (using SegmentInfo->lock). 241 | */ 242 | static SharedStopList * 243 | get_shared_stop_list(char *stop) 244 | { 245 | SharedStopList *list = segment_info->shstop; 246 | 247 | while (list != NULL) 248 | { 249 | if (strcmp(list->stopFile, stop) == 0) 250 | return list; 251 | list = list->next; 252 | } 253 | 254 | return NULL; 255 | } 256 | 257 | /* 258 | * Cleares IspellDict fields which are used for store affix list. 259 | */ 260 | static void 261 | clean_dict_affix(IspellDict *dict) 262 | { 263 | dict->maffixes = 0; 264 | dict->naffixes = 0; 265 | dict->Affix = NULL; 266 | 267 | dict->Suffix = NULL; 268 | dict->Prefix = NULL; 269 | 270 | dict->AffixData = NULL; 271 | dict->lenAffixData = 0; 272 | dict->nAffixData = 0; 273 | 274 | dict->CompoundAffix = NULL; 275 | dict->CompoundAffixFlags = NULL; 276 | dict->nCompoundAffixFlag = 0; 277 | dict->mCompoundAffixFlag = 0; 278 | 279 | dict->avail = 0; 280 | } 281 | 282 | /* 283 | * Initializes the dictionary for use in backends - checks whether such dictionary 284 | * and list of stopwords is already used, and if not then parses it and loads it into 285 | * the shared segment. 286 | * 287 | * Function lookup if the dictionary (word list) is already loaded in the 288 | * shared segment. If not then loads the dictionary (word list). 289 | * Affix list is loaded to a current backend process. 290 | * 291 | * This is called through dispell_init() which is responsible for proper locking 292 | * of the shared memory (using SegmentInfo->lock). 293 | */ 294 | static void 295 | init_shared_dict(DictInfo *info, MemoryContext infoCntx, 296 | char *dictFile, char *affFile, char *stopFile) 297 | { 298 | int size; 299 | SharedIspellDict *shdict = NULL; 300 | SharedStopList *shstop = NULL; 301 | MemoryContext oldctx; 302 | 303 | oldctx = MemoryContextSwitchTo(infoCntx); 304 | 305 | /* DICTIONARY + AFFIXES */ 306 | 307 | /* TODO This should probably check that the filenames are not NULL, and maybe that 308 | * it exists. Or maybe that's handled by the NIImport* functions. */ 309 | 310 | /* lookup if the dictionary (words and affixes) is already loaded in the shared segment */ 311 | shdict = get_shared_dict(dictFile, affFile); 312 | 313 | /* clear dict affix sources */ 314 | clean_dict_affix(&(info->dict)); 315 | 316 | /* load affix list */ 317 | NIStartBuild(&(info->dict)); 318 | NIImportAffixes(&(info->dict), get_tsearch_config_filename(affFile, "affix")); 319 | 320 | /* load the dictionary (word list) if not yet defined */ 321 | if (shdict == NULL) 322 | { 323 | IspellDict *dict; 324 | 325 | dict = (IspellDict *) palloc0(sizeof(IspellDict)); 326 | 327 | NIStartBuild(dict); 328 | NIImportDictionary(dict, get_tsearch_config_filename(dictFile, "dict")); 329 | 330 | dict->flagMode = info->dict.flagMode; 331 | dict->usecompound = info->dict.usecompound; 332 | 333 | dict->nCompoundAffixFlag = dict->mCompoundAffixFlag = 334 | info->dict.nCompoundAffixFlag; 335 | dict->CompoundAffixFlags = (CompoundAffixFlag *) palloc0( 336 | dict->nCompoundAffixFlag * sizeof(CompoundAffixFlag)); 337 | memcpy(dict->CompoundAffixFlags, info->dict.CompoundAffixFlags, 338 | dict->nCompoundAffixFlag * sizeof(CompoundAffixFlag)); 339 | 340 | /* 341 | * If affix->useFlagAliases == true then AffixData is generated 342 | * in NIImportAffixes(). Therefore we need to copy it. 343 | */ 344 | if (info->dict.useFlagAliases) 345 | { 346 | int i; 347 | 348 | dict->useFlagAliases = true; 349 | dict->lenAffixData = info->dict.lenAffixData; 350 | dict->nAffixData = info->dict.nAffixData; 351 | dict->AffixData = (const char **) palloc0(dict->nAffixData * sizeof(char *)); 352 | 353 | for (i = 0; i < dict->nAffixData; i++) 354 | dict->AffixData[i] = pstrdup(info->dict.AffixData[i]); 355 | } 356 | 357 | NISortDictionary(dict); 358 | NIFinishBuild(dict); 359 | 360 | /* check available space in shared segment */ 361 | size = sizeIspellDict(dict, dictFile, affFile); 362 | if (size > segment_info->available) 363 | elog(ERROR, "shared dictionary %s.dict / %s.affix needs %d B, only %zd B available", 364 | dictFile, affFile, size, segment_info->available); 365 | 366 | /* fine, there's enough space - copy the dictionary */ 367 | shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell); 368 | shdict->dict.naffixes = info->dict.naffixes; 369 | 370 | /* add the new dictionary to the linked list (of SharedIspellDict structures) */ 371 | shdict->next = segment_info->shdict; 372 | segment_info->shdict = shdict; 373 | } 374 | /* continue load affix list to a current backend process */ 375 | 376 | /* NISortAffixes is used AffixData. Therefore we need to copy pointer */ 377 | info->dict.lenAffixData = shdict->dict.lenAffixData; 378 | info->dict.nAffixData = shdict->dict.nAffixData; 379 | info->dict.AffixData = shdict->dict.AffixData; 380 | info->dict.Dictionary = shdict->dict.Dictionary; 381 | NISortAffixes(&(info->dict)); 382 | NIFinishBuild(&(info->dict)); 383 | 384 | /* STOP WORDS */ 385 | 386 | /* lookup if the stop words are already loaded in the shared segment, but only if there 387 | * actually is a list */ 388 | if (stopFile && *stopFile) 389 | { 390 | shstop = get_shared_stop_list(stopFile); 391 | 392 | /* load the stopwords if not yet defined */ 393 | if (shstop == NULL) 394 | { 395 | StopList stoplist; 396 | 397 | readstoplist(stopFile, &stoplist, str_tolower); 398 | 399 | size = sizeStopList(&stoplist, stopFile); 400 | if (size > segment_info->available) 401 | elog(ERROR, "shared stoplist %s.stop needs %d B, only %zd B available", 402 | stopFile, size, segment_info->available); 403 | 404 | /* fine, there's enough space - copy the stoplist */ 405 | shstop = copyStopList(&stoplist, stopFile, size); 406 | 407 | /* add the new stopword list to the linked list (of SharedStopList structures) */ 408 | shstop->next = segment_info->shstop; 409 | segment_info->shstop = shstop; 410 | } 411 | } 412 | 413 | /* Now, fill the DictInfo structure for the backend (references to dictionary, 414 | * stopwords and the filenames). */ 415 | 416 | info->shdict = shdict; 417 | info->shstop = shstop; 418 | INSTR_TIME_SET_CURRENT(info->lookup); 419 | 420 | memcpy(info->dictFile, dictFile, strlen(dictFile) + 1); 421 | memcpy(info->affixFile, affFile, strlen(affFile) + 1); 422 | if (stopFile != NULL) 423 | memcpy(info->stopFile, stopFile, strlen(stopFile) + 1); 424 | else 425 | memset(info->stopFile, 0, sizeof(info->stopFile)); 426 | 427 | MemoryContextSwitchTo(oldctx); 428 | /* save current context as long-lived */ 429 | info->infoCntx = infoCntx; 430 | } 431 | 432 | PG_FUNCTION_INFO_V1(dispell_init); 433 | PG_FUNCTION_INFO_V1(dispell_lexize); 434 | PG_FUNCTION_INFO_V1(dispell_reset); 435 | PG_FUNCTION_INFO_V1(dispell_mem_available); 436 | PG_FUNCTION_INFO_V1(dispell_mem_used); 437 | PG_FUNCTION_INFO_V1(dispell_list_dicts); 438 | PG_FUNCTION_INFO_V1(dispell_list_stoplists); 439 | 440 | /* 441 | * Resets the shared dictionary memory, i.e. removes all the dictionaries. This 442 | * is the only way to remove dictionaries from the memory - either when 443 | * a dictionary is no longer needed or needs to be reloaded (e.g. to update 444 | * list of words / affixes). 445 | */ 446 | Datum 447 | dispell_reset(PG_FUNCTION_ARGS) 448 | { 449 | LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); 450 | 451 | segment_info->shdict = NULL; 452 | segment_info->shstop = NULL; 453 | INSTR_TIME_SET_CURRENT(segment_info->lastReset); 454 | segment_info->firstfree = ((char*) segment_info) + MAXALIGN(sizeof(SegmentInfo)); 455 | segment_info->available = max_ispell_mem_size() - 456 | (int) (segment_info->firstfree - (char*) segment_info); 457 | 458 | memset(segment_info->firstfree, 0, segment_info->available); 459 | 460 | LWLockRelease(segment_info->lock); 461 | 462 | PG_RETURN_VOID(); 463 | } 464 | 465 | /* 466 | * Returns amount of 'free space' in the shared segment (usable for dictionaries). 467 | */ 468 | Datum 469 | dispell_mem_available(PG_FUNCTION_ARGS) 470 | { 471 | int result = 0; 472 | LWLockAcquire(segment_info->lock, LW_SHARED); 473 | 474 | result = segment_info->available; 475 | 476 | LWLockRelease(segment_info->lock); 477 | 478 | PG_RETURN_INT32(result); 479 | } 480 | 481 | /* 482 | * Returns amount of 'occupied space' in the shared segment (used by current 483 | * dictionaries). 484 | */ 485 | Datum 486 | dispell_mem_used(PG_FUNCTION_ARGS) 487 | { 488 | int result = 0; 489 | 490 | LWLockAcquire(segment_info->lock, LW_SHARED); 491 | 492 | result = max_ispell_mem_size() - segment_info->available; 493 | 494 | LWLockRelease(segment_info->lock); 495 | 496 | PG_RETURN_INT32(result); 497 | } 498 | 499 | /* 500 | * This initializes a (shared) dictionary for a backend. The function receives 501 | * a list of options specified in the CREATE TEXT SEARCH DICTIONARY with ispell 502 | * template (http://www.postgresql.org/docs/9.3/static/sql-createtsdictionary.html). 503 | * 504 | * There are three allowed options: DictFile, AffFile, StopWords. The values 505 | * should match to filenames in `pg_config --sharedir` directory, ending with 506 | * .dict, .affix and .stop. 507 | * 508 | * The StopWords parameter is optional, the two other are required. 509 | * 510 | * If any of the filenames are incorrect, the call to init_shared_dict will fail. 511 | * 512 | * Do not call it directly - it saves current memory context as long-lived 513 | * context. 514 | */ 515 | Datum 516 | dispell_init(PG_FUNCTION_ARGS) 517 | { 518 | List *dictoptions = (List *) PG_GETARG_POINTER(0); 519 | char *dictFile = NULL, 520 | *affFile = NULL, 521 | *stopFile = NULL; 522 | bool affloaded = false, 523 | dictloaded = false, 524 | stoploaded = false; 525 | ListCell *l; 526 | 527 | /* this is the result passed to dispell_lexize */ 528 | DictInfo *info = (DictInfo *) palloc0(sizeof(DictInfo)); 529 | 530 | foreach(l, dictoptions) 531 | { 532 | DefElem *defel = (DefElem *) lfirst(l); 533 | 534 | if (pg_strcasecmp(defel->defname, "DictFile") == 0) 535 | { 536 | if (dictloaded) 537 | ereport(ERROR, 538 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 539 | errmsg("multiple DictFile parameters"))); 540 | dictFile = defGetString(defel); 541 | dictloaded = true; 542 | } 543 | else if (pg_strcasecmp(defel->defname, "AffFile") == 0) 544 | { 545 | if (affloaded) 546 | ereport(ERROR, 547 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 548 | errmsg("multiple AffFile parameters"))); 549 | affFile = defGetString(defel); 550 | affloaded = true; 551 | } 552 | else if (pg_strcasecmp(defel->defname, "StopWords") == 0) 553 | { 554 | if (stoploaded) 555 | ereport(ERROR, 556 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 557 | errmsg("multiple StopWords parameters"))); 558 | stopFile = defGetString(defel); 559 | stoploaded = true; 560 | } 561 | else 562 | { 563 | ereport(ERROR, 564 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 565 | errmsg("unrecognized Ispell parameter: \"%s\"", 566 | defel->defname))); 567 | } 568 | } 569 | 570 | if (!affloaded) 571 | { 572 | ereport(ERROR, 573 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 574 | errmsg("missing AffFile parameter"))); 575 | } 576 | else if (!dictloaded) 577 | { 578 | ereport(ERROR, 579 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 580 | errmsg("missing DictFile parameter"))); 581 | } 582 | 583 | /* search if the dictionary is already initialized */ 584 | LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); 585 | 586 | /* 587 | * Current context is a long lived context. Create child context to store 588 | * DictInfo internal data. 589 | */ 590 | info->infoCntx = AllocSetContextCreate(CurrentMemoryContext, 591 | "shared_ispell context", 592 | ALLOCSET_DEFAULT_SIZES); 593 | 594 | init_shared_dict(info, info->infoCntx, dictFile, affFile, stopFile); 595 | 596 | LWLockRelease(segment_info->lock); 597 | 598 | PG_RETURN_POINTER(info); 599 | } 600 | 601 | Datum 602 | dispell_lexize(PG_FUNCTION_ARGS) 603 | { 604 | DictInfo *info = (DictInfo *) PG_GETARG_POINTER(0); 605 | char *in = (char *) PG_GETARG_POINTER(1); 606 | int32 len = PG_GETARG_INT32(2); 607 | char *txt; 608 | TSLexeme *res; 609 | TSLexeme *ptr, 610 | *cptr; 611 | 612 | if (len <= 0) 613 | PG_RETURN_POINTER(NULL); 614 | 615 | txt = str_tolower(in, len, DEFAULT_COLLATION_OID); 616 | 617 | /* need to lock the segment in shared mode */ 618 | LWLockAcquire(segment_info->lock, LW_SHARED); 619 | 620 | /* do we need to reinit the dictionary? was the dict reset since the lookup */ 621 | if (INSTR_TIME_GET_MICROSEC(info->lookup) < 622 | INSTR_TIME_GET_MICROSEC(segment_info->lastReset)) 623 | { 624 | DictInfo saveInfo = *info; 625 | 626 | /* relock in exclusive mode */ 627 | LWLockRelease(segment_info->lock); 628 | LWLockAcquire(segment_info->lock, LW_EXCLUSIVE); 629 | 630 | /* 631 | * info is allocated in info->saveCntx, so that's why we use a copy of 632 | * info here 633 | */ 634 | 635 | MemoryContextReset(saveInfo.infoCntx); 636 | MemSet(info, 0, sizeof(*info)); 637 | 638 | init_shared_dict(info, saveInfo.infoCntx, saveInfo.dictFile, 639 | saveInfo.affixFile, saveInfo.stopFile); 640 | } 641 | 642 | res = NINormalizeWord(&(info->dict), txt); 643 | 644 | /* nothing found :-( */ 645 | if (res == NULL) 646 | { 647 | LWLockRelease(segment_info->lock); 648 | PG_RETURN_POINTER(NULL); 649 | } 650 | 651 | ptr = cptr = res; 652 | while (ptr->lexeme) 653 | { 654 | if (info->shstop && searchstoplist(&(info->shstop->stop), ptr->lexeme)) 655 | { 656 | pfree(ptr->lexeme); 657 | ptr->lexeme = NULL; 658 | ptr++; 659 | } 660 | else 661 | { 662 | memcpy(cptr, ptr, sizeof(TSLexeme)); 663 | cptr++; 664 | ptr++; 665 | } 666 | } 667 | cptr->lexeme = NULL; 668 | 669 | LWLockRelease(segment_info->lock); 670 | 671 | PG_RETURN_POINTER(res); 672 | } 673 | 674 | /* 675 | * This 'allocates' memory in the shared segment - i.e. the memory is 676 | * already allocated and this just gives nbytes to the caller. This is 677 | * used exclusively by the 'copy' methods defined below. 678 | * 679 | * The memory is kept aligned thanks to MAXALIGN. Also, this assumes 680 | * the segment was locked properly by the caller. 681 | */ 682 | static char * 683 | shalloc(int bytes) 684 | { 685 | char *result; 686 | 687 | bytes = MAXALIGN(bytes); 688 | 689 | /* This shouldn't really happen, as the init_shared_dict checks the size 690 | * prior to copy. So let's just throw error here, as something went 691 | * obviously wrong. */ 692 | if (bytes > segment_info->available) 693 | elog(ERROR, "the shared segment (shared ispell) is too small"); 694 | 695 | result = segment_info->firstfree; 696 | segment_info->firstfree += bytes; 697 | segment_info->available -= bytes; 698 | 699 | memset(result, 0, bytes); 700 | 701 | return result; 702 | } 703 | 704 | /* 705 | * Copies a string into the shared segment - allocates memory and does memcpy. 706 | * 707 | * TODO This assumes the string is properly terminated (should be guaranteed 708 | * by the code that reads and parses the dictionary / affixes). 709 | */ 710 | static char * 711 | shstrcpy(const char *str) 712 | { 713 | char *tmp = shalloc(strlen(str) + 1); 714 | 715 | memcpy(tmp, str, strlen(str) + 1); 716 | 717 | return tmp; 718 | } 719 | 720 | /* 721 | * The following methods serve to do a "deep copy" of the parsed dictionary, 722 | * into the shared memory segment. For each structure this provides 'size' 723 | * and 'copy' functions to get the size first (for shalloc) and performing 724 | * the actual copy. 725 | */ 726 | 727 | /* SPNode - dictionary words */ 728 | 729 | static SPNode * 730 | copySPNode(SPNode *node) 731 | { 732 | int i; 733 | SPNode *copy = NULL; 734 | 735 | if (node == NULL) 736 | return NULL; 737 | 738 | copy = (SPNode *) shalloc(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); 739 | memcpy(copy, node, offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); 740 | 741 | for (i = 0; i < node->length; i++) 742 | copy->data[i].node = copySPNode(node->data[i].node); 743 | 744 | return copy; 745 | } 746 | 747 | static int 748 | sizeSPNode(SPNode *node) 749 | { 750 | int i; 751 | int size = 0; 752 | 753 | if (node == NULL) 754 | return 0; 755 | 756 | size = MAXALIGN(offsetof(SPNode, data) + sizeof(SPNodeData) * node->length); 757 | 758 | for (i = 0; i < node->length; i++) 759 | size += sizeSPNode(node->data[i].node); 760 | 761 | return size; 762 | } 763 | 764 | /* StopList */ 765 | 766 | static SharedStopList * 767 | copyStopList(StopList *list, char *stopFile, int size) 768 | { 769 | int i; 770 | SharedStopList *copy = (SharedStopList *) shalloc(sizeof(SharedStopList)); 771 | 772 | copy->stop.len = list->len; 773 | copy->stop.stop = (char **) shalloc(sizeof(char *) * list->len); 774 | copy->stopFile = shstrcpy(stopFile); 775 | copy->nbytes = size; 776 | 777 | for (i = 0; i < list->len; i++) 778 | copy->stop.stop[i] = shstrcpy(list->stop[i]); 779 | 780 | return copy; 781 | } 782 | 783 | static int 784 | sizeStopList(StopList *list, char *stopFile) 785 | { 786 | int i; 787 | int size = MAXALIGN(sizeof(SharedStopList)); 788 | 789 | size += MAXALIGN(sizeof(char *) * list->len); 790 | size += MAXALIGN(strlen(stopFile) + 1); 791 | 792 | for (i = 0; i < list->len; i++) 793 | size += MAXALIGN(strlen(list->stop[i]) + 1); 794 | 795 | return size; 796 | } 797 | 798 | /* 799 | * Performs deep copy of the dictionary into the shared memory segment. 800 | * 801 | * It gets the populated Ispell Dictionary (dict) and copies all the data 802 | * using the 'copy' methods listed above. It also keeps the filenames so 803 | * that it's possible to lookup the dictionaries later. 804 | * 805 | * Function copies only word list. Affix list is loaded to a current process. 806 | */ 807 | static SharedIspellDict * 808 | copyIspellDict(IspellDict *dict, char *dictFile, char *affixFile, int size, int words) 809 | { 810 | int i; 811 | SharedIspellDict *copy = (SharedIspellDict *) shalloc(sizeof(SharedIspellDict)); 812 | 813 | copy->dictFile = shalloc(strlen(dictFile) + 1); 814 | copy->affixFile = shalloc(strlen(affixFile) + 1); 815 | 816 | strcpy(copy->dictFile, dictFile); 817 | strcpy(copy->affixFile, affixFile); 818 | 819 | copy->dict.Dictionary = copySPNode(dict->Dictionary); 820 | 821 | /* copy affix data */ 822 | copy->dict.nAffixData = dict->nAffixData; 823 | copy->dict.AffixData = (const char **) shalloc(sizeof(char *) * dict->nAffixData); 824 | for (i = 0; i < copy->dict.nAffixData; i++) 825 | copy->dict.AffixData[i] = shstrcpy(dict->AffixData[i]); 826 | 827 | copy->dict.flagMode = dict->flagMode; 828 | 829 | copy->nbytes = size; 830 | copy->nwords = words; 831 | 832 | return copy; 833 | } 834 | 835 | /* 836 | * Computes how much space is needed for a dictionary (word list) in the shared segment. 837 | * 838 | * Function does not compute space for a affix list since affix list is loaded 839 | * to a current process. 840 | */ 841 | static int 842 | sizeIspellDict(IspellDict *dict, char *dictFile, char *affixFile) 843 | { 844 | int i; 845 | int size = MAXALIGN(sizeof(SharedIspellDict)); 846 | 847 | size += MAXALIGN(strlen(dictFile) + 1); 848 | size += MAXALIGN(strlen(affixFile) + 1); 849 | 850 | size += sizeSPNode(dict->Dictionary); 851 | 852 | /* copy affix data */ 853 | size += MAXALIGN(sizeof(char *) * dict->nAffixData); 854 | for (i = 0; i < dict->nAffixData; i++) 855 | size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1); 856 | 857 | return size; 858 | } 859 | 860 | /* SRF function returning a list of shared dictionaries currently loaded in memory. */ 861 | Datum 862 | dispell_list_dicts(PG_FUNCTION_ARGS) 863 | { 864 | FuncCallContext *funcctx; 865 | TupleDesc tupdesc; 866 | SharedIspellDict *dict; 867 | 868 | /* init on the first call */ 869 | if (SRF_IS_FIRSTCALL()) 870 | { 871 | MemoryContext oldcontext; 872 | 873 | funcctx = SRF_FIRSTCALL_INIT(); 874 | oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); 875 | 876 | /* get a shared lock and then the first dictionary */ 877 | LWLockAcquire(segment_info->lock, LW_SHARED); 878 | funcctx->user_fctx = segment_info->shdict; 879 | 880 | /* Build a tuple descriptor for our result type */ 881 | if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) 882 | ereport(ERROR, 883 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 884 | errmsg("function returning record called in context " 885 | "that cannot accept type record"))); 886 | 887 | /* 888 | * generate attribute metadata needed later to produce tuples from raw 889 | * C strings 890 | */ 891 | funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); 892 | funcctx->tuple_desc = tupdesc; 893 | 894 | /* switch back to the old context */ 895 | MemoryContextSwitchTo(oldcontext); 896 | } 897 | 898 | /* init the context */ 899 | funcctx = SRF_PERCALL_SETUP(); 900 | 901 | /* check if we have more data */ 902 | if (funcctx->user_fctx != NULL) 903 | { 904 | HeapTuple tuple; 905 | Datum result; 906 | Datum values[5]; 907 | bool nulls[5]; 908 | 909 | text *dictname, 910 | *affname; 911 | 912 | dict = (SharedIspellDict *) funcctx->user_fctx; 913 | funcctx->user_fctx = dict->next; 914 | 915 | memset(nulls, 0, sizeof(nulls)); 916 | 917 | dictname = cstring_to_text(dict->dictFile); 918 | affname = cstring_to_text(dict->affixFile); 919 | 920 | values[0] = PointerGetDatum(dictname); 921 | values[1] = PointerGetDatum(affname); 922 | values[2] = UInt32GetDatum(dict->nwords); 923 | values[3] = UInt32GetDatum(dict->dict.naffixes); 924 | values[4] = UInt32GetDatum(dict->nbytes); 925 | 926 | /* Build and return the tuple. */ 927 | tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); 928 | 929 | /* make the tuple into a datum */ 930 | result = HeapTupleGetDatum(tuple); 931 | 932 | /* Here we want to return another item: */ 933 | SRF_RETURN_NEXT(funcctx, result); 934 | } 935 | else 936 | { 937 | /* release the lock */ 938 | LWLockRelease(segment_info->lock); 939 | 940 | /* Here we are done returning items and just need to clean up: */ 941 | SRF_RETURN_DONE(funcctx); 942 | } 943 | } 944 | 945 | /* SRF function returning a list of shared stopword lists currently loaded in memory. */ 946 | Datum 947 | dispell_list_stoplists(PG_FUNCTION_ARGS) 948 | { 949 | FuncCallContext *funcctx; 950 | TupleDesc tupdesc; 951 | SharedStopList *stoplist; 952 | 953 | /* init on the first call */ 954 | if (SRF_IS_FIRSTCALL()) 955 | { 956 | MemoryContext oldcontext; 957 | 958 | funcctx = SRF_FIRSTCALL_INIT(); 959 | oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); 960 | 961 | /* get a shared lock and then the first stop list */ 962 | LWLockAcquire(segment_info->lock, LW_SHARED); 963 | funcctx->user_fctx = segment_info->shstop; 964 | 965 | /* Build a tuple descriptor for our result type */ 966 | if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) 967 | ereport(ERROR, 968 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 969 | errmsg("function returning record called in context " 970 | "that cannot accept type record"))); 971 | 972 | /* 973 | * generate attribute metadata needed later to produce tuples from raw 974 | * C strings 975 | */ 976 | funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); 977 | funcctx->tuple_desc = tupdesc; 978 | 979 | /* switch back to the old context */ 980 | MemoryContextSwitchTo(oldcontext); 981 | } 982 | 983 | /* init the context */ 984 | funcctx = SRF_PERCALL_SETUP(); 985 | 986 | /* check if we have more data */ 987 | if (funcctx->user_fctx != NULL) 988 | { 989 | HeapTuple tuple; 990 | Datum result; 991 | Datum values[3]; 992 | bool nulls[3]; 993 | 994 | text *stopname; 995 | 996 | stoplist = (SharedStopList *) funcctx->user_fctx; 997 | funcctx->user_fctx = stoplist->next; 998 | 999 | memset(nulls, 0, sizeof(nulls)); 1000 | 1001 | stopname = cstring_to_text(stoplist->stopFile); 1002 | 1003 | values[0] = PointerGetDatum(stopname); 1004 | values[1] = UInt32GetDatum(stoplist->stop.len); 1005 | values[2] = UInt32GetDatum(stoplist->nbytes); 1006 | 1007 | /* Build and return the tuple. */ 1008 | tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); 1009 | 1010 | /* make the tuple into a datum */ 1011 | result = HeapTupleGetDatum(tuple); 1012 | 1013 | /* Here we want to return another item: */ 1014 | SRF_RETURN_NEXT(funcctx, result); 1015 | } 1016 | else 1017 | { 1018 | /* release the lock */ 1019 | LWLockRelease(segment_info->lock); 1020 | 1021 | /* Here we are done returning items and just need to clean up: */ 1022 | SRF_RETURN_DONE(funcctx); 1023 | } 1024 | } 1025 | --------------------------------------------------------------------------------