├── .gitignore ├── Makefile ├── README.md ├── data ├── trgm.data └── trgm2.data ├── expected ├── pg_substring_trgm.out └── pg_trgm.out ├── pg_trgm--1.0--1.1.sql ├── pg_trgm--1.1--1.2.sql ├── pg_trgm--1.2--1.3.sql ├── pg_trgm--1.3.sql ├── pg_trgm--unpackaged--1.0.sql ├── pg_trgm.control ├── sql ├── pg_substring_trgm.sql └── pg_trgm.sql ├── trgm.h ├── trgm_gin.c ├── trgm_gist.c ├── trgm_op.c └── trgm_regexp.c /.gitignore: -------------------------------------------------------------------------------- 1 | /log/ 2 | /results/ 3 | /tmp_check/ 4 | /.deps/ 5 | *.o 6 | *.so -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # contrib/pg_trgm/Makefile 2 | 3 | MODULE_big = pg_trgm 4 | OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES) 5 | 6 | EXTENSION = pg_trgm 7 | DATA = pg_trgm--1.3.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--1.2--1.3.sql pg_trgm--unpackaged--1.0.sql 8 | PGFILEDESC = "pg_trgm - trigram matching" 9 | 10 | REGRESS = pg_trgm pg_substring_trgm 11 | 12 | ifdef USE_PGXS 13 | PG_CONFIG = pg_config 14 | PGXS := $(shell $(PG_CONFIG) --pgxs) 15 | include $(PGXS) 16 | else 17 | subdir = contrib/pg_trgm 18 | top_builddir = ../.. 19 | include $(top_builddir)/src/Makefile.global 20 | include $(top_srcdir)/contrib/contrib-global.mk 21 | endif 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pg_trgm – text similarity measurement and index searching based on trigrams 2 | 3 | ## Introduction 4 | 5 | The pg_trgm module provides functions and operators for determining the 6 | similarity of alphanumeric text based on trigram matching, as well as index 7 | operator classes that support fast searching for similar strings. 8 | 9 | A trigram is a group of three consecutive characters taken from a string. We can 10 | measure the similarity of two strings by counting the number of trigrams they 11 | share. This simple idea turns out to be very effective for measuring the 12 | similarity of words in many natural languages. 13 | 14 | The original module is located in 15 | [GitHub](https://github.com/postgres/postgres/tree/master/contrib/pg_trgm). This 16 | module provides a new function and new operators which provide fuzzy searching 17 | for word in a text. 18 | 19 | **Note**. Functions of this module and functions of pg_trgm module, which 20 | included in the PostgreSQL 9.6, are differ. Functions of this module have other 21 | names and the module does not provide GUC parameters. 22 | 23 | ## License 24 | 25 | This module available from [GitHub](https://github.com/postgrespro/pg_trgm_pro) 26 | under the same license as [PostgreSQL](http://www.postgresql.org/about/licence/) 27 | and supports PostgreSQL 9.4+. 28 | 29 | ## Installation 30 | 31 | Before build and install pg_trgm you should ensure following: 32 | 33 | * PostgreSQL version is 9.4 or higher. 34 | 35 | Typical installation procedure may look like this: 36 | 37 | $ git clone https://github.com/postgrespro/pg_trgm_pro 38 | $ cd pg_trgm_pro 39 | $ make USE_PGXS=1 40 | $ sudo make USE_PGXS=1 install 41 | $ make USE_PGXS=1 installcheck 42 | $ psql DB -c "CREATE EXTENSION pg_trgm;" 43 | 44 | ## New functions and operators 45 | 46 | The pg_trgm module provides the new functions. 47 | 48 | | Function | Returns | Description 49 | | -------------------------------- | ------- | --------------------------------------------------- 50 | | substring_similarity(text, text) | real | Returns a number that indicates how similar the first string to the most similar word of the second string. The function searches in the second string a most similar word not a most similar substring. The range of the result is zero (indicating that the two strings are completely dissimilar) to one (indicating that the first string is identical to one of the word of the second string). 51 | | show_substring_limit() | real | Returns the current substring similarity threshold that is used by the **<%** operator. 52 | | set_substring_limit(real) | real | Sets the current substring similarity threshold that is used by the **<%** operator. The threshold must be between 0 and 1 (default is 0.6). 53 | 54 | The module provides new operators. 55 | 56 | | Operator | Returns | Description 57 | | -------------- | ------- | --------------------------------------------------- 58 | | text <% text | boolean | Returns **true** if its arguments have a substring similarity that is greater than the current substring similarity threshold set by **set_substring_limit()**. 59 | 60 | GiST and GIN indexes support the operator **<%**. 61 | 62 | ## Examples 63 | 64 | Let us assume we have an **test_trgm** table: 65 | 66 | ```sql 67 | CREATE TABLE test_trgm (t text); 68 | ``` 69 | 70 | You can create GiST index: 71 | 72 | ```sql 73 | CREATE INDEX trgm_idx ON test_trgm USING GIST (t gist_trgm_ops); 74 | ``` 75 | 76 | or GIN index: 77 | 78 | ```sql 79 | CREATE INDEX trgm_idx ON test_trgm USING GIN (t gin_trgm_ops); 80 | ``` 81 | 82 | Now you can use an index on the **t** column for substring similarity. For example: 83 | 84 | ```sql 85 | SELECT t, substring_similarity('word', t) AS sml 86 | FROM test_trgm 87 | WHERE 'word' <% t 88 | ORDER BY sml DESC, t; 89 | ``` 90 | 91 | This will return all values in the text column that have a word which 92 | sufficiently similar to `word`, sorted from best match to worst. The index will be used to make this a fast operation even over very large data sets. 93 | 94 | ## Authors 95 | 96 | Oleg Bartunov 97 | 98 | Teodor Sigaev 99 | -------------------------------------------------------------------------------- /data/trgm.data: -------------------------------------------------------------------------------- 1 | qwertyu0001 2 | qwertyu0002 3 | qwertyu0003 4 | qwertyu0004 5 | qwertyu0005 6 | qwertyu0006 7 | qwertyu0007 8 | qwertyu0008 9 | qwertyu0009 10 | qwertyu0010 11 | qwertyu0011 12 | qwertyu0012 13 | qwertyu0013 14 | qwertyu0014 15 | qwertyu0015 16 | qwertyu0016 17 | qwertyu0017 18 | qwertyu0018 19 | qwertyu0019 20 | qwertyu0020 21 | qwertyu0021 22 | qwertyu0022 23 | qwertyu0023 24 | qwertyu0024 25 | qwertyu0025 26 | qwertyu0026 27 | qwertyu0027 28 | qwertyu0028 29 | qwertyu0029 30 | qwertyu0030 31 | qwertyu0031 32 | qwertyu0032 33 | qwertyu0033 34 | qwertyu0034 35 | qwertyu0035 36 | qwertyu0036 37 | qwertyu0037 38 | qwertyu0038 39 | qwertyu0039 40 | qwertyu0040 41 | qwertyu0041 42 | qwertyu0042 43 | qwertyu0043 44 | qwertyu0044 45 | qwertyu0045 46 | qwertyu0046 47 | qwertyu0047 48 | qwertyu0048 49 | qwertyu0049 50 | qwertyu0050 51 | qwertyu0051 52 | qwertyu0052 53 | qwertyu0053 54 | qwertyu0054 55 | qwertyu0055 56 | qwertyu0056 57 | qwertyu0057 58 | qwertyu0058 59 | qwertyu0059 60 | qwertyu0060 61 | qwertyu0061 62 | qwertyu0062 63 | qwertyu0063 64 | qwertyu0064 65 | qwertyu0065 66 | qwertyu0066 67 | qwertyu0067 68 | qwertyu0068 69 | qwertyu0069 70 | qwertyu0070 71 | qwertyu0071 72 | qwertyu0072 73 | qwertyu0073 74 | qwertyu0074 75 | qwertyu0075 76 | qwertyu0076 77 | qwertyu0077 78 | qwertyu0078 79 | qwertyu0079 80 | qwertyu0080 81 | qwertyu0081 82 | qwertyu0082 83 | qwertyu0083 84 | qwertyu0084 85 | qwertyu0085 86 | qwertyu0086 87 | qwertyu0087 88 | qwertyu0088 89 | qwertyu0089 90 | qwertyu0090 91 | qwertyu0091 92 | qwertyu0092 93 | qwertyu0093 94 | qwertyu0094 95 | qwertyu0095 96 | qwertyu0096 97 | qwertyu0097 98 | qwertyu0098 99 | qwertyu0099 100 | qwertyu0100 101 | qwertyu0101 102 | qwertyu0102 103 | qwertyu0103 104 | qwertyu0104 105 | qwertyu0105 106 | qwertyu0106 107 | qwertyu0107 108 | qwertyu0108 109 | qwertyu0109 110 | qwertyu0110 111 | qwertyu0111 112 | qwertyu0112 113 | qwertyu0113 114 | qwertyu0114 115 | qwertyu0115 116 | qwertyu0116 117 | qwertyu0117 118 | qwertyu0118 119 | qwertyu0119 120 | qwertyu0120 121 | qwertyu0121 122 | qwertyu0122 123 | qwertyu0123 124 | qwertyu0124 125 | qwertyu0125 126 | qwertyu0126 127 | qwertyu0127 128 | qwertyu0128 129 | qwertyu0129 130 | qwertyu0130 131 | qwertyu0131 132 | qwertyu0132 133 | qwertyu0133 134 | qwertyu0134 135 | qwertyu0135 136 | qwertyu0136 137 | qwertyu0137 138 | qwertyu0138 139 | qwertyu0139 140 | qwertyu0140 141 | qwertyu0141 142 | qwertyu0142 143 | qwertyu0143 144 | qwertyu0144 145 | qwertyu0145 146 | qwertyu0146 147 | qwertyu0147 148 | qwertyu0148 149 | qwertyu0149 150 | qwertyu0150 151 | qwertyu0151 152 | qwertyu0152 153 | qwertyu0153 154 | qwertyu0154 155 | qwertyu0155 156 | qwertyu0156 157 | qwertyu0157 158 | qwertyu0158 159 | qwertyu0159 160 | qwertyu0160 161 | qwertyu0161 162 | qwertyu0162 163 | qwertyu0163 164 | qwertyu0164 165 | qwertyu0165 166 | qwertyu0166 167 | qwertyu0167 168 | qwertyu0168 169 | qwertyu0169 170 | qwertyu0170 171 | qwertyu0171 172 | qwertyu0172 173 | qwertyu0173 174 | qwertyu0174 175 | qwertyu0175 176 | qwertyu0176 177 | qwertyu0177 178 | qwertyu0178 179 | qwertyu0179 180 | qwertyu0180 181 | qwertyu0181 182 | qwertyu0182 183 | qwertyu0183 184 | qwertyu0184 185 | qwertyu0185 186 | qwertyu0186 187 | qwertyu0187 188 | qwertyu0188 189 | qwertyu0189 190 | qwertyu0190 191 | qwertyu0191 192 | qwertyu0192 193 | qwertyu0193 194 | qwertyu0194 195 | qwertyu0195 196 | qwertyu0196 197 | qwertyu0197 198 | qwertyu0198 199 | qwertyu0199 200 | qwertyu0200 201 | qwertyu0201 202 | qwertyu0202 203 | qwertyu0203 204 | qwertyu0204 205 | qwertyu0205 206 | qwertyu0206 207 | qwertyu0207 208 | qwertyu0208 209 | qwertyu0209 210 | qwertyu0210 211 | qwertyu0211 212 | qwertyu0212 213 | qwertyu0213 214 | qwertyu0214 215 | qwertyu0215 216 | qwertyu0216 217 | qwertyu0217 218 | qwertyu0218 219 | qwertyu0219 220 | qwertyu0220 221 | qwertyu0221 222 | qwertyu0222 223 | qwertyu0223 224 | qwertyu0224 225 | qwertyu0225 226 | qwertyu0226 227 | qwertyu0227 228 | qwertyu0228 229 | qwertyu0229 230 | qwertyu0230 231 | qwertyu0231 232 | qwertyu0232 233 | qwertyu0233 234 | qwertyu0234 235 | qwertyu0235 236 | qwertyu0236 237 | qwertyu0237 238 | qwertyu0238 239 | qwertyu0239 240 | qwertyu0240 241 | qwertyu0241 242 | qwertyu0242 243 | qwertyu0243 244 | qwertyu0244 245 | qwertyu0245 246 | qwertyu0246 247 | qwertyu0247 248 | qwertyu0248 249 | qwertyu0249 250 | qwertyu0250 251 | qwertyu0251 252 | qwertyu0252 253 | qwertyu0253 254 | qwertyu0254 255 | qwertyu0255 256 | qwertyu0256 257 | qwertyu0257 258 | qwertyu0258 259 | qwertyu0259 260 | qwertyu0260 261 | qwertyu0261 262 | qwertyu0262 263 | qwertyu0263 264 | qwertyu0264 265 | qwertyu0265 266 | qwertyu0266 267 | qwertyu0267 268 | qwertyu0268 269 | qwertyu0269 270 | qwertyu0270 271 | qwertyu0271 272 | qwertyu0272 273 | qwertyu0273 274 | qwertyu0274 275 | qwertyu0275 276 | qwertyu0276 277 | qwertyu0277 278 | qwertyu0278 279 | qwertyu0279 280 | qwertyu0280 281 | qwertyu0281 282 | qwertyu0282 283 | qwertyu0283 284 | qwertyu0284 285 | qwertyu0285 286 | qwertyu0286 287 | qwertyu0287 288 | qwertyu0288 289 | qwertyu0289 290 | qwertyu0290 291 | qwertyu0291 292 | qwertyu0292 293 | qwertyu0293 294 | qwertyu0294 295 | qwertyu0295 296 | qwertyu0296 297 | qwertyu0297 298 | qwertyu0298 299 | qwertyu0299 300 | qwertyu0300 301 | qwertyu0301 302 | qwertyu0302 303 | qwertyu0303 304 | qwertyu0304 305 | qwertyu0305 306 | qwertyu0306 307 | qwertyu0307 308 | qwertyu0308 309 | qwertyu0309 310 | qwertyu0310 311 | qwertyu0311 312 | qwertyu0312 313 | qwertyu0313 314 | qwertyu0314 315 | qwertyu0315 316 | qwertyu0316 317 | qwertyu0317 318 | qwertyu0318 319 | qwertyu0319 320 | qwertyu0320 321 | qwertyu0321 322 | qwertyu0322 323 | qwertyu0323 324 | qwertyu0324 325 | qwertyu0325 326 | qwertyu0326 327 | qwertyu0327 328 | qwertyu0328 329 | qwertyu0329 330 | qwertyu0330 331 | qwertyu0331 332 | qwertyu0332 333 | qwertyu0333 334 | qwertyu0334 335 | qwertyu0335 336 | qwertyu0336 337 | qwertyu0337 338 | qwertyu0338 339 | qwertyu0339 340 | qwertyu0340 341 | qwertyu0341 342 | qwertyu0342 343 | qwertyu0343 344 | qwertyu0344 345 | qwertyu0345 346 | qwertyu0346 347 | qwertyu0347 348 | qwertyu0348 349 | qwertyu0349 350 | qwertyu0350 351 | qwertyu0351 352 | qwertyu0352 353 | qwertyu0353 354 | qwertyu0354 355 | qwertyu0355 356 | qwertyu0356 357 | qwertyu0357 358 | qwertyu0358 359 | qwertyu0359 360 | qwertyu0360 361 | qwertyu0361 362 | qwertyu0362 363 | qwertyu0363 364 | qwertyu0364 365 | qwertyu0365 366 | qwertyu0366 367 | qwertyu0367 368 | qwertyu0368 369 | qwertyu0369 370 | qwertyu0370 371 | qwertyu0371 372 | qwertyu0372 373 | qwertyu0373 374 | qwertyu0374 375 | qwertyu0375 376 | qwertyu0376 377 | qwertyu0377 378 | qwertyu0378 379 | qwertyu0379 380 | qwertyu0380 381 | qwertyu0381 382 | qwertyu0382 383 | qwertyu0383 384 | qwertyu0384 385 | qwertyu0385 386 | qwertyu0386 387 | qwertyu0387 388 | qwertyu0388 389 | qwertyu0389 390 | qwertyu0390 391 | qwertyu0391 392 | qwertyu0392 393 | qwertyu0393 394 | qwertyu0394 395 | qwertyu0395 396 | qwertyu0396 397 | qwertyu0397 398 | qwertyu0398 399 | qwertyu0399 400 | qwertyu0400 401 | qwertyu0401 402 | qwertyu0402 403 | qwertyu0403 404 | qwertyu0404 405 | qwertyu0405 406 | qwertyu0406 407 | qwertyu0407 408 | qwertyu0408 409 | qwertyu0409 410 | qwertyu0410 411 | qwertyu0411 412 | qwertyu0412 413 | qwertyu0413 414 | qwertyu0414 415 | qwertyu0415 416 | qwertyu0416 417 | qwertyu0417 418 | qwertyu0418 419 | qwertyu0419 420 | qwertyu0420 421 | qwertyu0421 422 | qwertyu0422 423 | qwertyu0423 424 | qwertyu0424 425 | qwertyu0425 426 | qwertyu0426 427 | qwertyu0427 428 | qwertyu0428 429 | qwertyu0429 430 | qwertyu0430 431 | qwertyu0431 432 | qwertyu0432 433 | qwertyu0433 434 | qwertyu0434 435 | qwertyu0435 436 | qwertyu0436 437 | qwertyu0437 438 | qwertyu0438 439 | qwertyu0439 440 | qwertyu0440 441 | qwertyu0441 442 | qwertyu0442 443 | qwertyu0443 444 | qwertyu0444 445 | qwertyu0445 446 | qwertyu0446 447 | qwertyu0447 448 | qwertyu0448 449 | qwertyu0449 450 | qwertyu0450 451 | qwertyu0451 452 | qwertyu0452 453 | qwertyu0453 454 | qwertyu0454 455 | qwertyu0455 456 | qwertyu0456 457 | qwertyu0457 458 | qwertyu0458 459 | qwertyu0459 460 | qwertyu0460 461 | qwertyu0461 462 | qwertyu0462 463 | qwertyu0463 464 | qwertyu0464 465 | qwertyu0465 466 | qwertyu0466 467 | qwertyu0467 468 | qwertyu0468 469 | qwertyu0469 470 | qwertyu0470 471 | qwertyu0471 472 | qwertyu0472 473 | qwertyu0473 474 | qwertyu0474 475 | qwertyu0475 476 | qwertyu0476 477 | qwertyu0477 478 | qwertyu0478 479 | qwertyu0479 480 | qwertyu0480 481 | qwertyu0481 482 | qwertyu0482 483 | qwertyu0483 484 | qwertyu0484 485 | qwertyu0485 486 | qwertyu0486 487 | qwertyu0487 488 | qwertyu0488 489 | qwertyu0489 490 | qwertyu0490 491 | qwertyu0491 492 | qwertyu0492 493 | qwertyu0493 494 | qwertyu0494 495 | qwertyu0495 496 | qwertyu0496 497 | qwertyu0497 498 | qwertyu0498 499 | qwertyu0499 500 | qwertyu0500 501 | qwertyu0501 502 | qwertyu0502 503 | qwertyu0503 504 | qwertyu0504 505 | qwertyu0505 506 | qwertyu0506 507 | qwertyu0507 508 | qwertyu0508 509 | qwertyu0509 510 | qwertyu0510 511 | qwertyu0511 512 | qwertyu0512 513 | qwertyu0513 514 | qwertyu0514 515 | qwertyu0515 516 | qwertyu0516 517 | qwertyu0517 518 | qwertyu0518 519 | qwertyu0519 520 | qwertyu0520 521 | qwertyu0521 522 | qwertyu0522 523 | qwertyu0523 524 | qwertyu0524 525 | qwertyu0525 526 | qwertyu0526 527 | qwertyu0527 528 | qwertyu0528 529 | qwertyu0529 530 | qwertyu0530 531 | qwertyu0531 532 | qwertyu0532 533 | qwertyu0533 534 | qwertyu0534 535 | qwertyu0535 536 | qwertyu0536 537 | qwertyu0537 538 | qwertyu0538 539 | qwertyu0539 540 | qwertyu0540 541 | qwertyu0541 542 | qwertyu0542 543 | qwertyu0543 544 | qwertyu0544 545 | qwertyu0545 546 | qwertyu0546 547 | qwertyu0547 548 | qwertyu0548 549 | qwertyu0549 550 | qwertyu0550 551 | qwertyu0551 552 | qwertyu0552 553 | qwertyu0553 554 | qwertyu0554 555 | qwertyu0555 556 | qwertyu0556 557 | qwertyu0557 558 | qwertyu0558 559 | qwertyu0559 560 | qwertyu0560 561 | qwertyu0561 562 | qwertyu0562 563 | qwertyu0563 564 | qwertyu0564 565 | qwertyu0565 566 | qwertyu0566 567 | qwertyu0567 568 | qwertyu0568 569 | qwertyu0569 570 | qwertyu0570 571 | qwertyu0571 572 | qwertyu0572 573 | qwertyu0573 574 | qwertyu0574 575 | qwertyu0575 576 | qwertyu0576 577 | qwertyu0577 578 | qwertyu0578 579 | qwertyu0579 580 | qwertyu0580 581 | qwertyu0581 582 | qwertyu0582 583 | qwertyu0583 584 | qwertyu0584 585 | qwertyu0585 586 | qwertyu0586 587 | qwertyu0587 588 | qwertyu0588 589 | qwertyu0589 590 | qwertyu0590 591 | qwertyu0591 592 | qwertyu0592 593 | qwertyu0593 594 | qwertyu0594 595 | qwertyu0595 596 | qwertyu0596 597 | qwertyu0597 598 | qwertyu0598 599 | qwertyu0599 600 | qwertyu0600 601 | qwertyu0601 602 | qwertyu0602 603 | qwertyu0603 604 | qwertyu0604 605 | qwertyu0605 606 | qwertyu0606 607 | qwertyu0607 608 | qwertyu0608 609 | qwertyu0609 610 | qwertyu0610 611 | qwertyu0611 612 | qwertyu0612 613 | qwertyu0613 614 | qwertyu0614 615 | qwertyu0615 616 | qwertyu0616 617 | qwertyu0617 618 | qwertyu0618 619 | qwertyu0619 620 | qwertyu0620 621 | qwertyu0621 622 | qwertyu0622 623 | qwertyu0623 624 | qwertyu0624 625 | qwertyu0625 626 | qwertyu0626 627 | qwertyu0627 628 | qwertyu0628 629 | qwertyu0629 630 | qwertyu0630 631 | qwertyu0631 632 | qwertyu0632 633 | qwertyu0633 634 | qwertyu0634 635 | qwertyu0635 636 | qwertyu0636 637 | qwertyu0637 638 | qwertyu0638 639 | qwertyu0639 640 | qwertyu0640 641 | qwertyu0641 642 | qwertyu0642 643 | qwertyu0643 644 | qwertyu0644 645 | qwertyu0645 646 | qwertyu0646 647 | qwertyu0647 648 | qwertyu0648 649 | qwertyu0649 650 | qwertyu0650 651 | qwertyu0651 652 | qwertyu0652 653 | qwertyu0653 654 | qwertyu0654 655 | qwertyu0655 656 | qwertyu0656 657 | qwertyu0657 658 | qwertyu0658 659 | qwertyu0659 660 | qwertyu0660 661 | qwertyu0661 662 | qwertyu0662 663 | qwertyu0663 664 | qwertyu0664 665 | qwertyu0665 666 | qwertyu0666 667 | qwertyu0667 668 | qwertyu0668 669 | qwertyu0669 670 | qwertyu0670 671 | qwertyu0671 672 | qwertyu0672 673 | qwertyu0673 674 | qwertyu0674 675 | qwertyu0675 676 | qwertyu0676 677 | qwertyu0677 678 | qwertyu0678 679 | qwertyu0679 680 | qwertyu0680 681 | qwertyu0681 682 | qwertyu0682 683 | qwertyu0683 684 | qwertyu0684 685 | qwertyu0685 686 | qwertyu0686 687 | qwertyu0687 688 | qwertyu0688 689 | qwertyu0689 690 | qwertyu0690 691 | qwertyu0691 692 | qwertyu0692 693 | qwertyu0693 694 | qwertyu0694 695 | qwertyu0695 696 | qwertyu0696 697 | qwertyu0697 698 | qwertyu0698 699 | qwertyu0699 700 | qwertyu0700 701 | qwertyu0701 702 | qwertyu0702 703 | qwertyu0703 704 | qwertyu0704 705 | qwertyu0705 706 | qwertyu0706 707 | qwertyu0707 708 | qwertyu0708 709 | qwertyu0709 710 | qwertyu0710 711 | qwertyu0711 712 | qwertyu0712 713 | qwertyu0713 714 | qwertyu0714 715 | qwertyu0715 716 | qwertyu0716 717 | qwertyu0717 718 | qwertyu0718 719 | qwertyu0719 720 | qwertyu0720 721 | qwertyu0721 722 | qwertyu0722 723 | qwertyu0723 724 | qwertyu0724 725 | qwertyu0725 726 | qwertyu0726 727 | qwertyu0727 728 | qwertyu0728 729 | qwertyu0729 730 | qwertyu0730 731 | qwertyu0731 732 | qwertyu0732 733 | qwertyu0733 734 | qwertyu0734 735 | qwertyu0735 736 | qwertyu0736 737 | qwertyu0737 738 | qwertyu0738 739 | qwertyu0739 740 | qwertyu0740 741 | qwertyu0741 742 | qwertyu0742 743 | qwertyu0743 744 | qwertyu0744 745 | qwertyu0745 746 | qwertyu0746 747 | qwertyu0747 748 | qwertyu0748 749 | qwertyu0749 750 | qwertyu0750 751 | qwertyu0751 752 | qwertyu0752 753 | qwertyu0753 754 | qwertyu0754 755 | qwertyu0755 756 | qwertyu0756 757 | qwertyu0757 758 | qwertyu0758 759 | qwertyu0759 760 | qwertyu0760 761 | qwertyu0761 762 | qwertyu0762 763 | qwertyu0763 764 | qwertyu0764 765 | qwertyu0765 766 | qwertyu0766 767 | qwertyu0767 768 | qwertyu0768 769 | qwertyu0769 770 | qwertyu0770 771 | qwertyu0771 772 | qwertyu0772 773 | qwertyu0773 774 | qwertyu0774 775 | qwertyu0775 776 | qwertyu0776 777 | qwertyu0777 778 | qwertyu0778 779 | qwertyu0779 780 | qwertyu0780 781 | qwertyu0781 782 | qwertyu0782 783 | qwertyu0783 784 | qwertyu0784 785 | qwertyu0785 786 | qwertyu0786 787 | qwertyu0787 788 | qwertyu0788 789 | qwertyu0789 790 | qwertyu0790 791 | qwertyu0791 792 | qwertyu0792 793 | qwertyu0793 794 | qwertyu0794 795 | qwertyu0795 796 | qwertyu0796 797 | qwertyu0797 798 | qwertyu0798 799 | qwertyu0799 800 | qwertyu0800 801 | qwertyu0801 802 | qwertyu0802 803 | qwertyu0803 804 | qwertyu0804 805 | qwertyu0805 806 | qwertyu0806 807 | qwertyu0807 808 | qwertyu0808 809 | qwertyu0809 810 | qwertyu0810 811 | qwertyu0811 812 | qwertyu0812 813 | qwertyu0813 814 | qwertyu0814 815 | qwertyu0815 816 | qwertyu0816 817 | qwertyu0817 818 | qwertyu0818 819 | qwertyu0819 820 | qwertyu0820 821 | qwertyu0821 822 | qwertyu0822 823 | qwertyu0823 824 | qwertyu0824 825 | qwertyu0825 826 | qwertyu0826 827 | qwertyu0827 828 | qwertyu0828 829 | qwertyu0829 830 | qwertyu0830 831 | qwertyu0831 832 | qwertyu0832 833 | qwertyu0833 834 | qwertyu0834 835 | qwertyu0835 836 | qwertyu0836 837 | qwertyu0837 838 | qwertyu0838 839 | qwertyu0839 840 | qwertyu0840 841 | qwertyu0841 842 | qwertyu0842 843 | qwertyu0843 844 | qwertyu0844 845 | qwertyu0845 846 | qwertyu0846 847 | qwertyu0847 848 | qwertyu0848 849 | qwertyu0849 850 | qwertyu0850 851 | qwertyu0851 852 | qwertyu0852 853 | qwertyu0853 854 | qwertyu0854 855 | qwertyu0855 856 | qwertyu0856 857 | qwertyu0857 858 | qwertyu0858 859 | qwertyu0859 860 | qwertyu0860 861 | qwertyu0861 862 | qwertyu0862 863 | qwertyu0863 864 | qwertyu0864 865 | qwertyu0865 866 | qwertyu0866 867 | qwertyu0867 868 | qwertyu0868 869 | qwertyu0869 870 | qwertyu0870 871 | qwertyu0871 872 | qwertyu0872 873 | qwertyu0873 874 | qwertyu0874 875 | qwertyu0875 876 | qwertyu0876 877 | qwertyu0877 878 | qwertyu0878 879 | qwertyu0879 880 | qwertyu0880 881 | qwertyu0881 882 | qwertyu0882 883 | qwertyu0883 884 | qwertyu0884 885 | qwertyu0885 886 | qwertyu0886 887 | qwertyu0887 888 | qwertyu0888 889 | qwertyu0889 890 | qwertyu0890 891 | qwertyu0891 892 | qwertyu0892 893 | qwertyu0893 894 | qwertyu0894 895 | qwertyu0895 896 | qwertyu0896 897 | qwertyu0897 898 | qwertyu0898 899 | qwertyu0899 900 | qwertyu0900 901 | qwertyu0901 902 | qwertyu0902 903 | qwertyu0903 904 | qwertyu0904 905 | qwertyu0905 906 | qwertyu0906 907 | qwertyu0907 908 | qwertyu0908 909 | qwertyu0909 910 | qwertyu0910 911 | qwertyu0911 912 | qwertyu0912 913 | qwertyu0913 914 | qwertyu0914 915 | qwertyu0915 916 | qwertyu0916 917 | qwertyu0917 918 | qwertyu0918 919 | qwertyu0919 920 | qwertyu0920 921 | qwertyu0921 922 | qwertyu0922 923 | qwertyu0923 924 | qwertyu0924 925 | qwertyu0925 926 | qwertyu0926 927 | qwertyu0927 928 | qwertyu0928 929 | qwertyu0929 930 | qwertyu0930 931 | qwertyu0931 932 | qwertyu0932 933 | qwertyu0933 934 | qwertyu0934 935 | qwertyu0935 936 | qwertyu0936 937 | qwertyu0937 938 | qwertyu0938 939 | qwertyu0939 940 | qwertyu0940 941 | qwertyu0941 942 | qwertyu0942 943 | qwertyu0943 944 | qwertyu0944 945 | qwertyu0945 946 | qwertyu0946 947 | qwertyu0947 948 | qwertyu0948 949 | qwertyu0949 950 | qwertyu0950 951 | qwertyu0951 952 | qwertyu0952 953 | qwertyu0953 954 | qwertyu0954 955 | qwertyu0955 956 | qwertyu0956 957 | qwertyu0957 958 | qwertyu0958 959 | qwertyu0959 960 | qwertyu0960 961 | qwertyu0961 962 | qwertyu0962 963 | qwertyu0963 964 | qwertyu0964 965 | qwertyu0965 966 | qwertyu0966 967 | qwertyu0967 968 | qwertyu0968 969 | qwertyu0969 970 | qwertyu0970 971 | qwertyu0971 972 | qwertyu0972 973 | qwertyu0973 974 | qwertyu0974 975 | qwertyu0975 976 | qwertyu0976 977 | qwertyu0977 978 | qwertyu0978 979 | qwertyu0979 980 | qwertyu0980 981 | qwertyu0981 982 | qwertyu0982 983 | qwertyu0983 984 | qwertyu0984 985 | qwertyu0985 986 | qwertyu0986 987 | qwertyu0987 988 | qwertyu0988 989 | qwertyu0989 990 | qwertyu0990 991 | qwertyu0991 992 | qwertyu0992 993 | qwertyu0993 994 | qwertyu0994 995 | qwertyu0995 996 | qwertyu0996 997 | qwertyu0997 998 | qwertyu0998 999 | qwertyu0999 1000 | qwertyu1000 1001 | -------------------------------------------------------------------------------- /data/trgm2.data: -------------------------------------------------------------------------------- 1 | Baikal 2 | Baikaluobbal 3 | Lake Baikal 4 | Baikalakko 5 | Baikal Business Centre 6 | Baikal Listvyanka Hotel 7 | Baikal Airfield 8 | Baikalovo 9 | Transbaikalia 10 | Baikal Mountains 11 | Baikal Hotel Moscow 12 | Zabaikalie 13 | Pribaikalskaya 14 | Baikal Plaza 15 | Rubaikale 16 | Tandobai Algad 17 | Daikalay 18 | Bakall 19 | Stubaital 20 | Neustift im Stubaital 21 | Anonyme Appartments Stubaital 22 | Barkaladja Pool 23 | Awabakal Nature Reserve 24 | Awabakal Field Studies Centre 25 | Barkala 26 | Bailallie 27 | Barkala Park 28 | Purba Kalaujan 29 | Nabakalas 30 | Barkal 31 | Baikanthapur 32 | Baikarjhuti 33 | Baika 34 | Baikari 35 | Bakalia Char 36 | Dakshin Bakalia 37 | Purba Kalmegha 38 | Efreytor-Bakalovo 39 | Baykalsko 40 | Baykal 41 | Baskaltsi 42 | Bakalite 43 | Bajkal 44 | Efrejtor Bakalovo 45 | Kampong Bakaladong 46 | Riacho do Sambaibal 47 | Sambaibal 48 | Barkalabava 49 | Zabaykal 50 | Bakalar Lake 51 | Kaikalahun Indian Reserve 25 52 | Tumba-Kalamba 53 | Kamba-Kalele 54 | Boyagbakala 55 | Bombakalo 56 | Batikalengbe 57 | Bakalukudu 58 | Bakalawa 59 | Bakala 60 | Matamba-Kalenge 61 | Kusu-Bakali 62 | Kambakala 63 | Bakali 64 | Abakalu 65 | Bonagbakala 66 | Bakalua 67 | Bikala Madila 68 | Bikala 69 | Bumba-Kaloki 70 | Tumba-Kalunga 71 | Kabankala 72 | Mambakala 73 | Tumba-Kalumba 74 | Kabakala 75 | Bikalabwa 76 | Bomba-Kalende 77 | Mwalaba-Kalamba 78 | Matamba-Kalenga 79 | Bumba-Kalumba 80 | Bikalange 81 | Kabikala 82 | Mubikale 83 | Kanampumba-Kalawa 84 | Tshiabakale 85 | Bakaly 86 | Bakalongo 87 | Bakale 88 | Bakala Koupi 89 | Bambakala 90 | Bakalou 91 | Tsibakala 92 | Kimbakala 93 | Dabakalakoro 94 | Dabakala 95 | Bakalafoulou 96 | Ngao Bakala 97 | Mobaika 98 | Baimalou 99 | Xibaitaling 100 | Baikai 101 | Baikang 102 | Baitaling 103 | Baikan 104 | Baimaling Linchang 105 | Baimalong 106 | Baikanzui 107 | Baiyali 108 | Baimaling 109 | Baimalang Donggang 110 | Baikangshuoma 111 | Baitaliao 112 | Taikale 113 | Babainale 114 | Bailale 115 | Baibale 116 | Baiwale 117 | Baikangnei 118 | Baitali 119 | Xiabaikan 120 | Bailalong 121 | Baimaluo 122 | Baikacun 123 | Baisala 124 | Bailalin 125 | Baimala 126 | Baidalong 127 | Dabaika 128 | Caikalong 129 | Cuobaikacun 130 | Baikadangcun 131 | Baimalin 132 | Subaika 133 | Gabakkale 134 | Barkallou 135 | Embatkala 136 | Bodega Tabaibal 137 | Golba Kalo 138 | Haikala 139 | Kaikale 140 | Waikaloulevu 141 | Waikalou Creek 142 | Waikalou 143 | Ndelaikalou 144 | Ndelaikalokalo 145 | Bay of Backaland 146 | Bankali 147 | Ker Samba Kalla 148 | Demba Kali 149 | Bakalarr 150 | Baipal 151 | Kalibakalako 152 | Dalabakala 153 | Bikal 154 | Sembaikan 155 | Praikalogu 156 | Tanjung Ompaikalio 157 | Bonebabakal 158 | Tanjung Batikala 159 | Pulau Bakalanpauno 160 | Teluk Bakalan 161 | Bakaltua Bank 162 | Bakalrejo 163 | Bakalan 164 | Sungai Bakaladiyan 165 | Bakal 166 | Buku Baikole 167 | Pulau Baika 168 | Tanjung Bakalinga 169 | Pulau Bakalan 170 | Desa Bakalan 171 | Kebakkalang 172 | Ngambakalang 173 | Mota Sabakal 174 | Bakalan Lor 175 | Babakalo 176 | Buyu Rapanbakalai 177 | Kalimundubakalan 178 | Bakalpokok 179 | Bakaldukuh 180 | Tanabakal 181 | Tanjung Aikaluin 182 | Desa Bakalrejo 183 | Bakalan Kidul 184 | Desa Kebakalan 185 | Kebakalan 186 | Bakalan Kulon 187 | Gunung Bakalan 188 | Kalibakal 189 | Bakaljaya 190 | Trobakal 191 | Bakalan Wetan 192 | Desa Bakal 193 | Alue Bakkala 194 | Uruk Bakal 195 | Bakalbuah 196 | Kwala Bakala 197 | Bakal Lama 198 | Bakal Julu 199 | Bakal Batu 200 | Moncong Baika 201 | Sampangbakalan 202 | Bakalam 203 | Desa Bakalankrapyak 204 | Lebakkalapa Tonggoh 205 | Trembakal 206 | Bakalan Tengah 207 | Kali Bakalan 208 | Desa Cemengbakalan 209 | Desa Bakalanpule 210 | Gunung Bakal 211 | Desa Tambakkalisogo 212 | Tambakkalisogo 213 | Desa Bakalanrayung 214 | Salu Bakalaeng 215 | Bakalaeng 216 | Danau Bakalan 217 | Selat Bakalan 218 | Selat Bakalanpauno 219 | Laikalanda 220 | Bakalinga 221 | Tanjung Mbakalang 222 | Desa Bakalankrajan 223 | Bakalan Dua 224 | Kali Purbakala 225 | Desa Bakalanwringinpitu 226 | Tukad Kubakal 227 | Praikalangga 228 | Banjar Kubakal 229 | Eat Bakal 230 | Sungai Bakala 231 | Kombakalada 232 | Sori Rabakalo 233 | Kahambikalela 234 | Baikarara 235 | Baikapaka 236 | Tukad Bakalan 237 | Teluk Haludubakal 238 | Yabakalewa 239 | Praikalumbang 240 | Waikalowo 241 | Praikalubu 242 | Loko Praikalubu 243 | Ramuk Ombakalada 244 | Praikalebung 245 | Praikaleka 246 | Andabakal 247 | Praikalau 248 | Praikalokat 249 | Praikalimbung 250 | Bambakalo 251 | Leubakkalian 252 | Pematang Baitalimbangan 253 | Lebakalil 254 | Gereba Kaler 255 | Krajan Bakalan 256 | Bakalan Barat 257 | Muarabakal 258 | Umbulan Maharobakal 259 | Bakaldalam 260 | Talang Bakal 261 | Pematang Bakalpanang 262 | Baidaloen 263 | Jatibakal 264 | Tubu Bakalekuk 265 | Dola Peimambakal 266 | Bakalang 267 | Teluk Bakalang 268 | Salu Baidale 269 | Bakalerek 270 | Ile Bakalibu 271 | Parbakalan 272 | Praikalembu 273 | Palindi Laikali 274 | Praikalu 275 | Sori Labakalate 276 | Air Bakal-kecil 277 | Sungaikalung 278 | Sungaikalong 279 | Pematang Bakalpanjang 280 | Payabakal 281 | Waikala 282 | Sungaikali 283 | Sungai Pebakalan 284 | Parit Membakal 285 | Bakalpakebo 286 | Baikat Abu Jaraban 287 | Maikalganj 288 | Maikala Range 289 | Bakalha 290 | Baitalpur 291 | Baikanthpur 292 | Baihal 293 | Barkala Reserved Forest 294 | Babaipalli 295 | Kaikalapettai 296 | Kambainallur 297 | Bakkalale 298 | Kaikalui 299 | Baijalpur 300 | Nehalla Bankalah Reserved Forest 301 | Barkala Rao 302 | Barkali 303 | Baidal 304 | Barkaleh 305 | Darreh Pumba Kal 306 | Bahkalleh 307 | Wibakale 308 | Gaikali 309 | Gagaba Kalo 310 | Barkalare 311 | Bakkalmal 312 | Gora Bakalyadyr 313 | Rodnik Bakalybulak 314 | Urochishche Bakaly 315 | Sopka Bakaly 316 | Gory Bakaly 317 | Bugor Arba-Kalgan 318 | Ozero Baykal 319 | Kolodets Tabakkalgan 320 | Walangivattu Vaikal 321 | Vattevaikal Anicut 322 | Vaikali Tevar Kulam 323 | Vaikalitevan Kulam 324 | Vaikaladichchenai 325 | Uchchodaikallu 326 | Sellapattu Vaikal 327 | Savata Vaikal 328 | Puttadivali Vaikal 329 | Palukadu Vaikal 330 | Mulaikallu Kulam 331 | Koraikallimadu 332 | Koraikalapu Kulam 333 | Karaiyamullivaikal 334 | Karaivaikal Kulam 335 | Kanawali Vaikal 336 | Habakkala 337 | Chalam Vaikal Aru 338 | Ambakala Wewa 339 | Alaikallupoddakulam 340 | Alaikallupodda Alankulam 341 | Akamadi Vaikal 342 | Alaikalluppodda Kulam 343 | Vaikaliththevakulam 344 | Baikole 345 | Sidi Mohammed el Bakali 346 | Sidi Mohammed Bakkal 347 | Sidi Bakal 348 | Oulad el Bakkal 349 | Zaouia Oulad Bakal 350 | Azib el Bakkali 351 | Tombakala 352 | Malaikaly 353 | Ambadikala 354 | Bakalica 355 | Bakalnica 356 | Abankala 357 | Kombakala 358 | Bawkalut 359 | Bakaleko 360 | Bawkalut Chaung 361 | Baukala 362 | Cerro Bainaltzin 363 | Sungai Bakal 364 | Bukit Ubaibalih 365 | Kampong Sombakal 366 | Kampung Lebai Ali 367 | Batikal 368 | Bakalalan Airport 369 | Maikali 370 | Bakalum 371 | Bakalambani 372 | Abakaliki 373 | Tsaunin Maikalaji 374 | Baikaha 375 | Llano Limbaika 376 | Barkald 377 | Barkald stasjon 378 | Barkaleitet 379 | Barkaldfossen 380 | Barkaldvola 381 | Bakkalegskardet 382 | Baikajavri 383 | Barkalden 384 | Bakkalia 385 | Siljabaika 386 | Aikaluokta 387 | Blombakkali 388 | Bavkalasis 389 | Baikajohka 390 | Bakkalykkja 391 | Bakalauri 392 | Bakalauri1 393 | Bakalauri2 394 | Bakalauri3 395 | Bakalauri4 396 | Bakalauri5 397 | Bakalauri6 398 | Bakalauri7 399 | Bakalauri8 400 | Bakalauri9 401 | Bakalsen 402 | Baiyaldi 403 | Naikala 404 | Baikanda 405 | Barkalne 406 | Bakalipur 407 | Bakaldum 408 | Raikal 409 | Baikatte 410 | Maikal 411 | Bakalbhar 412 | Waikalabubu Bay 413 | Baikai Island 414 | Abikal 415 | Boikalakalawa Bay 416 | Maikal River 417 | Bakalao Asibi Point 418 | Bankal 419 | Bakalod Island 420 | Bakalao Point 421 | Bakalan River 422 | Bakal Dos 423 | Bakal Uno 424 | Daang Bakal 425 | Bankal School 426 | Bakal Tres 427 | Kabankalan City Public Plaza 428 | Ranra Tabai Algad 429 | Bairkal Jabal 430 | Bairkal Dhora 431 | Bairkal 432 | Zaibai Algad 433 | Gulba Kalle 434 | Ragha Bakalzai 435 | Dabbarkal Sar 436 | Tabai Algad 437 | Haikalzai 438 | Wuchobai Algad 439 | Jabba Kalai 440 | Goth Soba Kaloi 441 | Baikar Tsarai 442 | Dudgaikal 443 | Baixale Kamar 444 | Zebai Algad 445 | Bakal Khel 446 | Goth Haikal 447 | Haikal 448 | Jaba Kalle 449 | Bakalovina 450 | Salabaikasy 451 | Guba Kalita 452 | Guba Kalgalaksha 453 | Guba Kaldo 454 | Bakalovo 455 | Baykalovo 456 | Baskalino 457 | Sopka Barkaleptskaya 458 | Bakalovskaya Ferma 459 | Bakalinskiy Rayon 460 | Sovkhoz Bakalinskiy 461 | Bakalinskiy 462 | Bakaldy 463 | Bakaldinskoye 464 | Urochishche Bakaldikha 465 | Zabaykalovskiy 466 | Barkalova 467 | Barkalovka 468 | Gora Barkalova 469 | Gora Barkalyu 470 | Bikalamakhi 471 | Stantsiya Bakal 472 | Baykalovskiy Rayon 473 | Baykalovskiy 474 | Baykalovsk 475 | Bakalda 476 | Boloto Malyy Baykal 477 | Boloto Baykal 478 | Zabaykalka 479 | Stantsiya Baykal 480 | Baykalo-Amurskaya Zheleznaya Doroga 481 | Kolkhoz Krasnyy Baykal 482 | Zaliv Baykal 483 | Bakalino 484 | Ovrag Bakalda 485 | Bakaldovshchina 486 | Prud Novyy Baykal 487 | Bakaleyka 488 | Bakalka 489 | Bakaly TV Mast 490 | Urochishche Bakalovo 491 | Kambaika 492 | Maloye Baykalovo 493 | Bakalinskiy Leskhoz 494 | Bikalikha 495 | Kordon Barkalo 496 | Sanatoriy Baykal 497 | Port Baykal 498 | Baykalikha 499 | Polevoy Stan Baykal 500 | Bakalovka 501 | Ramada Makkah Shubaika 502 | Mount Tohebakala 503 | Tambakale Island 504 | Mbanitambaika Island 505 | Mbakalaka Island 506 | Kumbakale 507 | Kaikaloka 508 | Kelesaikal 509 | Nasb Gabakallah 510 | Jabal Barkal 511 | Jabal Abakallah 512 | Al Barkali 513 | Shabakal Abbass 514 | Mabaikuli 515 | Bambakalema 516 | Bambakalia 517 | Baiwala 518 | Babakalia 519 | Baikama 520 | Bankalol 521 | Kundebakali 522 | Yumbaikamadu 523 | Tabakali 524 | Daba Kalharereh 525 | Barkale 526 | Bakalshile 527 | Bakaloolay 528 | Buur Bakaley 529 | Bakaley 530 | Buur Bakale 531 | Bakalaale 532 | Jabal Mobakali 533 | Khor Bakallii 534 | Korombaital 535 | Ambakali 536 | Ba Kaliin 537 | Mbay Bakala 538 | Tagobikala 539 | Fayzabadkala 540 | Aghbai Allazy 541 | Aghbai Alikagar 542 | Gora Fayzabadkala 543 | Daraikalot 544 | Aghbai Alakisirak 545 | Beikala 546 | Foho Berbakalau 547 | Mota Caicabaisala 548 | Sungai Utabailale 549 | Urochishche Bakalarnyn-Ayasy 550 | Urochishche Batkali 551 | Khrebet Batkali 552 | Ras Barkallah 553 | Babakale 554 | Fabrikalar 555 | Bakalukalu Shan 556 | Bakalukalu 557 | Laikala 558 | Waikalakaka 559 | Columbus Bakalar Municipal Airport 560 | Bakalar Library 561 | Bakkala Cemetery 562 | Clifton T Barkalow Elementary School 563 | Barkalow Hollow 564 | Kailuapuhi Waikalua Homesteads 565 | Kawaikalia Gulch 566 | Waikalae 567 | Waikaloa Stream 568 | Waikalua-Loko Fish Pond 569 | Halekou Waikaluakai Homesteads 570 | East Waikalua 571 | Omar Haikal Islamic Academy 572 | Bakalar Air Force Base (historical) 573 | Koshbakaly 574 | Bagkalen 575 | Gora Baikara 576 | Mfumbaika 577 | Mbakalungu 578 | Chumbaika 579 | Ntombankala School 580 | Bakalabwa Pans 581 | Khobai al Janhra 582 | Holiday Inn Dubai Al Barsha 583 | Novotel Dubai Al Barsha 584 | Doubletree Res.Dubai-Al Barsha 585 | Doubletree By Hilton Hotel and Apartments Dubai Al Barsha 586 | Doubletree By Hilton Dubai Al Barsha Hotel and Res 587 | Park Inn By Radisson Dubai Al Barsha 588 | Ramee Rose Hotel Dubai Al Barsha 589 | Aparthotel Adagio Premium Dubai Al Barsha 590 | Ataikala 591 | Selman Marrakech 592 | Riad Ain Marrakech 593 | Taj Palace Marrakech 594 | Delano Marrakech 595 | Pullman Marrakech Palmeraie Resort And Spa 596 | Lalla Calipau Marrakech 597 | Hotel Fashion Marrakech 598 | Four Seasons Resort Marrakech 599 | Adama Resort Marrakech 600 | Pullman Marrakech Palmeraie Re 601 | Ramada Resort Marrakech Douar Al Hana 602 | Hotel Zahia Marrakech 603 | Hotel Marrakech Le Tichka 604 | Le Chems Marrakech 605 | Beachcomber Royal Palm Marrakech 606 | Residence Marrakech 607 | Riad Hermes Marrakech 608 | Riad La Lune De Marrakech 609 | Hotel Marrakech Le Sangho Privilege 610 | Tempoo Hotel Marrakech 611 | Ag Hotel & Spa Marrakech 612 | Palm Appart Club Marrakech 613 | Hotel Ibis Moussafir Marrakech Palmeraie 614 | Ibis Marrakech Gare Voyageurs 615 | Marrakech Ryads Parc And Spa 616 | Terra Mia Marrakech Riad 617 | Residence Dar Lamia Marrakech 618 | Pullman Marrakech Palmeraie Rs 619 | Moussaf Marrakech Centre Gare 620 | Tempoo Hotel Marrakech Adults Only 621 | Sahara Palace Marrakech 622 | Moroccan House Marrakech 623 | El Andalouss And Spa Marrakech 624 | Suite Novotel Marrakech Rs 625 | Dar Catalina Marrakech Hotel Non Refundable Room 626 | Marrakech Hotel 627 | Oued Tammarrakech 628 | Tammarrakech 629 | Cercle de Marrakech-Banlieue 630 | Marrakech-Tensift-Al Haouz 631 | Koudia Marrakech 632 | Hotel Tichka Salam Marrakech 633 | L'Atlas Marrakech 634 | Royal Mirage Deluxe Marrakech 635 | Golden Tulip Farah Marrakech 636 | Ryad Mogador Marrakech 637 | Coralia Club Marrakech Palmariva 638 | La Sultana Marrakech 639 | Marrakech-Medina 640 | Marrakech 641 | Museum of Marrakech 642 | Douar Marrakechiyinc 643 | Ibis Marrakech Centre Gare 644 | Golden Tulip Rawabi Marrakech 645 | Murano Resort Marrakech 646 | Marrakech Garden Hotel 647 | Pullman Marrakech Palmerai Resort & Spa 648 | The Pearl Marrakech 649 | Palais Calipau Marrakech 650 | Hostal Equity Point Marrakech 651 | Sofitel Marrakech Lounge And Spa 652 | Pullman Marrakech Hotel And Spa 653 | Sofitel Marrakech Palais Imperial 654 | Hotel Ibis Moussafir Marrakech Centre Gare 655 | Red Hotel Marrakech 656 | Riad Zenith Marrakech 657 | Ksar Catalina Marrakech Hotel 658 | Blue Sea Hotel Marrakech Ryads Parc & Spa 659 | Bluebay Marrakech 660 | Pullman Marrakech Palmeraie Resort & Spa Hotel 661 | Riad Litzy Marrakech 662 | Sultana Hotel & Spa Marrakech 663 | Albatros Club Marrakech 664 | Hotel Sangho Club Marrakech 665 | Suite Novotel Marrakech Hotel 666 | Riad Utopia Suites & Spa Marrakech 667 | Riad Fatinat Marrakech 668 | Riad Dar El Aila Marrakech 669 | Es Saadi And Casino De Marrakech 670 | Dar Catalina Marrakech Hotel 671 | Grace Marrakech 672 | Marrakesh Apartments 673 | Marrakesh Country Club 674 | Koudiat Lmerrakechiyine 675 | Sidi Mohammed el Marrakchi 676 | Marrakesh 677 | Marrakchien 678 | Marrakchia 679 | Marrakesh Menara Airport 680 | Marrakesh Hua Hin Resort & Spa 681 | Marrakesh Hua Hin Resort And Spa 682 | Marrakesh Resort And Spa (Pool Suite) 683 | Marrakesh Huahin Resort & Spa 684 | Ibis Moussafir Marrakesh Centre Gare Hotel 685 | Maerak-chi 686 | Dar Hammou Ben Merrakchi 687 | Lalla el Marakchia 688 | Khrebet Marrakh 689 | Sungai Maru Kechil 690 | Marrache 691 | Goth Marracha 692 | Maramech Hill 693 | Maramech Woods Nature Preserve 694 | Oued Karakech 695 | Samarra School 696 | Jangal-e Marakeh Sar 697 | -------------------------------------------------------------------------------- /expected/pg_substring_trgm.out: -------------------------------------------------------------------------------- 1 | CREATE TABLE test_trgm2(t text COLLATE "C"); 2 | set extra_float_digits = 0; 3 | \copy test_trgm2 from 'data/trgm2.data' 4 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 5 | t | sml 6 | -------------------------------------+---------- 7 | Baykal | 1 8 | Boloto Baykal | 1 9 | Boloto Malyy Baykal | 1 10 | Kolkhoz Krasnyy Baykal | 1 11 | Ozero Baykal | 1 12 | Polevoy Stan Baykal | 1 13 | Port Baykal | 1 14 | Prud Novyy Baykal | 1 15 | Sanatoriy Baykal | 1 16 | Stantsiya Baykal | 1 17 | Zaliv Baykal | 1 18 | Baykalikha | 0.857143 19 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 20 | Baykalovo | 0.857143 21 | Baykalovsk | 0.857143 22 | Baykalovskiy | 0.857143 23 | Baykalovskiy Rayon | 0.857143 24 | Baykalsko | 0.857143 25 | Maloye Baykalovo | 0.857143 26 | Zabaykal | 0.714286 27 | (20 rows) 28 | 29 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 30 | t | sml 31 | ------------------------------+----- 32 | Kabankala | 1 33 | Kabankalan City Public Plaza | 0.9 34 | Abankala | 0.7 35 | Ntombankala School | 0.6 36 | (4 rows) 37 | 38 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 39 | t | sml 40 | -------------------------------------+---------- 41 | Baykal | 1 42 | Boloto Baykal | 1 43 | Boloto Malyy Baykal | 1 44 | Kolkhoz Krasnyy Baykal | 1 45 | Ozero Baykal | 1 46 | Polevoy Stan Baykal | 1 47 | Port Baykal | 1 48 | Prud Novyy Baykal | 1 49 | Sanatoriy Baykal | 1 50 | Stantsiya Baykal | 1 51 | Zaliv Baykal | 1 52 | Baykalikha | 0.857143 53 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 54 | Baykalovo | 0.857143 55 | Baykalovsk | 0.857143 56 | Baykalovskiy | 0.857143 57 | Baykalovskiy Rayon | 0.857143 58 | Baykalsko | 0.857143 59 | Maloye Baykalovo | 0.857143 60 | Zabaykal | 0.714286 61 | (20 rows) 62 | 63 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 64 | t | sml 65 | ------------------------------+----- 66 | Kabankala | 1 67 | Kabankalan City Public Plaza | 0.9 68 | Abankala | 0.7 69 | Ntombankala School | 0.6 70 | (4 rows) 71 | 72 | create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); 73 | set enable_seqscan=off; 74 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 75 | t | sml 76 | -------------------------------------+---------- 77 | Baykal | 1 78 | Boloto Baykal | 1 79 | Boloto Malyy Baykal | 1 80 | Kolkhoz Krasnyy Baykal | 1 81 | Ozero Baykal | 1 82 | Polevoy Stan Baykal | 1 83 | Port Baykal | 1 84 | Prud Novyy Baykal | 1 85 | Sanatoriy Baykal | 1 86 | Stantsiya Baykal | 1 87 | Zaliv Baykal | 1 88 | Baykalikha | 0.857143 89 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 90 | Baykalovo | 0.857143 91 | Baykalovsk | 0.857143 92 | Baykalovskiy | 0.857143 93 | Baykalovskiy Rayon | 0.857143 94 | Baykalsko | 0.857143 95 | Maloye Baykalovo | 0.857143 96 | Zabaykal | 0.714286 97 | (20 rows) 98 | 99 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 100 | t | sml 101 | ------------------------------+----- 102 | Kabankala | 1 103 | Kabankalan City Public Plaza | 0.9 104 | Abankala | 0.7 105 | Ntombankala School | 0.6 106 | (4 rows) 107 | 108 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 109 | t | sml 110 | -------------------------------------+---------- 111 | Baykal | 1 112 | Boloto Baykal | 1 113 | Boloto Malyy Baykal | 1 114 | Kolkhoz Krasnyy Baykal | 1 115 | Ozero Baykal | 1 116 | Polevoy Stan Baykal | 1 117 | Port Baykal | 1 118 | Prud Novyy Baykal | 1 119 | Sanatoriy Baykal | 1 120 | Stantsiya Baykal | 1 121 | Zaliv Baykal | 1 122 | Baykalikha | 0.857143 123 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 124 | Baykalovo | 0.857143 125 | Baykalovsk | 0.857143 126 | Baykalovskiy | 0.857143 127 | Baykalovskiy Rayon | 0.857143 128 | Baykalsko | 0.857143 129 | Maloye Baykalovo | 0.857143 130 | Zabaykal | 0.714286 131 | (20 rows) 132 | 133 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 134 | t | sml 135 | ------------------------------+----- 136 | Kabankala | 1 137 | Kabankalan City Public Plaza | 0.9 138 | Abankala | 0.7 139 | Ntombankala School | 0.6 140 | (4 rows) 141 | 142 | drop index trgm_idx2; 143 | create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); 144 | set enable_seqscan=off; 145 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 146 | t | sml 147 | -------------------------------------+---------- 148 | Baykal | 1 149 | Boloto Baykal | 1 150 | Boloto Malyy Baykal | 1 151 | Kolkhoz Krasnyy Baykal | 1 152 | Ozero Baykal | 1 153 | Polevoy Stan Baykal | 1 154 | Port Baykal | 1 155 | Prud Novyy Baykal | 1 156 | Sanatoriy Baykal | 1 157 | Stantsiya Baykal | 1 158 | Zaliv Baykal | 1 159 | Baykalikha | 0.857143 160 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 161 | Baykalovo | 0.857143 162 | Baykalovsk | 0.857143 163 | Baykalovskiy | 0.857143 164 | Baykalovskiy Rayon | 0.857143 165 | Baykalsko | 0.857143 166 | Maloye Baykalovo | 0.857143 167 | Zabaykal | 0.714286 168 | (20 rows) 169 | 170 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 171 | t | sml 172 | ------------------------------+----- 173 | Kabankala | 1 174 | Kabankalan City Public Plaza | 0.9 175 | Abankala | 0.7 176 | Ntombankala School | 0.6 177 | (4 rows) 178 | 179 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 180 | t | sml 181 | -------------------------------------+---------- 182 | Baykal | 1 183 | Boloto Baykal | 1 184 | Boloto Malyy Baykal | 1 185 | Kolkhoz Krasnyy Baykal | 1 186 | Ozero Baykal | 1 187 | Polevoy Stan Baykal | 1 188 | Port Baykal | 1 189 | Prud Novyy Baykal | 1 190 | Sanatoriy Baykal | 1 191 | Stantsiya Baykal | 1 192 | Zaliv Baykal | 1 193 | Baykalikha | 0.857143 194 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 195 | Baykalovo | 0.857143 196 | Baykalovsk | 0.857143 197 | Baykalovskiy | 0.857143 198 | Baykalovskiy Rayon | 0.857143 199 | Baykalsko | 0.857143 200 | Maloye Baykalovo | 0.857143 201 | Zabaykal | 0.714286 202 | (20 rows) 203 | 204 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 205 | t | sml 206 | ------------------------------+----- 207 | Kabankala | 1 208 | Kabankalan City Public Plaza | 0.9 209 | Abankala | 0.7 210 | Ntombankala School | 0.6 211 | (4 rows) 212 | 213 | select set_substring_limit(0.5); 214 | set_substring_limit 215 | --------------------- 216 | 0.5 217 | (1 row) 218 | 219 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 220 | t | sml 221 | -------------------------------------+---------- 222 | Baykal | 1 223 | Boloto Baykal | 1 224 | Boloto Malyy Baykal | 1 225 | Kolkhoz Krasnyy Baykal | 1 226 | Ozero Baykal | 1 227 | Polevoy Stan Baykal | 1 228 | Port Baykal | 1 229 | Prud Novyy Baykal | 1 230 | Sanatoriy Baykal | 1 231 | Stantsiya Baykal | 1 232 | Zaliv Baykal | 1 233 | Baykalikha | 0.857143 234 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 235 | Baykalovo | 0.857143 236 | Baykalovsk | 0.857143 237 | Baykalovskiy | 0.857143 238 | Baykalovskiy Rayon | 0.857143 239 | Baykalsko | 0.857143 240 | Maloye Baykalovo | 0.857143 241 | Zabaykal | 0.714286 242 | Bakal Batu | 0.571429 243 | Zabaykalka | 0.571429 244 | Zabaykalovskiy | 0.571429 245 | (23 rows) 246 | 247 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 248 | t | sml 249 | ----------------------------------+---------- 250 | Kabankala | 1 251 | Kabankalan City Public Plaza | 0.9 252 | Abankala | 0.7 253 | Ntombankala School | 0.6 254 | Kabakala | 0.583333 255 | Nehalla Bankalah Reserved Forest | 0.5 256 | (6 rows) 257 | 258 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 259 | t | sml 260 | -------------------------------------+---------- 261 | Baykal | 1 262 | Boloto Baykal | 1 263 | Boloto Malyy Baykal | 1 264 | Kolkhoz Krasnyy Baykal | 1 265 | Ozero Baykal | 1 266 | Polevoy Stan Baykal | 1 267 | Port Baykal | 1 268 | Prud Novyy Baykal | 1 269 | Sanatoriy Baykal | 1 270 | Stantsiya Baykal | 1 271 | Zaliv Baykal | 1 272 | Baykalikha | 0.857143 273 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 274 | Baykalovo | 0.857143 275 | Baykalovsk | 0.857143 276 | Baykalovskiy | 0.857143 277 | Baykalovskiy Rayon | 0.857143 278 | Baykalsko | 0.857143 279 | Maloye Baykalovo | 0.857143 280 | Zabaykal | 0.714286 281 | Bakal Batu | 0.571429 282 | Zabaykalka | 0.571429 283 | Zabaykalovskiy | 0.571429 284 | (23 rows) 285 | 286 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 287 | t | sml 288 | ----------------------------------+---------- 289 | Kabankala | 1 290 | Kabankalan City Public Plaza | 0.9 291 | Abankala | 0.7 292 | Ntombankala School | 0.6 293 | Kabakala | 0.583333 294 | Nehalla Bankalah Reserved Forest | 0.5 295 | (6 rows) 296 | 297 | select set_substring_limit(0.3); 298 | set_substring_limit 299 | --------------------- 300 | 0.3 301 | (1 row) 302 | 303 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 304 | t | sml 305 | -----------------------------------------------------------+---------- 306 | Baykal | 1 307 | Boloto Baykal | 1 308 | Boloto Malyy Baykal | 1 309 | Kolkhoz Krasnyy Baykal | 1 310 | Ozero Baykal | 1 311 | Polevoy Stan Baykal | 1 312 | Port Baykal | 1 313 | Prud Novyy Baykal | 1 314 | Sanatoriy Baykal | 1 315 | Stantsiya Baykal | 1 316 | Zaliv Baykal | 1 317 | Baykalikha | 0.857143 318 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 319 | Baykalovo | 0.857143 320 | Baykalovsk | 0.857143 321 | Baykalovskiy | 0.857143 322 | Baykalovskiy Rayon | 0.857143 323 | Baykalsko | 0.857143 324 | Maloye Baykalovo | 0.857143 325 | Zabaykal | 0.714286 326 | Bakal Batu | 0.571429 327 | Zabaykalka | 0.571429 328 | Zabaykalovskiy | 0.571429 329 | Air Bakal-kecil | 0.444444 330 | Bakal | 0.444444 331 | Bakal Dos | 0.444444 332 | Bakal Julu | 0.444444 333 | Bakal Khel | 0.444444 334 | Bakal Lama | 0.444444 335 | Bakal Tres | 0.444444 336 | Bakal Uno | 0.444444 337 | Daang Bakal | 0.444444 338 | Desa Bakal | 0.444444 339 | Eat Bakal | 0.444444 340 | Gunung Bakal | 0.444444 341 | Sidi Bakal | 0.444444 342 | Stantsiya Bakal | 0.444444 343 | Sungai Bakal | 0.444444 344 | Talang Bakal | 0.444444 345 | Uruk Bakal | 0.444444 346 | Zaouia Oulad Bakal | 0.444444 347 | Al Barkali | 0.428571 348 | Aparthotel Adagio Premium Dubai Al Barsha | 0.428571 349 | Baikal Business Centre | 0.428571 350 | Bay of Backaland | 0.428571 351 | Boikalakalawa Bay | 0.428571 352 | Doubletree By Hilton Dubai Al Barsha Hotel and Res | 0.428571 353 | Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.428571 354 | Doubletree Res.Dubai-Al Barsha | 0.428571 355 | Holiday Inn Dubai Al Barsha | 0.428571 356 | Jabal Barkal | 0.428571 357 | Novotel Dubai Al Barsha | 0.428571 358 | Park Inn By Radisson Dubai Al Barsha | 0.428571 359 | Ramee Rose Hotel Dubai Al Barsha | 0.428571 360 | Waikalabubu Bay | 0.428571 361 | Baikal | 0.4 362 | Baikal Airfield | 0.4 363 | Baikal Hotel Moscow | 0.4 364 | Baikal Listvyanka Hotel | 0.4 365 | Baikal Mountains | 0.4 366 | Baikal Plaza | 0.4 367 | Bajkal | 0.4 368 | Bankal | 0.4 369 | Bankal School | 0.4 370 | Barkal | 0.4 371 | Lake Baikal | 0.4 372 | Mbay Bakala | 0.4 373 | Oulad el Bakkal | 0.4 374 | Sidi Mohammed Bakkal | 0.4 375 | Bairkal | 0.363636 376 | Bairkal Dhora | 0.363636 377 | Bairkal Jabal | 0.363636 378 | Batikal | 0.363636 379 | Bakala | 0.333333 380 | Bakala Koupi | 0.333333 381 | Bakalaale | 0.333333 382 | Bakalabwa Pans | 0.333333 383 | Bakalaeng | 0.333333 384 | Bakalafoulou | 0.333333 385 | Bakalalan Airport | 0.333333 386 | Bakalam | 0.333333 387 | Bakalambani | 0.333333 388 | Bakalan | 0.333333 389 | Bakalan Barat | 0.333333 390 | Bakalan Dua | 0.333333 391 | Bakalan Kidul | 0.333333 392 | Bakalan Kulon | 0.333333 393 | Bakalan Lor | 0.333333 394 | Bakalan River | 0.333333 395 | Bakalan Tengah | 0.333333 396 | Bakalan Wetan | 0.333333 397 | Bakalang | 0.333333 398 | Bakalao Asibi Point | 0.333333 399 | Bakalao Point | 0.333333 400 | Bakalar Air Force Base (historical) | 0.333333 401 | Bakalar Lake | 0.333333 402 | Bakalar Library | 0.333333 403 | Bakalarr | 0.333333 404 | Bakalauri | 0.333333 405 | Bakalauri1 | 0.333333 406 | Bakalauri2 | 0.333333 407 | Bakalauri3 | 0.333333 408 | Bakalauri4 | 0.333333 409 | Bakalauri5 | 0.333333 410 | Bakalauri6 | 0.333333 411 | Bakalauri7 | 0.333333 412 | Bakalauri8 | 0.333333 413 | Bakalauri9 | 0.333333 414 | Bakalawa | 0.333333 415 | Bakalbhar | 0.333333 416 | Bakalbuah | 0.333333 417 | Bakalda | 0.333333 418 | Bakaldalam | 0.333333 419 | Bakaldinskoye | 0.333333 420 | Bakaldovshchina | 0.333333 421 | Bakaldukuh | 0.333333 422 | Bakaldum | 0.333333 423 | Bakaldy | 0.333333 424 | Bakale | 0.333333 425 | Bakaleko | 0.333333 426 | Bakalerek | 0.333333 427 | Bakaley | 0.333333 428 | Bakaleyka | 0.333333 429 | Bakalha | 0.333333 430 | Bakali | 0.333333 431 | Bakalia Char | 0.333333 432 | Bakalica | 0.333333 433 | Bakalinga | 0.333333 434 | Bakalino | 0.333333 435 | Bakalinskiy | 0.333333 436 | Bakalinskiy Leskhoz | 0.333333 437 | Bakalinskiy Rayon | 0.333333 438 | Bakalipur | 0.333333 439 | Bakalite | 0.333333 440 | Bakaljaya | 0.333333 441 | Bakalka | 0.333333 442 | Bakall | 0.333333 443 | Bakalnica | 0.333333 444 | Bakalod Island | 0.333333 445 | Bakalongo | 0.333333 446 | Bakaloolay | 0.333333 447 | Bakalou | 0.333333 448 | Bakalovina | 0.333333 449 | Bakalovka | 0.333333 450 | Bakalovo | 0.333333 451 | Bakalovskaya Ferma | 0.333333 452 | Bakalpakebo | 0.333333 453 | Bakalpokok | 0.333333 454 | Bakalrejo | 0.333333 455 | Bakalsen | 0.333333 456 | Bakalshile | 0.333333 457 | Bakaltua Bank | 0.333333 458 | Bakalua | 0.333333 459 | Bakalukalu | 0.333333 460 | Bakalukalu Shan | 0.333333 461 | Bakalukudu | 0.333333 462 | Bakalum | 0.333333 463 | Bakaly | 0.333333 464 | Bakaly TV Mast | 0.333333 465 | Buur Bakale | 0.333333 466 | Buur Bakaley | 0.333333 467 | Columbus Bakalar Municipal Airport | 0.333333 468 | Dakshin Bakalia | 0.333333 469 | Danau Bakalan | 0.333333 470 | Desa Bakalan | 0.333333 471 | Desa Bakalankrajan | 0.333333 472 | Desa Bakalankrapyak | 0.333333 473 | Desa Bakalanpule | 0.333333 474 | Desa Bakalanrayung | 0.333333 475 | Desa Bakalanwringinpitu | 0.333333 476 | Desa Bakalrejo | 0.333333 477 | Efrejtor Bakalovo | 0.333333 478 | Efreytor-Bakalovo | 0.333333 479 | Gora Bakalyadyr | 0.333333 480 | Gory Bakaly | 0.333333 481 | Gunung Bakalan | 0.333333 482 | Ile Bakalibu | 0.333333 483 | Kali Bakalan | 0.333333 484 | Kampong Bakaladong | 0.333333 485 | Khor Bakallii | 0.333333 486 | Krajan Bakalan | 0.333333 487 | Kusu-Bakali | 0.333333 488 | Kwala Bakala | 0.333333 489 | Ngao Bakala | 0.333333 490 | Ovrag Bakalda | 0.333333 491 | Pematang Bakalpanang | 0.333333 492 | Pematang Bakalpanjang | 0.333333 493 | Pulau Bakalan | 0.333333 494 | Pulau Bakalanpauno | 0.333333 495 | Ragha Bakalzai | 0.333333 496 | Rodnik Bakalybulak | 0.333333 497 | Salu Bakalaeng | 0.333333 498 | Selat Bakalan | 0.333333 499 | Selat Bakalanpauno | 0.333333 500 | Sidi Mohammed el Bakali | 0.333333 501 | Sopka Bakaly | 0.333333 502 | Sovkhoz Bakalinskiy | 0.333333 503 | Sungai Bakala | 0.333333 504 | Sungai Bakaladiyan | 0.333333 505 | Tanjung Bakalinga | 0.333333 506 | Teluk Bakalan | 0.333333 507 | Teluk Bakalang | 0.333333 508 | Tubu Bakalekuk | 0.333333 509 | Tukad Bakalan | 0.333333 510 | Urochishche Bakalarnyn-Ayasy | 0.333333 511 | Urochishche Bakaldikha | 0.333333 512 | Urochishche Bakalovo | 0.333333 513 | Urochishche Bakaly | 0.333333 514 | Bakkalmal | 0.307692 515 | Alue Bakkala | 0.3 516 | Azib el Bakkali | 0.3 517 | Ba Kaliin | 0.3 518 | Bagkalen | 0.3 519 | Bahkalleh | 0.3 520 | Baikalakko | 0.3 521 | Baikalovo | 0.3 522 | Baikaluobbal | 0.3 523 | Bakkala Cemetery | 0.3 524 | Bakkalale | 0.3 525 | Bakkalegskardet | 0.3 526 | Bakkalia | 0.3 527 | Bakkalykkja | 0.3 528 | Bankali | 0.3 529 | Bankalol | 0.3 530 | Barkala | 0.3 531 | Barkala Park | 0.3 532 | Barkala Rao | 0.3 533 | Barkala Reserved Forest | 0.3 534 | Barkalabava | 0.3 535 | Barkaladja Pool | 0.3 536 | Barkalare | 0.3 537 | Barkald | 0.3 538 | Barkald stasjon | 0.3 539 | Barkalden | 0.3 540 | Barkaldfossen | 0.3 541 | Barkaldvola | 0.3 542 | Barkale | 0.3 543 | Barkaleh | 0.3 544 | Barkaleitet | 0.3 545 | Barkali | 0.3 546 | Barkallou | 0.3 547 | Barkalne | 0.3 548 | Barkalova | 0.3 549 | Barkalovka | 0.3 550 | Barkalow Hollow | 0.3 551 | Baskalino | 0.3 552 | Baskaltsi | 0.3 553 | Baukala | 0.3 554 | Bavkalasis | 0.3 555 | Bawkalut | 0.3 556 | Bawkalut Chaung | 0.3 557 | Bikal | 0.3 558 | Clifton T Barkalow Elementary School | 0.3 559 | Gora Barkalova | 0.3 560 | Gora Barkalyu | 0.3 561 | Khrebet Batkali | 0.3 562 | Kordon Barkalo | 0.3 563 | Nehalla Bankalah Reserved Forest | 0.3 564 | Ras Barkallah | 0.3 565 | Sopka Barkaleptskaya | 0.3 566 | Urochishche Batkali | 0.3 567 | (261 rows) 568 | 569 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 570 | t | sml 571 | ----------------------------------+---------- 572 | Kabankala | 1 573 | Kabankalan City Public Plaza | 0.9 574 | Abankala | 0.7 575 | Ntombankala School | 0.6 576 | Kabakala | 0.583333 577 | Nehalla Bankalah Reserved Forest | 0.5 578 | Kabikala | 0.461538 579 | Mwalaba-Kalamba | 0.454545 580 | Bakala Koupi | 0.4 581 | Bankal | 0.4 582 | Bankal School | 0.4 583 | Bankali | 0.4 584 | Bankalol | 0.4 585 | Jabba Kalai | 0.4 586 | Kanampumba-Kalawa | 0.4 587 | Purba Kalaujan | 0.4 588 | Tumba-Kalamba | 0.4 589 | Daba Kalharereh | 0.363636 590 | Gagaba Kalo | 0.363636 591 | Jaba Kalle | 0.363636 592 | Dabakala | 0.333333 593 | Dalabakala | 0.333333 594 | Kambakala | 0.333333 595 | Ker Samba Kalla | 0.333333 596 | Fayzabadkala | 0.307692 597 | Gora Fayzabadkala | 0.307692 598 | Guba Kalgalaksha | 0.307692 599 | Habakkala | 0.307692 600 | Kaikalahun Indian Reserve 25 | 0.307692 601 | Kaikalapettai | 0.307692 602 | Alue Bakkala | 0.3 603 | Ambadikala | 0.3 604 | Ambakala Wewa | 0.3 605 | Ataikala | 0.3 606 | Ba Kaliin | 0.3 607 | Bakala | 0.3 608 | Bakkala Cemetery | 0.3 609 | Bambakala | 0.3 610 | Barkala | 0.3 611 | Barkala Park | 0.3 612 | Barkala Rao | 0.3 613 | Barkala Reserved Forest | 0.3 614 | Baukala | 0.3 615 | Beikala | 0.3 616 | Bikala | 0.3 617 | Bikala Madila | 0.3 618 | Bomba-Kalende | 0.3 619 | Bonagbakala | 0.3 620 | Boyagbakala | 0.3 621 | Bugor Arba-Kalgan | 0.3 622 | Bumba-Kaloki | 0.3 623 | Bumba-Kalumba | 0.3 624 | Darreh Pumba Kal | 0.3 625 | Demba Kali | 0.3 626 | Embatkala | 0.3 627 | Gereba Kaler | 0.3 628 | Golba Kalo | 0.3 629 | Goth Soba Kaloi | 0.3 630 | Guba Kaldo | 0.3 631 | Guba Kalita | 0.3 632 | Gulba Kalle | 0.3 633 | Haikala | 0.3 634 | Kali Bakalan | 0.3 635 | Kali Purbakala | 0.3 636 | Kalibakal | 0.3 637 | Kalibakalako | 0.3 638 | Kalimundubakalan | 0.3 639 | Kamba-Kalele | 0.3 640 | Kimbakala | 0.3 641 | Kombakala | 0.3 642 | Kwala Bakala | 0.3 643 | Laikala | 0.3 644 | Maikala Range | 0.3 645 | Mambakala | 0.3 646 | Matamba-Kalenga | 0.3 647 | Matamba-Kalenge | 0.3 648 | Mbay Bakala | 0.3 649 | Mount Tohebakala | 0.3 650 | Naikala | 0.3 651 | Ngao Bakala | 0.3 652 | Purba Kalmegha | 0.3 653 | Sungai Bakala | 0.3 654 | Tagobikala | 0.3 655 | Tanjung Batikala | 0.3 656 | Tombakala | 0.3 657 | Tsibakala | 0.3 658 | Tumba-Kalumba | 0.3 659 | Tumba-Kalunga | 0.3 660 | Waikala | 0.3 661 | (89 rows) 662 | 663 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 664 | t | sml 665 | -----------------------------------------------------------+---------- 666 | Baykal | 1 667 | Boloto Baykal | 1 668 | Boloto Malyy Baykal | 1 669 | Kolkhoz Krasnyy Baykal | 1 670 | Ozero Baykal | 1 671 | Polevoy Stan Baykal | 1 672 | Port Baykal | 1 673 | Prud Novyy Baykal | 1 674 | Sanatoriy Baykal | 1 675 | Stantsiya Baykal | 1 676 | Zaliv Baykal | 1 677 | Baykalikha | 0.857143 678 | Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 679 | Baykalovo | 0.857143 680 | Baykalovsk | 0.857143 681 | Baykalovskiy | 0.857143 682 | Baykalovskiy Rayon | 0.857143 683 | Baykalsko | 0.857143 684 | Maloye Baykalovo | 0.857143 685 | Zabaykal | 0.714286 686 | Bakal Batu | 0.571429 687 | Zabaykalka | 0.571429 688 | Zabaykalovskiy | 0.571429 689 | Air Bakal-kecil | 0.444444 690 | Bakal | 0.444444 691 | Bakal Dos | 0.444444 692 | Bakal Julu | 0.444444 693 | Bakal Khel | 0.444444 694 | Bakal Lama | 0.444444 695 | Bakal Tres | 0.444444 696 | Bakal Uno | 0.444444 697 | Daang Bakal | 0.444444 698 | Desa Bakal | 0.444444 699 | Eat Bakal | 0.444444 700 | Gunung Bakal | 0.444444 701 | Sidi Bakal | 0.444444 702 | Stantsiya Bakal | 0.444444 703 | Sungai Bakal | 0.444444 704 | Talang Bakal | 0.444444 705 | Uruk Bakal | 0.444444 706 | Zaouia Oulad Bakal | 0.444444 707 | Al Barkali | 0.428571 708 | Aparthotel Adagio Premium Dubai Al Barsha | 0.428571 709 | Baikal Business Centre | 0.428571 710 | Bay of Backaland | 0.428571 711 | Boikalakalawa Bay | 0.428571 712 | Doubletree By Hilton Dubai Al Barsha Hotel and Res | 0.428571 713 | Doubletree By Hilton Hotel and Apartments Dubai Al Barsha | 0.428571 714 | Doubletree Res.Dubai-Al Barsha | 0.428571 715 | Holiday Inn Dubai Al Barsha | 0.428571 716 | Jabal Barkal | 0.428571 717 | Novotel Dubai Al Barsha | 0.428571 718 | Park Inn By Radisson Dubai Al Barsha | 0.428571 719 | Ramee Rose Hotel Dubai Al Barsha | 0.428571 720 | Waikalabubu Bay | 0.428571 721 | Baikal | 0.4 722 | Baikal Airfield | 0.4 723 | Baikal Hotel Moscow | 0.4 724 | Baikal Listvyanka Hotel | 0.4 725 | Baikal Mountains | 0.4 726 | Baikal Plaza | 0.4 727 | Bajkal | 0.4 728 | Bankal | 0.4 729 | Bankal School | 0.4 730 | Barkal | 0.4 731 | Lake Baikal | 0.4 732 | Mbay Bakala | 0.4 733 | Oulad el Bakkal | 0.4 734 | Sidi Mohammed Bakkal | 0.4 735 | Bairkal | 0.363636 736 | Bairkal Dhora | 0.363636 737 | Bairkal Jabal | 0.363636 738 | Batikal | 0.363636 739 | Bakala | 0.333333 740 | Bakala Koupi | 0.333333 741 | Bakalaale | 0.333333 742 | Bakalabwa Pans | 0.333333 743 | Bakalaeng | 0.333333 744 | Bakalafoulou | 0.333333 745 | Bakalalan Airport | 0.333333 746 | Bakalam | 0.333333 747 | Bakalambani | 0.333333 748 | Bakalan | 0.333333 749 | Bakalan Barat | 0.333333 750 | Bakalan Dua | 0.333333 751 | Bakalan Kidul | 0.333333 752 | Bakalan Kulon | 0.333333 753 | Bakalan Lor | 0.333333 754 | Bakalan River | 0.333333 755 | Bakalan Tengah | 0.333333 756 | Bakalan Wetan | 0.333333 757 | Bakalang | 0.333333 758 | Bakalao Asibi Point | 0.333333 759 | Bakalao Point | 0.333333 760 | Bakalar Air Force Base (historical) | 0.333333 761 | Bakalar Lake | 0.333333 762 | Bakalar Library | 0.333333 763 | Bakalarr | 0.333333 764 | Bakalauri | 0.333333 765 | Bakalauri1 | 0.333333 766 | Bakalauri2 | 0.333333 767 | Bakalauri3 | 0.333333 768 | Bakalauri4 | 0.333333 769 | Bakalauri5 | 0.333333 770 | Bakalauri6 | 0.333333 771 | Bakalauri7 | 0.333333 772 | Bakalauri8 | 0.333333 773 | Bakalauri9 | 0.333333 774 | Bakalawa | 0.333333 775 | Bakalbhar | 0.333333 776 | Bakalbuah | 0.333333 777 | Bakalda | 0.333333 778 | Bakaldalam | 0.333333 779 | Bakaldinskoye | 0.333333 780 | Bakaldovshchina | 0.333333 781 | Bakaldukuh | 0.333333 782 | Bakaldum | 0.333333 783 | Bakaldy | 0.333333 784 | Bakale | 0.333333 785 | Bakaleko | 0.333333 786 | Bakalerek | 0.333333 787 | Bakaley | 0.333333 788 | Bakaleyka | 0.333333 789 | Bakalha | 0.333333 790 | Bakali | 0.333333 791 | Bakalia Char | 0.333333 792 | Bakalica | 0.333333 793 | Bakalinga | 0.333333 794 | Bakalino | 0.333333 795 | Bakalinskiy | 0.333333 796 | Bakalinskiy Leskhoz | 0.333333 797 | Bakalinskiy Rayon | 0.333333 798 | Bakalipur | 0.333333 799 | Bakalite | 0.333333 800 | Bakaljaya | 0.333333 801 | Bakalka | 0.333333 802 | Bakall | 0.333333 803 | Bakalnica | 0.333333 804 | Bakalod Island | 0.333333 805 | Bakalongo | 0.333333 806 | Bakaloolay | 0.333333 807 | Bakalou | 0.333333 808 | Bakalovina | 0.333333 809 | Bakalovka | 0.333333 810 | Bakalovo | 0.333333 811 | Bakalovskaya Ferma | 0.333333 812 | Bakalpakebo | 0.333333 813 | Bakalpokok | 0.333333 814 | Bakalrejo | 0.333333 815 | Bakalsen | 0.333333 816 | Bakalshile | 0.333333 817 | Bakaltua Bank | 0.333333 818 | Bakalua | 0.333333 819 | Bakalukalu | 0.333333 820 | Bakalukalu Shan | 0.333333 821 | Bakalukudu | 0.333333 822 | Bakalum | 0.333333 823 | Bakaly | 0.333333 824 | Bakaly TV Mast | 0.333333 825 | Buur Bakale | 0.333333 826 | Buur Bakaley | 0.333333 827 | Columbus Bakalar Municipal Airport | 0.333333 828 | Dakshin Bakalia | 0.333333 829 | Danau Bakalan | 0.333333 830 | Desa Bakalan | 0.333333 831 | Desa Bakalankrajan | 0.333333 832 | Desa Bakalankrapyak | 0.333333 833 | Desa Bakalanpule | 0.333333 834 | Desa Bakalanrayung | 0.333333 835 | Desa Bakalanwringinpitu | 0.333333 836 | Desa Bakalrejo | 0.333333 837 | Efrejtor Bakalovo | 0.333333 838 | Efreytor-Bakalovo | 0.333333 839 | Gora Bakalyadyr | 0.333333 840 | Gory Bakaly | 0.333333 841 | Gunung Bakalan | 0.333333 842 | Ile Bakalibu | 0.333333 843 | Kali Bakalan | 0.333333 844 | Kampong Bakaladong | 0.333333 845 | Khor Bakallii | 0.333333 846 | Krajan Bakalan | 0.333333 847 | Kusu-Bakali | 0.333333 848 | Kwala Bakala | 0.333333 849 | Ngao Bakala | 0.333333 850 | Ovrag Bakalda | 0.333333 851 | Pematang Bakalpanang | 0.333333 852 | Pematang Bakalpanjang | 0.333333 853 | Pulau Bakalan | 0.333333 854 | Pulau Bakalanpauno | 0.333333 855 | Ragha Bakalzai | 0.333333 856 | Rodnik Bakalybulak | 0.333333 857 | Salu Bakalaeng | 0.333333 858 | Selat Bakalan | 0.333333 859 | Selat Bakalanpauno | 0.333333 860 | Sidi Mohammed el Bakali | 0.333333 861 | Sopka Bakaly | 0.333333 862 | Sovkhoz Bakalinskiy | 0.333333 863 | Sungai Bakala | 0.333333 864 | Sungai Bakaladiyan | 0.333333 865 | Tanjung Bakalinga | 0.333333 866 | Teluk Bakalan | 0.333333 867 | Teluk Bakalang | 0.333333 868 | Tubu Bakalekuk | 0.333333 869 | Tukad Bakalan | 0.333333 870 | Urochishche Bakalarnyn-Ayasy | 0.333333 871 | Urochishche Bakaldikha | 0.333333 872 | Urochishche Bakalovo | 0.333333 873 | Urochishche Bakaly | 0.333333 874 | Bakkalmal | 0.307692 875 | Alue Bakkala | 0.3 876 | Azib el Bakkali | 0.3 877 | Ba Kaliin | 0.3 878 | Bagkalen | 0.3 879 | Bahkalleh | 0.3 880 | Baikalakko | 0.3 881 | Baikalovo | 0.3 882 | Baikaluobbal | 0.3 883 | Bakkala Cemetery | 0.3 884 | Bakkalale | 0.3 885 | Bakkalegskardet | 0.3 886 | Bakkalia | 0.3 887 | Bakkalykkja | 0.3 888 | Bankali | 0.3 889 | Bankalol | 0.3 890 | Barkala | 0.3 891 | Barkala Park | 0.3 892 | Barkala Rao | 0.3 893 | Barkala Reserved Forest | 0.3 894 | Barkalabava | 0.3 895 | Barkaladja Pool | 0.3 896 | Barkalare | 0.3 897 | Barkald | 0.3 898 | Barkald stasjon | 0.3 899 | Barkalden | 0.3 900 | Barkaldfossen | 0.3 901 | Barkaldvola | 0.3 902 | Barkale | 0.3 903 | Barkaleh | 0.3 904 | Barkaleitet | 0.3 905 | Barkali | 0.3 906 | Barkallou | 0.3 907 | Barkalne | 0.3 908 | Barkalova | 0.3 909 | Barkalovka | 0.3 910 | Barkalow Hollow | 0.3 911 | Baskalino | 0.3 912 | Baskaltsi | 0.3 913 | Baukala | 0.3 914 | Bavkalasis | 0.3 915 | Bawkalut | 0.3 916 | Bawkalut Chaung | 0.3 917 | Bikal | 0.3 918 | Clifton T Barkalow Elementary School | 0.3 919 | Gora Barkalova | 0.3 920 | Gora Barkalyu | 0.3 921 | Khrebet Batkali | 0.3 922 | Kordon Barkalo | 0.3 923 | Nehalla Bankalah Reserved Forest | 0.3 924 | Ras Barkallah | 0.3 925 | Sopka Barkaleptskaya | 0.3 926 | Urochishche Batkali | 0.3 927 | (261 rows) 928 | 929 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 930 | t | sml 931 | ----------------------------------+---------- 932 | Kabankala | 1 933 | Kabankalan City Public Plaza | 0.9 934 | Abankala | 0.7 935 | Ntombankala School | 0.6 936 | Kabakala | 0.583333 937 | Nehalla Bankalah Reserved Forest | 0.5 938 | Kabikala | 0.461538 939 | Mwalaba-Kalamba | 0.454545 940 | Bakala Koupi | 0.4 941 | Bankal | 0.4 942 | Bankal School | 0.4 943 | Bankali | 0.4 944 | Bankalol | 0.4 945 | Jabba Kalai | 0.4 946 | Kanampumba-Kalawa | 0.4 947 | Purba Kalaujan | 0.4 948 | Tumba-Kalamba | 0.4 949 | Daba Kalharereh | 0.363636 950 | Gagaba Kalo | 0.363636 951 | Jaba Kalle | 0.363636 952 | Dabakala | 0.333333 953 | Dalabakala | 0.333333 954 | Kambakala | 0.333333 955 | Ker Samba Kalla | 0.333333 956 | Fayzabadkala | 0.307692 957 | Gora Fayzabadkala | 0.307692 958 | Guba Kalgalaksha | 0.307692 959 | Habakkala | 0.307692 960 | Kaikalahun Indian Reserve 25 | 0.307692 961 | Kaikalapettai | 0.307692 962 | Alue Bakkala | 0.3 963 | Ambadikala | 0.3 964 | Ambakala Wewa | 0.3 965 | Ataikala | 0.3 966 | Ba Kaliin | 0.3 967 | Bakala | 0.3 968 | Bakkala Cemetery | 0.3 969 | Bambakala | 0.3 970 | Barkala | 0.3 971 | Barkala Park | 0.3 972 | Barkala Rao | 0.3 973 | Barkala Reserved Forest | 0.3 974 | Baukala | 0.3 975 | Beikala | 0.3 976 | Bikala | 0.3 977 | Bikala Madila | 0.3 978 | Bomba-Kalende | 0.3 979 | Bonagbakala | 0.3 980 | Boyagbakala | 0.3 981 | Bugor Arba-Kalgan | 0.3 982 | Bumba-Kaloki | 0.3 983 | Bumba-Kalumba | 0.3 984 | Darreh Pumba Kal | 0.3 985 | Demba Kali | 0.3 986 | Embatkala | 0.3 987 | Gereba Kaler | 0.3 988 | Golba Kalo | 0.3 989 | Goth Soba Kaloi | 0.3 990 | Guba Kaldo | 0.3 991 | Guba Kalita | 0.3 992 | Gulba Kalle | 0.3 993 | Haikala | 0.3 994 | Kali Bakalan | 0.3 995 | Kali Purbakala | 0.3 996 | Kalibakal | 0.3 997 | Kalibakalako | 0.3 998 | Kalimundubakalan | 0.3 999 | Kamba-Kalele | 0.3 1000 | Kimbakala | 0.3 1001 | Kombakala | 0.3 1002 | Kwala Bakala | 0.3 1003 | Laikala | 0.3 1004 | Maikala Range | 0.3 1005 | Mambakala | 0.3 1006 | Matamba-Kalenga | 0.3 1007 | Matamba-Kalenge | 0.3 1008 | Mbay Bakala | 0.3 1009 | Mount Tohebakala | 0.3 1010 | Naikala | 0.3 1011 | Ngao Bakala | 0.3 1012 | Purba Kalmegha | 0.3 1013 | Sungai Bakala | 0.3 1014 | Tagobikala | 0.3 1015 | Tanjung Batikala | 0.3 1016 | Tombakala | 0.3 1017 | Tsibakala | 0.3 1018 | Tumba-Kalumba | 0.3 1019 | Tumba-Kalunga | 0.3 1020 | Waikala | 0.3 1021 | (89 rows) 1022 | 1023 | -------------------------------------------------------------------------------- /pg_trgm--1.0--1.1.sql: -------------------------------------------------------------------------------- 1 | /* contrib/pg_trgm/pg_trgm--1.0--1.1.sql */ 2 | 3 | -- complain if script is sourced in psql, rather than via ALTER EXTENSION 4 | \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.1'" to load this file. \quit 5 | 6 | ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD 7 | OPERATOR 5 pg_catalog.~ (text, text), 8 | OPERATOR 6 pg_catalog.~* (text, text); 9 | 10 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 11 | OPERATOR 5 pg_catalog.~ (text, text), 12 | OPERATOR 6 pg_catalog.~* (text, text); 13 | -------------------------------------------------------------------------------- /pg_trgm--1.1--1.2.sql: -------------------------------------------------------------------------------- 1 | /* contrib/pg_trgm/pg_trgm--1.1--1.2.sql */ 2 | 3 | -- complain if script is sourced in psql, rather than via ALTER EXTENSION 4 | \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit 5 | 6 | CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal) 7 | RETURNS "char" 8 | AS 'MODULE_PATHNAME' 9 | LANGUAGE C IMMUTABLE STRICT; 10 | 11 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 12 | FUNCTION 6 (text, text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); 13 | -------------------------------------------------------------------------------- /pg_trgm--1.2--1.3.sql: -------------------------------------------------------------------------------- 1 | /* contrib/pg_trgm/pg_trgm--1.2--1.3.sql */ 2 | 3 | -- complain if script is sourced in psql, rather than via ALTER EXTENSION 4 | \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.3'" to load this file. \quit 5 | 6 | CREATE FUNCTION set_substring_limit(float4) 7 | RETURNS float4 8 | AS 'MODULE_PATHNAME' 9 | LANGUAGE C STRICT VOLATILE; 10 | 11 | CREATE FUNCTION show_substring_limit() 12 | RETURNS float4 13 | AS 'MODULE_PATHNAME' 14 | LANGUAGE C STRICT STABLE; 15 | 16 | CREATE FUNCTION substring_similarity(text,text) 17 | RETURNS float4 18 | AS 'MODULE_PATHNAME' 19 | LANGUAGE C STRICT IMMUTABLE; 20 | 21 | CREATE FUNCTION substring_similarity_op(text,text) 22 | RETURNS bool 23 | AS 'MODULE_PATHNAME' 24 | LANGUAGE C STRICT STABLE; -- stable because depends on trgm_substring_limit 25 | 26 | CREATE FUNCTION substring_similarity_commutator_op(text,text) 27 | RETURNS bool 28 | AS 'MODULE_PATHNAME' 29 | LANGUAGE C STRICT STABLE; -- stable because depends on trgm_substring_limit 30 | 31 | CREATE OPERATOR <% ( 32 | LEFTARG = text, 33 | RIGHTARG = text, 34 | PROCEDURE = substring_similarity_op, 35 | COMMUTATOR = '%>', 36 | RESTRICT = contsel, 37 | JOIN = contjoinsel 38 | ); 39 | 40 | CREATE OPERATOR %> ( 41 | LEFTARG = text, 42 | RIGHTARG = text, 43 | PROCEDURE = substring_similarity_commutator_op, 44 | COMMUTATOR = '<%', 45 | RESTRICT = contsel, 46 | JOIN = contjoinsel 47 | ); 48 | 49 | ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD 50 | OPERATOR 7 %> (text, text); 51 | 52 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 53 | OPERATOR 7 %> (text, text);; 54 | -------------------------------------------------------------------------------- /pg_trgm--1.3.sql: -------------------------------------------------------------------------------- 1 | /* contrib/pg_trgm/pg_trgm--1.3.sql */ 2 | 3 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 4 | \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit 5 | 6 | CREATE FUNCTION set_limit(float4) 7 | RETURNS float4 8 | AS 'MODULE_PATHNAME' 9 | LANGUAGE C STRICT VOLATILE; 10 | 11 | CREATE FUNCTION show_limit() 12 | RETURNS float4 13 | AS 'MODULE_PATHNAME' 14 | LANGUAGE C STRICT STABLE; 15 | 16 | CREATE FUNCTION show_trgm(text) 17 | RETURNS _text 18 | AS 'MODULE_PATHNAME' 19 | LANGUAGE C STRICT IMMUTABLE; 20 | 21 | CREATE FUNCTION similarity(text,text) 22 | RETURNS float4 23 | AS 'MODULE_PATHNAME' 24 | LANGUAGE C STRICT IMMUTABLE; 25 | 26 | CREATE FUNCTION similarity_op(text,text) 27 | RETURNS bool 28 | AS 'MODULE_PATHNAME' 29 | LANGUAGE C STRICT STABLE; -- stable because depends on trgm_limit 30 | 31 | CREATE OPERATOR % ( 32 | LEFTARG = text, 33 | RIGHTARG = text, 34 | PROCEDURE = similarity_op, 35 | COMMUTATOR = '%', 36 | RESTRICT = contsel, 37 | JOIN = contjoinsel 38 | ); 39 | 40 | CREATE FUNCTION set_substring_limit(float4) 41 | RETURNS float4 42 | AS 'MODULE_PATHNAME' 43 | LANGUAGE C STRICT VOLATILE; 44 | 45 | CREATE FUNCTION show_substring_limit() 46 | RETURNS float4 47 | AS 'MODULE_PATHNAME' 48 | LANGUAGE C STRICT STABLE; 49 | 50 | CREATE FUNCTION substring_similarity(text,text) 51 | RETURNS float4 52 | AS 'MODULE_PATHNAME' 53 | LANGUAGE C STRICT IMMUTABLE; 54 | 55 | CREATE FUNCTION substring_similarity_op(text,text) 56 | RETURNS bool 57 | AS 'MODULE_PATHNAME' 58 | LANGUAGE C STRICT STABLE; -- stable because depends on trgm_substring_limit 59 | 60 | CREATE FUNCTION substring_similarity_commutator_op(text,text) 61 | RETURNS bool 62 | AS 'MODULE_PATHNAME' 63 | LANGUAGE C STRICT STABLE; -- stable because depends on trgm_substring_limit 64 | 65 | CREATE OPERATOR <% ( 66 | LEFTARG = text, 67 | RIGHTARG = text, 68 | PROCEDURE = substring_similarity_op, 69 | COMMUTATOR = '%>', 70 | RESTRICT = contsel, 71 | JOIN = contjoinsel 72 | ); 73 | 74 | CREATE OPERATOR %> ( 75 | LEFTARG = text, 76 | RIGHTARG = text, 77 | PROCEDURE = substring_similarity_commutator_op, 78 | COMMUTATOR = '<%', 79 | RESTRICT = contsel, 80 | JOIN = contjoinsel 81 | ); 82 | 83 | CREATE FUNCTION similarity_dist(text,text) 84 | RETURNS float4 85 | AS 'MODULE_PATHNAME' 86 | LANGUAGE C STRICT IMMUTABLE; 87 | 88 | CREATE OPERATOR <-> ( 89 | LEFTARG = text, 90 | RIGHTARG = text, 91 | PROCEDURE = similarity_dist, 92 | COMMUTATOR = '<->' 93 | ); 94 | 95 | -- gist key 96 | CREATE FUNCTION gtrgm_in(cstring) 97 | RETURNS gtrgm 98 | AS 'MODULE_PATHNAME' 99 | LANGUAGE C STRICT IMMUTABLE; 100 | 101 | CREATE FUNCTION gtrgm_out(gtrgm) 102 | RETURNS cstring 103 | AS 'MODULE_PATHNAME' 104 | LANGUAGE C STRICT IMMUTABLE; 105 | 106 | CREATE TYPE gtrgm ( 107 | INTERNALLENGTH = -1, 108 | INPUT = gtrgm_in, 109 | OUTPUT = gtrgm_out 110 | ); 111 | 112 | -- support functions for gist 113 | CREATE FUNCTION gtrgm_consistent(internal,text,smallint,oid,internal) 114 | RETURNS bool 115 | AS 'MODULE_PATHNAME' 116 | LANGUAGE C IMMUTABLE STRICT; 117 | 118 | CREATE FUNCTION gtrgm_distance(internal,text,smallint,oid,internal) 119 | RETURNS float8 120 | AS 'MODULE_PATHNAME' 121 | LANGUAGE C IMMUTABLE STRICT; 122 | 123 | CREATE FUNCTION gtrgm_compress(internal) 124 | RETURNS internal 125 | AS 'MODULE_PATHNAME' 126 | LANGUAGE C IMMUTABLE STRICT; 127 | 128 | CREATE FUNCTION gtrgm_decompress(internal) 129 | RETURNS internal 130 | AS 'MODULE_PATHNAME' 131 | LANGUAGE C IMMUTABLE STRICT; 132 | 133 | CREATE FUNCTION gtrgm_penalty(internal,internal,internal) 134 | RETURNS internal 135 | AS 'MODULE_PATHNAME' 136 | LANGUAGE C IMMUTABLE STRICT; 137 | 138 | CREATE FUNCTION gtrgm_picksplit(internal, internal) 139 | RETURNS internal 140 | AS 'MODULE_PATHNAME' 141 | LANGUAGE C IMMUTABLE STRICT; 142 | 143 | CREATE FUNCTION gtrgm_union(internal, internal) 144 | RETURNS gtrgm 145 | AS 'MODULE_PATHNAME' 146 | LANGUAGE C IMMUTABLE STRICT; 147 | 148 | CREATE FUNCTION gtrgm_same(gtrgm, gtrgm, internal) 149 | RETURNS internal 150 | AS 'MODULE_PATHNAME' 151 | LANGUAGE C IMMUTABLE STRICT; 152 | 153 | -- create the operator class for gist 154 | CREATE OPERATOR CLASS gist_trgm_ops 155 | FOR TYPE text USING gist 156 | AS 157 | OPERATOR 1 % (text, text), 158 | FUNCTION 1 gtrgm_consistent (internal, text, smallint, oid, internal), 159 | FUNCTION 2 gtrgm_union (internal, internal), 160 | FUNCTION 3 gtrgm_compress (internal), 161 | FUNCTION 4 gtrgm_decompress (internal), 162 | FUNCTION 5 gtrgm_penalty (internal, internal, internal), 163 | FUNCTION 6 gtrgm_picksplit (internal, internal), 164 | FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal), 165 | STORAGE gtrgm; 166 | 167 | -- Add operators and support functions that are new in 9.1. We do it like 168 | -- this, leaving them "loose" in the operator family rather than bound into 169 | -- the gist_trgm_ops opclass, because that's the only state that can be 170 | -- reproduced during an upgrade from 9.0 (see pg_trgm--unpackaged--1.0.sql). 171 | 172 | ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD 173 | OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops, 174 | OPERATOR 3 pg_catalog.~~ (text, text), 175 | OPERATOR 4 pg_catalog.~~* (text, text), 176 | FUNCTION 8 (text, text) gtrgm_distance (internal, text, smallint, oid, internal); 177 | 178 | -- Add operators that are new in 9.3. 179 | 180 | ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD 181 | OPERATOR 5 pg_catalog.~ (text, text), 182 | OPERATOR 6 pg_catalog.~* (text, text); 183 | 184 | -- Add operators that are new in 9.6 (pg_trgm 1.3). 185 | 186 | ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD 187 | OPERATOR 7 %> (text, text); 188 | 189 | -- support functions for gin 190 | CREATE FUNCTION gin_extract_value_trgm(text, internal) 191 | RETURNS internal 192 | AS 'MODULE_PATHNAME' 193 | LANGUAGE C IMMUTABLE STRICT; 194 | 195 | CREATE FUNCTION gin_extract_query_trgm(text, internal, int2, internal, internal, internal, internal) 196 | RETURNS internal 197 | AS 'MODULE_PATHNAME' 198 | LANGUAGE C IMMUTABLE STRICT; 199 | 200 | CREATE FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal, internal, internal) 201 | RETURNS bool 202 | AS 'MODULE_PATHNAME' 203 | LANGUAGE C IMMUTABLE STRICT; 204 | 205 | -- create the operator class for gin 206 | CREATE OPERATOR CLASS gin_trgm_ops 207 | FOR TYPE text USING gin 208 | AS 209 | OPERATOR 1 % (text, text), 210 | FUNCTION 1 btint4cmp (int4, int4), 211 | FUNCTION 2 gin_extract_value_trgm (text, internal), 212 | FUNCTION 3 gin_extract_query_trgm (text, internal, int2, internal, internal, internal, internal), 213 | FUNCTION 4 gin_trgm_consistent (internal, int2, text, int4, internal, internal, internal, internal), 214 | STORAGE int4; 215 | 216 | -- Add operators that are new in 9.1. 217 | 218 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 219 | OPERATOR 3 pg_catalog.~~ (text, text), 220 | OPERATOR 4 pg_catalog.~~* (text, text); 221 | 222 | -- Add operators that are new in 9.3. 223 | 224 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 225 | OPERATOR 5 pg_catalog.~ (text, text), 226 | OPERATOR 6 pg_catalog.~* (text, text); 227 | 228 | -- Add functions that are new in 9.6 (pg_trgm 1.2). 229 | 230 | CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal) 231 | RETURNS "char" 232 | AS 'MODULE_PATHNAME' 233 | LANGUAGE C IMMUTABLE STRICT; 234 | 235 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 236 | FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); 237 | 238 | -- Add operators that are new in 9.6 (pg_trgm 1.3). 239 | 240 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 241 | OPERATOR 7 %> (text, text); 242 | -------------------------------------------------------------------------------- /pg_trgm--unpackaged--1.0.sql: -------------------------------------------------------------------------------- 1 | /* contrib/pg_trgm/pg_trgm--unpackaged--1.0.sql */ 2 | 3 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 4 | \echo Use "CREATE EXTENSION pg_trgm FROM unpackaged" to load this file. \quit 5 | 6 | ALTER EXTENSION pg_trgm ADD function set_limit(real); 7 | ALTER EXTENSION pg_trgm ADD function show_limit(); 8 | ALTER EXTENSION pg_trgm ADD function show_trgm(text); 9 | ALTER EXTENSION pg_trgm ADD function similarity(text,text); 10 | ALTER EXTENSION pg_trgm ADD function similarity_op(text,text); 11 | ALTER EXTENSION pg_trgm ADD operator %(text,text); 12 | ALTER EXTENSION pg_trgm ADD type gtrgm; 13 | ALTER EXTENSION pg_trgm ADD function gtrgm_in(cstring); 14 | ALTER EXTENSION pg_trgm ADD function gtrgm_out(gtrgm); 15 | ALTER EXTENSION pg_trgm ADD function gtrgm_consistent(internal,text,integer,oid,internal); 16 | ALTER EXTENSION pg_trgm ADD function gtrgm_compress(internal); 17 | ALTER EXTENSION pg_trgm ADD function gtrgm_decompress(internal); 18 | ALTER EXTENSION pg_trgm ADD function gtrgm_penalty(internal,internal,internal); 19 | ALTER EXTENSION pg_trgm ADD function gtrgm_picksplit(internal,internal); 20 | ALTER EXTENSION pg_trgm ADD function gtrgm_union(bytea,internal); 21 | ALTER EXTENSION pg_trgm ADD function gtrgm_same(gtrgm,gtrgm,internal); 22 | ALTER EXTENSION pg_trgm ADD operator family gist_trgm_ops using gist; 23 | ALTER EXTENSION pg_trgm ADD operator class gist_trgm_ops using gist; 24 | ALTER EXTENSION pg_trgm ADD operator family gin_trgm_ops using gin; 25 | ALTER EXTENSION pg_trgm ADD operator class gin_trgm_ops using gin; 26 | 27 | -- These functions had different names/signatures in 9.0. We can't just 28 | -- drop and recreate them because they are linked into the GIN opclass, 29 | -- so we need some ugly hacks. 30 | 31 | -- First, absorb them into the extension under their old names. 32 | 33 | ALTER EXTENSION pg_trgm ADD function gin_extract_trgm(text, internal); 34 | ALTER EXTENSION pg_trgm ADD function gin_extract_trgm(text, internal, int2, internal, internal); 35 | ALTER EXTENSION pg_trgm ADD function gin_trgm_consistent(internal,smallint,text,integer,internal,internal); 36 | 37 | -- Fix the names, and then do CREATE OR REPLACE to adjust the function 38 | -- bodies to be correct (ie, reference the correct C symbol). 39 | 40 | ALTER FUNCTION gin_extract_trgm(text, internal) 41 | RENAME TO gin_extract_value_trgm; 42 | CREATE OR REPLACE FUNCTION gin_extract_value_trgm(text, internal) 43 | RETURNS internal 44 | AS 'MODULE_PATHNAME' 45 | LANGUAGE C IMMUTABLE STRICT; 46 | 47 | ALTER FUNCTION gin_extract_trgm(text, internal, int2, internal, internal) 48 | RENAME TO gin_extract_query_trgm; 49 | CREATE OR REPLACE FUNCTION gin_extract_query_trgm(text, internal, int2, internal, internal) 50 | RETURNS internal 51 | AS 'MODULE_PATHNAME' 52 | LANGUAGE C IMMUTABLE STRICT; 53 | 54 | -- gin_trgm_consistent didn't change name. 55 | 56 | -- Last, fix the parameter lists by means of direct UPDATE on the pg_proc 57 | -- entries. This is ugly as can be, but there's no other way to do it 58 | -- while preserving the identities (OIDs) of the functions. 59 | 60 | UPDATE pg_catalog.pg_proc 61 | SET pronargs = 7, proargtypes = '25 2281 21 2281 2281 2281 2281' 62 | WHERE oid = 'gin_extract_query_trgm(text,internal,int2,internal,internal)'::pg_catalog.regprocedure; 63 | 64 | UPDATE pg_catalog.pg_proc 65 | SET pronargs = 8, proargtypes = '2281 21 25 23 2281 2281 2281 2281' 66 | WHERE oid = 'gin_trgm_consistent(internal,smallint,text,integer,internal,internal)'::pg_catalog.regprocedure; 67 | 68 | 69 | -- These were not in 9.0: 70 | 71 | CREATE FUNCTION similarity_dist(text,text) 72 | RETURNS float4 73 | AS 'MODULE_PATHNAME' 74 | LANGUAGE C STRICT IMMUTABLE; 75 | 76 | CREATE OPERATOR <-> ( 77 | LEFTARG = text, 78 | RIGHTARG = text, 79 | PROCEDURE = similarity_dist, 80 | COMMUTATOR = '<->' 81 | ); 82 | 83 | CREATE FUNCTION gtrgm_distance(internal,text,int,oid) 84 | RETURNS float8 85 | AS 'MODULE_PATHNAME' 86 | LANGUAGE C IMMUTABLE STRICT; 87 | 88 | -- Add new stuff to the operator classes. See comment in pg_trgm--1.0.sql. 89 | 90 | ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD 91 | OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops, 92 | OPERATOR 3 pg_catalog.~~ (text, text), 93 | OPERATOR 4 pg_catalog.~~* (text, text), 94 | FUNCTION 8 (text, text) gtrgm_distance (internal, text, int, oid); 95 | 96 | ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD 97 | OPERATOR 3 pg_catalog.~~ (text, text), 98 | OPERATOR 4 pg_catalog.~~* (text, text); 99 | -------------------------------------------------------------------------------- /pg_trgm.control: -------------------------------------------------------------------------------- 1 | # pg_trgm extension 2 | comment = 'text similarity measurement and index searching based on trigrams' 3 | default_version = '1.3' 4 | module_pathname = '$libdir/pg_trgm' 5 | relocatable = true 6 | -------------------------------------------------------------------------------- /sql/pg_substring_trgm.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE test_trgm2(t text COLLATE "C"); 2 | set extra_float_digits = 0; 3 | \copy test_trgm2 from 'data/trgm2.data' 4 | 5 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 6 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 7 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 8 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 9 | 10 | create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); 11 | set enable_seqscan=off; 12 | 13 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 14 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 15 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 16 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 17 | 18 | drop index trgm_idx2; 19 | create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); 20 | set enable_seqscan=off; 21 | 22 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 23 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 24 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 25 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 26 | 27 | select set_substring_limit(0.5); 28 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 29 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 30 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 31 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 32 | 33 | select set_substring_limit(0.3); 34 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; 35 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; 36 | select t,substring_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; 37 | select t,substring_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; 38 | -------------------------------------------------------------------------------- /sql/pg_trgm.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION pg_trgm; 2 | set extra_float_digits = 0; 3 | select show_trgm(''); 4 | select show_trgm('(*&^$@%@'); 5 | select show_trgm('a b c'); 6 | select show_trgm(' a b c '); 7 | select show_trgm('aA bB cC'); 8 | select show_trgm(' aA bB cC '); 9 | select show_trgm('a b C0*%^'); 10 | 11 | select similarity('wow','WOWa '); 12 | select similarity('wow',' WOW '); 13 | 14 | select similarity('---', '####---'); 15 | 16 | CREATE TABLE test_trgm(t text COLLATE "C"); 17 | 18 | \copy test_trgm from 'data/trgm.data' 19 | 20 | select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; 21 | select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t; 22 | select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; 23 | select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2; 24 | 25 | create index trgm_idx on test_trgm using gist (t gist_trgm_ops); 26 | set enable_seqscan=off; 27 | 28 | select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; 29 | select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t; 30 | select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; 31 | explain (costs off) 32 | select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2; 33 | select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2; 34 | 35 | drop index trgm_idx; 36 | create index trgm_idx on test_trgm using gin (t gin_trgm_ops); 37 | set enable_seqscan=off; 38 | 39 | select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; 40 | select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t; 41 | select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t; 42 | 43 | create table test2(t text COLLATE "C"); 44 | insert into test2 values ('abcdef'); 45 | insert into test2 values ('quark'); 46 | insert into test2 values (' z foo bar'); 47 | create index test2_idx_gin on test2 using gin (t gin_trgm_ops); 48 | set enable_seqscan=off; 49 | explain (costs off) 50 | select * from test2 where t like '%BCD%'; 51 | explain (costs off) 52 | select * from test2 where t ilike '%BCD%'; 53 | select * from test2 where t like '%BCD%'; 54 | select * from test2 where t like '%bcd%'; 55 | select * from test2 where t like E'%\\bcd%'; 56 | select * from test2 where t ilike '%BCD%'; 57 | select * from test2 where t ilike 'qua%'; 58 | select * from test2 where t like '%z foo bar%'; 59 | select * from test2 where t like ' z foo%'; 60 | explain (costs off) 61 | select * from test2 where t ~ '[abc]{3}'; 62 | explain (costs off) 63 | select * from test2 where t ~* 'DEF'; 64 | select * from test2 where t ~ '[abc]{3}'; 65 | select * from test2 where t ~ 'a[bc]+d'; 66 | select * from test2 where t ~ '(abc)*$'; 67 | select * from test2 where t ~* 'DEF'; 68 | select * from test2 where t ~ 'dEf'; 69 | select * from test2 where t ~* '^q'; 70 | select * from test2 where t ~* '[abc]{3}[def]{3}'; 71 | select * from test2 where t ~* 'ab[a-z]{3}'; 72 | select * from test2 where t ~* '(^| )qua'; 73 | select * from test2 where t ~ 'q.*rk$'; 74 | select * from test2 where t ~ 'q'; 75 | select * from test2 where t ~ '[a-z]{3}'; 76 | select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}'; 77 | select * from test2 where t ~ 'z foo bar'; 78 | select * from test2 where t ~ ' z foo bar'; 79 | select * from test2 where t ~ ' z foo bar'; 80 | select * from test2 where t ~ ' z foo'; 81 | drop index test2_idx_gin; 82 | create index test2_idx_gist on test2 using gist (t gist_trgm_ops); 83 | set enable_seqscan=off; 84 | explain (costs off) 85 | select * from test2 where t like '%BCD%'; 86 | explain (costs off) 87 | select * from test2 where t ilike '%BCD%'; 88 | select * from test2 where t like '%BCD%'; 89 | select * from test2 where t like '%bcd%'; 90 | select * from test2 where t like E'%\\bcd%'; 91 | select * from test2 where t ilike '%BCD%'; 92 | select * from test2 where t ilike 'qua%'; 93 | select * from test2 where t like '%z foo bar%'; 94 | select * from test2 where t like ' z foo%'; 95 | explain (costs off) 96 | select * from test2 where t ~ '[abc]{3}'; 97 | explain (costs off) 98 | select * from test2 where t ~* 'DEF'; 99 | select * from test2 where t ~ '[abc]{3}'; 100 | select * from test2 where t ~ 'a[bc]+d'; 101 | select * from test2 where t ~ '(abc)*$'; 102 | select * from test2 where t ~* 'DEF'; 103 | select * from test2 where t ~ 'dEf'; 104 | select * from test2 where t ~* '^q'; 105 | select * from test2 where t ~* '[abc]{3}[def]{3}'; 106 | select * from test2 where t ~* 'ab[a-z]{3}'; 107 | select * from test2 where t ~* '(^| )qua'; 108 | select * from test2 where t ~ 'q.*rk$'; 109 | select * from test2 where t ~ 'q'; 110 | select * from test2 where t ~ '[a-z]{3}'; 111 | select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}'; 112 | select * from test2 where t ~ 'z foo bar'; 113 | select * from test2 where t ~ ' z foo bar'; 114 | select * from test2 where t ~ ' z foo bar'; 115 | select * from test2 where t ~ ' z foo'; 116 | -------------------------------------------------------------------------------- /trgm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * contrib/pg_trgm/trgm.h 3 | */ 4 | #ifndef __TRGM_H__ 5 | #define __TRGM_H__ 6 | 7 | #include "access/gist.h" 8 | #include "access/itup.h" 9 | #include "storage/bufpage.h" 10 | 11 | /* 12 | * Options ... but note that trgm_regexp.c effectively assumes these values 13 | * of LPADDING and RPADDING. 14 | */ 15 | #define LPADDING 2 16 | #define RPADDING 1 17 | #define KEEPONLYALNUM 18 | /* 19 | * Caution: IGNORECASE macro means that trigrams are case-insensitive. 20 | * If this macro is disabled, the ~* and ~~* operators must be removed from 21 | * the operator classes, because we can't handle case-insensitive wildcard 22 | * search with case-sensitive trigrams. Failure to do this will result in 23 | * "cannot handle ~*(~~*) with case-sensitive trigrams" errors. 24 | */ 25 | #define IGNORECASE 26 | #define DIVUNION 27 | 28 | /* operator strategy numbers */ 29 | #define SimilarityStrategyNumber 1 30 | #define DistanceStrategyNumber 2 31 | #define LikeStrategyNumber 3 32 | #define ILikeStrategyNumber 4 33 | #define RegExpStrategyNumber 5 34 | #define RegExpICaseStrategyNumber 6 35 | #define SubstringSimilarityStrategyNumber 7 36 | 37 | typedef char trgm[3]; 38 | 39 | #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) ) 40 | #define CMPPCHAR(a,b,i) CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) ) 41 | #define CMPTRGM(a,b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) ) 42 | 43 | #define CPTRGM(a,b) do { \ 44 | *(((char*)(a))+0) = *(((char*)(b))+0); \ 45 | *(((char*)(a))+1) = *(((char*)(b))+1); \ 46 | *(((char*)(a))+2) = *(((char*)(b))+2); \ 47 | } while(0); 48 | 49 | #ifdef KEEPONLYALNUM 50 | #define ISWORDCHR(c) (t_isalpha(c) || t_isdigit(c)) 51 | #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) 52 | #else 53 | #define ISWORDCHR(c) (!t_isspace(c)) 54 | #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) ) 55 | #endif 56 | #define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) 57 | 58 | #define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */ 59 | #define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') /* Wildcard 60 | * meta-character */ 61 | 62 | typedef struct 63 | { 64 | int32 vl_len_; /* varlena header (do not touch directly!) */ 65 | uint8 flag; 66 | char data[FLEXIBLE_ARRAY_MEMBER]; 67 | } TRGM; 68 | 69 | #define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8)) 70 | 71 | /* gist */ 72 | #define BITBYTE 8 73 | #define SIGLENINT 3 /* >122 => key will toast, so very slow!!! */ 74 | #define SIGLEN ( sizeof(int)*SIGLENINT ) 75 | 76 | #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */ 77 | 78 | typedef char BITVEC[SIGLEN]; 79 | typedef char *BITVECP; 80 | 81 | #define LOOPBYTE \ 82 | for(i=0;i> (i)) & 0x01 ) 86 | #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) 87 | #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) 88 | #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 ) 89 | 90 | #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) 91 | #define HASH(sign, val) SETBIT((sign), HASHVAL(val)) 92 | 93 | #define ARRKEY 0x01 94 | #define SIGNKEY 0x02 95 | #define ALLISTRUE 0x04 96 | 97 | #define ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY ) 98 | #define ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY ) 99 | #define ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE ) 100 | 101 | #define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) 102 | #define GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) ) 103 | #define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) 104 | #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) 105 | 106 | /* 107 | * If DIVUNION is defined then similarity formula is: 108 | * count / (len1 + len2 - count) 109 | * else if DIVUNION is not defined then similarity formula is: 110 | * count / max(len1, len2) 111 | */ 112 | #ifdef DIVUNION 113 | #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count))) 114 | #else 115 | #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2))) 116 | #endif 117 | 118 | typedef struct TrgmPackedGraph TrgmPackedGraph; 119 | 120 | extern double trgm_limit; 121 | extern double trgm_substring_limit; 122 | 123 | extern uint32 trgm2int(trgm *ptr); 124 | extern void compact_trigram(trgm *tptr, char *str, int bytelen); 125 | extern TRGM *generate_trgm(char *str, int slen); 126 | extern TRGM *generate_wildcard_trgm(const char *str, int slen); 127 | extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact); 128 | extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2); 129 | extern bool *trgm_presence_map(TRGM *query, TRGM *key); 130 | extern TRGM *createTrgmNFA(text *text_re, Oid collation, 131 | TrgmPackedGraph **graph, MemoryContext rcontext); 132 | extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check); 133 | 134 | #endif /* __TRGM_H__ */ 135 | -------------------------------------------------------------------------------- /trgm_gin.c: -------------------------------------------------------------------------------- 1 | /* 2 | * contrib/pg_trgm/trgm_gin.c 3 | */ 4 | #include "postgres.h" 5 | 6 | #include "trgm.h" 7 | 8 | #include "access/gin.h" 9 | #include "fmgr.h" 10 | #if PG_VERSION_NUM >= 90500 11 | /* 12 | * We have to include skey.h in pre-9.5 to use StrategyNumber type 13 | */ 14 | #include "access/stratnum.h" 15 | #else 16 | #include "access/skey.h" 17 | #endif 18 | 19 | PG_FUNCTION_INFO_V1(gin_extract_trgm); 20 | PG_FUNCTION_INFO_V1(gin_extract_value_trgm); 21 | PG_FUNCTION_INFO_V1(gin_extract_query_trgm); 22 | PG_FUNCTION_INFO_V1(gin_trgm_consistent); 23 | PG_FUNCTION_INFO_V1(gin_trgm_triconsistent); 24 | 25 | /* 26 | * This function can only be called if a pre-9.1 version of the GIN operator 27 | * class definition is present in the catalogs (probably as a consequence 28 | * of upgrade-in-place). Cope. 29 | */ 30 | Datum 31 | gin_extract_trgm(PG_FUNCTION_ARGS) 32 | { 33 | if (PG_NARGS() == 3) 34 | return gin_extract_value_trgm(fcinfo); 35 | if (PG_NARGS() == 7) 36 | return gin_extract_query_trgm(fcinfo); 37 | elog(ERROR, "unexpected number of arguments to gin_extract_trgm"); 38 | PG_RETURN_NULL(); 39 | } 40 | 41 | Datum 42 | gin_extract_value_trgm(PG_FUNCTION_ARGS) 43 | { 44 | text *val = (text *) PG_GETARG_TEXT_P(0); 45 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); 46 | Datum *entries = NULL; 47 | TRGM *trg; 48 | int32 trglen; 49 | 50 | *nentries = 0; 51 | 52 | trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); 53 | trglen = ARRNELEM(trg); 54 | 55 | if (trglen > 0) 56 | { 57 | trgm *ptr; 58 | int32 i; 59 | 60 | *nentries = trglen; 61 | entries = (Datum *) palloc(sizeof(Datum) * trglen); 62 | 63 | ptr = GETARR(trg); 64 | for (i = 0; i < trglen; i++) 65 | { 66 | int32 item = trgm2int(ptr); 67 | 68 | entries[i] = Int32GetDatum(item); 69 | ptr++; 70 | } 71 | } 72 | 73 | PG_RETURN_POINTER(entries); 74 | } 75 | 76 | Datum 77 | gin_extract_query_trgm(PG_FUNCTION_ARGS) 78 | { 79 | text *val = (text *) PG_GETARG_TEXT_P(0); 80 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); 81 | StrategyNumber strategy = PG_GETARG_UINT16(2); 82 | 83 | /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */ 84 | Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); 85 | 86 | /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ 87 | int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); 88 | Datum *entries = NULL; 89 | TRGM *trg; 90 | int32 trglen; 91 | trgm *ptr; 92 | TrgmPackedGraph *graph; 93 | int32 i; 94 | 95 | switch (strategy) 96 | { 97 | case SimilarityStrategyNumber: 98 | case SubstringSimilarityStrategyNumber: 99 | trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); 100 | break; 101 | case ILikeStrategyNumber: 102 | #ifndef IGNORECASE 103 | elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 104 | #endif 105 | /* FALL THRU */ 106 | case LikeStrategyNumber: 107 | 108 | /* 109 | * For wildcard search we extract all the trigrams that every 110 | * potentially-matching string must include. 111 | */ 112 | trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); 113 | break; 114 | case RegExpICaseStrategyNumber: 115 | #ifndef IGNORECASE 116 | elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 117 | #endif 118 | /* FALL THRU */ 119 | case RegExpStrategyNumber: 120 | trg = createTrgmNFA(val, PG_GET_COLLATION(), 121 | &graph, CurrentMemoryContext); 122 | if (trg && ARRNELEM(trg) > 0) 123 | { 124 | /* 125 | * Successful regex processing: store NFA-like graph as 126 | * extra_data. GIN API requires an array of nentries 127 | * Pointers, but we just put the same value in each element. 128 | */ 129 | trglen = ARRNELEM(trg); 130 | *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen); 131 | for (i = 0; i < trglen; i++) 132 | (*extra_data)[i] = (Pointer) graph; 133 | } 134 | else 135 | { 136 | /* No result: have to do full index scan. */ 137 | *nentries = 0; 138 | *searchMode = GIN_SEARCH_MODE_ALL; 139 | PG_RETURN_POINTER(entries); 140 | } 141 | break; 142 | default: 143 | elog(ERROR, "unrecognized strategy number: %d", strategy); 144 | trg = NULL; /* keep compiler quiet */ 145 | break; 146 | } 147 | 148 | trglen = ARRNELEM(trg); 149 | *nentries = trglen; 150 | 151 | if (trglen > 0) 152 | { 153 | entries = (Datum *) palloc(sizeof(Datum) * trglen); 154 | ptr = GETARR(trg); 155 | for (i = 0; i < trglen; i++) 156 | { 157 | int32 item = trgm2int(ptr); 158 | 159 | entries[i] = Int32GetDatum(item); 160 | ptr++; 161 | } 162 | } 163 | 164 | /* 165 | * If no trigram was extracted then we have to scan all the index. 166 | */ 167 | if (trglen == 0) 168 | *searchMode = GIN_SEARCH_MODE_ALL; 169 | 170 | PG_RETURN_POINTER(entries); 171 | } 172 | 173 | Datum 174 | gin_trgm_consistent(PG_FUNCTION_ARGS) 175 | { 176 | bool *check = (bool *) PG_GETARG_POINTER(0); 177 | StrategyNumber strategy = PG_GETARG_UINT16(1); 178 | 179 | /* text *query = PG_GETARG_TEXT_P(2); */ 180 | int32 nkeys = PG_GETARG_INT32(3); 181 | Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); 182 | bool *recheck = (bool *) PG_GETARG_POINTER(5); 183 | bool res; 184 | int32 i, 185 | ntrue; 186 | double nlimit; 187 | 188 | /* All cases served by this function are inexact */ 189 | *recheck = true; 190 | 191 | switch (strategy) 192 | { 193 | case SimilarityStrategyNumber: 194 | case SubstringSimilarityStrategyNumber: 195 | nlimit = (strategy == SimilarityStrategyNumber) ? 196 | trgm_limit : trgm_substring_limit; 197 | 198 | /* Count the matches */ 199 | ntrue = 0; 200 | for (i = 0; i < nkeys; i++) 201 | { 202 | if (check[i]) 203 | ntrue++; 204 | } 205 | 206 | /*-------------------- 207 | * If DIVUNION is defined then similarity formula is: 208 | * c / (len1 + len2 - c) 209 | * where c is number of common trigrams and it stands as ntrue in 210 | * this code. Here we don't know value of len2 but we can assume 211 | * that c (ntrue) is a lower bound of len2, so upper bound of 212 | * similarity is: 213 | * c / (len1 + c - c) => c / len1 214 | * If DIVUNION is not defined then similarity formula is: 215 | * c / max(len1, len2) 216 | * And again, c (ntrue) is a lower bound of len2, but c <= len1 217 | * just by definition and, consequently, upper bound of 218 | * similarity is just c / len1. 219 | * So, independently on DIVUNION the upper bound formula is the same. 220 | */ 221 | res = (nkeys == 0) ? false : 222 | (((((float4) ntrue) / ((float4) nkeys))) >= nlimit); 223 | break; 224 | case ILikeStrategyNumber: 225 | #ifndef IGNORECASE 226 | elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 227 | #endif 228 | /* FALL THRU */ 229 | case LikeStrategyNumber: 230 | /* Check if all extracted trigrams are presented. */ 231 | res = true; 232 | for (i = 0; i < nkeys; i++) 233 | { 234 | if (!check[i]) 235 | { 236 | res = false; 237 | break; 238 | } 239 | } 240 | break; 241 | case RegExpICaseStrategyNumber: 242 | #ifndef IGNORECASE 243 | elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 244 | #endif 245 | /* FALL THRU */ 246 | case RegExpStrategyNumber: 247 | if (nkeys < 1) 248 | { 249 | /* Regex processing gave no result: do full index scan */ 250 | res = true; 251 | } 252 | else 253 | res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], 254 | check); 255 | break; 256 | default: 257 | elog(ERROR, "unrecognized strategy number: %d", strategy); 258 | res = false; /* keep compiler quiet */ 259 | break; 260 | } 261 | 262 | PG_RETURN_BOOL(res); 263 | } 264 | 265 | /* 266 | * In all cases, GIN_TRUE is at least as favorable to inclusion as 267 | * GIN_MAYBE. If no better option is available, simply treat 268 | * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary 269 | * consistent function. 270 | */ 271 | Datum 272 | gin_trgm_triconsistent(PG_FUNCTION_ARGS) 273 | { 274 | GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); 275 | StrategyNumber strategy = PG_GETARG_UINT16(1); 276 | 277 | /* text *query = PG_GETARG_TEXT_P(2); */ 278 | int32 nkeys = PG_GETARG_INT32(3); 279 | Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); 280 | GinTernaryValue res = GIN_MAYBE; 281 | int32 i, 282 | ntrue; 283 | bool *boolcheck; 284 | double nlimit; 285 | 286 | switch (strategy) 287 | { 288 | case SimilarityStrategyNumber: 289 | case SubstringSimilarityStrategyNumber: 290 | nlimit = (strategy == SimilarityStrategyNumber) ? 291 | trgm_limit : trgm_substring_limit; 292 | 293 | /* Count the matches */ 294 | ntrue = 0; 295 | for (i = 0; i < nkeys; i++) 296 | { 297 | if (check[i] != GIN_FALSE) 298 | ntrue++; 299 | } 300 | 301 | /* 302 | * See comment in gin_trgm_consistent() about * upper bound formula 303 | */ 304 | res = (nkeys == 0) 305 | ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit) 306 | ? GIN_MAYBE : GIN_FALSE); 307 | break; 308 | case ILikeStrategyNumber: 309 | #ifndef IGNORECASE 310 | elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 311 | #endif 312 | /* FALL THRU */ 313 | case LikeStrategyNumber: 314 | /* Check if all extracted trigrams are presented. */ 315 | res = GIN_MAYBE; 316 | for (i = 0; i < nkeys; i++) 317 | { 318 | if (check[i] == GIN_FALSE) 319 | { 320 | res = GIN_FALSE; 321 | break; 322 | } 323 | } 324 | break; 325 | case RegExpICaseStrategyNumber: 326 | #ifndef IGNORECASE 327 | elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 328 | #endif 329 | /* FALL THRU */ 330 | case RegExpStrategyNumber: 331 | if (nkeys < 1) 332 | { 333 | /* Regex processing gave no result: do full index scan */ 334 | res = GIN_MAYBE; 335 | } 336 | else 337 | { 338 | /* 339 | * As trigramsMatchGraph implements a monotonic boolean function, 340 | * promoting all GIN_MAYBE keys to GIN_TRUE will give a 341 | * conservative result. 342 | */ 343 | boolcheck = (bool *) palloc(sizeof(bool) * nkeys); 344 | for (i = 0; i < nkeys; i++) 345 | boolcheck[i] = (check[i] != GIN_FALSE); 346 | if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], 347 | boolcheck)) 348 | res = GIN_FALSE; 349 | pfree(boolcheck); 350 | } 351 | break; 352 | default: 353 | elog(ERROR, "unrecognized strategy number: %d", strategy); 354 | res = GIN_FALSE; /* keep compiler quiet */ 355 | break; 356 | } 357 | 358 | /* All cases served by this function are inexact */ 359 | Assert(res != GIN_TRUE); 360 | PG_RETURN_GIN_TERNARY_VALUE(res); 361 | } 362 | -------------------------------------------------------------------------------- /trgm_gist.c: -------------------------------------------------------------------------------- 1 | /* 2 | * contrib/pg_trgm/trgm_gist.c 3 | */ 4 | #include "postgres.h" 5 | 6 | #include "trgm.h" 7 | 8 | #include "fmgr.h" 9 | #if PG_VERSION_NUM >= 90500 10 | /* 11 | * We have to include skey.h in pre-9.5 to use StrategyNumber type 12 | */ 13 | #include "access/stratnum.h" 14 | #else 15 | #include "access/skey.h" 16 | #endif 17 | 18 | typedef struct 19 | { 20 | /* most recent inputs to gtrgm_consistent */ 21 | StrategyNumber strategy; 22 | text *query; 23 | /* extracted trigrams for query */ 24 | TRGM *trigrams; 25 | /* if a regex operator, the extracted graph */ 26 | TrgmPackedGraph *graph; 27 | 28 | /* 29 | * The "query" and "trigrams" are stored in the same palloc block as this 30 | * cache struct, at MAXALIGN'ed offsets. The graph however isn't. 31 | */ 32 | } gtrgm_consistent_cache; 33 | 34 | #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key)) 35 | 36 | 37 | PG_FUNCTION_INFO_V1(gtrgm_in); 38 | PG_FUNCTION_INFO_V1(gtrgm_out); 39 | PG_FUNCTION_INFO_V1(gtrgm_compress); 40 | PG_FUNCTION_INFO_V1(gtrgm_decompress); 41 | PG_FUNCTION_INFO_V1(gtrgm_consistent); 42 | PG_FUNCTION_INFO_V1(gtrgm_distance); 43 | PG_FUNCTION_INFO_V1(gtrgm_union); 44 | PG_FUNCTION_INFO_V1(gtrgm_same); 45 | PG_FUNCTION_INFO_V1(gtrgm_penalty); 46 | PG_FUNCTION_INFO_V1(gtrgm_picksplit); 47 | 48 | /* Number of one-bits in an unsigned byte */ 49 | static const uint8 number_of_ones[256] = { 50 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 51 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 52 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 53 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 54 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 55 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 56 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 57 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 58 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 59 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 60 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 61 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 62 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 63 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 64 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 65 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 66 | }; 67 | 68 | 69 | Datum 70 | gtrgm_in(PG_FUNCTION_ARGS) 71 | { 72 | elog(ERROR, "not implemented"); 73 | PG_RETURN_DATUM(0); 74 | } 75 | 76 | Datum 77 | gtrgm_out(PG_FUNCTION_ARGS) 78 | { 79 | elog(ERROR, "not implemented"); 80 | PG_RETURN_DATUM(0); 81 | } 82 | 83 | static void 84 | makesign(BITVECP sign, TRGM *a) 85 | { 86 | int32 k, 87 | len = ARRNELEM(a); 88 | trgm *ptr = GETARR(a); 89 | int32 tmp = 0; 90 | 91 | MemSet((void *) sign, 0, sizeof(BITVEC)); 92 | SETBIT(sign, SIGLENBIT); /* set last unused bit */ 93 | for (k = 0; k < len; k++) 94 | { 95 | CPTRGM(((char *) &tmp), ptr + k); 96 | HASH(sign, tmp); 97 | } 98 | } 99 | 100 | Datum 101 | gtrgm_compress(PG_FUNCTION_ARGS) 102 | { 103 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); 104 | GISTENTRY *retval = entry; 105 | 106 | if (entry->leafkey) 107 | { /* trgm */ 108 | TRGM *res; 109 | text *val = DatumGetTextP(entry->key); 110 | 111 | res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); 112 | retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); 113 | gistentryinit(*retval, PointerGetDatum(res), 114 | entry->rel, entry->page, 115 | entry->offset, false); 116 | } 117 | else if (ISSIGNKEY(DatumGetPointer(entry->key)) && 118 | !ISALLTRUE(DatumGetPointer(entry->key))) 119 | { 120 | int32 i, 121 | len; 122 | TRGM *res; 123 | BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); 124 | 125 | LOOPBYTE 126 | { 127 | if ((sign[i] & 0xff) != 0xff) 128 | PG_RETURN_POINTER(retval); 129 | } 130 | 131 | len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); 132 | res = (TRGM *) palloc(len); 133 | SET_VARSIZE(res, len); 134 | res->flag = SIGNKEY | ALLISTRUE; 135 | 136 | retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); 137 | gistentryinit(*retval, PointerGetDatum(res), 138 | entry->rel, entry->page, 139 | entry->offset, false); 140 | } 141 | PG_RETURN_POINTER(retval); 142 | } 143 | 144 | Datum 145 | gtrgm_decompress(PG_FUNCTION_ARGS) 146 | { 147 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); 148 | GISTENTRY *retval; 149 | text *key; 150 | 151 | key = DatumGetTextP(entry->key); 152 | 153 | if (key != (text *) DatumGetPointer(entry->key)) 154 | { 155 | /* need to pass back the decompressed item */ 156 | retval = palloc(sizeof(GISTENTRY)); 157 | gistentryinit(*retval, PointerGetDatum(key), 158 | entry->rel, entry->page, entry->offset, entry->leafkey); 159 | PG_RETURN_POINTER(retval); 160 | } 161 | else 162 | { 163 | /* we can return the entry as-is */ 164 | PG_RETURN_POINTER(entry); 165 | } 166 | } 167 | 168 | static int32 169 | cnt_sml_sign_common(TRGM *qtrg, BITVECP sign) 170 | { 171 | int32 count = 0; 172 | int32 k, 173 | len = ARRNELEM(qtrg); 174 | trgm *ptr = GETARR(qtrg); 175 | int32 tmp = 0; 176 | 177 | for (k = 0; k < len; k++) 178 | { 179 | CPTRGM(((char *) &tmp), ptr + k); 180 | count += GETBIT(sign, HASHVAL(tmp)); 181 | } 182 | 183 | return count; 184 | } 185 | 186 | Datum 187 | gtrgm_consistent(PG_FUNCTION_ARGS) 188 | { 189 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); 190 | text *query = PG_GETARG_TEXT_P(1); 191 | StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); 192 | 193 | /* Oid subtype = PG_GETARG_OID(3); */ 194 | bool *recheck = (bool *) PG_GETARG_POINTER(4); 195 | TRGM *key = (TRGM *) DatumGetPointer(entry->key); 196 | TRGM *qtrg; 197 | bool res; 198 | Size querysize = VARSIZE(query); 199 | gtrgm_consistent_cache *cache; 200 | double nlimit; 201 | 202 | /* 203 | * We keep the extracted trigrams in cache, because trigram extraction is 204 | * relatively CPU-expensive. When trying to reuse a cached value, check 205 | * strategy number not just query itself, because trigram extraction 206 | * depends on strategy. 207 | * 208 | * The cached structure is a single palloc chunk containing the 209 | * gtrgm_consistent_cache header, then the input query (starting at a 210 | * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN 211 | * boundary). However we don't try to include the regex graph (if any) in 212 | * that struct. (XXX currently, this approach can leak regex graphs 213 | * across index rescans. Not clear if that's worth fixing.) 214 | */ 215 | cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra; 216 | if (cache == NULL || 217 | cache->strategy != strategy || 218 | VARSIZE(cache->query) != querysize || 219 | memcmp((char *) cache->query, (char *) query, querysize) != 0) 220 | { 221 | gtrgm_consistent_cache *newcache; 222 | TrgmPackedGraph *graph = NULL; 223 | Size qtrgsize; 224 | 225 | switch (strategy) 226 | { 227 | case SimilarityStrategyNumber: 228 | case SubstringSimilarityStrategyNumber: 229 | qtrg = generate_trgm(VARDATA(query), 230 | querysize - VARHDRSZ); 231 | break; 232 | case ILikeStrategyNumber: 233 | #ifndef IGNORECASE 234 | elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 235 | #endif 236 | /* FALL THRU */ 237 | case LikeStrategyNumber: 238 | qtrg = generate_wildcard_trgm(VARDATA(query), 239 | querysize - VARHDRSZ); 240 | break; 241 | case RegExpICaseStrategyNumber: 242 | #ifndef IGNORECASE 243 | elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 244 | #endif 245 | /* FALL THRU */ 246 | case RegExpStrategyNumber: 247 | qtrg = createTrgmNFA(query, PG_GET_COLLATION(), 248 | &graph, fcinfo->flinfo->fn_mcxt); 249 | /* just in case an empty array is returned ... */ 250 | if (qtrg && ARRNELEM(qtrg) <= 0) 251 | { 252 | pfree(qtrg); 253 | qtrg = NULL; 254 | } 255 | break; 256 | default: 257 | elog(ERROR, "unrecognized strategy number: %d", strategy); 258 | qtrg = NULL; /* keep compiler quiet */ 259 | break; 260 | } 261 | 262 | qtrgsize = qtrg ? VARSIZE(qtrg) : 0; 263 | 264 | newcache = (gtrgm_consistent_cache *) 265 | MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, 266 | MAXALIGN(sizeof(gtrgm_consistent_cache)) + 267 | MAXALIGN(querysize) + 268 | qtrgsize); 269 | 270 | newcache->strategy = strategy; 271 | newcache->query = (text *) 272 | ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache))); 273 | memcpy((char *) newcache->query, (char *) query, querysize); 274 | if (qtrg) 275 | { 276 | newcache->trigrams = (TRGM *) 277 | ((char *) newcache->query + MAXALIGN(querysize)); 278 | memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize); 279 | /* release qtrg in case it was made in fn_mcxt */ 280 | pfree(qtrg); 281 | } 282 | else 283 | newcache->trigrams = NULL; 284 | newcache->graph = graph; 285 | 286 | if (cache) 287 | pfree(cache); 288 | fcinfo->flinfo->fn_extra = (void *) newcache; 289 | cache = newcache; 290 | } 291 | 292 | qtrg = cache->trigrams; 293 | 294 | switch (strategy) 295 | { 296 | case SimilarityStrategyNumber: 297 | case SubstringSimilarityStrategyNumber: 298 | /* Similarity search is exact. Substring similarity search is inexact */ 299 | *recheck = (strategy == SubstringSimilarityStrategyNumber); 300 | nlimit = (strategy == SimilarityStrategyNumber) ? 301 | trgm_limit : trgm_substring_limit; 302 | 303 | if (GIST_LEAF(entry)) 304 | { /* all leafs contains orig trgm */ 305 | /* 306 | * Prevent gcc optimizing the tmpsml variable using volatile 307 | * keyword. Otherwise comparison of nlimit and tmpsml may give 308 | * wrong results. 309 | */ 310 | float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck); 311 | 312 | /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ 313 | res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml >= nlimit); 314 | } 315 | else if (ISALLTRUE(key)) 316 | { /* non-leaf contains signature */ 317 | res = true; 318 | } 319 | else 320 | { /* non-leaf contains signature */ 321 | int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key)); 322 | int32 len = ARRNELEM(qtrg); 323 | 324 | if (len == 0) 325 | res = false; 326 | else 327 | res = (((((float8) count) / ((float8) len))) >= nlimit); 328 | } 329 | break; 330 | case ILikeStrategyNumber: 331 | #ifndef IGNORECASE 332 | elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 333 | #endif 334 | /* FALL THRU */ 335 | case LikeStrategyNumber: 336 | /* Wildcard search is inexact */ 337 | *recheck = true; 338 | 339 | /* 340 | * Check if all the extracted trigrams can be present in child 341 | * nodes. 342 | */ 343 | if (GIST_LEAF(entry)) 344 | { /* all leafs contains orig trgm */ 345 | res = trgm_contained_by(qtrg, key); 346 | } 347 | else if (ISALLTRUE(key)) 348 | { /* non-leaf contains signature */ 349 | res = true; 350 | } 351 | else 352 | { /* non-leaf contains signature */ 353 | int32 k, 354 | tmp = 0, 355 | len = ARRNELEM(qtrg); 356 | trgm *ptr = GETARR(qtrg); 357 | BITVECP sign = GETSIGN(key); 358 | 359 | res = true; 360 | for (k = 0; k < len; k++) 361 | { 362 | CPTRGM(((char *) &tmp), ptr + k); 363 | if (!GETBIT(sign, HASHVAL(tmp))) 364 | { 365 | res = false; 366 | break; 367 | } 368 | } 369 | } 370 | break; 371 | case RegExpICaseStrategyNumber: 372 | #ifndef IGNORECASE 373 | elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 374 | #endif 375 | /* FALL THRU */ 376 | case RegExpStrategyNumber: 377 | /* Regexp search is inexact */ 378 | *recheck = true; 379 | 380 | /* Check regex match as much as we can with available info */ 381 | if (qtrg) 382 | { 383 | if (GIST_LEAF(entry)) 384 | { /* all leafs contains orig trgm */ 385 | bool *check; 386 | 387 | check = trgm_presence_map(qtrg, key); 388 | res = trigramsMatchGraph(cache->graph, check); 389 | pfree(check); 390 | } 391 | else if (ISALLTRUE(key)) 392 | { /* non-leaf contains signature */ 393 | res = true; 394 | } 395 | else 396 | { /* non-leaf contains signature */ 397 | int32 k, 398 | tmp = 0, 399 | len = ARRNELEM(qtrg); 400 | trgm *ptr = GETARR(qtrg); 401 | BITVECP sign = GETSIGN(key); 402 | bool *check; 403 | 404 | /* 405 | * GETBIT() tests may give false positives, due to limited 406 | * size of the sign array. But since trigramsMatchGraph() 407 | * implements a monotone boolean function, false positives 408 | * in the check array can't lead to false negative answer. 409 | * So we can apply trigramsMatchGraph despite uncertainty, 410 | * and that usefully improves the quality of the search. 411 | */ 412 | check = (bool *) palloc(len * sizeof(bool)); 413 | for (k = 0; k < len; k++) 414 | { 415 | CPTRGM(((char *) &tmp), ptr + k); 416 | check[k] = GETBIT(sign, HASHVAL(tmp)); 417 | } 418 | res = trigramsMatchGraph(cache->graph, check); 419 | pfree(check); 420 | } 421 | } 422 | else 423 | { 424 | /* trigram-free query must be rechecked everywhere */ 425 | res = true; 426 | } 427 | break; 428 | default: 429 | elog(ERROR, "unrecognized strategy number: %d", strategy); 430 | res = false; /* keep compiler quiet */ 431 | break; 432 | } 433 | 434 | PG_RETURN_BOOL(res); 435 | } 436 | 437 | Datum 438 | gtrgm_distance(PG_FUNCTION_ARGS) 439 | { 440 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); 441 | text *query = PG_GETARG_TEXT_P(1); 442 | StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); 443 | 444 | /* Oid subtype = PG_GETARG_OID(3); */ 445 | TRGM *key = (TRGM *) DatumGetPointer(entry->key); 446 | TRGM *qtrg; 447 | float8 res; 448 | Size querysize = VARSIZE(query); 449 | char *cache = (char *) fcinfo->flinfo->fn_extra; 450 | 451 | /* 452 | * Cache the generated trigrams across multiple calls with the same query. 453 | */ 454 | if (cache == NULL || 455 | VARSIZE(cache) != querysize || 456 | memcmp(cache, query, querysize) != 0) 457 | { 458 | char *newcache; 459 | 460 | qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ); 461 | 462 | newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, 463 | MAXALIGN(querysize) + 464 | VARSIZE(qtrg)); 465 | 466 | memcpy(newcache, query, querysize); 467 | memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg)); 468 | 469 | if (cache) 470 | pfree(cache); 471 | fcinfo->flinfo->fn_extra = newcache; 472 | cache = newcache; 473 | } 474 | 475 | qtrg = (TRGM *) (cache + MAXALIGN(querysize)); 476 | 477 | switch (strategy) 478 | { 479 | case DistanceStrategyNumber: 480 | if (GIST_LEAF(entry)) 481 | { /* all leafs contains orig trgm */ 482 | /* 483 | * Prevent gcc optimizing the sml variable using volatile 484 | * keyword. Otherwise res can differ from the 485 | * substring_similarity_dist_op() function. 486 | */ 487 | float4 volatile sml = cnt_sml(qtrg, key, false); 488 | res = 1.0 - sml; 489 | } 490 | else if (ISALLTRUE(key)) 491 | { /* all leafs contains orig trgm */ 492 | res = 0.0; 493 | } 494 | else 495 | { /* non-leaf contains signature */ 496 | int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key)); 497 | int32 len = ARRNELEM(qtrg); 498 | 499 | res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len); 500 | } 501 | break; 502 | default: 503 | elog(ERROR, "unrecognized strategy number: %d", strategy); 504 | res = 0; /* keep compiler quiet */ 505 | break; 506 | } 507 | 508 | PG_RETURN_FLOAT8(res); 509 | } 510 | 511 | static int32 512 | unionkey(BITVECP sbase, TRGM *add) 513 | { 514 | int32 i; 515 | 516 | if (ISSIGNKEY(add)) 517 | { 518 | BITVECP sadd = GETSIGN(add); 519 | 520 | if (ISALLTRUE(add)) 521 | return 1; 522 | 523 | LOOPBYTE 524 | sbase[i] |= sadd[i]; 525 | } 526 | else 527 | { 528 | trgm *ptr = GETARR(add); 529 | int32 tmp = 0; 530 | 531 | for (i = 0; i < ARRNELEM(add); i++) 532 | { 533 | CPTRGM(((char *) &tmp), ptr + i); 534 | HASH(sbase, tmp); 535 | } 536 | } 537 | return 0; 538 | } 539 | 540 | 541 | Datum 542 | gtrgm_union(PG_FUNCTION_ARGS) 543 | { 544 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); 545 | int32 len = entryvec->n; 546 | int *size = (int *) PG_GETARG_POINTER(1); 547 | BITVEC base; 548 | int32 i; 549 | int32 flag = 0; 550 | TRGM *result; 551 | 552 | MemSet((void *) base, 0, sizeof(BITVEC)); 553 | for (i = 0; i < len; i++) 554 | { 555 | if (unionkey(base, GETENTRY(entryvec, i))) 556 | { 557 | flag = ALLISTRUE; 558 | break; 559 | } 560 | } 561 | 562 | flag |= SIGNKEY; 563 | len = CALCGTSIZE(flag, 0); 564 | result = (TRGM *) palloc(len); 565 | SET_VARSIZE(result, len); 566 | result->flag = flag; 567 | if (!ISALLTRUE(result)) 568 | memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC)); 569 | *size = len; 570 | 571 | PG_RETURN_POINTER(result); 572 | } 573 | 574 | Datum 575 | gtrgm_same(PG_FUNCTION_ARGS) 576 | { 577 | TRGM *a = (TRGM *) PG_GETARG_POINTER(0); 578 | TRGM *b = (TRGM *) PG_GETARG_POINTER(1); 579 | bool *result = (bool *) PG_GETARG_POINTER(2); 580 | 581 | if (ISSIGNKEY(a)) 582 | { /* then b also ISSIGNKEY */ 583 | if (ISALLTRUE(a) && ISALLTRUE(b)) 584 | *result = true; 585 | else if (ISALLTRUE(a)) 586 | *result = false; 587 | else if (ISALLTRUE(b)) 588 | *result = false; 589 | else 590 | { 591 | int32 i; 592 | BITVECP sa = GETSIGN(a), 593 | sb = GETSIGN(b); 594 | 595 | *result = true; 596 | LOOPBYTE 597 | { 598 | if (sa[i] != sb[i]) 599 | { 600 | *result = false; 601 | break; 602 | } 603 | } 604 | } 605 | } 606 | else 607 | { /* a and b ISARRKEY */ 608 | int32 lena = ARRNELEM(a), 609 | lenb = ARRNELEM(b); 610 | 611 | if (lena != lenb) 612 | *result = false; 613 | else 614 | { 615 | trgm *ptra = GETARR(a), 616 | *ptrb = GETARR(b); 617 | int32 i; 618 | 619 | *result = true; 620 | for (i = 0; i < lena; i++) 621 | if (CMPTRGM(ptra + i, ptrb + i)) 622 | { 623 | *result = false; 624 | break; 625 | } 626 | } 627 | } 628 | 629 | PG_RETURN_POINTER(result); 630 | } 631 | 632 | static int32 633 | sizebitvec(BITVECP sign) 634 | { 635 | int32 size = 0, 636 | i; 637 | 638 | LOOPBYTE 639 | size += number_of_ones[(unsigned char) sign[i]]; 640 | return size; 641 | } 642 | 643 | static int 644 | hemdistsign(BITVECP a, BITVECP b) 645 | { 646 | int i, 647 | diff, 648 | dist = 0; 649 | 650 | LOOPBYTE 651 | { 652 | diff = (unsigned char) (a[i] ^ b[i]); 653 | dist += number_of_ones[diff]; 654 | } 655 | return dist; 656 | } 657 | 658 | static int 659 | hemdist(TRGM *a, TRGM *b) 660 | { 661 | if (ISALLTRUE(a)) 662 | { 663 | if (ISALLTRUE(b)) 664 | return 0; 665 | else 666 | return SIGLENBIT - sizebitvec(GETSIGN(b)); 667 | } 668 | else if (ISALLTRUE(b)) 669 | return SIGLENBIT - sizebitvec(GETSIGN(a)); 670 | 671 | return hemdistsign(GETSIGN(a), GETSIGN(b)); 672 | } 673 | 674 | Datum 675 | gtrgm_penalty(PG_FUNCTION_ARGS) 676 | { 677 | GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ 678 | GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); 679 | float *penalty = (float *) PG_GETARG_POINTER(2); 680 | TRGM *origval = (TRGM *) DatumGetPointer(origentry->key); 681 | TRGM *newval = (TRGM *) DatumGetPointer(newentry->key); 682 | BITVECP orig = GETSIGN(origval); 683 | 684 | *penalty = 0.0; 685 | 686 | if (ISARRKEY(newval)) 687 | { 688 | char *cache = (char *) fcinfo->flinfo->fn_extra; 689 | TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC))); 690 | Size newvalsize = VARSIZE(newval); 691 | BITVECP sign; 692 | 693 | /* 694 | * Cache the sign data across multiple calls with the same newval. 695 | */ 696 | if (cache == NULL || 697 | VARSIZE(cachedVal) != newvalsize || 698 | memcmp(cachedVal, newval, newvalsize) != 0) 699 | { 700 | char *newcache; 701 | 702 | newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, 703 | MAXALIGN(sizeof(BITVEC)) + 704 | newvalsize); 705 | 706 | makesign((BITVECP) newcache, newval); 707 | 708 | cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC))); 709 | memcpy(cachedVal, newval, newvalsize); 710 | 711 | if (cache) 712 | pfree(cache); 713 | fcinfo->flinfo->fn_extra = newcache; 714 | cache = newcache; 715 | } 716 | 717 | sign = (BITVECP) cache; 718 | 719 | if (ISALLTRUE(origval)) 720 | *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1); 721 | else 722 | *penalty = hemdistsign(sign, orig); 723 | } 724 | else 725 | *penalty = hemdist(origval, newval); 726 | PG_RETURN_POINTER(penalty); 727 | } 728 | 729 | typedef struct 730 | { 731 | bool allistrue; 732 | BITVEC sign; 733 | } CACHESIGN; 734 | 735 | static void 736 | fillcache(CACHESIGN *item, TRGM *key) 737 | { 738 | item->allistrue = false; 739 | if (ISARRKEY(key)) 740 | makesign(item->sign, key); 741 | else if (ISALLTRUE(key)) 742 | item->allistrue = true; 743 | else 744 | memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC)); 745 | } 746 | 747 | #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) 748 | typedef struct 749 | { 750 | OffsetNumber pos; 751 | int32 cost; 752 | } SPLITCOST; 753 | 754 | static int 755 | comparecost(const void *a, const void *b) 756 | { 757 | if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost) 758 | return 0; 759 | else 760 | return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1; 761 | } 762 | 763 | 764 | static int 765 | hemdistcache(CACHESIGN *a, CACHESIGN *b) 766 | { 767 | if (a->allistrue) 768 | { 769 | if (b->allistrue) 770 | return 0; 771 | else 772 | return SIGLENBIT - sizebitvec(b->sign); 773 | } 774 | else if (b->allistrue) 775 | return SIGLENBIT - sizebitvec(a->sign); 776 | 777 | return hemdistsign(a->sign, b->sign); 778 | } 779 | 780 | Datum 781 | gtrgm_picksplit(PG_FUNCTION_ARGS) 782 | { 783 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); 784 | OffsetNumber maxoff = entryvec->n - 2; 785 | GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); 786 | OffsetNumber k, 787 | j; 788 | TRGM *datum_l, 789 | *datum_r; 790 | BITVECP union_l, 791 | union_r; 792 | int32 size_alpha, 793 | size_beta; 794 | int32 size_waste, 795 | waste = -1; 796 | int32 nbytes; 797 | OffsetNumber seed_1 = 0, 798 | seed_2 = 0; 799 | OffsetNumber *left, 800 | *right; 801 | BITVECP ptr; 802 | int i; 803 | CACHESIGN *cache; 804 | SPLITCOST *costvector; 805 | 806 | /* cache the sign data for each existing item */ 807 | cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); 808 | for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) 809 | fillcache(&cache[k], GETENTRY(entryvec, k)); 810 | 811 | /* now find the two furthest-apart items */ 812 | for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) 813 | { 814 | for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) 815 | { 816 | size_waste = hemdistcache(&(cache[j]), &(cache[k])); 817 | if (size_waste > waste) 818 | { 819 | waste = size_waste; 820 | seed_1 = k; 821 | seed_2 = j; 822 | } 823 | } 824 | } 825 | 826 | /* just in case we didn't make a selection ... */ 827 | if (seed_1 == 0 || seed_2 == 0) 828 | { 829 | seed_1 = 1; 830 | seed_2 = 2; 831 | } 832 | 833 | /* initialize the result vectors */ 834 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); 835 | v->spl_left = left = (OffsetNumber *) palloc(nbytes); 836 | v->spl_right = right = (OffsetNumber *) palloc(nbytes); 837 | v->spl_nleft = 0; 838 | v->spl_nright = 0; 839 | 840 | /* form initial .. */ 841 | if (cache[seed_1].allistrue) 842 | { 843 | datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); 844 | SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); 845 | datum_l->flag = SIGNKEY | ALLISTRUE; 846 | } 847 | else 848 | { 849 | datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0)); 850 | SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0)); 851 | datum_l->flag = SIGNKEY; 852 | memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC)); 853 | } 854 | if (cache[seed_2].allistrue) 855 | { 856 | datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); 857 | SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); 858 | datum_r->flag = SIGNKEY | ALLISTRUE; 859 | } 860 | else 861 | { 862 | datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0)); 863 | SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0)); 864 | datum_r->flag = SIGNKEY; 865 | memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC)); 866 | } 867 | 868 | union_l = GETSIGN(datum_l); 869 | union_r = GETSIGN(datum_r); 870 | maxoff = OffsetNumberNext(maxoff); 871 | fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff)); 872 | /* sort before ... */ 873 | costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); 874 | for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) 875 | { 876 | costvector[j - 1].pos = j; 877 | size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j])); 878 | size_beta = hemdistcache(&(cache[seed_2]), &(cache[j])); 879 | costvector[j - 1].cost = abs(size_alpha - size_beta); 880 | } 881 | qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); 882 | 883 | for (k = 0; k < maxoff; k++) 884 | { 885 | j = costvector[k].pos; 886 | if (j == seed_1) 887 | { 888 | *left++ = j; 889 | v->spl_nleft++; 890 | continue; 891 | } 892 | else if (j == seed_2) 893 | { 894 | *right++ = j; 895 | v->spl_nright++; 896 | continue; 897 | } 898 | 899 | if (ISALLTRUE(datum_l) || cache[j].allistrue) 900 | { 901 | if (ISALLTRUE(datum_l) && cache[j].allistrue) 902 | size_alpha = 0; 903 | else 904 | size_alpha = SIGLENBIT - sizebitvec( 905 | (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign) 906 | ); 907 | } 908 | else 909 | size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l)); 910 | 911 | if (ISALLTRUE(datum_r) || cache[j].allistrue) 912 | { 913 | if (ISALLTRUE(datum_r) && cache[j].allistrue) 914 | size_beta = 0; 915 | else 916 | size_beta = SIGLENBIT - sizebitvec( 917 | (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign) 918 | ); 919 | } 920 | else 921 | size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r)); 922 | 923 | if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) 924 | { 925 | if (ISALLTRUE(datum_l) || cache[j].allistrue) 926 | { 927 | if (!ISALLTRUE(datum_l)) 928 | MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC)); 929 | } 930 | else 931 | { 932 | ptr = cache[j].sign; 933 | LOOPBYTE 934 | union_l[i] |= ptr[i]; 935 | } 936 | *left++ = j; 937 | v->spl_nleft++; 938 | } 939 | else 940 | { 941 | if (ISALLTRUE(datum_r) || cache[j].allistrue) 942 | { 943 | if (!ISALLTRUE(datum_r)) 944 | MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC)); 945 | } 946 | else 947 | { 948 | ptr = cache[j].sign; 949 | LOOPBYTE 950 | union_r[i] |= ptr[i]; 951 | } 952 | *right++ = j; 953 | v->spl_nright++; 954 | } 955 | } 956 | 957 | *right = *left = FirstOffsetNumber; 958 | v->spl_ldatum = PointerGetDatum(datum_l); 959 | v->spl_rdatum = PointerGetDatum(datum_r); 960 | 961 | PG_RETURN_POINTER(v); 962 | } 963 | -------------------------------------------------------------------------------- /trgm_op.c: -------------------------------------------------------------------------------- 1 | /* 2 | * contrib/pg_trgm/trgm_op.c 3 | */ 4 | #include "postgres.h" 5 | 6 | #include 7 | 8 | #include "trgm.h" 9 | 10 | #include "catalog/pg_type.h" 11 | #include "tsearch/ts_locale.h" 12 | #include "utils/lsyscache.h" 13 | #include "utils/memutils.h" 14 | #include "utils/pg_crc.h" 15 | 16 | #if PG_VERSION_NUM >= 90500 17 | /* 18 | * We have to keep same checksum algorithm as in pre-9.5 in order to be 19 | * pg_upgradeable. 20 | */ 21 | #define INIT_CRC32 INIT_LEGACY_CRC32 22 | #define FIN_CRC32 FIN_LEGACY_CRC32 23 | #define COMP_CRC32 COMP_LEGACY_CRC32 24 | #endif 25 | 26 | PG_MODULE_MAGIC; 27 | 28 | double trgm_limit = 0.3f; 29 | double trgm_substring_limit = 0.6f; 30 | 31 | PG_FUNCTION_INFO_V1(set_limit); 32 | PG_FUNCTION_INFO_V1(show_limit); 33 | PG_FUNCTION_INFO_V1(set_substring_limit); 34 | PG_FUNCTION_INFO_V1(show_substring_limit); 35 | PG_FUNCTION_INFO_V1(show_trgm); 36 | PG_FUNCTION_INFO_V1(similarity); 37 | PG_FUNCTION_INFO_V1(substring_similarity); 38 | PG_FUNCTION_INFO_V1(similarity_dist); 39 | PG_FUNCTION_INFO_V1(similarity_op); 40 | PG_FUNCTION_INFO_V1(substring_similarity_op); 41 | PG_FUNCTION_INFO_V1(substring_similarity_commutator_op); 42 | 43 | /* Trigram with position */ 44 | typedef struct 45 | { 46 | trgm trg; 47 | int index; 48 | } pos_trgm; 49 | 50 | Datum 51 | set_limit(PG_FUNCTION_ARGS) 52 | { 53 | float4 nlimit = PG_GETARG_FLOAT4(0); 54 | 55 | if (nlimit < 0 || nlimit > 1.0) 56 | elog(ERROR, "wrong limit, should be between 0 and 1"); 57 | trgm_limit = nlimit; 58 | 59 | PG_RETURN_FLOAT4(trgm_limit); 60 | } 61 | 62 | Datum 63 | show_limit(PG_FUNCTION_ARGS) 64 | { 65 | PG_RETURN_FLOAT4(trgm_limit); 66 | } 67 | 68 | Datum 69 | set_substring_limit(PG_FUNCTION_ARGS) 70 | { 71 | float4 nlimit = PG_GETARG_FLOAT4(0); 72 | 73 | if (nlimit < 0 || nlimit > 1.0) 74 | elog(ERROR, "wrong limit, should be between 0 and 1"); 75 | trgm_substring_limit = nlimit; 76 | PG_RETURN_FLOAT4(trgm_substring_limit); 77 | } 78 | 79 | Datum 80 | show_substring_limit(PG_FUNCTION_ARGS) 81 | { 82 | PG_RETURN_FLOAT4(trgm_substring_limit); 83 | } 84 | 85 | static int 86 | comp_trgm(const void *a, const void *b) 87 | { 88 | return CMPTRGM(a, b); 89 | } 90 | 91 | static int 92 | unique_array(trgm *a, int len) 93 | { 94 | trgm *curend, 95 | *tmp; 96 | 97 | curend = tmp = a; 98 | while (tmp - a < len) 99 | if (CMPTRGM(tmp, curend)) 100 | { 101 | curend++; 102 | CPTRGM(curend, tmp); 103 | tmp++; 104 | } 105 | else 106 | tmp++; 107 | 108 | return curend + 1 - a; 109 | } 110 | 111 | /* 112 | * Finds first word in string, returns pointer to the word, 113 | * endword points to the character after word 114 | */ 115 | static char * 116 | find_word(char *str, int lenstr, char **endword, int *charlen) 117 | { 118 | char *beginword = str; 119 | 120 | while (beginword - str < lenstr && !ISWORDCHR(beginword)) 121 | beginword += pg_mblen(beginword); 122 | 123 | if (beginword - str >= lenstr) 124 | return NULL; 125 | 126 | *endword = beginword; 127 | *charlen = 0; 128 | while (*endword - str < lenstr && ISWORDCHR(*endword)) 129 | { 130 | *endword += pg_mblen(*endword); 131 | (*charlen)++; 132 | } 133 | 134 | return beginword; 135 | } 136 | 137 | /* 138 | * Reduce a trigram (three possibly multi-byte characters) to a trgm, 139 | * which is always exactly three bytes. If we have three single-byte 140 | * characters, we just use them as-is; otherwise we form a hash value. 141 | */ 142 | void 143 | compact_trigram(trgm *tptr, char *str, int bytelen) 144 | { 145 | if (bytelen == 3) 146 | { 147 | CPTRGM(tptr, str); 148 | } 149 | else 150 | { 151 | pg_crc32 crc; 152 | 153 | INIT_CRC32(crc); 154 | COMP_CRC32(crc, str, bytelen); 155 | FIN_CRC32(crc); 156 | 157 | /* 158 | * use only 3 upper bytes from crc, hope, it's good enough hashing 159 | */ 160 | CPTRGM(tptr, &crc); 161 | } 162 | } 163 | 164 | /* 165 | * Adds trigrams from words (already padded). 166 | */ 167 | static trgm * 168 | make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) 169 | { 170 | char *ptr = str; 171 | 172 | if (charlen < 3) 173 | return tptr; 174 | 175 | if (bytelen > charlen) 176 | { 177 | /* Find multibyte character boundaries and apply compact_trigram */ 178 | int lenfirst = pg_mblen(str), 179 | lenmiddle = pg_mblen(str + lenfirst), 180 | lenlast = pg_mblen(str + lenfirst + lenmiddle); 181 | 182 | while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen) 183 | { 184 | compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast); 185 | 186 | ptr += lenfirst; 187 | tptr++; 188 | 189 | lenfirst = lenmiddle; 190 | lenmiddle = lenlast; 191 | lenlast = pg_mblen(ptr + lenfirst + lenmiddle); 192 | } 193 | } 194 | else 195 | { 196 | /* Fast path when there are no multibyte characters */ 197 | Assert(bytelen == charlen); 198 | 199 | while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ ) 200 | { 201 | CPTRGM(tptr, ptr); 202 | ptr++; 203 | tptr++; 204 | } 205 | } 206 | 207 | return tptr; 208 | } 209 | 210 | /* 211 | * Make array of trigrams without sorting and removing duplicate items. 212 | * 213 | * trg: where to return the array of trigrams. 214 | * str: source string, of length slen bytes. 215 | * 216 | * Returns length of the generated array. 217 | */ 218 | static int 219 | generate_trgm_only(trgm *trg, char *str, int slen) 220 | { 221 | trgm *tptr; 222 | char *buf; 223 | int charlen, 224 | bytelen; 225 | char *bword, 226 | *eword; 227 | 228 | if (slen + LPADDING + RPADDING < 3 || slen == 0) 229 | return 0; 230 | 231 | tptr = trg; 232 | 233 | /* Allocate a buffer for case-folded, blank-padded words */ 234 | buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4); 235 | 236 | if (LPADDING > 0) 237 | { 238 | *buf = ' '; 239 | if (LPADDING > 1) 240 | *(buf + 1) = ' '; 241 | } 242 | 243 | eword = str; 244 | while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL) 245 | { 246 | #ifdef IGNORECASE 247 | bword = lowerstr_with_len(bword, eword - bword); 248 | bytelen = strlen(bword); 249 | #else 250 | bytelen = eword - bword; 251 | #endif 252 | 253 | memcpy(buf + LPADDING, bword, bytelen); 254 | 255 | #ifdef IGNORECASE 256 | pfree(bword); 257 | #endif 258 | 259 | buf[LPADDING + bytelen] = ' '; 260 | buf[LPADDING + bytelen + 1] = ' '; 261 | 262 | /* 263 | * count trigrams 264 | */ 265 | tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, 266 | charlen + LPADDING + RPADDING); 267 | } 268 | 269 | pfree(buf); 270 | 271 | return tptr - trg; 272 | } 273 | 274 | /* 275 | * Guard against possible overflow in the palloc requests below. (We 276 | * don't worry about the additive constants, since palloc can detect 277 | * requests that are a little above MaxAllocSize --- we just need to 278 | * prevent integer overflow in the multiplications.) 279 | */ 280 | static void 281 | protect_out_of_mem(int slen) 282 | { 283 | if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) || 284 | (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length())) 285 | ereport(ERROR, 286 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), 287 | errmsg("out of memory"))); 288 | } 289 | 290 | /* 291 | * Make array of trigrams with sorting and removing duplicate items. 292 | * 293 | * str: source string, of length slen bytes. 294 | * 295 | * Returns the sorted array of unique trigrams. 296 | */ 297 | TRGM * 298 | generate_trgm(char *str, int slen) 299 | { 300 | TRGM *trg; 301 | int len; 302 | 303 | protect_out_of_mem(slen); 304 | 305 | trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); 306 | trg->flag = ARRKEY; 307 | 308 | len = generate_trgm_only(GETARR(trg), str, slen); 309 | SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); 310 | 311 | if (len == 0) 312 | return trg; 313 | 314 | /* 315 | * Make trigrams unique. 316 | */ 317 | if (len > 1) 318 | { 319 | qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); 320 | len = unique_array(GETARR(trg), len); 321 | } 322 | 323 | SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); 324 | 325 | return trg; 326 | } 327 | 328 | /* 329 | * Make array of positional trigrams from two trigram arrays trg1 and trg2. 330 | * 331 | * trg1: trigram array of search pattern, of length len1. trg1 is required 332 | * word which positions don't matter and replaced with -1. 333 | * trg2: trigram array of text, of length len2. trg2 is haystack where we 334 | * search and have to store its positions. 335 | * 336 | * Returns concatenated trigram array. 337 | */ 338 | static pos_trgm * 339 | make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2) 340 | { 341 | pos_trgm *result; 342 | int i, len = len1 + len2; 343 | 344 | result = (pos_trgm *) palloc(sizeof(pos_trgm) * len); 345 | 346 | for (i = 0; i < len1; i++) 347 | { 348 | memcpy(&result[i].trg, &trg1[i], sizeof(trgm)); 349 | result[i].index = -1; 350 | } 351 | 352 | for (i = 0; i < len2; i++) 353 | { 354 | memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm)); 355 | result[i + len1].index = i; 356 | } 357 | 358 | return result; 359 | } 360 | 361 | /* 362 | * Compare position trigrams: compare trigrams first and position second. 363 | */ 364 | static int 365 | comp_ptrgm(const void *v1, const void *v2) 366 | { 367 | const pos_trgm *p1 = (const pos_trgm *)v1; 368 | const pos_trgm *p2 = (const pos_trgm *)v2; 369 | int cmp; 370 | 371 | cmp = CMPTRGM(p1->trg, p2->trg); 372 | if (cmp != 0) 373 | return cmp; 374 | 375 | if (p1->index < p2->index) 376 | return -1; 377 | else if (p1->index == p2->index) 378 | return 0; 379 | else 380 | return 1; 381 | } 382 | 383 | /* 384 | * Iterative search function which calculates maximum similarity with word in 385 | * the string. But maximum similarity is calculated only if check_only == false. 386 | * 387 | * trg2indexes: array which stores indexes of the array "found". 388 | * found: array which stores true of false values. 389 | * ulen1: count of unique trigrams of array "trg1". 390 | * len2: length of array "trg2" and array "trg2indexes". 391 | * len: length of the array "found". 392 | * check_only: if true then only check existaince of similar search pattern in 393 | * text. 394 | * 395 | * Returns substring similarity. 396 | */ 397 | static float4 398 | iterate_substring_similarity(int *trg2indexes, 399 | bool *found, 400 | int ulen1, 401 | int len2, 402 | int len, 403 | bool check_only) 404 | { 405 | int *lastpos, 406 | i, 407 | ulen2 = 0, 408 | count = 0, 409 | upper = -1, 410 | lower = -1; 411 | float4 smlr_cur, 412 | smlr_max = 0.0f; 413 | 414 | /* Memorise last position of each trigram */ 415 | lastpos = (int *) palloc(sizeof(int) * len); 416 | memset(lastpos, -1, sizeof(int) * len); 417 | 418 | for (i = 0; i < len2; i++) 419 | { 420 | /* Get index of next trigram */ 421 | int trgindex = trg2indexes[i]; 422 | 423 | /* Update last position of this trigram */ 424 | if (lower >= 0 || found[trgindex]) 425 | { 426 | if (lastpos[trgindex] < 0) 427 | { 428 | ulen2++; 429 | if (found[trgindex]) 430 | count++; 431 | } 432 | lastpos[trgindex] = i; 433 | } 434 | 435 | /* Adjust lower bound if this trigram is present in required substing */ 436 | if (found[trgindex]) 437 | { 438 | int prev_lower, 439 | tmp_ulen2, 440 | tmp_lower, 441 | tmp_count; 442 | 443 | upper = i; 444 | if (lower == -1) 445 | { 446 | lower = i; 447 | ulen2 = 1; 448 | } 449 | 450 | smlr_cur = CALCSML(count, ulen1, ulen2); 451 | 452 | /* Also try to adjust upper bound for greater similarity */ 453 | tmp_count = count; 454 | tmp_ulen2 = ulen2; 455 | prev_lower = lower; 456 | for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++) 457 | { 458 | float smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2); 459 | int tmp_trgindex; 460 | 461 | if (smlr_tmp > smlr_cur) 462 | { 463 | smlr_cur = smlr_tmp; 464 | ulen2 = tmp_ulen2; 465 | lower = tmp_lower; 466 | count = tmp_count; 467 | } 468 | /* 469 | * if we only check that substring similarity is greater than 470 | * trgm_substring_limit we do not need to calculate 471 | * a maximum similarity. 472 | */ 473 | if (check_only && smlr_cur >= trgm_substring_limit) 474 | break; 475 | 476 | tmp_trgindex = trg2indexes[tmp_lower]; 477 | if (lastpos[tmp_trgindex] == tmp_lower) 478 | { 479 | tmp_ulen2--; 480 | if (found[tmp_trgindex]) 481 | tmp_count--; 482 | } 483 | } 484 | 485 | smlr_max = Max(smlr_max, smlr_cur); 486 | /* 487 | * if we only check that substring similarity is greater than 488 | * trgm_substring_limit we do not need to calculate a 489 | * maximum similarity 490 | */ 491 | if (check_only && smlr_max >= trgm_substring_limit) 492 | break; 493 | 494 | for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++) 495 | { 496 | int tmp_trgindex; 497 | tmp_trgindex = trg2indexes[tmp_lower]; 498 | if (lastpos[tmp_trgindex] == tmp_lower) 499 | lastpos[tmp_trgindex] = -1; 500 | } 501 | } 502 | } 503 | 504 | pfree(lastpos); 505 | 506 | return smlr_max; 507 | } 508 | 509 | /* 510 | * Calculate substring similarity. 511 | * This function prepare two arrays: "trg2indexes" and "found". Then this arrays 512 | * are used to calculate substring similarity using 513 | * iterate_substring_similarity(). 514 | * 515 | * "trg2indexes" is array which stores indexes of the array "found". 516 | * In other words: 517 | * trg2indexes[j] = i; 518 | * found[i] = true (or false); 519 | * If found[i] == true then there is trigram trg2[j] in array "trg1". 520 | * If found[i] == false then there is not trigram trg2[j] in array "trg1". 521 | * 522 | * str1: search pattern string, of length slen1 bytes. 523 | * str2: text in which we are looking for a word, of length slen2 bytes. 524 | * check_only: if true then only check existaince of similar search pattern in 525 | * text. 526 | * 527 | * Returns substring similarity. 528 | */ 529 | static float4 530 | calc_substring_similarity(char *str1, int slen1, char *str2, int slen2, 531 | bool check_only) 532 | { 533 | bool *found; 534 | pos_trgm *ptrg; 535 | trgm *trg1; 536 | trgm *trg2; 537 | int len1, 538 | len2, 539 | len, 540 | i, 541 | j, 542 | ulen1; 543 | int *trg2indexes; 544 | float4 result; 545 | 546 | protect_out_of_mem(slen1 + slen2); 547 | 548 | /* Make positional trigrams */ 549 | trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3); 550 | trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3); 551 | 552 | len1 = generate_trgm_only(trg1, str1, slen1); 553 | len2 = generate_trgm_only(trg2, str2, slen2); 554 | 555 | ptrg = make_positional_trgm(trg1, len1, trg2, len2); 556 | len = len1 + len2; 557 | qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm); 558 | 559 | pfree(trg1); 560 | pfree(trg2); 561 | 562 | /* 563 | * Merge positional trigrams array: enumerate each trigram and find its 564 | * presence in required word. 565 | */ 566 | trg2indexes = (int *) palloc(sizeof(int) * len2); 567 | found = (bool *) palloc0(sizeof(bool) * len); 568 | 569 | ulen1 = 0; 570 | j = 0; 571 | for (i = 0; i < len; i++) 572 | { 573 | if (i > 0) 574 | { 575 | int cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg); 576 | if (cmp != 0) 577 | { 578 | if (found[j]) 579 | ulen1++; 580 | j++; 581 | } 582 | } 583 | 584 | if (ptrg[i].index >= 0) 585 | { 586 | trg2indexes[ptrg[i].index] = j; 587 | } 588 | else 589 | { 590 | found[j] = true; 591 | } 592 | } 593 | if (found[j]) 594 | ulen1++; 595 | 596 | /* Run iterative procedure to find maximum similarity with word */ 597 | result = iterate_substring_similarity(trg2indexes, found, ulen1, len2, len, 598 | check_only); 599 | 600 | pfree(trg2indexes); 601 | pfree(found); 602 | pfree(ptrg); 603 | 604 | return result; 605 | } 606 | 607 | 608 | /* 609 | * Extract the next non-wildcard part of a search string, i.e. a word bounded 610 | * by '_' or '%' meta-characters, non-word characters or string end. 611 | * 612 | * str: source string, of length lenstr bytes (need not be null-terminated) 613 | * buf: where to return the substring (must be long enough) 614 | * *bytelen: receives byte length of the found substring 615 | * *charlen: receives character length of the found substring 616 | * 617 | * Returns pointer to end+1 of the found substring in the source string. 618 | * Returns NULL if no word found (in which case buf, bytelen, charlen not set) 619 | * 620 | * If the found word is bounded by non-word characters or string boundaries 621 | * then this function will include corresponding padding spaces into buf. 622 | */ 623 | static const char * 624 | get_wildcard_part(const char *str, int lenstr, 625 | char *buf, int *bytelen, int *charlen) 626 | { 627 | const char *beginword = str; 628 | const char *endword; 629 | char *s = buf; 630 | bool in_leading_wildcard_meta = false; 631 | bool in_trailing_wildcard_meta = false; 632 | bool in_escape = false; 633 | int clen; 634 | 635 | /* 636 | * Find the first word character, remembering whether preceding character 637 | * was wildcard meta-character. Note that the in_escape state persists 638 | * from this loop to the next one, since we may exit at a word character 639 | * that is in_escape. 640 | */ 641 | while (beginword - str < lenstr) 642 | { 643 | if (in_escape) 644 | { 645 | if (ISWORDCHR(beginword)) 646 | break; 647 | in_escape = false; 648 | in_leading_wildcard_meta = false; 649 | } 650 | else 651 | { 652 | if (ISESCAPECHAR(beginword)) 653 | in_escape = true; 654 | else if (ISWILDCARDCHAR(beginword)) 655 | in_leading_wildcard_meta = true; 656 | else if (ISWORDCHR(beginword)) 657 | break; 658 | else 659 | in_leading_wildcard_meta = false; 660 | } 661 | beginword += pg_mblen(beginword); 662 | } 663 | 664 | /* 665 | * Handle string end. 666 | */ 667 | if (beginword - str >= lenstr) 668 | return NULL; 669 | 670 | /* 671 | * Add left padding spaces if preceding character wasn't wildcard 672 | * meta-character. 673 | */ 674 | *charlen = 0; 675 | if (!in_leading_wildcard_meta) 676 | { 677 | if (LPADDING > 0) 678 | { 679 | *s++ = ' '; 680 | (*charlen)++; 681 | if (LPADDING > 1) 682 | { 683 | *s++ = ' '; 684 | (*charlen)++; 685 | } 686 | } 687 | } 688 | 689 | /* 690 | * Copy data into buf until wildcard meta-character, non-word character or 691 | * string boundary. Strip escapes during copy. 692 | */ 693 | endword = beginword; 694 | while (endword - str < lenstr) 695 | { 696 | clen = pg_mblen(endword); 697 | if (in_escape) 698 | { 699 | if (ISWORDCHR(endword)) 700 | { 701 | memcpy(s, endword, clen); 702 | (*charlen)++; 703 | s += clen; 704 | } 705 | else 706 | { 707 | /* 708 | * Back up endword to the escape character when stopping at an 709 | * escaped char, so that subsequent get_wildcard_part will 710 | * restart from the escape character. We assume here that 711 | * escape chars are single-byte. 712 | */ 713 | endword--; 714 | break; 715 | } 716 | in_escape = false; 717 | } 718 | else 719 | { 720 | if (ISESCAPECHAR(endword)) 721 | in_escape = true; 722 | else if (ISWILDCARDCHAR(endword)) 723 | { 724 | in_trailing_wildcard_meta = true; 725 | break; 726 | } 727 | else if (ISWORDCHR(endword)) 728 | { 729 | memcpy(s, endword, clen); 730 | (*charlen)++; 731 | s += clen; 732 | } 733 | else 734 | break; 735 | } 736 | endword += clen; 737 | } 738 | 739 | /* 740 | * Add right padding spaces if next character isn't wildcard 741 | * meta-character. 742 | */ 743 | if (!in_trailing_wildcard_meta) 744 | { 745 | if (RPADDING > 0) 746 | { 747 | *s++ = ' '; 748 | (*charlen)++; 749 | if (RPADDING > 1) 750 | { 751 | *s++ = ' '; 752 | (*charlen)++; 753 | } 754 | } 755 | } 756 | 757 | *bytelen = s - buf; 758 | return endword; 759 | } 760 | 761 | /* 762 | * Generates trigrams for wildcard search string. 763 | * 764 | * Returns array of trigrams that must occur in any string that matches the 765 | * wildcard string. For example, given pattern "a%bcd%" the trigrams 766 | * " a", "bcd" would be extracted. 767 | */ 768 | TRGM * 769 | generate_wildcard_trgm(const char *str, int slen) 770 | { 771 | TRGM *trg; 772 | char *buf, 773 | *buf2; 774 | trgm *tptr; 775 | int len, 776 | charlen, 777 | bytelen; 778 | const char *eword; 779 | 780 | protect_out_of_mem(slen); 781 | 782 | trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) *3); 783 | trg->flag = ARRKEY; 784 | SET_VARSIZE(trg, TRGMHDRSIZE); 785 | 786 | if (slen + LPADDING + RPADDING < 3 || slen == 0) 787 | return trg; 788 | 789 | tptr = GETARR(trg); 790 | 791 | /* Allocate a buffer for blank-padded, but not yet case-folded, words */ 792 | buf = palloc(sizeof(char) * (slen + 4)); 793 | 794 | /* 795 | * Extract trigrams from each substring extracted by get_wildcard_part. 796 | */ 797 | eword = str; 798 | while ((eword = get_wildcard_part(eword, slen - (eword - str), 799 | buf, &bytelen, &charlen)) != NULL) 800 | { 801 | #ifdef IGNORECASE 802 | buf2 = lowerstr_with_len(buf, bytelen); 803 | bytelen = strlen(buf2); 804 | #else 805 | buf2 = buf; 806 | #endif 807 | 808 | /* 809 | * count trigrams 810 | */ 811 | tptr = make_trigrams(tptr, buf2, bytelen, charlen); 812 | 813 | #ifdef IGNORECASE 814 | pfree(buf2); 815 | #endif 816 | } 817 | 818 | pfree(buf); 819 | 820 | if ((len = tptr - GETARR(trg)) == 0) 821 | return trg; 822 | 823 | /* 824 | * Make trigrams unique. 825 | */ 826 | if (len > 1) 827 | { 828 | qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm); 829 | len = unique_array(GETARR(trg), len); 830 | } 831 | 832 | SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); 833 | 834 | return trg; 835 | } 836 | 837 | uint32 838 | trgm2int(trgm *ptr) 839 | { 840 | uint32 val = 0; 841 | 842 | val |= *(((unsigned char *) ptr)); 843 | val <<= 8; 844 | val |= *(((unsigned char *) ptr) + 1); 845 | val <<= 8; 846 | val |= *(((unsigned char *) ptr) + 2); 847 | 848 | return val; 849 | } 850 | 851 | Datum 852 | show_trgm(PG_FUNCTION_ARGS) 853 | { 854 | text *in = PG_GETARG_TEXT_P(0); 855 | TRGM *trg; 856 | Datum *d; 857 | ArrayType *a; 858 | trgm *ptr; 859 | int i; 860 | 861 | trg = generate_trgm(VARDATA(in), VARSIZE(in) - VARHDRSZ); 862 | d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg))); 863 | 864 | for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++) 865 | { 866 | text *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3)); 867 | 868 | if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr)) 869 | { 870 | snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr)); 871 | SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item))); 872 | } 873 | else 874 | { 875 | SET_VARSIZE(item, VARHDRSZ + 3); 876 | CPTRGM(VARDATA(item), ptr); 877 | } 878 | d[i] = PointerGetDatum(item); 879 | } 880 | 881 | a = construct_array( 882 | d, 883 | ARRNELEM(trg), 884 | TEXTOID, 885 | -1, 886 | false, 887 | 'i' 888 | ); 889 | 890 | for (i = 0; i < ARRNELEM(trg); i++) 891 | pfree(DatumGetPointer(d[i])); 892 | 893 | pfree(d); 894 | pfree(trg); 895 | PG_FREE_IF_COPY(in, 0); 896 | 897 | PG_RETURN_POINTER(a); 898 | } 899 | 900 | float4 901 | cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact) 902 | { 903 | trgm *ptr1, 904 | *ptr2; 905 | int count = 0; 906 | int len1, 907 | len2; 908 | 909 | ptr1 = GETARR(trg1); 910 | ptr2 = GETARR(trg2); 911 | 912 | len1 = ARRNELEM(trg1); 913 | len2 = ARRNELEM(trg2); 914 | 915 | /* explicit test is needed to avoid 0/0 division when both lengths are 0 */ 916 | if (len1 <= 0 || len2 <= 0) 917 | return (float4) 0.0; 918 | 919 | while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) 920 | { 921 | int res = CMPTRGM(ptr1, ptr2); 922 | 923 | if (res < 0) 924 | ptr1++; 925 | else if (res > 0) 926 | ptr2++; 927 | else 928 | { 929 | ptr1++; 930 | ptr2++; 931 | count++; 932 | } 933 | } 934 | 935 | /* 936 | * If inexact then len2 is equal to count, because we don't know actual 937 | * length of second string in inexact search and we can assume that count 938 | * is a lower bound of len2. 939 | */ 940 | return CALCSML(count, len1, inexact ? count : len2); 941 | } 942 | 943 | 944 | /* 945 | * Returns whether trg2 contains all trigrams in trg1. 946 | * This relies on the trigram arrays being sorted. 947 | */ 948 | bool 949 | trgm_contained_by(TRGM *trg1, TRGM *trg2) 950 | { 951 | trgm *ptr1, 952 | *ptr2; 953 | int len1, 954 | len2; 955 | 956 | ptr1 = GETARR(trg1); 957 | ptr2 = GETARR(trg2); 958 | 959 | len1 = ARRNELEM(trg1); 960 | len2 = ARRNELEM(trg2); 961 | 962 | while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) 963 | { 964 | int res = CMPTRGM(ptr1, ptr2); 965 | 966 | if (res < 0) 967 | return false; 968 | else if (res > 0) 969 | ptr2++; 970 | else 971 | { 972 | ptr1++; 973 | ptr2++; 974 | } 975 | } 976 | if (ptr1 - GETARR(trg1) < len1) 977 | return false; 978 | else 979 | return true; 980 | } 981 | 982 | /* 983 | * Return a palloc'd boolean array showing, for each trigram in "query", 984 | * whether it is present in the trigram array "key". 985 | * This relies on the "key" array being sorted, but "query" need not be. 986 | */ 987 | bool * 988 | trgm_presence_map(TRGM *query, TRGM *key) 989 | { 990 | bool *result; 991 | trgm *ptrq = GETARR(query), 992 | *ptrk = GETARR(key); 993 | int lenq = ARRNELEM(query), 994 | lenk = ARRNELEM(key), 995 | i; 996 | 997 | result = (bool *) palloc0(lenq * sizeof(bool)); 998 | 999 | /* for each query trigram, do a binary search in the key array */ 1000 | for (i = 0; i < lenq; i++) 1001 | { 1002 | int lo = 0; 1003 | int hi = lenk; 1004 | 1005 | while (lo < hi) 1006 | { 1007 | int mid = (lo + hi) / 2; 1008 | int res = CMPTRGM(ptrq, ptrk + mid); 1009 | 1010 | if (res < 0) 1011 | hi = mid; 1012 | else if (res > 0) 1013 | lo = mid + 1; 1014 | else 1015 | { 1016 | result[i] = true; 1017 | break; 1018 | } 1019 | } 1020 | ptrq++; 1021 | } 1022 | 1023 | return result; 1024 | } 1025 | 1026 | Datum 1027 | similarity(PG_FUNCTION_ARGS) 1028 | { 1029 | text *in1 = PG_GETARG_TEXT_P(0); 1030 | text *in2 = PG_GETARG_TEXT_P(1); 1031 | TRGM *trg1, 1032 | *trg2; 1033 | float4 res; 1034 | 1035 | trg1 = generate_trgm(VARDATA(in1), VARSIZE(in1) - VARHDRSZ); 1036 | trg2 = generate_trgm(VARDATA(in2), VARSIZE(in2) - VARHDRSZ); 1037 | 1038 | res = cnt_sml(trg1, trg2, false); 1039 | 1040 | pfree(trg1); 1041 | pfree(trg2); 1042 | PG_FREE_IF_COPY(in1, 0); 1043 | PG_FREE_IF_COPY(in2, 1); 1044 | 1045 | PG_RETURN_FLOAT4(res); 1046 | } 1047 | 1048 | Datum 1049 | substring_similarity(PG_FUNCTION_ARGS) 1050 | { 1051 | text *in1 = PG_GETARG_TEXT_PP(0); 1052 | text *in2 = PG_GETARG_TEXT_PP(1); 1053 | float4 res; 1054 | 1055 | res = calc_substring_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), 1056 | VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), 1057 | false); 1058 | 1059 | PG_FREE_IF_COPY(in1, 0); 1060 | PG_FREE_IF_COPY(in2, 1); 1061 | PG_RETURN_FLOAT4(res); 1062 | } 1063 | 1064 | Datum 1065 | similarity_dist(PG_FUNCTION_ARGS) 1066 | { 1067 | float4 res = DatumGetFloat4(DirectFunctionCall2(similarity, 1068 | PG_GETARG_DATUM(0), 1069 | PG_GETARG_DATUM(1))); 1070 | 1071 | PG_RETURN_FLOAT4(1.0 - res); 1072 | } 1073 | 1074 | Datum 1075 | similarity_op(PG_FUNCTION_ARGS) 1076 | { 1077 | float4 res = DatumGetFloat4(DirectFunctionCall2(similarity, 1078 | PG_GETARG_DATUM(0), 1079 | PG_GETARG_DATUM(1))); 1080 | 1081 | PG_RETURN_BOOL(res >= trgm_limit); 1082 | } 1083 | 1084 | Datum 1085 | substring_similarity_op(PG_FUNCTION_ARGS) 1086 | { 1087 | text *in1 = PG_GETARG_TEXT_PP(0); 1088 | text *in2 = PG_GETARG_TEXT_PP(1); 1089 | float4 res; 1090 | 1091 | res = calc_substring_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), 1092 | VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), 1093 | true); 1094 | 1095 | PG_FREE_IF_COPY(in1, 0); 1096 | PG_FREE_IF_COPY(in2, 1); 1097 | PG_RETURN_BOOL(res >= trgm_substring_limit); 1098 | } 1099 | 1100 | Datum 1101 | substring_similarity_commutator_op(PG_FUNCTION_ARGS) 1102 | { 1103 | text *in1 = PG_GETARG_TEXT_PP(0); 1104 | text *in2 = PG_GETARG_TEXT_PP(1); 1105 | float4 res; 1106 | 1107 | res = calc_substring_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), 1108 | VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), 1109 | true); 1110 | 1111 | PG_FREE_IF_COPY(in1, 0); 1112 | PG_FREE_IF_COPY(in2, 1); 1113 | PG_RETURN_BOOL(res >= trgm_substring_limit); 1114 | } 1115 | --------------------------------------------------------------------------------