├── .circleci └── config.yml ├── .clang-format ├── .dockerignore ├── .gitignore ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── docs ├── Administration.md ├── Aggregations.md ├── CNAME ├── Chinese.md ├── Clients.md ├── Commands.md ├── Configuring.md ├── DESIGN.md ├── Escaping.md ├── Extensions.md ├── Highlight.md ├── Overview.md ├── Query_Syntax.md ├── Quick_Start.md ├── Scoring.md ├── Sorting.md ├── Stemming.md ├── Stopwords.md ├── Synonyms.md ├── Tags.md ├── Threading.md ├── commands.yaml ├── concurrency.png ├── design │ ├── gc.md │ └── indexing.md ├── go_client.md ├── img │ ├── favicon.png │ ├── latency.png │ └── throughput.png ├── index.md ├── java_client.md ├── logo.png ├── logo_small.png ├── payloads.md └── python_client.md ├── mkdocs.yml ├── ramp.yml ├── requirements.txt ├── src ├── Makefile ├── aggregate │ ├── aggregate.h │ ├── aggregate_exec.c │ ├── aggregate_plan.c │ ├── aggregate_plan.h │ ├── aggregate_request.c │ ├── expr │ │ ├── Makefile │ │ ├── expression.c │ │ ├── expression.h │ │ ├── lexer.c │ │ ├── lexer.rl │ │ ├── parser-toplevel.c │ │ ├── parser.c.inc │ │ ├── parser.h │ │ ├── parser.y │ │ └── token.h │ ├── filter.c │ ├── functions │ │ ├── date.c │ │ ├── function.c │ │ ├── function.h │ │ ├── math.c │ │ └── string.c │ ├── group_by.c │ ├── project.h │ ├── projector.c │ ├── reducer.c │ ├── reducer.h │ └── reducers │ │ ├── count.c │ │ ├── count_distinct.c │ │ ├── deviation.c │ │ ├── first_value.c │ │ ├── minmax.c │ │ ├── quantile.c │ │ ├── random_sample.c │ │ ├── sum.c │ │ └── to_list.c ├── benchmark │ ├── Makefile │ ├── benchmark.c │ ├── redisearch │ │ └── __init__.py │ ├── shakespeare.py │ └── time_sample.h ├── buffer.c ├── buffer.h ├── byte_offsets.c ├── byte_offsets.h ├── cndict_loader.c ├── cndict_loader.h ├── commands.h ├── concurrent_ctx.c ├── concurrent_ctx.h ├── config.c ├── config.h ├── cursor.c ├── cursor.h ├── debug_commads.c ├── debug_commads.h ├── dep │ ├── LICENSE │ ├── bloom │ │ ├── Makefile │ │ ├── README.md │ │ ├── contrib │ │ │ ├── MurmurHash2.c │ │ │ ├── bloom.c │ │ │ ├── bloom.h │ │ │ └── murmurhash2.h │ │ ├── sb.c │ │ └── sb.h │ ├── cndict │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── bundle_friso.py │ │ ├── cn_t2s.json │ │ ├── cndict_data.c │ │ ├── friso.ini │ │ ├── gen_simp_trad.py │ │ ├── lex │ │ │ ├── friso.lex.ini │ │ │ ├── lex-admin.lex │ │ │ ├── lex-cemixed.lex │ │ │ ├── lex-chars.lex │ │ │ ├── lex-cn-mz.lex │ │ │ ├── lex-cn-place.lex │ │ │ ├── lex-company.lex │ │ │ ├── lex-dname-1.lex │ │ │ ├── lex-dname-2.lex │ │ │ ├── lex-ecmixed.lex │ │ │ ├── lex-en-pun.lex │ │ │ ├── lex-en.lex │ │ │ ├── lex-festival.lex │ │ │ ├── lex-flname.lex │ │ │ ├── lex-food.lex │ │ │ ├── lex-lang.lex │ │ │ ├── lex-ln-adorn.lex │ │ │ ├── lex-lname.lex │ │ │ ├── lex-main.lex │ │ │ ├── lex-nation.lex │ │ │ ├── lex-net.lex │ │ │ ├── lex-org.lex │ │ │ ├── lex-sname.lex │ │ │ ├── lex-stopword.lex │ │ │ ├── lex-touris.lex │ │ │ └── lex-units.lex │ │ └── read_friso.py │ ├── friso │ │ ├── CMakeLists.txt │ │ ├── LICENSE.md │ │ ├── Makefile │ │ ├── Makefile.RediSearch │ │ ├── friso.c │ │ ├── friso.h │ │ ├── friso_API.h │ │ ├── friso_GBK.c │ │ ├── friso_UTF8.c │ │ ├── friso_array.c │ │ ├── friso_ctype.c │ │ ├── friso_ctype.h │ │ ├── friso_hash.c │ │ ├── friso_lexicon.c │ │ ├── friso_link.c │ │ ├── friso_simptrad.h │ │ └── friso_string.c │ ├── hll │ │ ├── LICENSE │ │ ├── hll.c │ │ └── hll.h │ ├── libnu │ │ ├── LICENSE │ │ ├── Makefile │ │ ├── README.md │ │ ├── casemap.h │ │ ├── casemap_internal.h │ │ ├── cesu8.c │ │ ├── cesu8.h │ │ ├── cesu8_internal.h │ │ ├── config.h │ │ ├── defines.h │ │ ├── ducet.c │ │ ├── ducet.h │ │ ├── extra.c │ │ ├── extra.h │ │ ├── gen │ │ │ ├── README │ │ │ ├── _ducet.c │ │ │ ├── _ducet_switch.c │ │ │ ├── _tofold.c │ │ │ ├── _tolower.c │ │ │ └── _toupper.c │ │ ├── libnu.h │ │ ├── mph.h │ │ ├── strcoll.c │ │ ├── strcoll.h │ │ ├── strcoll_internal.h │ │ ├── strings.c │ │ ├── strings.h │ │ ├── tofold.c │ │ ├── tolower.c │ │ ├── toupper.c │ │ ├── udb.h │ │ ├── utf16.c │ │ ├── utf16.h │ │ ├── utf16_internal.h │ │ ├── utf16be.c │ │ ├── utf16be.h │ │ ├── utf16he.c │ │ ├── utf16he.h │ │ ├── utf16le.c │ │ ├── utf16le.h │ │ ├── utf32.c │ │ ├── utf32.h │ │ ├── utf32_internal.h │ │ ├── utf32be.c │ │ ├── utf32be.h │ │ ├── utf32he.c │ │ ├── utf32he.h │ │ ├── utf32le.c │ │ ├── utf32le.h │ │ ├── utf8.c │ │ ├── utf8.h │ │ ├── utf8_internal.h │ │ ├── validate.c │ │ ├── validate.h │ │ ├── version.c │ │ └── version.h │ ├── miniz │ │ ├── Makefile │ │ ├── miniz.c │ │ └── miniz.h │ ├── snowball │ │ ├── AUTHORS │ │ ├── CMakeLists.txt │ │ ├── COPYING │ │ ├── Makefile │ │ ├── README │ │ ├── examples │ │ │ └── stemwords.c │ │ ├── include │ │ │ └── libstemmer.h │ │ ├── libstemmer │ │ │ ├── libstemmer.c │ │ │ ├── libstemmer_c.in │ │ │ ├── libstemmer_utf8.c │ │ │ ├── modules.h │ │ │ ├── modules.txt │ │ │ ├── modules_utf8.h │ │ │ └── modules_utf8.txt │ │ ├── mkinc.mak │ │ ├── mkinc_utf8.mak │ │ ├── runtime │ │ │ ├── api.c │ │ │ ├── api.h │ │ │ ├── header.h │ │ │ └── utilities.c │ │ └── src_c │ │ │ ├── stem_ISO_8859_1_danish.c │ │ │ ├── stem_ISO_8859_1_danish.h │ │ │ ├── stem_ISO_8859_1_dutch.c │ │ │ ├── stem_ISO_8859_1_dutch.h │ │ │ ├── stem_ISO_8859_1_english.c │ │ │ ├── stem_ISO_8859_1_english.h │ │ │ ├── stem_ISO_8859_1_finnish.c │ │ │ ├── stem_ISO_8859_1_finnish.h │ │ │ ├── stem_ISO_8859_1_french.c │ │ │ ├── stem_ISO_8859_1_french.h │ │ │ ├── stem_ISO_8859_1_german.c │ │ │ ├── stem_ISO_8859_1_german.h │ │ │ ├── stem_ISO_8859_1_italian.c │ │ │ ├── stem_ISO_8859_1_italian.h │ │ │ ├── stem_ISO_8859_1_norwegian.c │ │ │ ├── stem_ISO_8859_1_norwegian.h │ │ │ ├── stem_ISO_8859_1_porter.c │ │ │ ├── stem_ISO_8859_1_porter.h │ │ │ ├── stem_ISO_8859_1_portuguese.c │ │ │ ├── stem_ISO_8859_1_portuguese.h │ │ │ ├── stem_ISO_8859_1_spanish.c │ │ │ ├── stem_ISO_8859_1_spanish.h │ │ │ ├── stem_ISO_8859_1_swedish.c │ │ │ ├── stem_ISO_8859_1_swedish.h │ │ │ ├── stem_ISO_8859_2_hungarian.c │ │ │ ├── stem_ISO_8859_2_hungarian.h │ │ │ ├── stem_ISO_8859_2_romanian.c │ │ │ ├── stem_ISO_8859_2_romanian.h │ │ │ ├── stem_KOI8_R_russian.c │ │ │ ├── stem_KOI8_R_russian.h │ │ │ ├── stem_UTF_8_arabic.c │ │ │ ├── stem_UTF_8_arabic.h │ │ │ ├── stem_UTF_8_danish.c │ │ │ ├── stem_UTF_8_danish.h │ │ │ ├── stem_UTF_8_dutch.c │ │ │ ├── stem_UTF_8_dutch.h │ │ │ ├── stem_UTF_8_english.c │ │ │ ├── stem_UTF_8_english.h │ │ │ ├── stem_UTF_8_finnish.c │ │ │ ├── stem_UTF_8_finnish.h │ │ │ ├── stem_UTF_8_french.c │ │ │ ├── stem_UTF_8_french.h │ │ │ ├── stem_UTF_8_german.c │ │ │ ├── stem_UTF_8_german.h │ │ │ ├── stem_UTF_8_hungarian.c │ │ │ ├── stem_UTF_8_hungarian.h │ │ │ ├── stem_UTF_8_italian.c │ │ │ ├── stem_UTF_8_italian.h │ │ │ ├── stem_UTF_8_norwegian.c │ │ │ ├── stem_UTF_8_norwegian.h │ │ │ ├── stem_UTF_8_porter.c │ │ │ ├── stem_UTF_8_porter.h │ │ │ ├── stem_UTF_8_portuguese.c │ │ │ ├── stem_UTF_8_portuguese.h │ │ │ ├── stem_UTF_8_romanian.c │ │ │ ├── stem_UTF_8_romanian.h │ │ │ ├── stem_UTF_8_russian.c │ │ │ ├── stem_UTF_8_russian.h │ │ │ ├── stem_UTF_8_spanish.c │ │ │ ├── stem_UTF_8_spanish.h │ │ │ ├── stem_UTF_8_swedish.c │ │ │ ├── stem_UTF_8_swedish.h │ │ │ ├── stem_UTF_8_tamil.c │ │ │ ├── stem_UTF_8_tamil.h │ │ │ ├── stem_UTF_8_turkish.c │ │ │ └── stem_UTF_8_turkish.h │ ├── thpool │ │ ├── thpool.c │ │ └── thpool.h │ └── triemap │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── Makefile │ │ ├── README.md │ │ ├── test │ │ ├── Makefile │ │ ├── benchmark.c │ │ ├── crc16.c │ │ ├── minunit.h │ │ ├── test.c │ │ └── time_sample.h │ │ ├── triemap.c │ │ └── triemap.h ├── doc_table.c ├── doc_table.h ├── document.c ├── document.h ├── document_basic.c ├── err.h ├── ext │ ├── Makefile │ ├── default.c │ └── default.h ├── extension.c ├── extension.h ├── forward_index.c ├── forward_index.h ├── fragmenter.c ├── fragmenter.h ├── gc.c ├── gc.h ├── geo_index.c ├── geo_index.h ├── highlight.h ├── highlight_processor.c ├── id_filter.c ├── id_filter.h ├── id_list.c ├── id_list.h ├── index.c ├── index.h ├── index_iterator.h ├── index_result.c ├── index_result.h ├── indexer.c ├── indexer.h ├── inverted_index.c ├── inverted_index.h ├── leakcheck.supp ├── module-init │ └── module-init.c ├── module.c ├── module.h ├── numeric_filter.c ├── numeric_filter.h ├── numeric_index.c ├── numeric_index.h ├── offset_vector.c ├── print_version.c ├── pytest │ ├── CMakeLists.txt │ ├── Makefile │ ├── base_case.py │ ├── games.json.bz2 │ ├── hotels.py │ ├── rmtest.config │ ├── test.py │ ├── test_aggregate.py │ ├── test_aof.py │ ├── test_cn.py │ ├── test_conditional_updates.py │ ├── test_cursors.py │ ├── test_doctable.py │ ├── test_ext.py │ ├── test_fuzz.py │ ├── test_fuzzy.py │ ├── test_gc.py │ ├── test_safemode.py │ ├── test_scorers.py │ ├── test_summarize.py │ ├── test_synonyms.py │ ├── test_tags.py │ └── test_wideschema.py ├── qint.c ├── qint.h ├── query.c ├── query.h ├── query_node.h ├── query_parser │ ├── Makefile │ ├── lexer.c │ ├── lexer.rl │ ├── parse.h │ ├── parser-toplevel.c │ ├── parser.c.inc │ ├── parser.h │ ├── parser.y │ ├── template.c.tpl │ └── tokenizer.h ├── query_plan.c ├── query_plan.h ├── redis.conf ├── redis_index.c ├── redis_index.h ├── redisearch.h ├── redismodule.h ├── result_processor.c ├── result_processor.h ├── rmalloc.h ├── rmutil │ ├── CMDPARSE.md │ ├── CMakeLists.txt │ ├── Makefile │ ├── alloc.c │ ├── alloc.h │ ├── cmdparse.c │ ├── cmdparse.h │ ├── heap.c │ ├── heap.h │ ├── logging.h │ ├── periodic.c │ ├── periodic.h │ ├── priority_queue.c │ ├── priority_queue.h │ ├── sds.c │ ├── sds.h │ ├── sdsalloc.h │ ├── strings.c │ ├── strings.h │ ├── test.h │ ├── test_cmdparse.c │ ├── test_heap.c │ ├── test_periodic.c │ ├── test_priority_queue.c │ ├── test_util.h │ ├── test_vector.c │ ├── util.c │ ├── util.h │ ├── vector.c │ └── vector.h ├── run_valgrind.sh ├── search_ctx.h ├── search_options.h ├── search_request.c ├── search_request.h ├── sortable.c ├── sortable.h ├── spec.c ├── spec.h ├── stemmer.c ├── stemmer.h ├── stopwords.c ├── stopwords.h ├── summarize_spec.c ├── summarize_spec.h ├── synonym_map.c ├── synonym_map.h ├── tag_index.c ├── tag_index.h ├── tests │ ├── CMakeLists.txt │ ├── Makefile │ ├── bench-decoder.c │ ├── cn_sample.txt │ ├── ext-example │ │ ├── Makefile │ │ ├── example.c │ │ └── example.h │ ├── genesis.txt │ ├── quantile_data.txt │ ├── test_aggregate.c │ ├── test_arr.c │ ├── test_array.c │ ├── test_blkalloc.c │ ├── test_cntokenize.c │ ├── test_expr.c │ ├── test_extensions.c │ ├── test_index.c │ ├── test_khtable.c │ ├── test_qint.c │ ├── test_quantile.c │ ├── test_query.c │ ├── test_range.c │ ├── test_result_processor.c │ ├── test_stemmer.c │ ├── test_stopwords.c │ ├── test_summarize.c │ ├── test_synonym_map.c │ ├── test_tag_index.c │ ├── test_tokenize.c │ ├── test_trie.c │ ├── test_util.h │ ├── test_value.c │ ├── time_sample.h │ └── titles.csv ├── tokenize.c ├── tokenize.h ├── tokenize_cn.c ├── toksep.h ├── trie │ ├── Makefile │ ├── levenshtein.c │ ├── levenshtein.h │ ├── rune_util.c │ ├── rune_util.h │ ├── sparse_vector.c │ ├── sparse_vector.h │ ├── trie.c │ ├── trie.h │ ├── trie_type.c │ └── trie_type.h ├── util │ ├── Makefile │ ├── arr.h │ ├── arr_rm_alloc.h │ ├── array.c │ ├── array.h │ ├── block_alloc.c │ ├── block_alloc.h │ ├── fnv.c │ ├── fnv.h │ ├── heap.c │ ├── heap.h │ ├── khash.h │ ├── khtable.c │ ├── khtable.h │ ├── logging.c │ ├── logging.h │ ├── mempool.c │ ├── mempool.h │ ├── minmax.h │ ├── minmax_heap.c │ ├── minmax_heap.h │ ├── misc.c │ ├── misc.h │ ├── quantile.c │ ├── quantile.h │ └── strconv.h ├── value.c ├── value.h ├── varint.c ├── varint.h └── version.h └── srcutil ├── gen_parser_toplevel.py ├── lemon.c ├── lempar.c └── make-parser.mk /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | working_directory: ~/repo 5 | 6 | docker: 7 | - image: circleci/python:latest 8 | - image: gcc:latest 9 | 10 | steps: 11 | - checkout 12 | 13 | # Download and cache dependencies 14 | - restore_cache: 15 | keys: 16 | - v1-dependencies-{{ checksum "requirements.txt" }} 17 | # fallback to using the latest cache if no exact match is found 18 | - v1-dependencies- 19 | 20 | - run: 21 | name: install dependencies 22 | command: | 23 | python3 -m venv venv 24 | . venv/bin/activate 25 | pip install -r requirements.txt 26 | 27 | - save_cache: 28 | paths: 29 | - ./venv 30 | key: v1-dependencies-{{ checksum "requirements.txt" }} 31 | 32 | - run: 33 | name: run build 34 | command: make 35 | 36 | # 37 | # Currently disabled due to several remaining python3 porting issues 38 | # 39 | # - run: 40 | # name: run tests 41 | # command: make PYTHON=python3 test 42 | # 43 | # - store_artifacts: 44 | # path: test-reports 45 | # destination: test-reports 46 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/*.rdb* 2 | **/*.aof* 3 | **/*.rdb 4 | **/*.out 5 | **/*.md 6 | **/*.zip 7 | **/*.gz 8 | **/*.so 9 | **/*.o 10 | **/*.a 11 | .git 12 | site 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !/**/ 3 | !**/*.h 4 | !**/*.c 5 | !**/*.py 6 | !**/*.md 7 | !.gitignore 8 | !**/Makefile 9 | !**/*.yml 10 | !**/*.yaml 11 | !**/*.mak 12 | !**/CMakeLists.txt 13 | !/cndict 14 | /site 15 | /build* 16 | !/debian/* 17 | 18 | src/dep/snowball/libstemmer/mkinc.mak 19 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM redis:latest as builder 2 | 3 | ENV LIBDIR /usr/lib/redis/modules 4 | ENV DEPS "python python-setuptools python-pip wget unzip build-essential git" 5 | # Set up a build environment 6 | RUN set -ex;\ 7 | deps="$DEPS";\ 8 | apt-get update; \ 9 | apt-get install -y --no-install-recommends $deps;\ 10 | pip install rmtest; 11 | 12 | # Build the source 13 | ADD . /REDISEARCH 14 | WORKDIR /REDISEARCH 15 | RUN set -ex;\ 16 | make clean; \ 17 | deps="$DEPS";\ 18 | make all -j 4; \ 19 | make test; 20 | 21 | # Package the runner 22 | FROM redis:latest 23 | ENV LIBDIR /usr/lib/redis/modules 24 | WORKDIR /data 25 | RUN set -ex;\ 26 | mkdir -p "$LIBDIR"; 27 | COPY --from=builder /REDISEARCH/src/redisearch.so "$LIBDIR" 28 | 29 | CMD ["redis-server", "--loadmodule", "/usr/lib/redis/modules/redisearch.so"] 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | $(MAKE) -C ./src all 3 | 4 | test: 5 | $(MAKE) -C ./src $@ 6 | 7 | clean: 8 | $(MAKE) -C ./src $@ 9 | 10 | distclean: 11 | $(MAKE) -C ./src $@ 12 | .PHONY: distclean 13 | 14 | package: all 15 | $(MAKE) -C ./src package 16 | .PHONY: package 17 | 18 | buildall: 19 | $(MAKE) -C ./src $@ 20 | 21 | deploydocs: 22 | mkdocs gh-deploy 23 | .PHONY: deploydocs 24 | 25 | staticlib: 26 | $(MAKE) -C ./src $@ 27 | 28 | # Builds a small utility that outputs the current version 29 | print_version: 30 | $(MAKE) -C ./src print_version 31 | 32 | docker: distclean print_version 33 | docker build . -t goodform/redisearch 34 | 35 | docker_push: docker 36 | docker push goodform/redisearch:latest 37 | docker tag goodform/redisearch:latest goodform/redisearch:`./src/print_version` 38 | docker push goodform/redisearch:`./src/print_version` 39 | -------------------------------------------------------------------------------- /docs/Administration.md: -------------------------------------------------------------------------------- 1 | # RediSearch Administration Guide 2 | 3 | RediSearch doesn't require any configuration to work, but there are a few things worth noting when running RediSearch on top of Redis. 4 | 5 | ## Persistence 6 | 7 | RediSearch supports both RDB and AOF based persistence. For a pure RDB set-up, nothing special is needed beyond the standard Redis RDB configuration. 8 | 9 | ### AOF Persistence 10 | 11 | While RediSearch supports working with AOF based persistence, as of version 1.1.0 it **does not support** "classic AOF" mode, which uses AOF rewriting. Instead, it only supports AOF with RDB preamble mode. In this mode, rewriting the AOF log just creates an RDB file, which is appended to. 12 | 13 | To enable AOF persistence with RediSearch, add the two following lines to your redis.conf: 14 | 15 | ``` 16 | appendonly yes 17 | aof-use-rdb-preamble yes 18 | ``` 19 | 20 | ## Master/Slave Replication 21 | 22 | RediSearch supports replication inherently, and using a master/slave set-up, you can use slaves for high availability. On top of that, slaves can be used for searching, to load-balance read traffic. 23 | 24 | ## Cluster Support 25 | 26 | RediSearch will not work correctly on a cluster. 27 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | redisearch.io 2 | -------------------------------------------------------------------------------- /docs/Clients.md: -------------------------------------------------------------------------------- 1 | # RediSearch Client Libraries 2 | 3 | RediSearch has several client libraries, written by the module authors and community members - abstracting the API in different programming languages. 4 | 5 | While it is possible and simple to use the raw Redis commands API, in most cases it's easier to just use a client library abstracting it. 6 | 7 | ## Currently available Libraries 8 | 9 | | Language | Library | Author | License | Comments | 10 | |---|---|---|---|---| 11 | |Python | [redisearch-py](https://github.com/RedisLabs/redisearch-py) | Redis Labs | BSD | Usually the most up-to-date client library | 12 | | Java | [JRediSearch](https://github.com/RedisLabs/JRediSearch) | Redis Labs | BSD | | 13 | | Go | [redisearch-go](https://github.com/RedisLabs/redisearch-go) | Redis Labs | BSD | Incomplete API | 14 | | JavaScript | [RedRediSearch](https://github.com/stockholmux/redredisearch) | Kyle J. Davis | MIT | Partial API, compatible with [Reds](https://github.com/tj/reds) | 15 | | C# | [NRediSearch](https://libraries.io/nuget/NRediSearch) | Marc Gravell | MIT | Part of StackExchange.Redis | 16 | | PHP | [redisearch-php](https://github.com/ethanhann/redisearch-php) | Ethan Hann | MIT | 17 | | Ruby on Rails | [redi_search_rails](https://github.com/dmitrypol/redi_search_rails) | Dmitry Polyakovsky | MIT | | 18 | | Ruby | [redisearch-rb](https://github.com/vruizext/redisearch-rb) | Victor Ruiz | MIT | | 19 | -------------------------------------------------------------------------------- /docs/Escaping.md: -------------------------------------------------------------------------------- 1 | # Controlling Text Tokenization and Escaping 2 | 3 | At the moment, RediSearch uses a very simple tokenizer for documents and a slightly more sophisticated tokenizer for queries. Both allow a degree of control over string escaping and tokenization. 4 | 5 | Note: There is a different mechanism for tokenizing text and tag fields, this document refers only to text fields. For tag fields please refer to the [Tag Fields](/Tags) documentation. 6 | 7 | ## The rules of text field tokenization 8 | 9 | 1. All punctuation marks and whitespaces (besides underscores) separate the document and queries into tokens. e.g. any character of `,.<>{}[]"':;!@#$%^&*()-+=~` will break the text into terms. So the text `foo-bar.baz...bag` will be tokenized into `[foo, bar, baz, bag]` 10 | 11 | 2. Escaping separators in both queries and documents is done by prepending a backslash to any separator. e.g. the text `hello\-world hello-world` will be tokenized as `[hello-world, hello, world]`. **NOTE** that in most languages you will need an extra backslash when formatting the document or query, to signify an actual backslash, so the actual text in redis-cli for example, will be entered as `hello\\-world`. 12 | 13 | 2. Underscores (`_`) are not used as separators in either document or query. So the text `hello_world` will remain as is after tokenization. 14 | 15 | 3. Repeating spaces or punctuation marks are stripped. 16 | 17 | 4. In Latin characters, everything gets converted to lowercase. 18 | -------------------------------------------------------------------------------- /docs/Quick_Start.md: -------------------------------------------------------------------------------- 1 | 2 | # Quick Start Guide for RediSearch 3 | 4 | ## Running with Docker 5 | 6 | ```sh 7 | docker run -p 6379:6379 goodform/redisearch:latest 8 | ``` 9 | 10 | ## Building and running from source 11 | 12 | ```sh 13 | git clone https://github.com/goodform/RediSearch.git 14 | cd RediSearch/src 15 | make all 16 | 17 | # Assuming you have a Redis build from the unstable branch: 18 | /path/to/redis-server --loadmodule ./redisearch.so 19 | ``` 20 | 21 | ## Creating an index with fields and weights (default weight is 1.0) 22 | 23 | ``` 24 | 127.0.0.1:6379> FT.CREATE myIdx SCHEMA title TEXT WEIGHT 5.0 body TEXT url TEXT 25 | OK 26 | 27 | ``` 28 | 29 | ## Adding documents to the index 30 | ``` 31 | 127.0.0.1:6379> FT.ADD myIdx doc1 1.0 FIELDS title "hello world" body "lorem ipsum" url "http://redis.io" 32 | OK 33 | ``` 34 | 35 | ## Searching the index 36 | 37 | ``` 38 | 127.0.0.1:6379> FT.SEARCH myIdx "hello world" LIMIT 0 10 39 | 1) (integer) 1 40 | 2) "doc1" 41 | 3) 1) "title" 42 | 2) "hello world" 43 | 3) "body" 44 | 4) "lorem ipsum" 45 | 5) "url" 46 | 6) "http://redis.io" 47 | ``` 48 | 49 | !!! note 50 | Input is expected to be valid utf-8 or ASCII. The engine cannot handle wide character unicode at the moment. 51 | 52 | 53 | ## Dropping the index 54 | 55 | ``` 56 | 127.0.0.1:6379> FT.DROP myIdx 57 | OK 58 | ``` 59 | 60 | ## Adding and getting Auto-complete suggestions 61 | 62 | ``` 63 | 127.0.0.1:6379> FT.SUGADD autocomplete "hello world" 100 64 | OK 65 | 66 | 127.0.0.1:6379> FT.SUGGET autocomplete "he" 67 | 1) "hello world" 68 | 69 | ``` 70 | -------------------------------------------------------------------------------- /docs/Stopwords.md: -------------------------------------------------------------------------------- 1 | # Stop-Words 2 | 3 | RediSearch has a pre-defined default list of [stop-words](https://en.wikipedia.org/wiki/Stop_words). These are words that are usually so common that they do not add much information to search, but take up a lot of space and CPU time in the index. 4 | 5 | When indexing, stop-words are discarded and not indexed. When searching, they are also ignored and treated as if they were not sent to the query processor. This is done when parsing the query. 6 | 7 | At the moment, the default stop-word list applies to all full-text indexes in all languages and can be overridden manually at index creation time. 8 | 9 | ## Default stop-word list 10 | 11 | The following words are treated as stop-words by default: 12 | 13 | ``` 14 | a, is, the, an, and, are, as, at, be, but, by, for, 15 | if, in, into, it, no, not, of, on, or, such, that, their, 16 | then, there, these, they, this, to, was, will, with 17 | ``` 18 | 19 | ## Overriding the default stop-words 20 | 21 | Stop-words for an index can be defined (or disabled completely) on index creation using the `STOPWORDS` argument in the [FT.CREATE](/Commands/#ftcreate) command. 22 | 23 | The format is `STOPWORDS {number} {stopword} ...` where number is the number of stopwords given. The `STOPWORDS` argument must come before the `SCHEMA` argument. For example: 24 | 25 | ``` 26 | FT.CREATE myIndex STOPWORDS 3 foo bar baz SCHEMA title TEXT body TEXT 27 | ``` 28 | 29 | ## Disabling stop-words completely 30 | 31 | Disabling stopwords completely can be done by passing `STOPWORDS 0` on `FT.CREATE`. 32 | 33 | 34 | ## Avoiding stop-word detection in search queries 35 | 36 | In rare use cases, where queries are very long and are guaranteed by the client application to not contain stopwords, it is possible to avoid checking for them when parsing the query. This saves some CPU time and is only worth it if the query has dozens or more terms in it. Using this without verifying that the query doesn't contain stop-words might result in empty queries. 37 | -------------------------------------------------------------------------------- /docs/commands.yaml: -------------------------------------------------------------------------------- 1 | FT.CREATE: 2 | summary: Set the string value of a key 3 | complexity: O(1) 4 | arguments: 5 | - 6 | comment: the index name 7 | name: index_name 8 | type: key 9 | - 10 | name: 11 | - field 12 | - score | NUMERIC 13 | type: 14 | - string 15 | - double | enum 16 | - enum: 17 | - NUMERIC 18 | comment: 19 | pairs of field name and relative weight in scoring. 20 | The weight is a double, but does not need to be normalized. 21 | 22 | since: 0.1 23 | returns: 24 | type: status 25 | value: OK on success, error otherwise 26 | 27 | FT.ADD: 28 | complexity: O(1) 29 | arguments: 30 | - 31 | comment: the index name 32 | name: index_name 33 | type: key 34 | - 35 | name: docId 36 | type: string 37 | - 38 | name: score 39 | type: double 40 | comment: The document's score, between 0.0 and 1.0 41 | - 42 | command: LANGUAGE 43 | name: 44 | - lang 45 | type: 46 | - string 47 | optional: true 48 | - 49 | name: nosave 50 | type: enum 51 | enum: [NOSAVE] 52 | - 53 | 54 | index docId score [LANGUAGE lang] [NOSAVE] FIELDS .... 55 | 56 | 57 | -------------------------------------------------------------------------------- /docs/concurrency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/docs/concurrency.png -------------------------------------------------------------------------------- /docs/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/docs/img/favicon.png -------------------------------------------------------------------------------- /docs/img/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/docs/img/latency.png -------------------------------------------------------------------------------- /docs/img/throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/docs/img/throughput.png -------------------------------------------------------------------------------- /docs/java_client.md: -------------------------------------------------------------------------------- 1 | # JRediSearch - RediSearch Java Client 2 | 3 | [https://github.com/RedisLabs/JRediSearch](https://github.com/RedisLabs/JRediSearch) 4 | 5 | ## Overview 6 | 7 | JRediSearch is a Java library abstracting the API of the RediSearch Redis module, that implements a powerful in-memory search engine inside Redis. 8 | 9 | See full documentation at [https://github.com/RedisLabs/JRediSearch](https://github.com/RedisLabs/JRediSearch). 10 | 11 | ## Usage example 12 | 13 | Initializing the client: 14 | 15 | ```java 16 | 17 | import io.redisearch.client.Client; 18 | import io.redisearch.Document; 19 | import io.redisearch.SearchResult; 20 | import io.redisearch.Query; 21 | import io.redisearch.Schema; 22 | 23 | ... 24 | 25 | Client client = new Client("testung", "localhost", 6379); 26 | 27 | ``` 28 | 29 | Defining a schema for an index and creating it: 30 | 31 | ```java 32 | 33 | Schema sc = new Schema() 34 | .addTextField("title", 5.0) 35 | .addTextField("body", 1.0) 36 | .addNumericField("price"); 37 | 38 | client.createIndex(sc, Client.IndexOptions.Default()); 39 | 40 | ``` 41 | 42 | Adding documents to the index: 43 | 44 | ```java 45 | Map fields = new HashMap<>(); 46 | fields.put("title", "hello world"); 47 | fields.put("body", "lorem ipsum"); 48 | fields.put("price", 1337); 49 | 50 | client.addDocument("doc1", fields); 51 | 52 | ``` 53 | 54 | Searching the index: 55 | 56 | ```java 57 | 58 | // Creating a complex query 59 | Query q = new Query("hello world") 60 | .addFilter(new Query.NumericFilter("price", 0, 1000)) 61 | .limit(0,5); 62 | 63 | // actual search 64 | SearchResult res = client.search(q); 65 | 66 | 67 | ``` 68 | 69 | --- 70 | 71 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/docs/logo.png -------------------------------------------------------------------------------- /docs/logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/docs/logo_small.png -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: RediSearch Documentation 2 | site_url: http://redisearch.io 3 | repo_url: https://github.com/goodform/RediSearch 4 | 5 | 6 | google_analytics: 7 | - 'UA-92003007-8' 8 | - 'auto' 9 | markdown_extensions: 10 | - codehilite 11 | - toc(permalink=true, separator="_") 12 | - admonition 13 | 14 | use_directory_urls: true 15 | theme: 16 | name: 'material' 17 | language: 'en' 18 | logo: 'logo_small.png' 19 | favicon: 'img/favicon.png' 20 | palette: 21 | primary: 'indigo' 22 | accent: 'red' 23 | font: 24 | text: 'Roboto' 25 | code: 'Roboto Mono' 26 | feature: 27 | tabs: false 28 | pages: 29 | - 'Home': 'index.md' 30 | - 'Quick Start': 'Quick_Start.md' 31 | - 'Command Reference': 'Commands.md' 32 | - 'Configuration': 'Configuring.md' 33 | - 'Administration': "Administration.md" 34 | - Reference: 35 | - 'Query Syntax': 'Query_Syntax.md' 36 | - 'Stop-Words': 'Stopwords.md' 37 | - 'Aggregations (NEW!)': 'Aggregations.md' 38 | - 'Tokenization and Escaping': 'Escaping.md' 39 | - 'Sortable Values': 'Sorting.md' 40 | - 'Tag Fields': 'Tags.md' 41 | - 'Highlighting Results': Highlight.md 42 | - 'Scoring Documents': 'Scoring.md' 43 | - 'Extension API': Extensions.md 44 | - 'Stemming Support': Stemming.md 45 | - 'Synonyms Support': Synonyms.md 46 | - 'Document Payloads': payloads.md 47 | - Clients: 48 | - 'Client Libraries': 'Clients.md' 49 | - 'Python API': python_client.md 50 | - 'Java API': java_client.md 51 | - 'Go API': go_client.md 52 | 53 | - Design Documents: 54 | - 'Garbage Collection': 'design/gc.md' 55 | - Articles: 56 | - 'Multi-Threading in RediSearch': Threading.md 57 | 58 | - 'Chinese Support': 'Chinese.md' 59 | 60 | #google_analytics: ['UA-89573912-1', 'redisearch'] 61 | -------------------------------------------------------------------------------- /ramp.yml: -------------------------------------------------------------------------------- 1 | display_name: RediSearch 2 | author: goodform 3 | email: freeandopenredismodules@gmail.com 4 | description: High performance search index for Redis 5 | homepage: https://github.com/goodform/RediSearch 6 | license: AGPLv3 7 | command_line_args: "" 8 | min_redis_version: "4.0" 9 | min_redis_pack_version: "5.0" 10 | capabilities: 11 | - types 12 | - no_multi_key 13 | - backup_restore 14 | - failover_migrate 15 | - persistence_aof 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | redis 2 | rmtest 3 | -------------------------------------------------------------------------------- /src/aggregate/expr/Makefile: -------------------------------------------------------------------------------- 1 | SRCUTIL = ../../../srcutil 2 | PARSER_SYMBOL_PREFIX=RSExprParser 3 | 4 | include $(SRCUTIL)/make-parser.mk 5 | -------------------------------------------------------------------------------- /src/aggregate/expr/parser-toplevel.c: -------------------------------------------------------------------------------- 1 | #define Parse RSExprParser_Parse 2 | #define ParseTrace RSExprParser_ParseTrace 3 | #define ParseAlloc RSExprParser_ParseAlloc 4 | #define ParseFree RSExprParser_ParseFree 5 | #define ParseInit RSExprParser_ParseInit 6 | #define ParseFinalize RSExprParser_ParseFinalize 7 | #define ParseStackPeack RSExprParser_ParseStackPeack 8 | #include "parser.c.inc" 9 | -------------------------------------------------------------------------------- /src/aggregate/expr/parser.h: -------------------------------------------------------------------------------- 1 | #define AND 1 2 | #define OR 2 3 | #define NOT 3 4 | #define EQ 4 5 | #define NE 5 6 | #define LT 6 7 | #define LE 7 8 | #define GT 8 9 | #define GE 9 10 | #define PLUS 10 11 | #define MINUS 11 12 | #define DIVIDE 12 13 | #define TIMES 13 14 | #define MOD 14 15 | #define POW 15 16 | #define LP 16 17 | #define RP 17 18 | #define PROPERTY 18 19 | #define SYMBOL 19 20 | #define STRING 20 21 | #define NUMBER 21 22 | #define ARGLIST 22 23 | #define COMMA 23 24 | -------------------------------------------------------------------------------- /src/aggregate/expr/token.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_AGGREGATE_TOKEN_H_ 2 | #define RS_AGGREGATE_TOKEN_H_ 3 | #include 4 | #include "expression.h" 5 | /* A query-specific tokenizer, that reads symbols like quots, pipes, etc */ 6 | typedef struct { 7 | const char *raw; 8 | size_t len; 9 | char *pos; 10 | 11 | char *errorMsg; 12 | 13 | RSExpr *root; 14 | int ok; 15 | 16 | } RSExprParseCtx; 17 | 18 | /* A token in the process of parsing a query. Unlike the document tokenizer, it 19 | works iteratively and is not callback based. */ 20 | typedef struct { 21 | const char *s; 22 | int len; 23 | int pos; 24 | double numval; 25 | } RSExprToken; 26 | 27 | #endif -------------------------------------------------------------------------------- /src/aggregate/filter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | RSExpr *exp; 9 | RSSortingTable *sortables; 10 | RSExprEvalCtx ctx; 11 | RSValue val; 12 | } FilterCtx; 13 | 14 | static FilterCtx *NewFilterCtx() { 15 | FilterCtx *ret = malloc(sizeof(*ret)); 16 | return ret; 17 | } 18 | 19 | void Filter_Free(ResultProcessor *p) { 20 | FilterCtx *pc = p->ctx.privdata; 21 | 22 | RSFunctionEvalCtx_Free(pc->ctx.fctx); 23 | RSExpr_Free(pc->exp); 24 | free(pc); 25 | free(p); 26 | } 27 | 28 | int Filter_Next(ResultProcessorCtx *ctx, SearchResult *res) { 29 | FilterCtx *pc = ctx->privdata; 30 | 31 | char *err; 32 | do { // read while we either get EOF or the filter expr evaluates to true 33 | RESULTPROCESSOR_MAYBE_RET_EOF(ctx->upstream, res, 1); 34 | pc->ctx.r = res; 35 | pc->ctx.fctx->res = res; 36 | int rc = RSExpr_Eval(&pc->ctx, pc->exp, &pc->val, &err); 37 | if (rc == EXPR_EVAL_OK) { 38 | if (RSValue_BoolTest(&pc->val)) { 39 | return RS_RESULT_OK; 40 | } 41 | } 42 | } while (1); 43 | return RS_RESULT_EOF; 44 | } 45 | 46 | ResultProcessor *NewFilter(RedisSearchCtx *sctx, ResultProcessor *upstream, const char *expr, 47 | size_t len, char **err) { 48 | 49 | FilterCtx *ctx = NewFilterCtx(); 50 | ctx->ctx.sctx = sctx; 51 | ctx->ctx.sortables = sctx && sctx->spec ? sctx->spec->sortables : NULL; 52 | ctx->ctx.fctx = RS_NewFunctionEvalCtx(); 53 | ctx->exp = RSExpr_Parse(expr, len, err); 54 | if (!ctx->exp) { 55 | free(ctx); 56 | return NULL; 57 | } 58 | ResultProcessor *proc = NewResultProcessor(upstream, ctx); 59 | proc->Next = Filter_Next; 60 | proc->Free = Filter_Free; 61 | return proc; 62 | } 63 | -------------------------------------------------------------------------------- /src/aggregate/functions/function.c: -------------------------------------------------------------------------------- 1 | #define RS_FUNCTION_C_ 2 | #include "function.h" 3 | 4 | /* Allocate some memory for a function that can be freed automatically when the execution is done */ 5 | inline void *RSFunction_Alloc(RSFunctionEvalCtx *ctx, size_t sz) { 6 | return BlkAlloc_Alloc(&ctx->alloc, sz, MAX(sz, 1024)); 7 | } 8 | 9 | char *RSFunction_Strndup(RSFunctionEvalCtx *ctx, const char *str, size_t len) { 10 | char *ret = RSFunction_Alloc(ctx, len + 1); 11 | memcpy(ret, str, len); 12 | ret[len] = '\0'; 13 | return ret; 14 | } 15 | 16 | void RSFunctionEvalCtx_Free(RSFunctionEvalCtx *ctx) { 17 | BlkAlloc_FreeAll(&ctx->alloc, NULL, NULL, 0); 18 | free(ctx); 19 | } 20 | RSFunctionEvalCtx *RS_NewFunctionEvalCtx() { 21 | RSFunctionEvalCtx *ret = malloc(sizeof(*ret)); 22 | BlkAlloc_Init(&ret->alloc); 23 | return ret; 24 | } 25 | 26 | static RSFunctionRegistry functions_g = {0}; 27 | 28 | RSFunction RSFunctionRegistry_Get(const char *name, size_t len) { 29 | 30 | for (size_t i = 0; i < functions_g.len; i++) { 31 | if (len == strlen(functions_g.funcs[i].name) && 32 | !strncasecmp(functions_g.funcs[i].name, name, len)) { 33 | return functions_g.funcs[i].f; 34 | } 35 | } 36 | return NULL; 37 | } 38 | 39 | RSValueType RSFunctionRegistry_GetType(const char *name, size_t len) { 40 | for (size_t i = 0; i < functions_g.len; i++) { 41 | if (len == strlen(functions_g.funcs[i].name) && 42 | !strncasecmp(functions_g.funcs[i].name, name, len)) { 43 | return functions_g.funcs[i].retType; 44 | } 45 | } 46 | return RSValue_Null; 47 | } 48 | 49 | int RSFunctionRegistry_RegisterFunction(const char *name, RSFunction f, RSValueType retType) { 50 | if (functions_g.len + 1 >= functions_g.cap) { 51 | functions_g.cap += functions_g.cap ? functions_g.cap : 2; 52 | functions_g.funcs = realloc(functions_g.funcs, functions_g.cap * sizeof(*functions_g.funcs)); 53 | } 54 | functions_g.funcs[functions_g.len].f = f; 55 | functions_g.funcs[functions_g.len].name = name; 56 | functions_g.funcs[functions_g.len].retType = retType; 57 | functions_g.len++; 58 | return 1; 59 | } 60 | -------------------------------------------------------------------------------- /src/aggregate/project.h: -------------------------------------------------------------------------------- 1 | #ifndef PROJECT_H__ 2 | #define PROJECT_H__ 3 | 4 | #include 5 | #include 6 | 7 | ResultProcessor *NewProjector(RedisSearchCtx *sctx, ResultProcessor *upstream, const char *alias, 8 | const char *expr, size_t len, char **err); 9 | #endif 10 | -------------------------------------------------------------------------------- /src/aggregate/projector.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "project.h" 8 | #include 9 | #include 10 | typedef struct { 11 | RSExpr *exp; 12 | const char *alias; 13 | RSSortingTable *sortables; 14 | RSExprEvalCtx ctx; 15 | RSValue val; 16 | } ProjectorCtx; 17 | 18 | static ProjectorCtx *NewProjectorCtx(const char *alias) { 19 | ProjectorCtx *ret = malloc(sizeof(*ret)); 20 | ret->alias = alias; 21 | return ret; 22 | } 23 | 24 | void Projector_Free(ResultProcessor *p) { 25 | ProjectorCtx *pc = p->ctx.privdata; 26 | 27 | RSFunctionEvalCtx_Free(pc->ctx.fctx); 28 | RSExpr_Free(pc->exp); 29 | free(pc); 30 | free(p); 31 | } 32 | 33 | int Projector_Next(ResultProcessorCtx *ctx, SearchResult *res) { 34 | RESULTPROCESSOR_MAYBE_RET_EOF(ctx->upstream, res, 1); 35 | ProjectorCtx *pc = ctx->privdata; 36 | pc->ctx.r = res; 37 | pc->ctx.fctx->res = res; 38 | char *err; 39 | int rc = RSExpr_Eval(&pc->ctx, pc->exp, &pc->val, &err); 40 | if (rc == EXPR_EVAL_OK) { 41 | RSValue *a = RS_NewValue(RSValue_Null); 42 | *a = pc->val; 43 | a->allocated = 1; 44 | a->refcount = 0; 45 | 46 | RSFieldMap_Set(&res->fields, pc->alias, a); 47 | } else { 48 | RSFieldMap_Set(&res->fields, pc->alias, RS_NullVal()); 49 | } 50 | return RS_RESULT_OK; 51 | } 52 | 53 | ResultProcessor *NewProjector(RedisSearchCtx *sctx, ResultProcessor *upstream, const char *alias, 54 | const char *expr, size_t len, char **err) { 55 | 56 | ProjectorCtx *ctx = NewProjectorCtx(alias); 57 | ctx->ctx.sctx = sctx; 58 | ctx->ctx.sortables = sctx && sctx->spec ? sctx->spec->sortables : NULL; 59 | ctx->ctx.fctx = RS_NewFunctionEvalCtx(); 60 | ctx->exp = RSExpr_Parse(expr, len, err); 61 | if (!ctx->exp) { 62 | free(ctx); 63 | return NULL; 64 | } 65 | ResultProcessor *proc = NewResultProcessor(upstream, ctx); 66 | proc->Next = Projector_Next; 67 | proc->Free = Projector_Free; 68 | return proc; 69 | } 70 | -------------------------------------------------------------------------------- /src/aggregate/reducers/count.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct counter { 5 | size_t count; 6 | }; 7 | 8 | void *counter_NewInstance(ReducerCtx *ctx) { 9 | BlkAlloc *ba = ctx->privdata; 10 | struct counter *ctr = 11 | ReducerCtx_Alloc(ctx, sizeof(*ctr), 1024 * sizeof(*ctr)); // malloc(sizeof(*ctr)); 12 | ctr->count = 0; 13 | return ctr; 14 | } 15 | 16 | int counter_Add(void *ctx, SearchResult *res) { 17 | struct counter *ctr = ctx; 18 | ctr->count++; 19 | return 1; 20 | } 21 | 22 | int counter_Finalize(void *ctx, const char *key, SearchResult *res) { 23 | struct counter *ctr = ctx; 24 | // printf("Counter finalize! count %zd\n", ctr->count); 25 | RSFieldMap_SetNumber(&res->fields, key, ctr->count); 26 | return 1; 27 | } 28 | 29 | Reducer *NewCount(RedisSearchCtx *ctx, const char *alias) { 30 | Reducer *r = NewReducer(ctx, NULL); 31 | 32 | r->Add = counter_Add; 33 | r->Finalize = counter_Finalize; 34 | r->Free = Reducer_GenericFree; 35 | r->FreeInstance = NULL; 36 | r->NewInstance = counter_NewInstance; 37 | r->alias = FormatAggAlias(alias, "count", ""); 38 | return r; 39 | } -------------------------------------------------------------------------------- /src/benchmark/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -g -O3 -std=gnu99 -I/usr/local/include -Wall -Wno-unused-function 2 | LDFLAGS= -L/usr/local/lib -lhiredis -lev -lc -lm -static 3 | CC=gcc 4 | 5 | benchmark: benchmark.o 6 | $(CC) -o ./benchmark benchmark.o $(LDFLAGS) 7 | 8 | all: benchmark 9 | -------------------------------------------------------------------------------- /src/benchmark/shakespeare.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from redisearch import Client 3 | 4 | 5 | def index(): 6 | client = Client('sh') 7 | # client.drop_index() 8 | client.create_index(txt=1.0) 9 | chapters = {} 10 | with open('will_play_text.csv') as fp: 11 | 12 | r = csv.reader(fp, delimiter=';') 13 | for line in r: 14 | #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."] 15 | 16 | play, chapter, character, text = line[1], line[2], line[4], line[5] 17 | 18 | d = chapters.setdefault('{}:{}'.format(play, chapter), {}) 19 | d['play'] = play 20 | d['text'] = d.get('text', '') + ' ' + text 21 | 22 | for chapter, doc in chapters.iteritems(): 23 | print chapter, doc 24 | client.add_document(chapter, nosave=True, txt=doc['text']) 25 | 26 | if __name__ == '__main__': 27 | index() 28 | -------------------------------------------------------------------------------- /src/benchmark/time_sample.h: -------------------------------------------------------------------------------- 1 | #ifndef __RL_TIME_SAMPLE__ 2 | #define __RL_TIME_SAMPLE__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef struct { 10 | struct timespec startTime; 11 | struct timespec endTime; 12 | long long durationNS; 13 | int num; 14 | } TimeSample; 15 | 16 | static void TimeSampler_Start(TimeSample *ts) { 17 | clock_gettime(CLOCK_REALTIME, &ts->startTime); 18 | ts->num = 0; 19 | } 20 | 21 | static void TimeSampler_Tick(TimeSample *ts) { 22 | ++ts->num; 23 | } 24 | static void TimeSampler_End(TimeSample *ts) { 25 | 26 | 27 | clock_gettime(CLOCK_REALTIME, &ts->endTime); 28 | 29 | ts->durationNS = ((long long)1000000000 * ts->endTime.tv_sec + ts->endTime.tv_nsec) - 30 | ((long long)1000000000 * ts->startTime.tv_sec + ts->startTime.tv_nsec); 31 | 32 | } 33 | 34 | static long long TimeSampler_DurationNS(TimeSample *ts) { 35 | 36 | return ts->durationNS; 37 | } 38 | 39 | static long long TimeSampler_DurationMS(TimeSample *ts) { 40 | 41 | return ts->durationNS / 1000; 42 | } 43 | 44 | static double TimeSampler_DurationSec(TimeSample *ts) { 45 | 46 | return (double)ts->durationNS / 1000000000.0; 47 | } 48 | 49 | static double TimeSampler_IterationSec(TimeSample *ts) { 50 | 51 | return ((double)ts->durationNS / 1000000000.0)/(double)(ts->num ? ts->num : 1.0); 52 | } 53 | 54 | 55 | 56 | #define TIME_SAMPLE_RUN(blk) { TimeSample ts; TimeSampler_Start(&ts); { blk; } ; TimeSampler_End(&ts); printf("Execution time for " #blk ": %f seconds\n", TimeSampler_DurationSec(&ts)); } 57 | 58 | #define TIME_SAMPLE_RUN_LOOP(N, blk) { TimeSample ts; TimeSampler_Start(&ts); for (int __ts_loop = 0; __ts_loop < N; __ts_loop++) { blk; } ; TimeSampler_End(&ts); printf("Execution time for " #blk ": %f seconds/iteration\n", TimeSampler_IterationSec(&ts)); } 59 | 60 | 61 | 62 | #endif -------------------------------------------------------------------------------- /src/cndict_loader.h: -------------------------------------------------------------------------------- 1 | #ifndef CNDICT_LOADER_H 2 | #define CNDICT_LOADER_H 3 | 4 | #include "dep/friso/friso.h" 5 | 6 | // Defined in cndict_loader.c 7 | // Loads the built-in dictionary into the provided dictionary object 8 | int ChineseDictLoad(friso_dic_t); 9 | 10 | // Defined in generated/cndict_data.c 11 | // Configures the friso config object based on built-in settings. 12 | void ChineseDictConfigure(friso_t, friso_config_t); 13 | #endif -------------------------------------------------------------------------------- /src/commands.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_COMMANDS_H_ 2 | #define RS_COMMANDS_H_ 3 | 4 | /** RS_CMD_PREFIX can be defined with -D from the Makefile */ 5 | #ifndef RS_CMD_PREFIX 6 | #define RS_CMD_PREFIX "FT" 7 | #endif 8 | 9 | #define RS_CREATE_CMD RS_CMD_PREFIX ".CREATE" 10 | #define RS_ADD_CMD RS_CMD_PREFIX ".ADD" 11 | #define RS_SAFEADD_CMD RS_CMD_PREFIX ".SAFEADD" 12 | #define RS_SETPAYLOAD_CMD RS_CMD_PREFIX ".SETPAYLOAD" 13 | #define RS_ADDHASH_CMD RS_CMD_PREFIX ".ADDHASH" 14 | #define RS_SAFEADDHASH_CMD RS_CMD_PREFIX ".SAFEADDHASH" 15 | #define RS_INFO_CMD RS_CMD_PREFIX ".INFO" 16 | #define RS_SEARCH_CMD RS_CMD_PREFIX ".SEARCH" 17 | #define RS_AGGREGATE_CMD RS_CMD_PREFIX ".AGGREGATE" 18 | 19 | #define RS_EXPLAIN_CMD RS_CMD_PREFIX ".EXPLAIN" 20 | #define RS_DEL_CMD RS_CMD_PREFIX ".DEL" 21 | #define RS_DROP_CMD RS_CMD_PREFIX ".DROP" 22 | #define RS_GET_CMD RS_CMD_PREFIX ".GET" 23 | #define RS_MGET_CMD RS_CMD_PREFIX ".MGET" 24 | #define RS_TAGVALS_CMD RS_CMD_PREFIX ".TAGVALS" 25 | 26 | #define RS_SUGADD_CMD RS_CMD_PREFIX ".SUGADD" 27 | #define RS_SUGGET_CMD RS_CMD_PREFIX ".SUGGET" 28 | #define RS_SUGDEL_CMD RS_CMD_PREFIX ".SUGDEL" 29 | #define RS_SUGLEN_CMD RS_CMD_PREFIX ".SUGLEN" 30 | 31 | #define RS_CURSOR_CMD RS_CMD_PREFIX ".CURSOR" 32 | 33 | #define RS_SYNADD_CMD RS_CMD_PREFIX ".SYNADD" 34 | #define RS_SYNUPDATE_CMD RS_CMD_PREFIX ".SYNUPDATE" 35 | #define RS_SYNFORCEUPDATE_CMD RS_CMD_PREFIX ".SYNFORCEUPDATE" 36 | #define RS_SYNDUMP_CMD RS_CMD_PREFIX ".SYNDUMP" 37 | 38 | #define RS_ALTER_CMD RS_CMD_PREFIX ".ALTER" 39 | 40 | #define RS_DEBUG RS_CMD_PREFIX ".DEBUG" 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/debug_commads.h: -------------------------------------------------------------------------------- 1 | /* 2 | * debug_commads.h 3 | * 4 | * Created on: Jun 27, 2018 5 | * Author: meir 6 | */ 7 | 8 | #ifndef SRC_DEBUG_COMMADS_H_ 9 | #define SRC_DEBUG_COMMADS_H_ 10 | 11 | #include "redismodule.h" 12 | #include "index_iterator.h" 13 | #include 14 | 15 | #define DUMP_INVIDX_COMMAND "DUMP_INVIDX" 16 | #define DUMP_NUMIDX_COMMAND "DUMP_NUMIDX" 17 | #define DUMP_TAGIDX_COMMAND "DUMP_TAGIDX" 18 | #define IDTODOCID_COMMAND "IDTODOCID" 19 | #define DOCIDTOID_COMMAND "DOCIDTOID" 20 | 21 | /** 22 | * debug command implementation 23 | * Currently three sub-commands available 24 | * 1. DUMP_INVIDX - which dump all doc ids in an inverted index 25 | * 2. DUMP_NUMIDX - which dump all doc ids in a numeric index 26 | * 3. DUMP_TAGIDX - which dump all doc ids in a tag index 27 | * 28 | */ 29 | int DebugCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); 30 | 31 | #endif /* SRC_DEBUG_COMMADS_H_ */ 32 | -------------------------------------------------------------------------------- /src/dep/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2013-2014 RAD Game Tools and Valve Software 2 | Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC 3 | 4 | All Rights Reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/dep/bloom/Makefile: -------------------------------------------------------------------------------- 1 | all: libbloom.a 2 | 3 | libbloom.a: sb.o contrib/MurmurHash2.o 4 | $(AR) -rcs $@ $^ 5 | -------------------------------------------------------------------------------- /src/dep/bloom/README.md: -------------------------------------------------------------------------------- 1 | This code is adopted from the [Rebloom](https://github.com/goodform/rebloom) 2 | module 3 | -------------------------------------------------------------------------------- /src/dep/bloom/contrib/MurmurHash2.c: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash2, by Austin Appleby 3 | 4 | // Note - This code makes a few assumptions about how your machine behaves - 5 | 6 | // 1. We can read a 4-byte value from any address without crashing 7 | // 2. sizeof(int) == 4 8 | 9 | // And it has a few limitations - 10 | 11 | // 1. It will not work incrementally. 12 | // 2. It will not produce the same results on little-endian and big-endian 13 | // machines. 14 | 15 | unsigned int murmurhash2(const void * key, int len, const unsigned int seed) 16 | { 17 | // 'm' and 'r' are mixing constants generated offline. 18 | // They're not really 'magic', they just happen to work well. 19 | 20 | const unsigned int m = 0x5bd1e995; 21 | const int r = 24; 22 | 23 | // Initialize the hash to a 'random' value 24 | 25 | unsigned int h = seed ^ len; 26 | 27 | // Mix 4 bytes at a time into the hash 28 | 29 | const unsigned char * data = (const unsigned char *)key; 30 | 31 | while(len >= 4) 32 | { 33 | unsigned int k = *(unsigned int *)data; 34 | 35 | k *= m; 36 | k ^= k >> r; 37 | k *= m; 38 | 39 | h *= m; 40 | h ^= k; 41 | 42 | data += 4; 43 | len -= 4; 44 | } 45 | 46 | // Handle the last few bytes of the input array 47 | 48 | switch(len) 49 | { 50 | case 3: h ^= data[2] << 16; 51 | case 2: h ^= data[1] << 8; 52 | case 1: h ^= data[0]; 53 | h *= m; 54 | }; 55 | 56 | // Do a few final mixes of the hash to ensure the last few 57 | // bytes are well-incorporated. 58 | 59 | h ^= h >> 13; 60 | h *= m; 61 | h ^= h >> 15; 62 | 63 | return h; 64 | } 65 | -------------------------------------------------------------------------------- /src/dep/bloom/contrib/murmurhash2.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _BLOOM_MURMURHASH2 3 | #define _BLOOM_MURMURHASH2 4 | 5 | unsigned int murmurhash2(const void * key, int len, const unsigned int seed); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/dep/cndict/.gitignore: -------------------------------------------------------------------------------- 1 | !**/*.ini 2 | !**/*.lex 3 | !**/*.json 4 | -------------------------------------------------------------------------------- /src/dep/cndict/Makefile: -------------------------------------------------------------------------------- 1 | FRISO_INI := friso.ini 2 | FRISO_LEXDIR := lex 3 | PYTHON := python 4 | 5 | cndict_data.c: bundle_friso.py 6 | $(PYTHON) bundle_friso.py -i $(FRISO_INI) -d $(FRISO_LEXDIR) -o . 7 | 8 | clean: 9 | rm -rf cndict_data.c 10 | -------------------------------------------------------------------------------- /src/dep/cndict/gen_simp_trad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This script takes a JSON dictionary containing traditional chinese characters 5 | as keys, and the simplified equivalents as values. It then outputs a header file 6 | appropriate for inclusion. The header output file contains an array, 7 | `Cn_T2S` which can be used as 8 | 9 | ``` 10 | simpChr = Cn_T2S[tradChr]; 11 | ``` 12 | 13 | the variable Cn_T2S_MinChr contains the smallest key in the dictionary, whereas 14 | Cn_T2S_MaxChr contains the largest key in the dictionary. 15 | """ 16 | 17 | import json 18 | import datetime 19 | import sys 20 | from argparse import ArgumentParser 21 | 22 | ap = ArgumentParser() 23 | ap.add_argument('-f', '--file', help='Chinese map file', required=True) 24 | ap.add_argument('-o', '--output', help='Where to place the output C source') 25 | 26 | options = ap.parse_args() 27 | 28 | with open(options.file, 'r') as fp: 29 | txt = json.load(fp) 30 | 31 | if options.output is None or ap.output == '-': 32 | ofp = sys.stdout 33 | else: 34 | ofp = open(ap.output, 'w') 35 | 36 | CP_MIN = 0xffffffff 37 | CP_MAX = 0x00 38 | 39 | for k in txt: 40 | v = ord(k) 41 | if v > CP_MAX: 42 | CP_MAX = v 43 | if v < CP_MIN: 44 | CP_MIN = v 45 | 46 | ofp.write(''' 47 | /** 48 | * Generated by {script} on {date} 49 | * 50 | */ 51 | #include 52 | 53 | static const uint16_t Cn_T2S_MinChr = {cp_min}; 54 | static const uint16_t Cn_T2S_MaxChr = {cp_max}; 55 | 56 | static uint16_t Cn_T2S[{cap}]={{ 57 | '''.format( 58 | script=' '.join(sys.argv), 59 | date=datetime.datetime.now(), 60 | cp_min=CP_MIN, 61 | cp_max=CP_MAX, 62 | cap=CP_MAX+1)) 63 | 64 | 65 | 66 | 67 | num_items = 0 68 | ITEMS_PER_LINE = 5 69 | 70 | for trad, simp in txt.items(): 71 | ix = ord(trad) 72 | val = ord(simp) 73 | ofp.write(' [0x{:X}]=0x{:X},'.format(ix, val)) 74 | num_items += 1 75 | if num_items >= ITEMS_PER_LINE: 76 | ofp.write('\n') 77 | num_items = 0 78 | 79 | ofp.write('};\n') 80 | ofp.flush() -------------------------------------------------------------------------------- /src/dep/cndict/lex/friso.lex.ini: -------------------------------------------------------------------------------- 1 | #friso lexicon configure file. 2 | # @email chenxin619315@gmail.com 3 | # @date 2012-12-19 4 | #main lexion 5 | __LEX_CJK_WORDS__ :[ 6 | lex-main.lex; 7 | lex-admin.lex; 8 | lex-chars.lex; 9 | lex-cn-mz.lex; 10 | lex-cn-place.lex; 11 | lex-company.lex; 12 | lex-festival.lex; 13 | lex-flname.lex; 14 | lex-food.lex; 15 | lex-lang.lex; 16 | lex-nation.lex; 17 | lex-net.lex; 18 | lex-org.lex; 19 | lex-touris.lex; 20 | #add more here 21 | ] 22 | #single chinese unit lexicon 23 | __LEX_CJK_UNITS__ :[ 24 | lex-units.lex; 25 | ] 26 | #chinese and english mixed word lexicon like "b超". 27 | __LEX_ECM_WORDS__:[ 28 | lex-ecmixed.lex; 29 | ] 30 | #english and chinese mixed word lexicon like "卡拉ok". 31 | __LEX_CEM_WORDS__:[ 32 | lex-cemixed.lex; 33 | ] 34 | #chinese last name lexicon. 35 | __LEX_CN_LNAME__:[ 36 | lex-lname.lex; 37 | ] 38 | #single name words lexicon. 39 | __LEX_CN_SNAME__:[ 40 | lex-sname.lex; 41 | ] 42 | #first word of a double chinese name. 43 | __LEX_CN_DNAME1__:[ 44 | lex-dname-1.lex; 45 | ] 46 | #second word of a double chinese name. 47 | __LEX_CN_DNAME2__:[ 48 | lex-dname-2.lex; 49 | ] 50 | #chinese last name decorate word. 51 | __LEX_CN_LNA__:[ 52 | lex-ln-adorn.lex; 53 | ] 54 | #stopwords lexicon 55 | __LEX_STOPWORDS__:[ 56 | lex-stopword.lex; 57 | ] 58 | #english and punctuation mixed words lexicon. 59 | __LEX_ENPUN_WORDS__:[ 60 | lex-en-pun.lex; 61 | ] 62 | #english words(for synonyms words) 63 | __LEX_EN_WORDS__:[ 64 | lex-en.lex; 65 | ] 66 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-admin.lex: -------------------------------------------------------------------------------- 1 | 人事部/人事管理部门,人事管理部 2 | 人事管理部/人事管理部门,人事部 3 | 信息产业部/null 4 | 农业部/null 5 | 医管局/医疗管理部门,医疗管理部 6 | 医疗管理部/医疗管理部门,医管局 7 | 医疗管理部门/医管局,医疗管理部 8 | 发改委/null 9 | 国土资源部/null 10 | 国防部/人民武装力量部,军事部,防卫厅 11 | 军事部/人民武装力量部,防卫厅 12 | 外交部/国务院,政治部,对外关系部,外务省 13 | 外交部长/null 14 | 教育部/null 15 | 文化部/null 16 | 民政部/null 17 | 能源部/null 18 | 财政部/null 19 | 铁道部/null 20 | 防卫厅/null 21 | 防卫省/null 22 | 革命委员会/null 23 | 交通运输部/null 24 | 对外经济贸易部/null 25 | 技术部/null 26 | 总装备部/null 27 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-cemixed.lex: -------------------------------------------------------------------------------- 1 | #中文英文混合词词库 2 | 卡拉ok/null 3 | 漂亮mm/null 4 | 拳皇ova/拳皇动漫 5 | 奇都ktv/null 6 | 哆啦a梦/null 7 | 高3/高三 8 | 高2/高二 9 | 高1/高一 10 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-company.lex: -------------------------------------------------------------------------------- 1 | 央视/null 2 | 电信/null 3 | 移动/null 4 | 网通/null 5 | 联通/null 6 | 铁通/null 7 | 百度/null 8 | 环球网/null 9 | 长城网/null 10 | 新浪/null 11 | 腾讯/null 12 | 搜搜/soso 13 | 谷歌/null 14 | 雅虎/null 15 | 微软/null 16 | 中关村/null 17 | 搜狐/null 18 | 网易/null 19 | 硅谷/null 20 | 维基百科/null 21 | 巨人网络/null 22 | 阿里巴巴/null 23 | 阿里旺旺/旺旺 24 | 旺旺/null 25 | 淘宝/null 26 | 赶集网/null 27 | 猪八戒网/null 28 | 唯你英语/null 29 | 拉手网/null 30 | 百贯福泰/null 31 | 汇划算/null 32 | 汇划算网/null 33 | 聚划算/null 34 | 天猫/null 35 | 天猫网/null 36 | 亚马逊/null 37 | 亚马逊网/null 38 | 拍拍/null 39 | 拍拍网/null 40 | 京东/null 41 | 京东商城/null 42 | 返利网/null 43 | 支付宝/null 44 | 支付宝担保/null 45 | 支付宝及时到帐/null 46 | 支付宝双工能/null 47 | 财付通/null 48 | 财付通及时到帐/null 49 | 网银在线/null 50 | 苏宁易购/null 51 | 苏宁电器/null 52 | 仙童公司/null 53 | 开源中国/null 54 | 畅想网络/null 55 | 快乐大本营/null 56 | 越策越开心/null 57 | 超级男声/null 58 | 超男/null 59 | 超级女声/超女 60 | 超女/超级女声 61 | 好声音/null 62 | 快乐男声/快男 63 | 快男/快乐男声 64 | 快乐女声/null 65 | 快女/null 66 | 德克士/null 67 | 肯德基/null 68 | 奥利奥/null 69 | 回头客/null 70 | 苏波尔/null 71 | 苏宁/null 72 | 苏宁电器/null 73 | 苏宁易购/null 74 | 中央银行/null 75 | 人民银行/null 76 | 工商银行/null 77 | 农业银行/null 78 | 中国银行/null 79 | 建设银行/null 80 | 交通银行/null 81 | 华夏银行/null 82 | 光大银行/null 83 | 招商银行/null 84 | 中信银行/null 85 | 兴业银行/null 86 | 民生银行/null 87 | 深圳发展银行/null 88 | 广东发展银行/null 89 | 上海浦东发展银行/null 90 | 恒丰银行/null 91 | 农业发展银行/null 92 | 国家进出口信贷银行/null 93 | 国家开发银行/null 94 | 北京商业银行/null 95 | 上海银行/null 96 | 济南商业银行/null 97 | 信用社/null 98 | 农村信用社/null 99 | 邮政局/null 100 | 邮政储蓄银行/null 101 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-dname-1.lex: -------------------------------------------------------------------------------- 1 | #双姓名首字词库 2 | 建 3 | 小 4 | 晓 5 | 文 6 | 志 7 | 国 8 | 玉 9 | 丽 10 | 永 11 | 海 12 | 春 13 | 金 14 | 明 15 | 新 16 | 德 17 | 秀 18 | 红 19 | 亚 20 | 伟 21 | 雪 22 | 俊 23 | 桂 24 | 爱 25 | 美 26 | 世 27 | 正 28 | 庆 29 | 学 30 | 家 31 | 立 32 | 淑 33 | 振 34 | 云 35 | 华 36 | 光 37 | 惠 38 | 兴 39 | 天 40 | 长 41 | 艳 42 | 慧 43 | 利 44 | 宏 45 | 佳 46 | 瑞 47 | 凤 48 | 荣 49 | 秋 50 | 继 51 | 嘉 52 | 卫 53 | 燕 54 | 思 55 | 维 56 | 少 57 | 福 58 | 忠 59 | 宝 60 | 子 61 | 成 62 | 月 63 | 洪 64 | 东 65 | 一 66 | 泽 67 | 林 68 | 大 69 | 素 70 | 旭 71 | 宇 72 | 智 73 | 锦 74 | 冬 75 | 玲 76 | 雅 77 | 伯 78 | 翠 79 | 传 80 | 启 81 | 剑 82 | 安 83 | 树 84 | 良 85 | 中 86 | 梦 87 | 广 88 | 昌 89 | 元 90 | 万 91 | 清 92 | 静 93 | 友 94 | 宗 95 | 兆 96 | 丹 97 | 克 98 | 彩 99 | 绍 100 | 喜 101 | 远 102 | 朝 103 | 敏 104 | 培 105 | 胜 106 | 祖 107 | 先 108 | 菊 109 | 士 110 | 向 111 | 有 112 | 连 113 | 军 114 | 健 115 | 巧 116 | 耀 117 | 莉 118 | 英 119 | 方 120 | 和 121 | 仁 122 | 孝 123 | 梅 124 | 汉 125 | 兰 126 | 松 127 | 水 128 | 江 129 | 益 130 | 开 131 | 景 132 | 运 133 | 贵 134 | 祥 135 | 青 136 | 芳 137 | 碧 138 | 婷 139 | 龙 140 | 鹏 141 | 自 142 | 顺 143 | 双 144 | 书 145 | 生 146 | 义 147 | 跃 148 | 银 149 | 佩 150 | 雨 151 | 保 152 | 贤 153 | 仲 154 | 鸿 155 | 浩 156 | 加 157 | 定 158 | 炳 159 | 飞 160 | 锡 161 | 柏 162 | 发 163 | 超 164 | 道 165 | 怀 166 | 进 167 | 其 168 | 富 169 | 平 170 | 全 171 | 阳 172 | 吉 173 | 茂 174 | 彦 175 | 诗 176 | 洁 177 | 润 178 | 承 179 | 治 180 | 焕 181 | 如 182 | 君 183 | 增 184 | 善 185 | 希 186 | 根 187 | 应 188 | 勇 189 | 宜 190 | 守 191 | 会 192 | 凯 193 | 育 194 | 湘 195 | 凌 196 | 本 197 | 敬 198 | 博 199 | 延 200 | 乐 201 | 三 202 | 高 203 | 熙 204 | 逸 205 | 幸 206 | 灵 207 | 宣 208 | 才 209 | 述 210 | 化 211 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-dname-2.lex: -------------------------------------------------------------------------------- 1 | #双姓名尾字词库 2 | 华 3 | 平 4 | 明 5 | 英 6 | 军 7 | 林 8 | 萍 9 | 芳 10 | 玲 11 | 红 12 | 生 13 | 霞 14 | 梅 15 | 文 16 | 荣 17 | 珍 18 | 兰 19 | 娟 20 | 峰 21 | 琴 22 | 云 23 | 辉 24 | 东 25 | 龙 26 | 敏 27 | 伟 28 | 强 29 | 丽 30 | 春 31 | 杰 32 | 燕 33 | 民 34 | 君 35 | 波 36 | 国 37 | 芬 38 | 清 39 | 祥 40 | 斌 41 | 婷 42 | 飞 43 | 良 44 | 忠 45 | 新 46 | 凤 47 | 锋 48 | 成 49 | 勇 50 | 刚 51 | 玉 52 | 元 53 | 宇 54 | 海 55 | 兵 56 | 安 57 | 庆 58 | 涛 59 | 鹏 60 | 亮 61 | 青 62 | 阳 63 | 艳 64 | 松 65 | 江 66 | 莲 67 | 娜 68 | 兴 69 | 光 70 | 德 71 | 武 72 | 香 73 | 俊 74 | 秀 75 | 慧 76 | 雄 77 | 才 78 | 宏 79 | 群 80 | 琼 81 | 胜 82 | 超 83 | 彬 84 | 莉 85 | 中 86 | 山 87 | 富 88 | 花 89 | 宁 90 | 利 91 | 贵 92 | 福 93 | 发 94 | 义 95 | 蓉 96 | 喜 97 | 娥 98 | 昌 99 | 仁 100 | 志 101 | 全 102 | 宝 103 | 权 104 | 美 105 | 琳 106 | 建 107 | 金 108 | 贤 109 | 星 110 | 丹 111 | 根 112 | 和 113 | 珠 114 | 康 115 | 菊 116 | 琪 117 | 坤 118 | 泉 119 | 秋 120 | 静 121 | 佳 122 | 顺 123 | 源 124 | 珊 125 | 达 126 | 欣 127 | 如 128 | 莹 129 | 章 130 | 浩 131 | 勤 132 | 芹 133 | 容 134 | 友 135 | 芝 136 | 豪 137 | 洁 138 | 鑫 139 | 惠 140 | 洪 141 | 旺 142 | 虎 143 | 远 144 | 妮 145 | 森 146 | 妹 147 | 南 148 | 雯 149 | 奇 150 | 健 151 | 卿 152 | 虹 153 | 娇 154 | 媛 155 | 怡 156 | 铭 157 | 川 158 | 进 159 | 博 160 | 智 161 | 来 162 | 琦 163 | 学 164 | 聪 165 | 洋 166 | 乐 167 | 年 168 | 翔 169 | 然 170 | 栋 171 | 凯 172 | 颖 173 | 鸣 174 | 丰 175 | 瑞 176 | 奎 177 | 立 178 | 堂 179 | 威 180 | 雪 181 | 鸿 182 | 晶 183 | 桂 184 | 凡 185 | 娣 186 | 先 187 | 洲 188 | 毅 189 | 雅 190 | 月 191 | 旭 192 | 田 193 | 晖 194 | 方 195 | 恒 196 | 亚 197 | 泽 198 | 风 199 | 银 200 | 高 201 | 贞 202 | 九 203 | 薇 204 | 钰 205 | 城 206 | 宜 207 | 厚 208 | 耐 209 | 声 210 | 腾 211 | 宸 212 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-ecmixed.lex: -------------------------------------------------------------------------------- 1 | #英文中文混合字, 注意英文字符均为小写 2 | a咖/主角 3 | a片/毛片,av 4 | a座/null 5 | a股/股票 6 | a型/null 7 | a杯/a罩杯 8 | a罩杯/a杯 9 | a计划/null 10 | aa制/null 11 | ab型/null 12 | ab档案/null 13 | a美a/null 14 | a梦/null 15 | x-射线/null 16 | # 17 | b座/null 18 | b股/null 19 | b型/null 20 | b树/null 21 | b计划/null 22 | b超/null 23 | b杯/b罩杯 24 | b罩杯/b杯 25 | bb机/call机 26 | bb仔/null 27 | bp机/null 28 | # 29 | c盘/null 30 | c座/null 31 | c语言/null 32 | c杯/c罩杯 33 | c罩杯/c杯 34 | cd盒/null 35 | cd机/null 36 | call机/bb机 37 | # 38 | d盘/null 39 | d座/null 40 | d版/null 41 | d杯/d罩杯 42 | d罩杯/d杯 43 | dna鉴定/null 44 | # 45 | e盘/null 46 | e座/null 47 | e化/null 48 | e通/null 49 | e仔/null 50 | e语言/易语言 51 | e杯/e罩杯 52 | e罩杯/e杯 53 | # 54 | f盘/null 55 | f座/null 56 | f杯/f罩杯 57 | f罩杯/f杯 58 | # 59 | g盘/null 60 | g点/null 61 | g杯/g罩杯 62 | g罩杯/g杯 63 | # 64 | h盘/null 65 | h股/null 66 | h杯/h罩杯 67 | h罩杯/h杯 68 | # 69 | i盘/null 70 | ic卡/null 71 | ip卡/null 72 | ip段/null 73 | ip电话/null 74 | ip地址/null 75 | it行业/null 76 | it民工/码农 77 | it男/null 78 | # 79 | j盘/null 80 | # 81 | k仔/null 82 | k盘/null 83 | k党/null 84 | k书/看书,搞学习 85 | k粉/氯胺酮 86 | k歌/唱歌,嗨歌 87 | k他命/null 88 | k歌之王/null 89 | # 90 | n年/很久 91 | # 92 | o型/null 93 | # 94 | pc机/null 95 | ph值/null 96 | # 97 | sim卡/null 98 | # 99 | u盘/null 100 | u形/null 101 | usb手指/null 102 | usb接口/null 103 | usb插口/null 104 | usb记忆棒/null 105 | # 106 | visa卡/null 107 | v沟/null 108 | # 109 | z盘/null 110 | # 111 | q版/null 112 | qq号/null 113 | q立方/null 114 | # 115 | rss订阅/null 116 | # 117 | t盘/null 118 | # 119 | x光/null 120 | x光线/x射线 121 | x射线/x光线 122 | γ射线/null 123 | # 124 | t恤衫/t恤 125 | t恤/t恤衫 126 | t字帐/null 127 | t型台/null 128 | # 129 | 250g硬盘/null 130 | 160g硬盘/null 131 | 500g硬盘/null 132 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-en-pun.lex: -------------------------------------------------------------------------------- 1 | #英文和标点组合成的词,英文字母统一使用小写。 2 | c++ 3 | g++ 4 | c# 5 | i++ 6 | x- 7 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-en.lex: -------------------------------------------------------------------------------- 1 | #英文词条, 做英文词语同义词追加用 2 | decimal/decimals,fraction 3 | spirit/mind 4 | admire/appreciate,like,love,enjoy 5 | chenxin12/chenxin,lionsoul 6 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-flname.lex: -------------------------------------------------------------------------------- 1 | #西方姓氏词库 2 | 亚历山大/null 3 | 克林顿/null 4 | 克里斯汀/null 5 | 布什/null 6 | 布莱尔/null 7 | 科特勒/null 8 | 约翰/null 9 | 约翰逊/null 10 | 蒂娜/null 11 | 安妮/null 12 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-food.lex: -------------------------------------------------------------------------------- 1 | 雪碧/null 2 | 可口可乐/null 3 | 冰红茶/null 4 | 奶茶/null 5 | 花生奶/null 6 | 芬达/null 7 | 珍珠奶茶/null 8 | 达利源/null 9 | 肯德鸡/null 10 | 炸薯条/null 11 | 麻辣烫/null 12 | 麻辣干锅/null 13 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-lang.lex: -------------------------------------------------------------------------------- 1 | 中文/国语 2 | 国语/null 3 | 台湾话/台语 4 | 台语/台湾话 5 | 客家话/null 6 | 汉字/null 7 | 汉语/国语,中文 8 | 法文/法文 9 | 法语/法语 10 | 福建话/null 11 | 粤语/广东话 12 | 美语/英语,英文 13 | 英文/英语 14 | 英语/英文 15 | 西班牙语/null 16 | 闽南语/null 17 | 泰语/null 18 | 西班牙语/null 19 | 俄罗斯语/null 20 | 拉丁语/null 21 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-ln-adorn.lex: -------------------------------------------------------------------------------- 1 | #姓氏修饰,例如:老陈,小陈,中的老,小 2 | #如果他已经是姓氏(lex-lname.lex中的词),则无须放在这里。 3 | 老 4 | 小 5 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-nation.lex: -------------------------------------------------------------------------------- 1 | 东非/null 2 | 中华/null 3 | 中华/null 4 | 中华人民共和国/null 5 | 中华民国/null 6 | 中国/null 7 | 中國/null 8 | 中非/null 9 | 乌克兰/null 10 | 也门/null 11 | 以色列/null 12 | 伊拉克/null 13 | 伊朗/null 14 | 俄罗斯/null 15 | 分类/null 16 | 加拿大/null 17 | 南非/null 18 | 古巴/null 19 | 台湾/null 20 | 埃及/null 21 | 塞尔维亚/null 22 | 墨西哥/null 23 | 威尔士/null 24 | 尼日利亚/null 25 | 巴比伦/null 26 | 希腊/null 27 | 德国/null 28 | 德意志/null 29 | 意大利/null 30 | 捷克/null 31 | 日本/null 32 | 朝鲜/null 33 | 比利时/null 34 | 法兰西/null 35 | 法国/null 36 | 波兰/null 37 | 波黑/null 38 | 瑞典/null 39 | 瑞士/null 40 | 白俄罗斯/null 41 | 缅甸/null 42 | 美利坚/null 43 | 美利坚合众国/null 44 | 美国/null 45 | 老挝/null 46 | 苏格兰/null 47 | 苏联/null 48 | 英国/null 49 | 英格兰/null 50 | 葡萄牙/null 51 | 蒙古/null 52 | 西班牙/null 53 | 越南/null 54 | 韩国/null 55 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-net.lex: -------------------------------------------------------------------------------- 1 | 油条哥/null 2 | 活雷锋/null 3 | 夕阳红/null 4 | 帮扶村/null 5 | 后援会/null 6 | 复炸油/null 7 | 献血哥/null 8 | 放心姐/null 9 | 啃老族/null 10 | 特训班/null 11 | 平头男/null 12 | 爆头哥/null 13 | 楼主/null 14 | 有两把刷子/null 15 | 非典/null 16 | 微信/null 17 | 微博/null 18 | 吊丝/null 19 | 高富帅/null 20 | 矮穷挫/null 21 | 白富美/null 22 | 狮子的魂/null 23 | 仓老师/仓井空 24 | 郭德纲/null 25 | 单田芳/null 26 | 李笑笑/null 27 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-org.lex: -------------------------------------------------------------------------------- 1 | 上海合作组织/null 2 | 世卫/null 3 | 世界卫生组织/null 4 | 世界银行/null 5 | 东盟/null 6 | 亚太经合组织/null 7 | 人权理事会/null 8 | 六方会谈/null 9 | 北约/null 10 | 哈马斯/null 11 | 安全理事会/null 12 | 安理会/null 13 | 欧佩克/null 14 | 红十字会/null 15 | 联合国/null 16 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-sname.lex: -------------------------------------------------------------------------------- 1 | #中文单名词库 2 | 敏 3 | 伟 4 | 勇 5 | 军 6 | 斌 7 | 静 8 | 丽 9 | 涛 10 | 芳 11 | 杰 12 | 萍 13 | 强 14 | 俊 15 | 明 16 | 燕 17 | 磊 18 | 玲 19 | 华 20 | 平 21 | 鹏 22 | 健 23 | 波 24 | 红 25 | 丹 26 | 辉 27 | 超 28 | 艳 29 | 莉 30 | 刚 31 | 娟 32 | 峰 33 | 婷 34 | 亮 35 | 洁 36 | 颖 37 | 琳 38 | 英 39 | 慧 40 | 飞 41 | 霞 42 | 浩 43 | 凯 44 | 宇 45 | 毅 46 | 林 47 | 佳 48 | 云 49 | 莹 50 | 娜 51 | 晶 52 | 洋 53 | 文 54 | 鑫 55 | 欣 56 | 琴 57 | 宁 58 | 琼 59 | 兵 60 | 青 61 | 琦 62 | 翔 63 | 彬 64 | 锋 65 | 阳 66 | 璐 67 | 旭 68 | 蕾 69 | 剑 70 | 虹 71 | 蓉 72 | 建 73 | 倩 74 | 梅 75 | 宏 76 | 威 77 | 博 78 | 君 79 | 力 80 | 龙 81 | 晨 82 | 薇 83 | 雪 84 | 琪 85 | 欢 86 | 荣 87 | 江 88 | 炜 89 | 成 90 | 庆 91 | 冰 92 | 东 93 | 帆 94 | 雷 95 | 楠 96 | 锐 97 | 进 98 | 海 99 | 凡 100 | 巍 101 | 维 102 | 迪 103 | 媛 104 | 玮 105 | 杨 106 | 群 107 | 瑛 108 | 悦 109 | 春 110 | 瑶 111 | 婧 112 | 兰 113 | 茜 114 | 松 115 | 爽 116 | 立 117 | 瑜 118 | 睿 119 | 晖 120 | 聪 121 | 帅 122 | 瑾 123 | 骏 124 | 雯 125 | 晓 126 | 昊 127 | 勤 128 | 新 129 | 瑞 130 | 岩 131 | 星 132 | 忠 133 | 志 134 | 怡 135 | 坤 136 | 康 137 | 航 138 | 利 139 | 畅 140 | 坚 141 | 雄 142 | 智 143 | 萌 144 | 哲 145 | 岚 146 | 洪 147 | 捷 148 | 珊 149 | 恒 150 | 靖 151 | 清 152 | 扬 153 | 昕 154 | 乐 155 | 武 156 | 玉 157 | 诚 158 | 菲 159 | 锦 160 | 凤 161 | 珍 162 | 晔 163 | 妍 164 | 璇 165 | 胜 166 | 菁 167 | 科 168 | 芬 169 | 露 170 | 越 171 | 彤 172 | 曦 173 | 义 174 | 良 175 | 鸣 176 | 芸 177 | 方 178 | 月 179 | 铭 180 | 光 181 | 震 182 | 冬 183 | 源 184 | 政 185 | 虎 186 | 莎 187 | 彪 188 | 蓓 189 | 钢 190 | 凌 191 | 奇 192 | 卫 193 | 彦 194 | 烨 195 | 可 196 | 黎 197 | 川 198 | 淼 199 | 惠 200 | 祥 201 | 然 202 | 三 203 | 逗 204 | 高 205 | 潇 206 | 正 207 | 硕 208 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-touris.lex: -------------------------------------------------------------------------------- 1 | 世博园/null 2 | 世博会/null 3 | 长城/null 4 | 黄山/null 5 | 衡山/null 6 | 华山/null 7 | 泰山/null 8 | -------------------------------------------------------------------------------- /src/dep/cndict/lex/lex-units.lex: -------------------------------------------------------------------------------- 1 | #中文单字单位词库 2 | #长度 3 | 米 4 | 寸 5 | 尺 6 | 丈 7 | 里 8 | #时间 9 | 年 10 | 月 11 | 日 12 | 时 13 | #分 14 | 秒 15 | #币 16 | 元 17 | 角 18 | #容量 19 | 升 20 | 斗 21 | 石 22 | 瓶 23 | 袋 24 | 盒 25 | #重量 26 | 吨 27 | 克 28 | 斤 29 | 两 30 | 担 31 | #地积 32 | 亩 33 | 顷 34 | #其他 35 | 折 36 | 件 37 | 番 38 | ℃ 39 | ℉ 40 | -------------------------------------------------------------------------------- /src/dep/cndict/read_friso.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import zlib 3 | import struct 4 | from argparse import ArgumentParser 5 | from cStringIO import StringIO 6 | 7 | ap = ArgumentParser() 8 | ap.add_argument('-f', '--file', default='CNDICT.out') 9 | 10 | opts = ap.parse_args() 11 | fp = open(opts.file) 12 | 13 | # Read the header/version 14 | version = struct.unpack('!I', fp.read(4))[0] 15 | print "VERSION", version 16 | 17 | TYPE_MASK = 0x1F 18 | F_SYNS = 0x01 << 5 19 | F_FREQS = 0x02 << 5 20 | 21 | 22 | def print_header(hdrbyte): 23 | print "Type: {0}. Has Syns={1}, Has Freqs={2}".format( 24 | hdrbyte & TYPE_MASK, 25 | bool(hdrbyte & F_SYNS), 26 | bool(hdrbyte & F_FREQS) 27 | ) 28 | 29 | 30 | def read_zstr(fp): 31 | ret = bytearray() 32 | while True: 33 | s = fp.read(1) 34 | if len(s) == 0 or ord(s) == 0: 35 | return ret.decode('utf-8') 36 | ret += s 37 | 38 | 39 | def read_entry(fp): 40 | firstbyte = fp.read(1) 41 | if len(firstbyte) == 0: 42 | raise EOFError() 43 | 44 | hdrinfo = ord(firstbyte) 45 | print_header(hdrinfo) 46 | # Read up to the first buf 47 | term = read_zstr(fp) 48 | syns = [] 49 | freqs = 0 50 | if hdrinfo & F_SYNS: 51 | # Check the number of syns we're to read 52 | syncount = struct.unpack("!h", fp.read(2))[0] 53 | for _ in range(syncount): 54 | syns.append(read_zstr(fp)) 55 | if hdrinfo & F_FREQS: 56 | freqs = struct.unpack("!I", fp.read(4))[0] 57 | 58 | return term, syns, freqs 59 | 60 | sio = StringIO(zlib.decompress(fp.read())) 61 | while True: 62 | term, syns, freqs = read_entry(sio) 63 | print term, freqs -------------------------------------------------------------------------------- /src/dep/friso/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | IF ("${CMAKE_C_COMPILER_ID}" MATCHES "Clang") 2 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-tautological-compare") 3 | ENDIF() 4 | 5 | ADD_LIBRARY(friso OBJECT 6 | friso.c 7 | friso_array.c 8 | friso_hash.c 9 | friso_lexicon.c 10 | friso_link.c 11 | friso_string.c 12 | friso_ctype.c 13 | friso_UTF8.c 14 | friso_GBK.c) -------------------------------------------------------------------------------- /src/dep/friso/Makefile.RediSearch: -------------------------------------------------------------------------------- 1 | SOURCEDIR = . 2 | CC_SOURCES = $(wildcard $(SOURCEDIR)/*.c) 3 | CC_OBJECTS = $(sort $(patsubst $(SOURCEDIR)/%.c, $(SOURCEDIR)/%.o , $(CC_SOURCES))) 4 | 5 | .SUFFIXES: .c .cc .o 6 | 7 | all: libfriso.a 8 | 9 | libfriso.a: $(CC_OBJECTS) 10 | ar rc $@ $^ 11 | 12 | clean: 13 | rm -rf *.xo *.so *.o *.a 14 | -------------------------------------------------------------------------------- /src/dep/hll/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Artem Zaytsev 2 | Permission is hereby granted, free of charge, to any person obtaining a copy 3 | of this software and associated documentation files (the "Software"), to deal 4 | in the Software without restriction, including without limitation the rights 5 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 | copies of the Software, and to permit persons to whom the Software is 7 | furnished to do so, subject to the following conditions: 8 | The above copyright notice and this permission notice shall be included 9 | in all copies or substantial portions of the Software. 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 12 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 13 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 14 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 15 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 16 | OTHER DEALINGS IN THE SOFTWARE. 17 | -------------------------------------------------------------------------------- /src/dep/hll/hll.h: -------------------------------------------------------------------------------- 1 | #ifndef AVZ_HLL_H 2 | #define AVZ_HLL_H 3 | 4 | #include 5 | #include 6 | 7 | struct HLL { 8 | uint8_t bits; 9 | 10 | size_t size; 11 | uint8_t *registers; 12 | }; 13 | 14 | extern int hll_init(struct HLL *hll, uint8_t bits); 15 | extern int hll_load(struct HLL *hll, const void *registers, size_t size); 16 | extern void hll_destroy(struct HLL *hll); 17 | extern int hll_merge(struct HLL *dst, const struct HLL *src); 18 | extern void hll_add(struct HLL *hll, const void *buf, size_t size); 19 | void hll_add_hash(struct HLL *hll, uint32_t h); 20 | extern double hll_count(const struct HLL *hll); 21 | 22 | extern uint32_t _hll_hash(const struct HLL *hll); 23 | 24 | #endif /* AVZ_HLL_H */ 25 | -------------------------------------------------------------------------------- /src/dep/libnu/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Aleksey Tulinov 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/dep/libnu/Makefile: -------------------------------------------------------------------------------- 1 | # find the OS 2 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 3 | 4 | # Compile flags for non-osx / osx 5 | ifneq ($(uname_S),Darwin) 6 | CFLAGS ?= -W -Wall -fno-common -g -ggdb -fPIC -std=c99 -O2 7 | CPPFLAGS ?= -W -Wall -fno-common -g -ggdb 8 | else 9 | CFLAGS ?= -W -Wall -dynamic -fno-common -g -fPIC -ggdb -std=c99 -O2 10 | CPPFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -O2 11 | endif 12 | 13 | SOURCEDIR = . 14 | CC_SOURCES = $(wildcard $(SOURCEDIR)/*.c) 15 | CC_OBJECTS = $(sort $(patsubst $(SOURCEDIR)/%.c, $(SOURCEDIR)/%.o, $(CC_SOURCES))) 16 | 17 | .SUFFIXES: .c .cc .o 18 | 19 | all: libnu.a 20 | 21 | # $(SOURCEDIR)/%.o: $(SOURCEDIR)/%.c 22 | # $(CC) -I. $(SHOBJ_CFLAGS) -fPIC -fpermissive -c $< -o $@ 23 | 24 | # test1.xo: ../redismodule.h 25 | 26 | libnu.a: $(CC_OBJECTS) 27 | ar rcs $@ $^ 28 | 29 | clean: 30 | rm -rf *.xo *.so *.o *.a 31 | -------------------------------------------------------------------------------- /src/dep/libnu/README.md: -------------------------------------------------------------------------------- 1 | # Libnu 2 | 3 | The files in this folder are taken from the (excellent) **nunicode** library by Aleksey Tulinov. 4 | 5 | See [https://bitbucket.org/alekseyt/nunicode](https://bitbucket.org/alekseyt/nunicode) 6 | -------------------------------------------------------------------------------- /src/dep/libnu/casemap_internal.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_CASEMAP_INTERNAL_H 2 | #define NU_CASEMAP_INTERNAL_H 3 | 4 | #include 5 | #include 6 | 7 | #include "udb.h" 8 | 9 | /** Casemap codepoint 10 | * 11 | * @ingroup transformations 12 | */ 13 | static inline 14 | const char* _nu_to_something(uint32_t codepoint, 15 | const int16_t *G, size_t G_SIZE, 16 | const uint32_t *VALUES_C, const uint16_t *VALUES_I, const uint8_t *COMBINED) { 17 | 18 | return nu_udb_lookup(codepoint, G, G_SIZE, VALUES_C, VALUES_I, COMBINED); 19 | } 20 | 21 | #endif /* NU_CASEMAP_INTERNAL_H */ 22 | -------------------------------------------------------------------------------- /src/dep/libnu/cesu8.c: -------------------------------------------------------------------------------- 1 | #include "cesu8.h" 2 | 3 | #ifdef NU_WITH_CESU8_READER 4 | #ifdef NU_WITH_VALIDATION 5 | 6 | int nu_cesu8_validread(const char *encoded, size_t max_len) { 7 | const unsigned char *up = (const unsigned char *)(encoded); 8 | 9 | /* i guess there is no way to detect misplaceed CESU-8 10 | * trail surrogate alone, it will produce valid UTF-8 sequence 11 | * greater than U+10000 */ 12 | 13 | /* 6-bytes sequence 14 | * 15 | * 11101101 followed by 1010xxxx should be 16 | * then followed by xxxxxxxx 11101101 1011xxxx xxxxxxxx */ 17 | if (*(up) == 0xED && (*(up + 1) & 0xF0) == 0xA0) { 18 | if (max_len < 6) { 19 | return 0; 20 | } 21 | 22 | if (*(up + 3) != 0xED || (*(up + 4) & 0xF0) != 0xB0) { 23 | return 0; 24 | } 25 | 26 | return 6; 27 | } 28 | 29 | return utf8_validread_basic(encoded, max_len); 30 | } 31 | 32 | #endif /* NU_WITH_VALIDATION */ 33 | #endif /* NU_WITH_CESU8_READER */ 34 | 35 | #ifdef NU_WITH_CESU8_WRITER 36 | 37 | char* nu_cesu8_write(uint32_t unicode, char *cesu8) { 38 | unsigned codepoint_len = cesu8_codepoint_length(unicode); 39 | 40 | if (cesu8 != 0) { 41 | switch (codepoint_len) { 42 | case 1: *cesu8 = (char)(unicode); break; 43 | case 2: b2_utf8(unicode, cesu8); break; 44 | case 3: b3_utf8(unicode, cesu8); break; 45 | default: b6_cesu8(unicode, cesu8); break; /* len == 6 */ 46 | } 47 | } 48 | 49 | return cesu8 + codepoint_len; 50 | } 51 | 52 | #endif /* NU_WITH_CESU8_WRITER */ 53 | -------------------------------------------------------------------------------- /src/dep/libnu/defines.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_DEFINES_H 2 | #define NU_DEFINES_H 3 | 4 | /** @file 5 | */ 6 | 7 | /** @defgroup defines Defines 8 | */ 9 | 10 | #ifndef NU_EXPORT 11 | 12 | # ifdef _WIN32 13 | # define NU_EXPORT __declspec(dllexport) 14 | 15 | # elif __GNUC__ >= 4 16 | # ifdef NU_BUILD_STATIC 17 | # define NU_EXPORT __attribute__ ((visibility ("hidden"))) 18 | # else 19 | # define NU_EXPORT __attribute__ ((visibility ("default"))) 20 | # endif 21 | 22 | # else 23 | # define NU_EXPORT 24 | # endif 25 | 26 | #endif /* NU_EXPORT */ 27 | 28 | /** Integer version of Unicode specification implemented. 900 == 9.0.0 29 | * 30 | * @ingroup defines 31 | */ 32 | #define NU_UNICODE_VERSION 900 33 | /** Special limit value to unset limit on string. Used internally by nunicode. 34 | * 35 | * @ingroup defines 36 | */ 37 | #define NU_UNLIMITED ((const void *)(-1)) 38 | 39 | #ifdef _MSC_VER 40 | #define ssize_t ptrdiff_t 41 | #endif 42 | 43 | #endif /* NU_DEFINES_H */ 44 | -------------------------------------------------------------------------------- /src/dep/libnu/ducet.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_DUCET_H 2 | #define NU_DUCET_H 3 | 4 | #include 5 | 6 | #include "config.h" 7 | #include "defines.h" 8 | 9 | #if defined (__cplusplus) || defined (c_plusplus) 10 | extern "C" { 11 | #endif 12 | 13 | #ifdef NU_WITH_DUCET 14 | 15 | /** Get DUCET value of codepoint 16 | * 17 | * Normally, for unlisted codepoints, this function will return number greater 18 | * than max weight of listed codepoints, hence putting all unlisted codepoints 19 | * (not letters and not numbers) to the end of the sorted list (in codepoint 20 | * order). 21 | * 22 | * @ingroup udb 23 | * @param codepoint codepoint 24 | * @param weight previous weight for compound weight (not used here) 25 | * @param context pointer passed to nu_strcoll() 26 | * @return comparable weight of the codepoint 27 | */ 28 | NU_EXPORT 29 | int32_t nu_ducet_weight(uint32_t codepoint, int32_t *weight, void *context); 30 | 31 | #endif /* NU_WITH_DUCET */ 32 | 33 | #if defined (__cplusplus) || defined (c_plusplus) 34 | } 35 | #endif 36 | 37 | #endif /* NU_DUCET_H */ 38 | -------------------------------------------------------------------------------- /src/dep/libnu/gen/README: -------------------------------------------------------------------------------- 1 | Automatically generated files, see unicode.org/Makefile:gen, see tools/ 2 | 3 | If you are going to regen these files, you need python, shell 4 | and you better have a Linux box. 5 | -------------------------------------------------------------------------------- /src/dep/libnu/libnu.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_LIBNUNICODE_H 2 | #define NU_LIBNUNICODE_H 3 | 4 | #include "casemap.h" 5 | #include "cesu8.h" 6 | #include "defines.h" 7 | #include "ducet.h" 8 | #include "extra.h" 9 | #include "strcoll.h" 10 | #include "strings.h" 11 | #include "validate.h" 12 | #include "version.h" 13 | #include "udb.h" 14 | #include "utf16.h" 15 | #include "utf16be.h" 16 | #include "utf16he.h" 17 | #include "utf16le.h" 18 | #include "utf32.h" 19 | #include "utf32be.h" 20 | #include "utf32he.h" 21 | #include "utf32le.h" 22 | #include "utf8.h" 23 | 24 | #endif /* NU_LIBNUNICODE_H */ 25 | -------------------------------------------------------------------------------- /src/dep/libnu/mph.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_MPH_H 2 | #define NU_MPH_H 3 | 4 | /* Intentionally undocumented 5 | * 6 | * http://iswsa.acm.org/mphf/index.html 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | #include "config.h" 13 | 14 | #if defined (__cplusplus) || defined (c_plusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #ifdef NU_WITH_UDB 19 | 20 | /* those need to be the same values as used in MPH generation */ 21 | #define PRIME 0x01000193 22 | 23 | /** Calculate G offset from codepoint 24 | */ 25 | static inline 26 | uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) { 27 | if (hash == 0) { 28 | hash = PRIME; 29 | } 30 | 31 | return hash ^ codepoint; 32 | } 33 | 34 | /** Get hash value of Unicode codepoint 35 | */ 36 | static inline 37 | uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE, 38 | uint32_t codepoint) { 39 | 40 | uint32_t h = _nu_hash(0, codepoint); 41 | int16_t offset = G[h % G_SIZE]; 42 | if (offset < 0) { 43 | return (uint32_t)(-offset - 1); 44 | } 45 | return (_nu_hash(offset, codepoint) % G_SIZE); 46 | } 47 | 48 | /** Lookup value in MPH 49 | */ 50 | static inline 51 | uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I, 52 | uint32_t codepoint, uint32_t hash) { 53 | 54 | const uint32_t *c = (V_C + hash); 55 | const uint16_t *i = (V_I + hash); 56 | 57 | /* due to nature of minimal perfect hash, it will always 58 | * produce collision for codepoints outside of MPH original set. 59 | * thus VALUES_C contain original codepoint to check if 60 | * collision occurred */ 61 | 62 | return (*c != codepoint ? 0 : *i); 63 | } 64 | 65 | #endif /* NU_WITH_UDB */ 66 | 67 | #if defined (__cplusplus) || defined (c_plusplus) 68 | } 69 | #endif 70 | 71 | #endif /* NU_MPH_H */ 72 | -------------------------------------------------------------------------------- /src/dep/libnu/tofold.c: -------------------------------------------------------------------------------- 1 | #include "casemap.h" 2 | 3 | #ifdef NU_WITH_TOFOLD 4 | 5 | #include "casemap_internal.h" 6 | #include "gen/_tofold.c" 7 | 8 | const char* nu_tofold(uint32_t codepoint) { 9 | return _nu_to_something(codepoint, NU_TOFOLD_G, NU_TOFOLD_G_SIZE, 10 | NU_TOFOLD_VALUES_C, NU_TOFOLD_VALUES_I, NU_TOFOLD_COMBINED); 11 | } 12 | 13 | const char* _nu_tofold(const char *encoded, const char *limit, nu_read_iterator_t read, 14 | uint32_t *u, const char **transform, 15 | void *context) { 16 | 17 | (void)(limit); 18 | (void)(context); 19 | 20 | uint32_t _u = 0; 21 | const char *np = read(encoded, &_u); 22 | 23 | *transform = nu_tofold(_u); 24 | 25 | if (u != 0) { 26 | *u = _u; 27 | } 28 | 29 | return np; 30 | } 31 | 32 | #endif /* NU_WITH_TOFOLD */ 33 | -------------------------------------------------------------------------------- /src/dep/libnu/tolower.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "casemap.h" 4 | 5 | #ifdef NU_WITH_TOLOWER 6 | 7 | #include "casemap_internal.h" 8 | #include "gen/_tolower.c" 9 | 10 | /* in nu_casemap_read (UTF-8), zero-terminated */ 11 | static const char *__nu_final_sigma = "ς"; 12 | 13 | const char* nu_tolower(uint32_t codepoint) { 14 | return _nu_to_something(codepoint, NU_TOLOWER_G, NU_TOLOWER_G_SIZE, 15 | NU_TOLOWER_VALUES_C, NU_TOLOWER_VALUES_I, NU_TOLOWER_COMBINED); 16 | } 17 | 18 | const char* _nu_tolower(const char *encoded, const char *limit, nu_read_iterator_t read, 19 | uint32_t *u, const char **transform, 20 | void *context) { 21 | 22 | (void)(context); 23 | 24 | uint32_t _u = 0; 25 | const char *np = read(encoded, &_u); 26 | 27 | if (u != 0) { 28 | *u = _u; 29 | } 30 | 31 | /* handling of 0x03A3 ('Σ') 32 | * 33 | * this is the only language-independent exception described in 34 | * SpecialCasing.txt (Unicode 7.0) */ 35 | 36 | assert(nu_casemap_read == nu_utf8_read); 37 | 38 | if (_u == 0x03A3) { 39 | if (np >= limit) { 40 | *transform = __nu_final_sigma; 41 | return np; 42 | } 43 | 44 | uint32_t nu = 0; 45 | read(np, &nu); 46 | 47 | if (nu == 0) { 48 | *transform = __nu_final_sigma; 49 | return np; 50 | } 51 | } 52 | 53 | *transform = nu_tolower(_u); 54 | 55 | return np; 56 | } 57 | 58 | #endif /* NU_WITH_TOLOWER */ 59 | -------------------------------------------------------------------------------- /src/dep/libnu/toupper.c: -------------------------------------------------------------------------------- 1 | #include "casemap.h" 2 | 3 | #ifdef NU_WITH_TOUPPER 4 | 5 | #include "casemap_internal.h" 6 | #include "gen/_toupper.c" 7 | 8 | const char* nu_toupper(uint32_t codepoint) { 9 | return _nu_to_something(codepoint, NU_TOUPPER_G, NU_TOUPPER_G_SIZE, 10 | NU_TOUPPER_VALUES_C, NU_TOUPPER_VALUES_I, NU_TOUPPER_COMBINED); 11 | } 12 | 13 | const char* _nu_toupper(const char *encoded, const char *limit, nu_read_iterator_t read, 14 | uint32_t *u, const char **transform, 15 | void *context) { 16 | 17 | (void)(limit); 18 | (void)(context); 19 | 20 | uint32_t _u = 0; 21 | const char *np = read(encoded, &_u); 22 | 23 | *transform = nu_toupper(_u); 24 | 25 | if (u != 0) { 26 | *u = _u; 27 | } 28 | 29 | return np; 30 | } 31 | 32 | #endif /* NU_WITH_TOUPPER */ 33 | -------------------------------------------------------------------------------- /src/dep/libnu/utf16.c: -------------------------------------------------------------------------------- 1 | #include "utf16.h" 2 | #include "utf16be.h" 3 | #include "utf16le.h" 4 | 5 | #ifdef NU_WITH_UTF16_READER 6 | 7 | const char* nu_utf16_read_bom(const char *encoded, nu_utf16_bom_t *bom) { 8 | unsigned char bom0 = *(unsigned char *)(encoded); 9 | unsigned char bom1 = *(unsigned char *)(encoded + 1); 10 | 11 | if (bom0 == 0xFF && bom1 == 0xFE) { 12 | if (bom != 0) { 13 | #ifdef NU_WITH_UTF16_WRITER 14 | bom->write_bom = nu_utf16le_write_bom; 15 | #endif 16 | bom->read = nu_utf16le_read; 17 | bom->write = nu_utf16le_write; 18 | #ifdef NU_WITH_REVERSE_READ 19 | bom->revread = nu_utf16le_revread; 20 | #endif 21 | #ifdef NU_WITH_VALIDATION 22 | bom->validread = nu_utf16le_validread; 23 | #endif 24 | } 25 | } 26 | else { 27 | if (bom != 0) { 28 | #ifdef NU_WITH_UTF16_WRITER 29 | bom->write_bom = nu_utf16be_write_bom; 30 | #endif 31 | bom->read = nu_utf16be_read; 32 | bom->write = nu_utf16be_write; 33 | #ifdef NU_WITH_REVERSE_READ 34 | bom->revread = nu_utf16be_revread; 35 | #endif 36 | #ifdef NU_WITH_VALIDATION 37 | bom->validread = nu_utf16be_validread; 38 | #endif 39 | } 40 | 41 | if (bom0 == 0xFE && bom1 == 0xFF) { 42 | return encoded + 2; 43 | } 44 | else { 45 | return encoded; 46 | } 47 | } 48 | 49 | return encoded + 2; 50 | } 51 | 52 | #endif /* NU_WITH_UTF16_READER */ 53 | 54 | #ifdef NU_WITH_UTF16_WRITER 55 | 56 | char* nu_utf16le_write_bom(char *encoded) { 57 | unsigned char *p = (unsigned char *)(encoded); 58 | 59 | *(p) = 0xFF; 60 | *(p + 1) = 0xFE; 61 | 62 | return encoded + 2; 63 | } 64 | 65 | char* nu_utf16be_write_bom(char *encoded) { 66 | unsigned char *p = (unsigned char *)(encoded); 67 | 68 | *(p) = 0xFE; 69 | *(p + 1) = 0xFF; 70 | 71 | return encoded + 2; 72 | } 73 | 74 | #endif /* NU_WITH_UTF16_WRITER */ 75 | -------------------------------------------------------------------------------- /src/dep/libnu/utf16be.c: -------------------------------------------------------------------------------- 1 | #include "utf16be.h" 2 | 3 | #ifdef NU_WITH_UTF16BE_READER 4 | #ifdef NU_WITH_VALIDATION 5 | 6 | int nu_utf16be_validread(const char *encoded, size_t max_len) { 7 | if (max_len < 2) { 8 | return 0; 9 | } 10 | 11 | return utf16_validread(encoded, max_len); 12 | } 13 | 14 | #endif /* NU_WITH_VALIDATION */ 15 | #endif /* NU_WITH_UTF16BE_READER */ 16 | 17 | #ifdef NU_WITH_UTF16BE_WRITER 18 | 19 | char* nu_utf16be_write(uint32_t unicode, char *utf16) { 20 | unsigned codepoint_len = utf16_codepoint_length(unicode); 21 | 22 | if (utf16 != 0) { 23 | switch (codepoint_len) { 24 | case 2: nu_htobes((uint16_t)(unicode), utf16); break; 25 | default: { /* len == 4 */ 26 | uint16_t c0 = 0, c1 = 0; 27 | b4_utf16(unicode, &c0, &c1); 28 | nu_htobes(c0, utf16); 29 | nu_htobes(c1, utf16 + 2); 30 | break; 31 | } 32 | } 33 | } 34 | 35 | return utf16 + codepoint_len; 36 | } 37 | 38 | #endif /* NU_WITH_UTF16BE_WRITER */ 39 | -------------------------------------------------------------------------------- /src/dep/libnu/utf16be.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_UTF16BE_H 2 | #define NU_UTF16BE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "config.h" 8 | #include "defines.h" 9 | #include "utf16_internal.h" 10 | 11 | #if defined (__cplusplus) || defined (c_plusplus) 12 | extern "C" { 13 | #endif 14 | 15 | #ifdef NU_WITH_UTF16BE_READER 16 | 17 | /** 18 | * @ingroup utf16 19 | * @see nu_utf16le_read 20 | */ 21 | static inline 22 | const char* nu_utf16be_read(const char *utf16, uint32_t *unicode) { 23 | uint32_t c = nu_betohs(utf16); 24 | 25 | if (c >= 0xD800 && c <= 0xDBFF) { 26 | if (unicode != 0) { 27 | *unicode = ((c & 0x03FF) << 10 | (nu_betohs(utf16 + 2) & 0x03FF)) + 0x10000; 28 | } 29 | return utf16 + 4; 30 | } 31 | else if (unicode != 0) { 32 | *unicode = c; 33 | } 34 | 35 | return utf16 + 2; 36 | } 37 | 38 | #ifdef NU_WITH_REVERSE_READ 39 | 40 | /** 41 | * @ingroup utf16 42 | * @see nu_utf16le_revread 43 | */ 44 | static inline 45 | const char* nu_utf16be_revread(uint32_t *unicode, const char *utf16) { 46 | /* valid UTF-16 sequences are either 2 or 4 bytes long 47 | * trail sequences are between 0xDC00 .. 0xDFFF */ 48 | const char *p = utf16 - 2; 49 | uint16_t ec = nu_betohs(p); 50 | 51 | if (ec >= 0xDC00 && ec <= 0xDFFF) { /* trail surrogate */ 52 | p -= 2; 53 | } 54 | 55 | if (unicode != 0) { 56 | nu_utf16be_read(p, unicode); 57 | } 58 | 59 | return p; 60 | } 61 | 62 | #endif /* NU_WITH_REVERSE_READ */ 63 | 64 | #ifdef NU_WITH_VALIDATION 65 | 66 | /** 67 | * @ingroup utf16 68 | * @see nu_utf16le_validread 69 | */ 70 | NU_EXPORT 71 | int nu_utf16be_validread(const char *encoded, size_t max_len); 72 | 73 | #endif /* NU_WITH_VALIDATION */ 74 | #endif /* NU_WITH_UTF16BE_READER */ 75 | 76 | #ifdef NU_WITH_UTF16BE_WRITER 77 | 78 | /** 79 | * @ingroup utf16 80 | * @see nu_utf16le_write 81 | */ 82 | NU_EXPORT 83 | char* nu_utf16be_write(uint32_t unicode, char *utf16); 84 | 85 | #endif /* NU_WITH_UTF16BE_WRITER */ 86 | 87 | #if defined (__cplusplus) || defined (c_plusplus) 88 | } 89 | #endif 90 | 91 | #endif /* NU_UTF16BE_H */ 92 | -------------------------------------------------------------------------------- /src/dep/libnu/utf16he.c: -------------------------------------------------------------------------------- 1 | #include "utf16he.h" 2 | #include "utf16_internal.h" 3 | 4 | #ifdef NU_WITH_UTF16HE_READER 5 | #ifdef NU_WITH_VALIDATION 6 | 7 | int nu_utf16he_validread(const char *encoded, size_t max_len) { 8 | if (max_len < 2) { 9 | return 0; 10 | } 11 | 12 | char lead = (*(uint16_t *)(encoded) & 0xFF00) >> 8; 13 | 14 | if (utf16_valid_lead(lead) != 0) { 15 | if (max_len < 4) { 16 | return 0; 17 | } 18 | 19 | char trail = (*(uint16_t *)(encoded + 2) & 0xFF00) >> 8; 20 | 21 | if (utf16_valid_trail(trail) == 0) { 22 | return 0; 23 | } 24 | 25 | return 4; 26 | } 27 | 28 | if (utf16_valid_trail(lead) != 0) { 29 | return 0; 30 | } 31 | 32 | return 2; 33 | } 34 | 35 | #endif /* NU_WITH_VALIDATION */ 36 | #endif /* NU_WITH_UTF16HE_READER */ 37 | 38 | #ifdef NU_WITH_UTF16HE_WRITER 39 | 40 | char* nu_utf16he_write(uint32_t unicode, char *utf16) { 41 | unsigned codepoint_len = utf16_codepoint_length(unicode); 42 | 43 | if (utf16 != 0) { 44 | switch (codepoint_len) { 45 | case 2: *(uint16_t *)(utf16) = (uint16_t)(unicode); break; 46 | default: { /* len == 4 */ 47 | uint16_t c0 = 0, c1 = 0; 48 | b4_utf16(unicode, &c0, &c1); 49 | *(uint16_t *)(utf16) = c0; 50 | *(uint16_t *)(utf16 + 2) = c1; 51 | break; 52 | } 53 | } 54 | } 55 | 56 | return utf16 + codepoint_len; 57 | } 58 | 59 | #endif /* NU_WITH_UTF16HE_WRITER */ 60 | -------------------------------------------------------------------------------- /src/dep/libnu/utf16le.c: -------------------------------------------------------------------------------- 1 | #include "utf16le.h" 2 | 3 | #ifdef NU_WITH_UTF16LE_READER 4 | #ifdef NU_WITH_VALIDATION 5 | 6 | int nu_utf16le_validread(const char *encoded, size_t max_len) { 7 | if (max_len < 2) { 8 | return 0; 9 | } 10 | 11 | return utf16_validread(encoded + 1, max_len); 12 | } 13 | 14 | #endif /* NU_WITH_VALIDATION */ 15 | #endif /* NU_WITH_UTF16LE_READER */ 16 | 17 | #ifdef NU_WITH_UTF16LE_WRITER 18 | 19 | char* nu_utf16le_write(uint32_t unicode, char *utf16) { 20 | unsigned codepoint_len = utf16_codepoint_length(unicode); 21 | 22 | if (utf16 != 0) { 23 | switch (codepoint_len) { 24 | case 2: nu_htoles((uint16_t)(unicode), utf16); break; 25 | default: { /* len == 4 */ 26 | uint16_t c0 = 0, c1 = 0; 27 | b4_utf16(unicode, &c0, &c1); 28 | nu_htoles(c0, utf16); 29 | nu_htoles(c1, utf16 + 2); 30 | break; 31 | } 32 | } 33 | } 34 | 35 | return utf16 + codepoint_len; 36 | } 37 | 38 | #endif /* NU_WITH_UTF16LE_WRITER */ 39 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32.c: -------------------------------------------------------------------------------- 1 | #include "utf32.h" 2 | #include "utf32be.h" 3 | #include "utf32le.h" 4 | 5 | #ifdef NU_WITH_UTF32_READER 6 | 7 | const char* nu_utf32_read_bom(const char *encoded, nu_utf32_bom_t *bom) { 8 | unsigned char bom0 = *(unsigned char *)(encoded); 9 | unsigned char bom1 = *(unsigned char *)(encoded + 1); 10 | unsigned char bom2 = *(unsigned char *)(encoded + 2); 11 | unsigned char bom3 = *(unsigned char *)(encoded + 3); 12 | 13 | if (bom0 == 0xFF && bom1 == 0xFE 14 | && bom2 == 0 && bom3 == 0) { 15 | if (bom != 0) { 16 | #ifdef NU_WITH_UTF32_WRITER 17 | bom->write_bom = nu_utf32le_write_bom; 18 | #endif 19 | bom->read = nu_utf32le_read; 20 | bom->write = nu_utf32le_write; 21 | #ifdef NU_WITH_REVERSE_READ 22 | bom->revread = nu_utf32le_revread; 23 | #endif 24 | #ifdef NU_WITH_VALIDATION 25 | bom->validread = nu_utf32le_validread; 26 | #endif 27 | } 28 | } 29 | else { 30 | if (bom != 0) { 31 | #ifdef NU_WITH_UTF32_WRITER 32 | bom->write_bom = nu_utf32be_write_bom; 33 | #endif 34 | bom->read = nu_utf32be_read; 35 | bom->write = nu_utf32be_write; 36 | #ifdef NU_WITH_REVERSE_READ 37 | bom->revread = nu_utf32be_revread; 38 | #endif 39 | #ifdef NU_WITH_VALIDATION 40 | bom->validread = nu_utf32be_validread; 41 | #endif 42 | } 43 | 44 | if (bom0 == 0 && bom1 == 0 45 | && bom2 == 0xFE && bom3 == 0xFF) { 46 | return encoded + 4; 47 | } 48 | else { 49 | return encoded; 50 | } 51 | } 52 | 53 | return encoded + 4; 54 | } 55 | 56 | #endif /* NU_WITH_UTF32_READER */ 57 | 58 | #ifdef NU_WITH_UTF32_WRITER 59 | 60 | char* nu_utf32le_write_bom(char *encoded) { 61 | unsigned char *p = (unsigned char *)(encoded); 62 | 63 | *(p) = 0xFF; 64 | *(p + 1) = 0xFE; 65 | *(p + 2) = 0; 66 | *(p + 3) = 0; 67 | 68 | return encoded + 4; 69 | } 70 | 71 | char* nu_utf32be_write_bom(char *encoded) { 72 | unsigned char *p = (unsigned char *)(encoded); 73 | 74 | *(p) = 0; 75 | *(p + 1) = 0; 76 | *(p + 2) = 0xFE; 77 | *(p + 3) = 0xFF; 78 | 79 | return encoded + 4; 80 | } 81 | 82 | #endif /* NU_WITH_UTF32_WRITER */ 83 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_UTF32_H 2 | #define NU_UTF32_H 3 | 4 | #include 5 | 6 | #include "config.h" 7 | #include "defines.h" 8 | #include "strings.h" 9 | #include "validate.h" 10 | 11 | /** @defgroup utf32 UTF-32 support 12 | */ 13 | 14 | #if defined (__cplusplus) || defined (c_plusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #if (defined NU_WITH_UTF32_READER) || (defined NU_WITH_UTF32_WRITER) 19 | /** For sizeof() only 20 | * 21 | * @ingroup utf32 22 | */ 23 | static const uint32_t NU_UTF32_BOM = 0; 24 | #endif 25 | 26 | /** Endianess-specific UTF-32 write BOM function */ 27 | typedef char* (*nu_utf32_write_bom_t)(char *); 28 | 29 | #ifdef NU_WITH_UTF32_READER 30 | 31 | /** Holder for endianess-specific UTF-32 functions 32 | * 33 | * @ingroup utf32 34 | * @see nu_utf32_write_bom 35 | */ 36 | typedef struct { 37 | /** Read (decode) function 38 | */ 39 | nu_read_iterator_t read; 40 | /** Write (encode) function 41 | */ 42 | nu_write_iterator_t write; 43 | /** Reverse-read (decode) function 44 | */ 45 | nu_revread_iterator_t revread; 46 | /** Validation function 47 | */ 48 | nu_validread_iterator_t validread; 49 | /** BOM writing function 50 | */ 51 | nu_utf32_write_bom_t write_bom; 52 | } nu_utf32_bom_t; 53 | 54 | /** 55 | * @ingroup utf32 56 | * @see nu_utf16_read_bom 57 | */ 58 | NU_EXPORT 59 | const char* nu_utf32_read_bom(const char *encoded, nu_utf32_bom_t *bom); 60 | 61 | #endif /* NU_WITH_UTF32_READER */ 62 | 63 | #ifdef NU_WITH_UTF32_WRITER 64 | 65 | /** 66 | * @ingroup utf32 67 | * @see nu_utf16le_write_bom 68 | */ 69 | NU_EXPORT 70 | char* nu_utf32le_write_bom(char *encoded); 71 | 72 | /** 73 | * @ingroup utf32 74 | * @see nu_utf16be_write_bom 75 | */ 76 | NU_EXPORT 77 | char* nu_utf32be_write_bom(char *encoded); 78 | 79 | #endif /* NU_WITH_UTF32_WRITER */ 80 | 81 | #if defined (__cplusplus) || defined (c_plusplus) 82 | } 83 | #endif 84 | 85 | #endif /* NU_UTF32_H */ 86 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32_internal.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_UTF32_INTERNAL_H 2 | #define NU_UTF32_INTERNAL_H 3 | 4 | #include 5 | 6 | #define NU_UTF32_MAX_CODEPOINT (0x0010FFFF) 7 | 8 | static inline 9 | uint32_t nu_letohl(const char *p) { 10 | const unsigned char *up = (const unsigned char *)(p); 11 | return (*(up + 3) << 24 | *(up + 2) << 16 | *(up + 1) << 8 | *(up)); 12 | } 13 | 14 | static inline 15 | void nu_htolel(uint32_t s, char *p) { 16 | unsigned char *up = (unsigned char *)(p); 17 | *(up) = (s & 0xFF); 18 | *(up + 1) = ((s & 0xFF00) >> 8); 19 | *(up + 2) = ((s & 0xFF0000) >> 16); 20 | *(up + 3) = ((s & 0xFF000000) >> 24); 21 | } 22 | 23 | static inline 24 | uint32_t nu_betohl(const char *p) { 25 | const unsigned char *up = (const unsigned char *)(p); 26 | return (*(up) << 24 | *(up + 1) << 16 | *(up + 2) << 8 | *(up + 3)); 27 | } 28 | 29 | static inline 30 | void nu_htobel(uint32_t s, char *p) { 31 | unsigned char *up = (unsigned char *)(p); 32 | *(up + 3) = (s & 0xFF); 33 | *(up + 2) = (s & 0xFF00) >> 8; 34 | *(up + 1) = (s & 0xFF0000) >> 16; 35 | *(up) = (s & 0xFF000000) >> 24; 36 | } 37 | 38 | static inline 39 | int utf32_validread_basic(const char *p, size_t max_len) { 40 | (void)(p); 41 | return (max_len >= 4 ? 4 : 0); /* UTF-32 is ok with any 4-byte sequence */ 42 | } 43 | 44 | #endif /* NU_UTF32_INTERNAL_H */ 45 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32be.c: -------------------------------------------------------------------------------- 1 | #include "utf32be.h" 2 | 3 | #ifdef NU_WITH_UTF32BE_READER 4 | #ifdef NU_WITH_VALIDATION 5 | 6 | int nu_utf32be_validread(const char *p, size_t max_len) { 7 | if (utf32_validread_basic(p, max_len) == 0) { 8 | return 0; 9 | } 10 | 11 | uint32_t u = 0; 12 | nu_utf32be_read(p, &u); 13 | 14 | if (u > NU_UTF32_MAX_CODEPOINT) { 15 | return 0; 16 | } 17 | 18 | return (u >= 0xD800 && u <= 0xDFFF ? 0 : 4); 19 | } 20 | 21 | #endif /* NU_WITH_VALIDATION */ 22 | #endif /* NU_WITH_UTF32BE_READER */ 23 | 24 | #ifdef NU_WITH_UTF32BE_WRITER 25 | 26 | char* nu_utf32be_write(uint32_t unicode, char *utf32) { 27 | if (utf32 != 0) { 28 | nu_htobel(unicode, utf32); 29 | } 30 | 31 | return utf32 + 4; 32 | } 33 | 34 | #endif /* NU_WITH_UTF32BE_WRITER */ 35 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32be.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_UTF32BE_H 2 | #define NU_UTF32BE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "config.h" 8 | #include "defines.h" 9 | #include "utf32_internal.h" 10 | 11 | #if defined (__cplusplus) || defined (c_plusplus) 12 | extern "C" { 13 | #endif 14 | 15 | #ifdef NU_WITH_UTF32BE_READER 16 | 17 | /** 18 | * @ingroup utf32 19 | * @see nu_utf16be_read 20 | */ 21 | static inline 22 | const char* nu_utf32be_read(const char *utf32, uint32_t *unicode) { 23 | if (unicode != 0) { 24 | *unicode = nu_betohl(utf32); 25 | } 26 | 27 | return utf32 + 4; 28 | } 29 | 30 | #ifdef NU_WITH_REVERSE_READ 31 | 32 | /* 33 | * @ingroup utf32 34 | * @see nu_utf16be_revread 35 | */ 36 | static inline 37 | const char* nu_utf32be_revread(uint32_t *unicode, const char *utf32) { 38 | const char *p = utf32 - 4; 39 | 40 | if (unicode != 0) { 41 | nu_utf32be_read(p, unicode); 42 | } 43 | 44 | return p; 45 | } 46 | 47 | #endif /* NU_WITH_REVERSE_READ */ 48 | 49 | #ifdef NU_WITH_VALIDATION 50 | 51 | /** 52 | * @ingroup utf32 53 | * @see nu_utf16be_validread 54 | */ 55 | NU_EXPORT 56 | int nu_utf32be_validread(const char *p, size_t max_len); 57 | 58 | #endif /* NU_WITH_VALIDATION */ 59 | #endif /* NU_WITH_UTF32BE_READER */ 60 | 61 | #ifdef NU_WITH_UTF32BE_WRITER 62 | 63 | /** 64 | * @ingroup utf32 65 | * @see nu_utf16be_write 66 | */ 67 | NU_EXPORT 68 | char* nu_utf32be_write(uint32_t unicode, char *utf32); 69 | 70 | #endif /* NU_WITH_UTF32BE_WRITER */ 71 | 72 | #if defined (__cplusplus) || defined (c_plusplus) 73 | } 74 | #endif 75 | 76 | #endif /* NU_UTF32BE_H */ 77 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32he.c: -------------------------------------------------------------------------------- 1 | #include "utf32he.h" 2 | 3 | #ifdef NU_WITH_UTF32HE_READER 4 | #ifdef NU_WITH_REVERSE_READ 5 | 6 | #endif /* NU_WITH_REVERSE_READ */ 7 | 8 | #ifdef NU_WITH_VALIDATION 9 | 10 | int nu_utf32he_validread(const char *p, size_t max_len) { 11 | if (utf32_validread_basic(p, max_len) == 0) { 12 | return 0; 13 | } 14 | 15 | uint32_t u = 0; 16 | nu_utf32he_read(p, &u); 17 | 18 | if (u > NU_UTF32_MAX_CODEPOINT) { 19 | return 0; 20 | } 21 | 22 | return (u >= 0xD800 && u <= 0xDFFF ? 0 : 4); 23 | } 24 | 25 | #endif /* NU_WITH_VALIDATION */ 26 | #endif /* NU_WITH_UTF32HE_READER */ 27 | 28 | #ifdef NU_WITH_UTF32HE_WRITER 29 | 30 | char* nu_utf32he_write(uint32_t unicode, char *utf32) { 31 | *(uint32_t *)(utf32) = unicode; 32 | 33 | return utf32 + 4; 34 | } 35 | 36 | #endif /* NU_WITH_UTF32HE_WRITER */ 37 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32he.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_UTF32HE_H 2 | #define NU_UTF32HE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "config.h" 8 | #include "defines.h" 9 | #include "utf32_internal.h" 10 | 11 | #if defined (__cplusplus) || defined (c_plusplus) 12 | extern "C" { 13 | #endif 14 | 15 | #ifdef NU_WITH_UTF32HE_READER 16 | 17 | /** 18 | * @ingroup utf32 19 | * @see nu_utf16le_read 20 | */ 21 | static inline 22 | const char* nu_utf32he_read(const char *utf32, uint32_t *unicode) { 23 | if (unicode != 0) { 24 | *unicode = *(uint32_t *)(utf32); 25 | } 26 | 27 | return utf32 + 4; 28 | } 29 | 30 | #ifdef NU_WITH_REVERSE_READ 31 | 32 | /* 33 | * @ingroup utf32 34 | * @see nu_utf16le_revread 35 | */ 36 | static inline 37 | const char* nu_utf32he_revread(uint32_t *unicode, const char *utf32) { 38 | const char *p = utf32 - 4; 39 | 40 | if (unicode != 0) { 41 | nu_utf32he_read(p, unicode); 42 | } 43 | 44 | return p; 45 | } 46 | 47 | #endif /* NU_WITH_REVERSE_READ */ 48 | 49 | #ifdef NU_WITH_VALIDATION 50 | 51 | /** 52 | * @ingroup utf32 53 | * @see nu_utf16le_validread 54 | */ 55 | NU_EXPORT 56 | int nu_utf32he_validread(const char *p, size_t max_len); 57 | 58 | #endif /* NU_WITH_VALIDATION */ 59 | #endif /* NU_WITH_UTF32HE_READER */ 60 | 61 | #ifdef NU_WITH_UTF32HE_WRITER 62 | 63 | /** 64 | * @ingroup utf32 65 | * @see nu_utf16le_write 66 | */ 67 | NU_EXPORT 68 | char* nu_utf32he_write(uint32_t unicode, char *utf32); 69 | 70 | #endif /* NU_WITH_UTF32LE_WRITER */ 71 | 72 | #if defined (__cplusplus) || defined (c_plusplus) 73 | } 74 | #endif 75 | 76 | #endif /* NU_UTF32HE_H */ 77 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32le.c: -------------------------------------------------------------------------------- 1 | #include "utf32le.h" 2 | 3 | #ifdef NU_WITH_UTF32LE_READER 4 | #ifdef NU_WITH_VALIDATION 5 | 6 | int nu_utf32le_validread(const char *p, size_t max_len) { 7 | if (utf32_validread_basic(p, max_len) == 0) { 8 | return 0; 9 | } 10 | 11 | uint32_t u = 0; 12 | nu_utf32le_read(p, &u); 13 | 14 | if (u > NU_UTF32_MAX_CODEPOINT) { 15 | return 0; 16 | } 17 | 18 | return (u >= 0xD800 && u <= 0xDFFF ? 0 : 4); 19 | } 20 | 21 | #endif /* NU_WITH_VALIDATION */ 22 | #endif /* NU_WITH_UTF32LE_READER */ 23 | 24 | #ifdef NU_WITH_UTF32LE_WRITER 25 | 26 | char* nu_utf32le_write(uint32_t unicode, char *utf32) { 27 | if (utf32 != 0) { 28 | nu_htolel(unicode, utf32); 29 | } 30 | 31 | return utf32 + 4; 32 | } 33 | 34 | #endif /* NU_WITH_UTF32LE_WRITER */ 35 | -------------------------------------------------------------------------------- /src/dep/libnu/utf32le.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_UTF32LE_H 2 | #define NU_UTF32LE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "config.h" 8 | #include "defines.h" 9 | #include "utf32_internal.h" 10 | 11 | #if defined (__cplusplus) || defined (c_plusplus) 12 | extern "C" { 13 | #endif 14 | 15 | #ifdef NU_WITH_UTF32LE_READER 16 | 17 | /** 18 | * @ingroup utf32 19 | * @see nu_utf16le_read 20 | */ 21 | static inline 22 | const char* nu_utf32le_read(const char *utf32, uint32_t *unicode) { 23 | if (unicode != 0) { 24 | *unicode = nu_letohl(utf32); 25 | } 26 | 27 | return utf32 + 4; 28 | } 29 | 30 | #ifdef NU_WITH_REVERSE_READ 31 | 32 | /* 33 | * @ingroup utf32 34 | * @see nu_utf16le_revread 35 | */ 36 | static inline 37 | const char* nu_utf32le_revread(uint32_t *unicode, const char *utf32) { 38 | const char *p = utf32 - 4; 39 | 40 | if (unicode != 0) { 41 | nu_utf32le_read(p, unicode); 42 | } 43 | 44 | return p; 45 | } 46 | 47 | #endif /* NU_WITH_REVERSE_READ */ 48 | 49 | #ifdef NU_WITH_VALIDATION 50 | 51 | /** 52 | * @ingroup utf32 53 | * @see nu_utf16le_validread 54 | */ 55 | NU_EXPORT 56 | int nu_utf32le_validread(const char *p, size_t max_len); 57 | 58 | #endif /* NU_WITH_VALIDATION */ 59 | #endif /* NU_WITH_UTF32LE_READER */ 60 | 61 | #ifdef NU_WITH_UTF32LE_WRITER 62 | 63 | /** 64 | * @ingroup utf32 65 | * @see nu_utf16le_write 66 | */ 67 | NU_EXPORT 68 | char* nu_utf32le_write(uint32_t unicode, char *utf32); 69 | 70 | #endif /* NU_WITH_UTF32LE_WRITER */ 71 | 72 | #if defined (__cplusplus) || defined (c_plusplus) 73 | } 74 | #endif 75 | 76 | #endif /* NU_UTF32LE_H */ 77 | -------------------------------------------------------------------------------- /src/dep/libnu/validate.c: -------------------------------------------------------------------------------- 1 | #include "validate.h" 2 | 3 | #ifdef NU_WITH_VALIDATION 4 | 5 | const char* nu_validate(const char *encoded, size_t max_len, nu_validread_iterator_t it) { 6 | const char *p = encoded; 7 | 8 | while (p < encoded + max_len) { 9 | /* max_len should be tested inside of it() call */ 10 | int byte_len = it(p, max_len - (p - encoded)); 11 | 12 | if (byte_len <= 0) { 13 | return p; 14 | } 15 | 16 | p += byte_len; 17 | } 18 | 19 | return 0; 20 | } 21 | 22 | #endif /* NU_WITH_VALIDATION */ 23 | -------------------------------------------------------------------------------- /src/dep/libnu/validate.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_VALIDATE_H 2 | #define NU_VALIDATE_H 3 | 4 | /** @defgroup validation Encoding validation 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | #include "config.h" 11 | #include "defines.h" 12 | 13 | #if defined (__cplusplus) || defined (c_plusplus) 14 | extern "C" { 15 | #endif 16 | 17 | /** Validation function 18 | * 19 | * @ingroup iterators 20 | * @see nu_utf8_validread 21 | */ 22 | typedef int (*nu_validread_iterator_t)(const char *p, size_t max_len); 23 | 24 | #ifdef NU_WITH_VALIDATION 25 | 26 | /** Validate string encoding 27 | * 28 | * If this check fails then none of the nunicode functions is applicable to 29 | * 'encoded'. Calling any function on such string will lead to undefined 30 | * behavior. 31 | * 32 | * @ingroup validation 33 | * @param encoded encoded string 34 | * @param max_len length of the buffer, nu_validate() won't go further 35 | * than this 36 | * @param it validating iterator (e.g. nu_utf8_validread) 37 | * @return 0 on valid string, pointer to invalid segment in string on 38 | * validation error 39 | * 40 | * @see nu_utf8_validread 41 | */ 42 | NU_EXPORT 43 | const char* nu_validate(const char *encoded, size_t max_len, 44 | nu_validread_iterator_t it); 45 | 46 | #endif /* NU_WITH_VALIDATION */ 47 | 48 | #if defined (__cplusplus) || defined (c_plusplus) 49 | } 50 | #endif 51 | 52 | #endif /* NU_VALIDATE_H */ 53 | -------------------------------------------------------------------------------- /src/dep/libnu/version.c: -------------------------------------------------------------------------------- 1 | #include "version.h" 2 | 3 | static const char *__nu_version_string = NU_VERSION; 4 | 5 | const char* nu_version(void) { 6 | return __nu_version_string; 7 | } 8 | -------------------------------------------------------------------------------- /src/dep/libnu/version.h: -------------------------------------------------------------------------------- 1 | #ifndef NU_VERSION_H 2 | #define NU_VERSION_H 3 | 4 | #include "defines.h" 5 | 6 | /** @defgroup other Other 7 | */ 8 | 9 | #if defined (__cplusplus) || defined (c_plusplus) 10 | extern "C" { 11 | #endif 12 | 13 | /** This define holds human-readable version of nunicode 14 | * 15 | * @ingroup defines 16 | */ 17 | #define NU_VERSION "custom" 18 | 19 | /** Human-readable version of nunicode 20 | * 21 | * @ingroup other 22 | * @return version string 23 | */ 24 | NU_EXPORT 25 | const char* nu_version(void); 26 | 27 | #if defined (__cplusplus) || defined (c_plusplus) 28 | } 29 | #endif 30 | 31 | #endif /* NU_VERSION_H */ 32 | -------------------------------------------------------------------------------- /src/dep/miniz/Makefile: -------------------------------------------------------------------------------- 1 | all: libminiz.a 2 | 3 | libminiz.a: miniz.o 4 | $(AR) rcs $@ $^ 5 | -------------------------------------------------------------------------------- /src/dep/snowball/AUTHORS: -------------------------------------------------------------------------------- 1 | Authors 2 | ======= 3 | 4 | Martin Porter 5 | ------------- 6 | 7 | - Designed the snowball language. 8 | - Implemented the snowball to C compiler. 9 | - Implemented the stemming algorithms in C. 10 | - Wrote the documentation. 11 | 12 | Richard Boulton 13 | --------------- 14 | 15 | - Implemented Java backend of the snowball compiler. 16 | - Developed build system. 17 | - Assisted with website maintenance. 18 | 19 | 20 | Assistance from 21 | --------------- 22 | 23 | Olivier Bornet - fixes to java packaging and build system. 24 | Andreas Jung - useful bug reports on the libstemmer library. 25 | Olly Betts - several patches, bug reports, and performance improvements. 26 | Sebastiano Vigna and Oerd Cukalla - patches for the Java stemming algorithms. 27 | Ralf Junker - fix a potential memory leak in sb_stemmer_new(). 28 | -------------------------------------------------------------------------------- /src/dep/snowball/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | FILE(GLOB SNOWBALL_SRC 2 | "src_c/*.c" 3 | "libstemmer/libstemmer.c" 4 | "runtime/*.c") 5 | INCLUDE_DIRECTORIES(include) 6 | ADD_LIBRARY(snowball OBJECT ${SNOWBALL_SRC}) 7 | -------------------------------------------------------------------------------- /src/dep/snowball/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2001, Dr Martin Porter 2 | Copyright (c) 2004,2005, Richard Boulton 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 3. Neither the name of the Snowball project nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /src/dep/snowball/Makefile: -------------------------------------------------------------------------------- 1 | include mkinc.mak 2 | CFLAGS+=-Iinclude 3 | all: libstemmer.o stemwords 4 | libstemmer.o: $(snowball_sources:.c=.o) 5 | $(AR) -cru $@ $^ 6 | stemwords: examples/stemwords.o libstemmer.o 7 | $(CC) -o $@ $^ 8 | clean: 9 | rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o 10 | -------------------------------------------------------------------------------- /src/dep/snowball/mkinc_utf8.mak: -------------------------------------------------------------------------------- 1 | # libstemmer/mkinc_utf8.mak: List of stemming module source files 2 | # 3 | # This file is generated by mkmodules.pl from a list of module names. 4 | # Do not edit manually. 5 | # 6 | # Modules included by this file are: arabic, danish, dutch, english, finnish, 7 | # french, german, hungarian, italian, norwegian, porter, portuguese, 8 | # romanian, russian, spanish, swedish, tamil, turkish 9 | 10 | snowball_sources= \ 11 | src_c/stem_UTF_8_arabic.c \ 12 | src_c/stem_UTF_8_danish.c \ 13 | src_c/stem_UTF_8_dutch.c \ 14 | src_c/stem_UTF_8_english.c \ 15 | src_c/stem_UTF_8_finnish.c \ 16 | src_c/stem_UTF_8_french.c \ 17 | src_c/stem_UTF_8_german.c \ 18 | src_c/stem_UTF_8_hungarian.c \ 19 | src_c/stem_UTF_8_italian.c \ 20 | src_c/stem_UTF_8_norwegian.c \ 21 | src_c/stem_UTF_8_porter.c \ 22 | src_c/stem_UTF_8_portuguese.c \ 23 | src_c/stem_UTF_8_romanian.c \ 24 | src_c/stem_UTF_8_russian.c \ 25 | src_c/stem_UTF_8_spanish.c \ 26 | src_c/stem_UTF_8_swedish.c \ 27 | src_c/stem_UTF_8_tamil.c \ 28 | src_c/stem_UTF_8_turkish.c \ 29 | runtime/api.c \ 30 | runtime/utilities.c \ 31 | libstemmer/libstemmer_utf8.c 32 | 33 | snowball_headers= \ 34 | src_c/stem_UTF_8_arabic.h \ 35 | src_c/stem_UTF_8_danish.h \ 36 | src_c/stem_UTF_8_dutch.h \ 37 | src_c/stem_UTF_8_english.h \ 38 | src_c/stem_UTF_8_finnish.h \ 39 | src_c/stem_UTF_8_french.h \ 40 | src_c/stem_UTF_8_german.h \ 41 | src_c/stem_UTF_8_hungarian.h \ 42 | src_c/stem_UTF_8_italian.h \ 43 | src_c/stem_UTF_8_norwegian.h \ 44 | src_c/stem_UTF_8_porter.h \ 45 | src_c/stem_UTF_8_portuguese.h \ 46 | src_c/stem_UTF_8_romanian.h \ 47 | src_c/stem_UTF_8_russian.h \ 48 | src_c/stem_UTF_8_spanish.h \ 49 | src_c/stem_UTF_8_swedish.h \ 50 | src_c/stem_UTF_8_tamil.h \ 51 | src_c/stem_UTF_8_turkish.h \ 52 | include/libstemmer.h \ 53 | libstemmer/modules_utf8.h \ 54 | runtime/api.h \ 55 | runtime/header.h 56 | 57 | -------------------------------------------------------------------------------- /src/dep/snowball/runtime/api.c: -------------------------------------------------------------------------------- 1 | 2 | #include /* for calloc, free */ 3 | #include "header.h" 4 | 5 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) 6 | { 7 | struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); 8 | if (z == NULL) return NULL; 9 | z->p = create_s(); 10 | if (z->p == NULL) goto error; 11 | if (S_size) 12 | { 13 | int i; 14 | z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); 15 | if (z->S == NULL) goto error; 16 | 17 | for (i = 0; i < S_size; i++) 18 | { 19 | z->S[i] = create_s(); 20 | if (z->S[i] == NULL) goto error; 21 | } 22 | } 23 | 24 | if (I_size) 25 | { 26 | z->I = (int *) calloc(I_size, sizeof(int)); 27 | if (z->I == NULL) goto error; 28 | } 29 | 30 | if (B_size) 31 | { 32 | z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); 33 | if (z->B == NULL) goto error; 34 | } 35 | 36 | return z; 37 | error: 38 | SN_close_env(z, S_size); 39 | return NULL; 40 | } 41 | 42 | extern void SN_close_env(struct SN_env * z, int S_size) 43 | { 44 | if (z == NULL) return; 45 | if (S_size) 46 | { 47 | int i; 48 | for (i = 0; i < S_size; i++) 49 | { 50 | lose_s(z->S[i]); 51 | } 52 | free(z->S); 53 | } 54 | free(z->I); 55 | free(z->B); 56 | if (z->p) lose_s(z->p); 57 | free(z); 58 | } 59 | 60 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s) 61 | { 62 | int err = replace_s(z, 0, z->l, size, s, NULL); 63 | z->c = 0; 64 | return err; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /src/dep/snowball/runtime/api.h: -------------------------------------------------------------------------------- 1 | 2 | typedef unsigned char symbol; 3 | 4 | /* Or replace 'char' above with 'short' for 16 bit characters. 5 | 6 | More precisely, replace 'char' with whatever type guarantees the 7 | character width you need. Note however that sizeof(symbol) should divide 8 | HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise 9 | there is an alignment problem. In the unlikely event of a problem here, 10 | consult Martin Porter. 11 | 12 | */ 13 | 14 | struct SN_env { 15 | symbol * p; 16 | int c; int l; int lb; int bra; int ket; 17 | symbol * * S; 18 | int * I; 19 | unsigned char * B; 20 | }; 21 | 22 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); 23 | extern void SN_close_env(struct SN_env * z, int S_size); 24 | 25 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s); 26 | 27 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_danish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 9 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_dutch.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * dutch_ISO_8859_1_create_env(void); 9 | extern void dutch_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int dutch_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_english.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * english_ISO_8859_1_create_env(void); 9 | extern void english_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int english_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_finnish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * finnish_ISO_8859_1_create_env(void); 9 | extern void finnish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int finnish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_french.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * french_ISO_8859_1_create_env(void); 9 | extern void french_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int french_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_german.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * german_ISO_8859_1_create_env(void); 9 | extern void german_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int german_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_italian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * italian_ISO_8859_1_create_env(void); 9 | extern void italian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int italian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_norwegian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 9 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_porter.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * porter_ISO_8859_1_create_env(void); 9 | extern void porter_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int porter_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_portuguese.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * portuguese_ISO_8859_1_create_env(void); 9 | extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int portuguese_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_spanish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * spanish_ISO_8859_1_create_env(void); 9 | extern void spanish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int spanish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_1_swedish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 9 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_2_hungarian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * hungarian_ISO_8859_2_create_env(void); 9 | extern void hungarian_ISO_8859_2_close_env(struct SN_env * z); 10 | 11 | extern int hungarian_ISO_8859_2_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_ISO_8859_2_romanian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * romanian_ISO_8859_2_create_env(void); 9 | extern void romanian_ISO_8859_2_close_env(struct SN_env * z); 10 | 11 | extern int romanian_ISO_8859_2_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_KOI8_R_russian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * russian_KOI8_R_create_env(void); 9 | extern void russian_KOI8_R_close_env(struct SN_env * z); 10 | 11 | extern int russian_KOI8_R_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_arabic.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * arabic_UTF_8_create_env(void); 9 | extern void arabic_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int arabic_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_danish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * danish_UTF_8_create_env(void); 9 | extern void danish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int danish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_dutch.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * dutch_UTF_8_create_env(void); 9 | extern void dutch_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int dutch_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_english.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * english_UTF_8_create_env(void); 9 | extern void english_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int english_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_finnish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * finnish_UTF_8_create_env(void); 9 | extern void finnish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int finnish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_french.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * french_UTF_8_create_env(void); 9 | extern void french_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int french_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_german.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * german_UTF_8_create_env(void); 9 | extern void german_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int german_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_hungarian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * hungarian_UTF_8_create_env(void); 9 | extern void hungarian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int hungarian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_italian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * italian_UTF_8_create_env(void); 9 | extern void italian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int italian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_norwegian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * norwegian_UTF_8_create_env(void); 9 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int norwegian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_porter.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * porter_UTF_8_create_env(void); 9 | extern void porter_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int porter_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_portuguese.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * portuguese_UTF_8_create_env(void); 9 | extern void portuguese_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int portuguese_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_romanian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * romanian_UTF_8_create_env(void); 9 | extern void romanian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int romanian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_russian.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * russian_UTF_8_create_env(void); 9 | extern void russian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int russian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_spanish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * spanish_UTF_8_create_env(void); 9 | extern void spanish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int spanish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_swedish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * swedish_UTF_8_create_env(void); 9 | extern void swedish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int swedish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_tamil.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * tamil_UTF_8_create_env(void); 9 | extern void tamil_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int tamil_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/snowball/src_c/stem_UTF_8_turkish.h: -------------------------------------------------------------------------------- 1 | /* This file was generated automatically by the Snowball to ISO C compiler */ 2 | /* http://snowballstem.org/ */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * turkish_UTF_8_create_env(void); 9 | extern void turkish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int turkish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/dep/triemap/.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | *.su 34 | 35 | .vscode 36 | test/test 37 | benchmark 38 | -------------------------------------------------------------------------------- /src/dep/triemap/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, Redis Labs 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /src/dep/triemap/Makefile: -------------------------------------------------------------------------------- 1 | # find the OS 2 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 3 | 4 | # Compile flags for linux / osx 5 | CFLAGS ?= -W -fno-common -g -ggdb -fPIC -std=gnu99 -O3 6 | 7 | SOURCEDIR = . 8 | CC_SOURCES = $(wildcard $(SOURCEDIR)/*.c) 9 | CC_OBJECTS = $(sort $(patsubst $(SOURCEDIR)/%.c, $(SOURCEDIR)/%.o, $(CC_SOURCES))) 10 | 11 | .SUFFIXES: .c .cc .o 12 | 13 | all: libtriemap.a 14 | 15 | 16 | libtriemap.a: $(CC_OBJECTS) 17 | ar rcs $@ $^ 18 | 19 | clean: 20 | rm -rf *.o *.a 21 | -------------------------------------------------------------------------------- /src/dep/triemap/README.md: -------------------------------------------------------------------------------- 1 | # triemap 2 | 3 | C implementation of a compact trie lookup map 4 | 5 | ## Features 6 | 7 | * High memory efficiency, fast lookups and insertions 8 | * Deletions with node rejoining 9 | * Prefix lookups with an iterator API 10 | * Random key extraction 11 | * No external dependencies, just one C and one H file 12 | 13 | ## Basic Example 14 | ```c 15 | 16 | TrieMap *tm = NewTrieMap(); 17 | 18 | char buf[32]; 19 | 20 | for (int i = 0; i < 100; i++) { 21 | sprintf(buf, "key%d", i); 22 | TrieMap_Add(tm, buf, strlen(buf), NULL, NULL); 23 | } 24 | 25 | TrieMapIterator *it = TrieMap_Iterate(tm, "key1", 4); 26 | 27 | char *str = NULL; 28 | tm_len_t len = 0; 29 | void *ptr = NULL; 30 | 31 | /* Prefix Iteration */ 32 | while (TrieMapIterator_Next(it, &str, &len, &ptr)) { 33 | printf("Found key %.*s\n", (int)len, str); 34 | } 35 | 36 | TrieMapIterator_Free(&it); 37 | TrieMap_Free(tm, NULL); 38 | 39 | ``` 40 | 41 | -------------------------------------------------------------------------------- /src/dep/triemap/test/Makefile: -------------------------------------------------------------------------------- 1 | # find the OS 2 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 3 | 4 | # Compile flags for linux / osx 5 | CFLAGS ?= -fno-common -g -ggdb -fPIC -std=gnu99 -O3 6 | LDFLAGS ?= -lm 7 | SOURCEDIR = . 8 | CC_SOURCES = $(SOURCEDIR)/crc16.c 9 | CC_OBJECTS = $(sort $(patsubst $(SOURCEDIR)/%.c, $(SOURCEDIR)/%.o, $(CC_SOURCES))) 10 | 11 | .SUFFIXES: .c .cc .o 12 | 13 | all: test test_valgrind 14 | 15 | benchmark: $(CC_SOURCES) benchmark.c ../triemap.c 16 | $(CC) $(CFLAGS) -o benchmark $^ $(LDFLAGS) 17 | .PHONY = benchmark 18 | 19 | build_test: $(CC_SOURCES) test.c ../triemap.c 20 | $(CC) $(CFLAGS) -o test $^ $(LDFLAGS) 21 | 22 | .PHONY: test 23 | test: build_test 24 | $(sh) ./test 25 | 26 | test_valgrind: build_test 27 | 28 | valgrind --error-exitcode=1 --leak-resolution=low --quiet \ 29 | --leak-check=full --show-possibly-lost=no ./test 30 | 31 | 32 | 33 | clean: 34 | rm -rf *.o *.a benchmark test 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/err.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_ERR_H_ 2 | #define RS_ERR_H_ 3 | #include 4 | 5 | #define FMT_ERR(e, fmt, ...) \ 6 | ({ \ 7 | asprintf(e, fmt, __VA_ARGS__); \ 8 | NULL; \ 9 | }) 10 | 11 | #define SET_ERR(e, msg) \ 12 | ({ \ 13 | if (e && !*e) *e = strdup(msg); \ 14 | NULL; \ 15 | }) 16 | 17 | #define ERR_FREE(e) \ 18 | if (e) { \ 19 | free(e); \ 20 | } 21 | 22 | #endif -------------------------------------------------------------------------------- /src/ext/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS ?= -g -fPIC -lc -lm -O2 -std=gnu99 2 | 3 | .SUFFIXES: .c .so .xo .o 4 | 5 | all: default.o 6 | -------------------------------------------------------------------------------- /src/ext/default.h: -------------------------------------------------------------------------------- 1 | #ifndef __EXT_DEFAULT_H__ 2 | #define __EXT_DEFAULT_H__ 3 | #include "redisearch.h" 4 | 5 | #define SYNONYMS_EXPENDER_NAME "SYNONYM" 6 | #define STEMMER_EXPENDER_NAME "SBSTEM" 7 | #define DEFAULT_EXPANDER_NAME "DEFAULT" 8 | #define DEFAULT_SCORER_NAME "TFIDF" 9 | #define TFIDF_DOCNORM_SCORER_NAME "TFIDF.DOCNORM" 10 | #define DISMAX_SCORER_NAME "DISMAX" 11 | #define BM25_SCORER_NAME "BM25" 12 | #define DOCSCORE_SCORER "DOCSCORE" 13 | #define HAMMINGDISTANCE_SCORER "HAMMING" 14 | 15 | int DefaultExtensionInit(RSExtensionCtx *ctx); 16 | 17 | #endif -------------------------------------------------------------------------------- /src/extension.h: -------------------------------------------------------------------------------- 1 | #ifndef __REDISEARCH_EXTN_H__ 2 | #define __REDISEARCH_EXTN_H__ 3 | 4 | #include "redisearch.h" 5 | 6 | /* Initialize the extensions mechanism, create registries, etc */ 7 | void Extensions_Init(); 8 | 9 | /* Context for saving a scoring function and its private data and free */ 10 | typedef struct { 11 | RSScoringFunction sf; 12 | RSFreeFunction ff; 13 | void *privdata; 14 | } ExtScoringFunctionCtx; 15 | 16 | /* Context for saving the a token expander and its free / privdata */ 17 | typedef struct { 18 | RSQueryTokenExpander exp; 19 | RSFreeFunction ff; 20 | void *privdata; 21 | } ExtQueryExpanderCtx; 22 | 23 | /* Get a scoring function by name. Returns NULL if no such scoring function exists */ 24 | ExtScoringFunctionCtx *Extensions_GetScoringFunction(RSScoringFunctionCtx *ctx, const char *name); 25 | 26 | /* Get a query expander function by name. Returns NULL if no such function exists */ 27 | ExtQueryExpanderCtx *Extensions_GetQueryExpander(RSQueryExpanderCtx *ctx, const char *name); 28 | 29 | /* Load an extension explicitly with its name and an init function */ 30 | int Extension_Load(const char *name, RSExtensionInitFunc func); 31 | 32 | /* Dynamically load a RediSearch extension by .so file path. Returns REDISMODULE_OK or ERR. errMsg 33 | * is set to NULL on success or an error message on failure */ 34 | int Extension_LoadDynamic(const char *path, char **errMsg); 35 | 36 | #endif -------------------------------------------------------------------------------- /src/gc.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_GARBAGE_COLLECTOR_H_ 2 | #define RS_GARBAGE_COLLECTOR_H_ 3 | 4 | #include "redismodule.h" 5 | #include "rmutil/periodic.h" 6 | 7 | // the maximum frequency we are allowed to run in 8 | #define GC_MAX_HZ 100 9 | #define GC_MIN_HZ 1 10 | #define GC_DEFAULT_HZ 10 11 | 12 | #define NUM_CYCLES_HISTORY 10 13 | 14 | typedef struct { 15 | // total bytes collected by the GC 16 | size_t totalCollected; 17 | // number of cycle ran 18 | size_t numCycles; 19 | // the number of cycles that collected anything 20 | size_t effectiveCycles; 21 | 22 | // the collection result of the last N cycles. 23 | // this is a cyclical buffer 24 | size_t history[NUM_CYCLES_HISTORY]; 25 | // the offset in the history cyclical buffer 26 | int historyOffset; 27 | } GCStats; 28 | 29 | #ifndef RS_GC_C_ 30 | typedef struct GarbageCollectorCtx GarbageCollectorCtx; 31 | 32 | /* Create a new garbage collector, with a string for the index name, and initial frequency */ 33 | GarbageCollectorCtx *NewGarbageCollector(const RedisModuleString *k, float initial_hz, 34 | uint64_t spec_unique_id); 35 | 36 | // Start the collector thread 37 | int GC_Start(GarbageCollectorCtx *ctx); 38 | 39 | /* Stop the garbage collector, and call its termination function asynchronously when its thread is 40 | * finished. This also frees the resources allocated for the GC context */ 41 | int GC_Stop(GarbageCollectorCtx *ctx); 42 | 43 | // get the current stats from the collector 44 | const struct GCStats *GC_GetStats(GarbageCollectorCtx *ctx); 45 | 46 | // called externally when the user deletes a document to hint at increasing the HZ 47 | void GC_OnDelete(GarbageCollectorCtx *ctx); 48 | 49 | /* Render the GC stats to a redis connection, used by FT.INFO */ 50 | void GC_RenderStats(RedisModuleCtx *ctx, GarbageCollectorCtx *gc); 51 | 52 | #endif // RS_GC_C_ 53 | #endif 54 | -------------------------------------------------------------------------------- /src/geo_index.h: -------------------------------------------------------------------------------- 1 | #ifndef __GEO_INDEX_H__ 2 | #define __GEO_INDEX_H__ 3 | 4 | #include "redisearch.h" 5 | #include "redismodule.h" 6 | #include "index_result.h" 7 | #include "index_iterator.h" 8 | #include "search_ctx.h" 9 | 10 | typedef struct geoIndex { 11 | RedisSearchCtx *ctx; 12 | const FieldSpec *sp; 13 | } GeoIndex; 14 | 15 | #define GEOINDEX_KEY_FMT "geo:%s/%s" 16 | 17 | int GeoIndex_AddStrings(GeoIndex *gi, t_docId docId, char *slon, char *slat); 18 | 19 | typedef struct geoFilter { 20 | 21 | const char *property; 22 | double lat; 23 | double lon; 24 | double radius; 25 | const char *unit; 26 | } GeoFilter; 27 | 28 | /* Create a geo filter from parsed strings and numbers */ 29 | GeoFilter *NewGeoFilter(double lon, double lat, double radius, const char *unit); 30 | 31 | /* Make sure that the parameters of the filter make sense - i.e. coordinates are in range, radius is 32 | * sane, unit is valid. Return 1 if valid, 0 if not, and set the error string into err */ 33 | int GeoFilter_IsValid(GeoFilter *f, char **err); 34 | 35 | /* Parse a geo filter from redis arguments. We assume the filter args start at argv[0] */ 36 | int GeoFilter_Parse(GeoFilter *gf, RedisModuleString **argv, int argc); 37 | void GeoFilter_Free(GeoFilter *gf); 38 | IndexIterator *NewGeoRangeIterator(GeoIndex *gi, GeoFilter *gf, double weight); 39 | 40 | #endif -------------------------------------------------------------------------------- /src/highlight.h: -------------------------------------------------------------------------------- 1 | #ifndef HIGHLIGHT_H_ 2 | #define HIGHLIGHT_H_ 3 | 4 | #include "result_processor.h" 5 | #include "search_request.h" 6 | 7 | ResultProcessor *NewHighlightProcessor(ResultProcessor *upstream, RSSearchRequest *req); 8 | 9 | #endif -------------------------------------------------------------------------------- /src/id_filter.c: -------------------------------------------------------------------------------- 1 | #include "id_filter.h" 2 | #include "doc_table.h" 3 | #include "rmalloc.h" 4 | #include "id_list.h" 5 | 6 | /* Create a new IdFilter from a list of redis strings. count is the number of strings, guaranteed to 7 | * be less than or equal to the length of args */ 8 | IdFilter *NewIdFilter(RedisModuleString **args, int count, DocTable *dt) { 9 | 10 | IdFilter *ret = malloc(sizeof(*ret)); 11 | *ret = (IdFilter){.ids = NULL, .keys = args, .size = 0}; 12 | if (count <= 0) { 13 | return ret; 14 | } 15 | ret->ids = calloc(count, sizeof(t_docId)); 16 | for (int i = 0; i < count; i++) { 17 | 18 | t_docId did = DocTable_GetId(dt, MakeDocKeyR(args[i])); 19 | if (did) { 20 | ret->ids[ret->size++] = did; 21 | } 22 | } 23 | return ret; 24 | } 25 | 26 | void IdFilter_Free(IdFilter *f) { 27 | if (f->ids) { 28 | free(f->ids); 29 | f->ids = NULL; 30 | } 31 | free(f); 32 | } 33 | 34 | IndexIterator *NewIdFilterIterator(IdFilter *f) { 35 | 36 | if (f->ids == NULL || f->size == 0) { 37 | return NULL; 38 | } 39 | 40 | return NewIdListIterator(f->ids, f->size, 1); 41 | } -------------------------------------------------------------------------------- /src/id_filter.h: -------------------------------------------------------------------------------- 1 | #ifndef __ID_FILTER_H__ 2 | #define __ID_FILTER_H__ 3 | 4 | #include "redismodule.h" 5 | #include "index_iterator.h" 6 | #include "doc_table.h" 7 | 8 | /* An IdFilter is a generic filter that limits the results of a query to a given set of ids. It is 9 | * created from a list of keys in the index */ 10 | typedef struct idFilter { 11 | t_docId *ids; 12 | RedisModuleString **keys; 13 | t_offset size; 14 | } IdFilter; 15 | 16 | /* Create a new IdFilter from a list of redis strings. count is the number of strings, guaranteed to 17 | * be less than or equal to the length of args */ 18 | IdFilter *NewIdFilter(RedisModuleString **args, int count, DocTable *dt); 19 | 20 | /* Free the filter's internal data, but not the filter itself, that is allocated on the stack */ 21 | void IdFilter_Free(IdFilter *f); 22 | 23 | /** Return a new id filter iterator from a filter. If no ids are in the filter, we return NULL */ 24 | IndexIterator *NewIdFilterIterator(IdFilter *f); 25 | #endif 26 | -------------------------------------------------------------------------------- /src/id_list.h: -------------------------------------------------------------------------------- 1 | #ifndef __ID_LIST_H__ 2 | #define __ID_LIST_H__ 3 | 4 | #include "index_iterator.h" 5 | 6 | /* A generic iterator over a pre-sorted list of document ids. This is used by the geo index and the 7 | * id filter. */ 8 | typedef struct { 9 | t_docId *docIds; 10 | t_docId lastDocId; 11 | t_offset size; 12 | t_offset offset; 13 | int atEOF; 14 | RSIndexResult *res; 15 | } IdListIterator; 16 | 17 | /* Create a new IdListIterator from a pre populated list of document ids of size num. The doc ids 18 | * are sorted in this function, so there is no need to sort them. They are automatically freed in 19 | * the end and assumed to be allocated using rm_malloc */ 20 | IndexIterator *NewIdListIterator(t_docId *ids, t_offset num, double weight); 21 | 22 | #endif -------------------------------------------------------------------------------- /src/index_iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef __INDEX_ITERATOR_H__ 2 | #define __INDEX_ITERATOR_H__ 3 | 4 | #include 5 | #include "redisearch.h" 6 | #include "index_result.h" 7 | 8 | #define INDEXREAD_EOF 0 9 | #define INDEXREAD_OK 1 10 | #define INDEXREAD_NOTFOUND 2 11 | 12 | /* An abstract interface used by readers / intersectors / unioners etc. 13 | Basically query execution creates a tree of iterators that activate each other 14 | recursively */ 15 | typedef struct indexIterator { 16 | void *ctx; 17 | 18 | RSIndexResult *(*Current)(void *ctx); 19 | 20 | /* Read the next entry from the iterator, into hit *e. 21 | * Returns INDEXREAD_EOF if at the end */ 22 | int (*Read)(void *ctx, RSIndexResult **e); 23 | 24 | /* Skip to a docid, potentially reading the entry into hit, if the docId 25 | * matches */ 26 | int (*SkipTo)(void *ctx, t_docId docId, RSIndexResult **hit); 27 | 28 | /* the last docId read */ 29 | t_docId (*LastDocId)(void *ctx); 30 | 31 | /* can we continue iteration? */ 32 | int (*HasNext)(void *ctx); 33 | 34 | /* release the iterator's context and free everything needed */ 35 | void (*Free)(struct indexIterator *self); 36 | 37 | /* Return the number of results in this iterator. Used by the query execution 38 | * on the top iterator */ 39 | size_t (*Len)(void *ctx); 40 | 41 | /* Abort the execution of the iterator and mark it as EOF. This is used for early aborting in case 42 | * of data consistency issues due to multi threading */ 43 | void (*Abort)(void *ctx); 44 | 45 | /* Rewinde the iterator to the beginning and reset its state */ 46 | void (*Rewind)(void *ctx); 47 | } IndexIterator; 48 | 49 | #endif -------------------------------------------------------------------------------- /src/module-init/module-init.c: -------------------------------------------------------------------------------- 1 | #include "module.h" 2 | #include "version.h" 3 | 4 | #ifndef RS_STATIC 5 | /* This stub is compiled in (by the build system) if it's an end-target module */ 6 | int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { 7 | 8 | if (RedisModule_Init(ctx, "ft", REDISEARCH_MODULE_VERSION, REDISMODULE_APIVER_1) == 9 | REDISMODULE_ERR) 10 | return REDISMODULE_ERR; 11 | return RediSearch_InitModuleInternal(ctx, argv, argc); 12 | } 13 | #endif -------------------------------------------------------------------------------- /src/module.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_MODULE_H_ 2 | #define RS_MODULE_H_ 3 | 4 | #include "redismodule.h" 5 | 6 | int RediSearch_InitModuleInternal(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); 7 | 8 | #endif -------------------------------------------------------------------------------- /src/numeric_filter.h: -------------------------------------------------------------------------------- 1 | #ifndef __NUMERIC_FILTER_H__ 2 | #define __NUMERIC_FILTER_H__ 3 | #include "redisearch.h" 4 | #include "search_ctx.h" 5 | #include "rmutil/vector.h" 6 | 7 | #define NF_INFINITY (1.0 / 0.0) 8 | #define NF_NEGATIVE_INFINITY (-1.0 / 0.0) 9 | 10 | typedef struct numericFilter { 11 | const char *fieldName; 12 | double min; 13 | double max; 14 | int inclusiveMin; 15 | int inclusiveMax; 16 | 17 | } NumericFilter; 18 | 19 | NumericFilter *NewNumericFilter(double min, double max, int inclusiveMin, int inclusiveMax); 20 | void NumericFilter_Free(NumericFilter *nf); 21 | NumericFilter *ParseNumericFilter(RedisSearchCtx *ctx, RedisModuleString **argv, int argc); 22 | Vector *ParseMultipleFilters(RedisSearchCtx *ctx, RedisModuleString **argv, int argc); 23 | 24 | /* 25 | A numeric index allows indexing of documents by numeric ranges, and intersection 26 | of them with fulltext indexes. 27 | */ 28 | static inline int NumericFilter_Match(NumericFilter *f, double score) { 29 | 30 | int rc = 0; 31 | // match min - -inf or x >/>= score 32 | int matchMin = (f->inclusiveMin ? score >= f->min : score > f->min); 33 | 34 | if (matchMin) { 35 | // match max - +inf or x inclusiveMax ? score <= f->max : score < f->max); 37 | } 38 | return rc; 39 | } 40 | 41 | #endif -------------------------------------------------------------------------------- /src/print_version.c: -------------------------------------------------------------------------------- 1 | #ifdef PRINT_VERSION_TARGET 2 | #include 3 | #include "version.h" 4 | 5 | /* This is a utility that prints the current semantic version string, to be used in make files */ 6 | 7 | int main(int argc, char **argv) { 8 | printf("%d.%d.%d\n", REDISEARCH_VERSION_MAJOR, REDISEARCH_VERSION_MINOR, 9 | REDISEARCH_VERSION_PATCH); 10 | return 0; 11 | } 12 | #endif -------------------------------------------------------------------------------- /src/pytest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Find all the Python files with test_* in them 2 | FILE(GLOB PY_TEST_FILES "test_*.py") 3 | 4 | FOREACH(n ${PY_TEST_FILES}) 5 | GET_FILENAME_COMPONENT(test_name ${n} NAME_WE) 6 | ADD_TEST(NAME "PY_${test_name}" 7 | COMMAND "python" "-m" "unittest" "${test_name}" 8 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") 9 | SET_TESTS_PROPERTIES("PY_${test_name}" PROPERTIES ENVIRONMENT 10 | "REDIS_MODULE_PATH=$") 11 | ENDFOREACH() -------------------------------------------------------------------------------- /src/pytest/Makefile: -------------------------------------------------------------------------------- 1 | PYTHON:=python3 2 | 3 | test: 4 | $(PYTHON) -m unittest discover -v 5 | -------------------------------------------------------------------------------- /src/pytest/base_case.py: -------------------------------------------------------------------------------- 1 | from rmtest import ModuleTestCase 2 | 3 | 4 | class BaseSearchTestCase(ModuleTestCase('../src/module-oss.so')): 5 | 6 | def setUp(self): 7 | self.flushdb() 8 | 9 | def search(self, *args): 10 | return self.cmd('ft.search', *args) 11 | 12 | def flushdb(self): 13 | self.cmd('flushdb') 14 | -------------------------------------------------------------------------------- /src/pytest/games.json.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodform/RediSearch/c1d54f8195734394a2ec8b14d18b8feffd0bb081/src/pytest/games.json.bz2 -------------------------------------------------------------------------------- /src/pytest/rmtest.config: -------------------------------------------------------------------------------- 1 | [server] 2 | module = ../redisearch.so 3 | -------------------------------------------------------------------------------- /src/pytest/test_doctable.py: -------------------------------------------------------------------------------- 1 | from rmtest import BaseModuleTestCase 2 | import redis 3 | import unittest 4 | from hotels import hotels 5 | import random 6 | import time 7 | 8 | 9 | class SearchTestCase(BaseModuleTestCase): 10 | 11 | @property 12 | def module_args(self): 13 | return super(SearchTestCase, self).module_args + ['MAXDOCTABLESIZE', '100'] 14 | # mainly this test adding and removing docs while the doc table size is 100 15 | # and make sure we are not crashing and not leaking memory (when runs with valgrind). 16 | def testDocTable(self): 17 | with self.redis() as r: 18 | r.flushdb() 19 | self.assertOk(r.execute_command( 20 | 'ft.create', 'idx', 'schema', 'title', 'text', 'body', 'text')) 21 | # doc table size is 100 so insearting 1000 docs should gives us 10 docs in each bucket 22 | for i in range(1000): 23 | self.assertOk(r.execute_command('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 24 | 'title', 'hello world %d' % (i % 100), 25 | 'body', 'lorem ist ipsum')) 26 | 27 | for i in range(100): 28 | res = r.execute_command('ft.search', 'idx', 'hello world %d' % i) 29 | self.assertEqual(res[0], 10) 30 | 31 | # deleting the first 100 docs 32 | for i in range(100): 33 | self.assertEqual(r.execute_command('ft.del', 'idx', 'doc%d' % i), 1) 34 | 35 | for i in range(100): 36 | res = r.execute_command('ft.search', 'idx', 'hello world %d' % i) 37 | self.assertEqual(res[0], 9) 38 | 39 | self.assertOk(r.execute_command('ft.drop', 'idx')) 40 | -------------------------------------------------------------------------------- /src/pytest/test_ext.py: -------------------------------------------------------------------------------- 1 | from rmtest import BaseModuleTestCase 2 | import redis 3 | import unittest 4 | from hotels import hotels 5 | import random 6 | import time 7 | import subprocess 8 | import os 9 | import os.path 10 | 11 | # this file is 'pytest' 12 | 13 | SELF_DIR = os.path.abspath(os.path.dirname(__file__)) 14 | TEST_MODULE = SELF_DIR + '/../tests/ext-example/example.so' 15 | class ExtensionTestCase(BaseModuleTestCase): 16 | @property 17 | def module_args(self): 18 | return super(ExtensionTestCase, self).module_args + ['EXTLOAD', TEST_MODULE] 19 | 20 | def testExt(self): 21 | if not os.path.exists(TEST_MODULE): 22 | subprocess.call(['make', '-C', os.path.dirname(TEST_MODULE)]) 23 | with self.redis() as r: 24 | 25 | r.flushdb() 26 | N = 100 27 | self.assertOk(r.execute_command( 28 | 'ft.create', 'idx', 'schema', 'f', 'text')) 29 | for i in range(N): 30 | 31 | self.assertOk(r.execute_command('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 32 | 'f', 'hello world')) 33 | res = r.execute_command('ft.search', 'idx', 'hello world') 34 | self.assertEqual(N, res[0]) 35 | res = r.execute_command('ft.search', 'idx', 'hello world', 'scorer', 'filterout_scorer') 36 | self.assertEqual(0, res[0]) 37 | 38 | if __name__ == '__main__': 39 | 40 | unittest.main() 41 | -------------------------------------------------------------------------------- /src/pytest/test_safemode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from test import SearchTestCase 3 | import os 4 | 5 | class SafemodeTestCase(SearchTestCase): 6 | # TODO: Implement a proper API in rmtest and expose this correctly 7 | _loadmodule_args = ( 8 | os.environ.get('REDIS_MODULE_PATH', '../redisearch.so'), 'SAFEMODE',) -------------------------------------------------------------------------------- /src/pytest/test_wideschema.py: -------------------------------------------------------------------------------- 1 | from rmtest import BaseModuleTestCase 2 | import redis 3 | import unittest 4 | import platform 5 | 6 | 7 | class SearchTestCase(BaseModuleTestCase): 8 | 9 | def search(self, r, *args): 10 | return r.execute_command('ft.search', *args) 11 | 12 | def testWideSchema(self): 13 | with self.redis() as r: 14 | r.flushdb() 15 | schema = [] 16 | FIELDS = 64 17 | for i in range(FIELDS): 18 | schema.extend(('field_%d' % i, 'TEXT')) 19 | self.assertOk(r.execute_command( 20 | 'ft.create', 'idx', 'schema', *schema)) 21 | N = 10 22 | for n in range(N): 23 | fields = [] 24 | for i in range(FIELDS): 25 | fields.extend(('field_%d' % i, 'hello token_%d' % i)) 26 | self.assertOk(r.execute_command('ft.add', 'idx', 27 | 'doc%d' % n, 1.0, 'fields', *fields)) 28 | for _ in r.retry_with_rdb_reload(): 29 | for i in range(FIELDS): 30 | 31 | res = self.search( 32 | r, 'idx', '@field_%d:token_%d' % (i, i), 'NOCONTENT') 33 | self.assertEqual(res[0], N) 34 | 35 | res = r.execute_command( 36 | 'ft.explain', 'idx', '@field_%d:token_%d' % (i, i), 'VERBATIM').strip() 37 | self.assertEqual('@field_%d:token_%d' % (i, i), res) 38 | 39 | res = self.search( 40 | r, 'idx', 'hello @field_%d:token_%d' % (i, i), 'NOCONTENT') 41 | self.assertEqual(res[0], N) 42 | 43 | res = self.search(r, 'idx', ' '.join( 44 | ('@field_%d:token_%d' % (i, i) for i in range(FIELDS)))) 45 | self.assertEqual(res[0], N) 46 | 47 | res = self.search(r, 'idx', ' '.join( 48 | ('token_%d' % (i) for i in range(FIELDS)))) 49 | self.assertEqual(res[0], N) 50 | 51 | if __name__ == '__main__': 52 | 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /src/query_parser/Makefile: -------------------------------------------------------------------------------- 1 | SRCUTIL := ../../srcutil 2 | PARSER_SYMBOL_PREFIX := RSQuery 3 | include $(SRCUTIL)/make-parser.mk 4 | -------------------------------------------------------------------------------- /src/query_parser/parse.h: -------------------------------------------------------------------------------- 1 | #ifndef __QUERY_PARSER_PARSE_H__ 2 | #define __QUERY_PARSER_PARSE_H__ 3 | 4 | #include "tokenizer.h" 5 | #include "../query.h" 6 | //#include "../rmutil/alloc.h" 7 | 8 | #endif // !__QUERY_PARSER_PARSE_H__ -------------------------------------------------------------------------------- /src/query_parser/parser-toplevel.c: -------------------------------------------------------------------------------- 1 | #define Parse RSQuery_Parse 2 | #define ParseTrace RSQuery_ParseTrace 3 | #define ParseAlloc RSQuery_ParseAlloc 4 | #define ParseFree RSQuery_ParseFree 5 | #define ParseInit RSQuery_ParseInit 6 | #define ParseFinalize RSQuery_ParseFinalize 7 | #define ParseStackPeack RSQuery_ParseStackPeack 8 | #include "parser.c.inc" 9 | -------------------------------------------------------------------------------- /src/query_parser/parser.h: -------------------------------------------------------------------------------- 1 | #define LOWEST 1 2 | #define TILDE 2 3 | #define TAGLIST 3 4 | #define QUOTE 4 5 | #define COLON 5 6 | #define MINUS 6 7 | #define NUMBER 7 8 | #define STOPWORD 8 9 | #define TERMLIST 9 10 | #define TERM 10 11 | #define PREFIX 11 12 | #define PERCENT 12 13 | #define ATTRIBUTE 13 14 | #define LP 14 15 | #define RP 15 16 | #define MODIFIER 16 17 | #define AND 17 18 | #define OR 18 19 | #define ORX 19 20 | #define ARROW 20 21 | #define STAR 21 22 | #define SEMICOLON 22 23 | #define LB 23 24 | #define RB 24 25 | #define LSQB 25 26 | #define RSQB 26 27 | -------------------------------------------------------------------------------- /src/query_parser/tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef __QUERY_TOKENIZER_H__ 2 | #define __QUERY_TOKENIZER_H__ 3 | 4 | #include 5 | #include "../tokenize.h" 6 | 7 | /* A query-specific tokenizer, that reads symbols like quots, pipes, etc */ 8 | typedef struct { 9 | const char *text; 10 | size_t len; 11 | char *pos; 12 | const char *separators; 13 | NormalizeFunc normalize; 14 | const char **stopwords; 15 | 16 | } QueryTokenizer; 17 | 18 | /* Quer tokenizer token type */ 19 | // typedef enum { T_WORD, T_QUOTE, T_AND, T_OR, T_END, T_STOPWORD } 20 | // QueryTokenType; 21 | 22 | /* A token in the process of parsing a query. Unlike the document tokenizer, it 23 | works iteratively and is not callback based. */ 24 | typedef struct { 25 | const char *s; 26 | int len; 27 | int pos; 28 | char *field; 29 | double numval; 30 | // QueryTokenType ; 31 | } QueryToken; 32 | 33 | typedef struct { 34 | double num; 35 | int inclusive; 36 | } RangeNumber; 37 | 38 | #define QUERY_STOPWORDS DEFAULT_STOPWORDS; 39 | 40 | #endif -------------------------------------------------------------------------------- /src/redis.conf: -------------------------------------------------------------------------------- 1 | loadmodule ./redisearch.so 2 | -------------------------------------------------------------------------------- /src/rmalloc.h: -------------------------------------------------------------------------------- 1 | #ifndef __REDISEARCH_ALLOC__ 2 | #define __REDISEARCH_ALLOC__ 3 | 4 | #include 5 | #include 6 | #include "redismodule.h" 7 | 8 | #ifdef REDIS_MODULE_TARGET /* Set this when compiling your code as a module */ 9 | 10 | static inline void *rm_malloc(size_t n) { 11 | return RedisModule_Alloc(n); 12 | } 13 | static inline void *rm_calloc(size_t nelem, size_t elemsz) { 14 | return RedisModule_Calloc(nelem, elemsz); 15 | } 16 | static inline void *rm_realloc(void *p, size_t n) { 17 | return RedisModule_Realloc(p, n); 18 | } 19 | static inline void rm_free(void *p) { 20 | RedisModule_Free(p); 21 | } 22 | static inline char *rm_strdup(const char *s) { 23 | return RedisModule_Strdup(s); 24 | } 25 | 26 | static char *rm_strndup(const char *s, size_t n) { 27 | char *ret = rm_malloc(n + 1); 28 | 29 | if (ret) { 30 | ret[n] = '\0'; 31 | memcpy(ret, s, n); 32 | } 33 | return ret; 34 | } 35 | #endif 36 | #ifndef REDIS_MODULE_TARGET 37 | /* for non redis module targets */ 38 | #define rm_malloc malloc 39 | #define rm_free free 40 | #define rm_calloc calloc 41 | #define rm_realloc realloc 42 | #define rm_free free 43 | #define rm_strdup strdup 44 | #define rm_strndup strndup 45 | #endif 46 | 47 | #define rm_new(x) rm_malloc(sizeof(x)) 48 | 49 | #endif /* __RMUTIL_ALLOC__ */ 50 | -------------------------------------------------------------------------------- /src/rmutil/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ADD_LIBRARY(rmutil OBJECT 2 | alloc.c 3 | cmdparse.c 4 | heap.c 5 | periodic.c 6 | priority_queue.c 7 | sds.c 8 | strings.c 9 | util.c 10 | vector.c) -------------------------------------------------------------------------------- /src/rmutil/Makefile: -------------------------------------------------------------------------------- 1 | # set environment variable RM_INCLUDE_DIR to the location of redismodule.h 2 | ifndef RM_INCLUDE_DIR 3 | RM_INCLUDE_DIR=../ 4 | endif 5 | 6 | CFLAGS ?= -g -fPIC -O3 -std=gnu99 -Wall -Wno-unused-function 7 | CFLAGS += -I$(RM_INCLUDE_DIR) 8 | CC=gcc 9 | 10 | OBJS=util.o strings.o sds.o vector.o alloc.o periodic.o cmdparse.o 11 | 12 | all: librmutil.a 13 | 14 | clean: 15 | rm -rf *.o *.a 16 | 17 | librmutil.a: $(OBJS) 18 | ar rcs $@ $^ 19 | 20 | test_vector: test_vector.o vector.o 21 | $(CC) -Wall -o $@ $^ -lc -lpthread -O0 22 | @(sh -c ./$@) 23 | .PHONY: test_vector 24 | 25 | test_periodic: test_periodic.o periodic.o 26 | $(CC) -Wall -o $@ $^ -lc -lpthread -O0 27 | @(sh -c ./$@) 28 | .PHONY: test_periodic 29 | 30 | test_cmdparse: test_cmdparse.o cmdparse.o 31 | $(CC) -Wall -o $@ $^ -lc -lpthread -O0 32 | @(sh -c ./$@) 33 | 34 | .PHONY: test_cmdparse 35 | test: test_periodic test_vector 36 | .PHONY: test 37 | -------------------------------------------------------------------------------- /src/rmutil/alloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "alloc.h" 5 | 6 | /* A patched implementation of strdup that will use our patched calloc */ 7 | char *rmalloc_strndup(const char *s, size_t n) { 8 | char *ret = calloc(n + 1, sizeof(char)); 9 | if (ret) 10 | memcpy(ret, s, n); 11 | return ret; 12 | } 13 | 14 | /* 15 | * Re-patching RedisModule_Alloc and friends to the original malloc functions 16 | * 17 | * This function shold be called if you are working with malloc-patched code 18 | * ouside of redis, usually for unit tests. Call it once when entering your unit 19 | * tests' main(). 20 | * 21 | * Since including "alloc.h" while defining REDIS_MODULE_TARGET 22 | * replaces all malloc functions in redis with the RM_Alloc family of functions, 23 | * when running that code outside of redis, your app will crash. This function 24 | * patches the RM_Alloc functions back to the original mallocs. */ 25 | void RMUTil_InitAlloc() { 26 | 27 | RedisModule_Alloc = malloc; 28 | RedisModule_Realloc = realloc; 29 | RedisModule_Calloc = calloc; 30 | RedisModule_Free = free; 31 | RedisModule_Strdup = strdup; 32 | } 33 | -------------------------------------------------------------------------------- /src/rmutil/alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef __RMUTIL_ALLOC__ 2 | #define __RMUTIL_ALLOC__ 3 | 4 | /* Automatic Redis Module Allocation functions monkey-patching. 5 | * 6 | * Including this file while REDIS_MODULE_TARGET is defined, will explicitly 7 | * override malloc, calloc, realloc & free with RedisModule_Alloc, 8 | * RedisModule_Callc, etc implementations, that allow Redis better control and 9 | * reporting over allocations per module. 10 | * 11 | * You should include this file in all c files AS THE LAST INCLUDED FILE 12 | * 13 | * This only has effect when when compiling with the macro REDIS_MODULE_TARGET 14 | * defined. The idea is that for unit tests it will not be defined, but for the 15 | * module build target it will be. 16 | * 17 | */ 18 | 19 | #include 20 | #include 21 | 22 | char *rmalloc_strndup(const char *s, size_t n); 23 | 24 | #ifdef REDIS_MODULE_TARGET /* Set this when compiling your code as a module */ 25 | 26 | #define malloc(size) RedisModule_Alloc(size) 27 | #define calloc(count, size) RedisModule_Calloc(count, size) 28 | #define realloc(ptr, size) RedisModule_Realloc(ptr, size) 29 | #define free(ptr) RedisModule_Free(ptr) 30 | 31 | #ifdef strdup 32 | #undef strdup 33 | #endif 34 | #define strdup(ptr) RedisModule_Strdup(ptr) 35 | 36 | /* More overriding */ 37 | // needed to avoid calling strndup->malloc 38 | #ifdef strndup 39 | #undef strndup 40 | #endif 41 | #define strndup(s, n) rmalloc_strndup(s, n) 42 | 43 | #else 44 | 45 | #endif /* REDIS_MODULE_TARGET */ 46 | /* This function shold be called if you are working with malloc-patched code 47 | * ouside of redis, usually for unit tests. Call it once when entering your unit 48 | * tests' main() */ 49 | void RMUTil_InitAlloc(); 50 | 51 | #endif /* __RMUTIL_ALLOC__ */ 52 | -------------------------------------------------------------------------------- /src/rmutil/logging.h: -------------------------------------------------------------------------------- 1 | #ifndef __RMUTIL_LOGGING_H__ 2 | #define __RMUTIL_LOGGING_H__ 3 | 4 | /* Convenience macros for redis logging */ 5 | 6 | #define RM_LOG_DEBUG(ctx, ...) RedisModule_Log(ctx, "debug", __VA_ARGS__) 7 | #define RM_LOG_VERBOSE(ctx, ...) RedisModule_Log(ctx, "verbose", __VA_ARGS__) 8 | #define RM_LOG_NOTICE(ctx, ...) RedisModule_Log(ctx, "notice", __VA_ARGS__) 9 | #define RM_LOG_WARNING(ctx, ...) RedisModule_Log(ctx, "warning", __VA_ARGS__) 10 | 11 | #endif -------------------------------------------------------------------------------- /src/rmutil/priority_queue.c: -------------------------------------------------------------------------------- 1 | #include "priority_queue.h" 2 | #include "heap.h" 3 | 4 | PriorityQueue *__newPriorityQueueSize(size_t elemSize, size_t cap, int (*cmp)(void *, void *)) { 5 | PriorityQueue *pq = malloc(sizeof(PriorityQueue)); 6 | pq->v = __newVectorSize(elemSize, cap); 7 | pq->cmp = cmp; 8 | return pq; 9 | } 10 | 11 | inline size_t Priority_Queue_Size(PriorityQueue *pq) { 12 | return Vector_Size(pq->v); 13 | } 14 | 15 | inline int Priority_Queue_Top(PriorityQueue *pq, void *ptr) { 16 | return Vector_Get(pq->v, 0, ptr); 17 | } 18 | 19 | inline size_t __priority_Queue_PushPtr(PriorityQueue *pq, void *elem) { 20 | size_t top = __vector_PushPtr(pq->v, elem); 21 | Heap_Push(pq->v, 0, top, pq->cmp); 22 | return top; 23 | } 24 | 25 | inline void Priority_Queue_Pop(PriorityQueue *pq) { 26 | if (pq->v->top == 0) { 27 | return; 28 | } 29 | Heap_Pop(pq->v, 0, pq->v->top, pq->cmp); 30 | pq->v->top--; 31 | } 32 | 33 | void Priority_Queue_Free(PriorityQueue *pq) { 34 | Vector_Free(pq->v); 35 | free(pq); 36 | } 37 | -------------------------------------------------------------------------------- /src/rmutil/strings.h: -------------------------------------------------------------------------------- 1 | #ifndef __RMUTIL_STRINGS_H__ 2 | #define __RMUTIL_STRINGS_H__ 3 | 4 | #include 5 | 6 | /* 7 | * Create a new RedisModuleString object from a printf-style format and arguments. 8 | * Note that RedisModuleString objects CANNOT be used as formatting arguments. 9 | */ 10 | // DEPRECATED since it was added to the RedisModule API. Replaced with a macro below 11 | // RedisModuleString *RMUtil_CreateFormattedString(RedisModuleCtx *ctx, const char *fmt, ...); 12 | #define RMUtil_CreateFormattedString RedisModule_CreateStringPrintf 13 | 14 | /* Return 1 if the two strings are equal. Case *sensitive* */ 15 | int RMUtil_StringEquals(RedisModuleString *s1, RedisModuleString *s2); 16 | 17 | /* Return 1 if the string is equal to a C NULL terminated string. Case *sensitive* */ 18 | int RMUtil_StringEqualsC(RedisModuleString *s1, const char *s2); 19 | 20 | /* Return 1 if the string is equal to a C NULL terminated string. Case *insensitive* */ 21 | int RMUtil_StringEqualsCaseC(RedisModuleString *s1, const char *s2); 22 | 23 | /* Converts a redis string to lowercase in place without reallocating anything */ 24 | void RMUtil_StringToLower(RedisModuleString *s); 25 | 26 | /* Converts a redis string to uppercase in place without reallocating anything */ 27 | void RMUtil_StringToUpper(RedisModuleString *s); 28 | 29 | // If set, copy the strings using strdup rather than simply storing pointers. 30 | #define RMUTIL_STRINGCONVERT_COPY 1 31 | 32 | /** 33 | * Convert one or more RedisModuleString objects into `const char*`. 34 | * Both rs and ss are arrays, and should be of length. 35 | * Options may be 0 or `RMUTIL_STRINGCONVERT_COPY` 36 | */ 37 | void RMUtil_StringConvert(RedisModuleString **rs, const char **ss, size_t n, int options); 38 | #endif 39 | -------------------------------------------------------------------------------- /src/rmutil/test_heap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "heap.h" 3 | #include "assert.h" 4 | 5 | int cmp(void *a, void *b) { 6 | int *__a = (int *) a; 7 | int *__b = (int *) b; 8 | return *__a - *__b; 9 | } 10 | 11 | int main(int argc, char **argv) { 12 | int myints[] = {10, 20, 30, 5, 15}; 13 | Vector *v = NewVector(int, 5); 14 | for (int i = 0; i < 5; i++) { 15 | Vector_Push(v, myints[i]); 16 | } 17 | 18 | Make_Heap(v, 0, v->top, cmp); 19 | 20 | int n; 21 | Vector_Get(v, 0, &n); 22 | assert(30 == n); 23 | 24 | Heap_Pop(v, 0, v->top, cmp); 25 | v->top = 4; 26 | Vector_Get(v, 0, &n); 27 | assert(20 == n); 28 | 29 | Vector_Push(v, 99); 30 | Heap_Push(v, 0, v->top, cmp); 31 | Vector_Get(v, 0, &n); 32 | assert(99 == n); 33 | 34 | Vector_Free(v); 35 | printf("PASS!\n"); 36 | return 0; 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/rmutil/test_periodic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "periodic.h" 5 | #include "assert.h" 6 | #include "test.h" 7 | 8 | void timerCb(RedisModuleCtx *ctx, void *p) { 9 | int *x = p; 10 | (*x)++; 11 | } 12 | 13 | int testPeriodic() { 14 | int x = 0; 15 | struct RMUtilTimer *tm = 16 | RMUtil_NewPeriodicTimer(timerCb, &x, (struct timespec){.tv_sec = 0, .tv_nsec = 10000000}); 17 | 18 | sleep(1); 19 | 20 | ASSERT_EQUAL(0, RMUtilTimer_Stop(tm)); 21 | ASSERT(x > 0); 22 | ASSERT(x <= 100); 23 | RMUtilTimer_Free(tm); 24 | return 0; 25 | } 26 | 27 | TEST_MAIN({ TESTFUNC(testPeriodic); }); 28 | -------------------------------------------------------------------------------- /src/rmutil/test_priority_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "assert.h" 3 | #include "priority_queue.h" 4 | 5 | int cmp(void* i1, void* i2) { 6 | int *__i1 = (int*) i1; 7 | int *__i2 = (int*) i2; 8 | return *__i1 - *__i2; 9 | } 10 | 11 | int main(int argc, char **argv) { 12 | PriorityQueue *pq = NewPriorityQueue(int, 10, cmp); 13 | assert(0 == Priority_Queue_Size(pq)); 14 | 15 | for (int i = 0; i < 5; i++) { 16 | Priority_Queue_Push(pq, i); 17 | } 18 | assert(5 == Priority_Queue_Size(pq)); 19 | 20 | Priority_Queue_Pop(pq); 21 | assert(4 == Priority_Queue_Size(pq)); 22 | 23 | Priority_Queue_Push(pq, 10); 24 | Priority_Queue_Push(pq, 20); 25 | Priority_Queue_Push(pq, 15); 26 | int n; 27 | Priority_Queue_Top(pq, &n); 28 | assert(20 == n); 29 | 30 | Priority_Queue_Pop(pq); 31 | Priority_Queue_Top(pq, &n); 32 | assert(15 == n); 33 | 34 | Priority_Queue_Free(pq); 35 | printf("PASS!\n"); 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /src/rmutil/test_vector.c: -------------------------------------------------------------------------------- 1 | #include "vector.h" 2 | #include 3 | #include "test.h" 4 | 5 | int testVector() { 6 | 7 | Vector *v = NewVector(int, 1); 8 | ASSERT(v != NULL); 9 | // Vector_Put(v, 0, 1); 10 | // Vector_Put(v, 1, 3); 11 | for (int i = 0; i < 10; i++) { 12 | Vector_Push(v, i); 13 | } 14 | ASSERT_EQUAL(10, Vector_Size(v)); 15 | ASSERT_EQUAL(16, Vector_Cap(v)); 16 | 17 | for (int i = 0; i < Vector_Size(v); i++) { 18 | int n; 19 | int rc = Vector_Get(v, i, &n); 20 | ASSERT_EQUAL(1, rc); 21 | // printf("%d %d\n", rc, n); 22 | 23 | ASSERT_EQUAL(n, i); 24 | } 25 | 26 | Vector_Free(v); 27 | 28 | v = NewVector(char *, 0); 29 | int N = 4; 30 | char *strings[4] = {"hello", "world", "foo", "bar"}; 31 | 32 | for (int i = 0; i < N; i++) { 33 | Vector_Push(v, strings[i]); 34 | } 35 | ASSERT_EQUAL(N, Vector_Size(v)); 36 | ASSERT(Vector_Cap(v) >= N); 37 | 38 | for (int i = 0; i < Vector_Size(v); i++) { 39 | char *x; 40 | int rc = Vector_Get(v, i, &x); 41 | ASSERT_EQUAL(1, rc); 42 | ASSERT_STRING_EQ(x, strings[i]); 43 | } 44 | 45 | int rc = Vector_Get(v, 100, NULL); 46 | ASSERT_EQUAL(0, rc); 47 | 48 | Vector_Free(v); 49 | 50 | return 0; 51 | // Vector_Push(v, "hello"); 52 | // Vector_Push(v, "world"); 53 | // char *x = NULL; 54 | // int rc = Vector_Getx(v, 0, &x); 55 | // printf("rc: %d got %s\n", rc, x); 56 | } 57 | 58 | TEST_MAIN({ TESTFUNC(testVector); }); 59 | -------------------------------------------------------------------------------- /src/run_valgrind.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ -z "$REDIS_PATH" ]; then 3 | REDIS_PATH="redis-server" 4 | fi 5 | 6 | valgrind --tool=memcheck --leak-check=full --show-leak-kinds=definite --suppressions=leakcheck.supp $REDIS_PATH redis.conf 7 | -------------------------------------------------------------------------------- /src/search_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef __SEARCH_CTX_H 2 | #define __SEARCH_CTX_H 3 | 4 | #include 5 | 6 | #include "redismodule.h" 7 | #include "spec.h" 8 | #include "trie/trie_type.h" 9 | #include 10 | 11 | /** Context passed to all redis related search handling functions. */ 12 | typedef struct { 13 | RedisModuleCtx *redisCtx; 14 | RedisModuleKey *key; 15 | RedisModuleString *keyName; 16 | IndexSpec *spec; 17 | } RedisSearchCtx; 18 | 19 | #define SEARCH_CTX_STATIC(ctx, sp) \ 20 | (RedisSearchCtx) { \ 21 | .redisCtx = ctx, .spec = sp \ 22 | } 23 | 24 | #define SEARCH_CTX_SORTABLES(ctx) ((ctx && ctx->spec) ? ctx->spec->sortables : NULL) 25 | // Create a string context on the heap 26 | RedisSearchCtx *NewSearchCtx(RedisModuleCtx *ctx, RedisModuleString *indexName); 27 | RedisSearchCtx *NewSearchCtxDefault(RedisModuleCtx *ctx); 28 | 29 | RedisSearchCtx *SearchCtx_Refresh(RedisSearchCtx *sctx, RedisModuleString *keyName); 30 | 31 | // Same as above, only from c string (null terminated) 32 | RedisSearchCtx *NewSearchCtxC(RedisModuleCtx *ctx, const char *indexName); 33 | 34 | void SearchCtx_Free(RedisSearchCtx *sctx); 35 | #endif 36 | -------------------------------------------------------------------------------- /src/search_request.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_SEARCH_REQUEST_H__ 2 | #define RS_SEARCH_REQUEST_H__ 3 | 4 | #include 5 | #include "redisearch.h" 6 | #include "numeric_filter.h" 7 | #include "geo_index.h" 8 | #include "id_filter.h" 9 | #include "sortable.h" 10 | #include "search_options.h" 11 | #include "query_plan.h" 12 | 13 | typedef struct { 14 | 15 | char *rawQuery; 16 | size_t qlen; 17 | 18 | RSSearchOptions opts; 19 | 20 | /* Numeric Filters */ 21 | Vector *numericFilters; 22 | 23 | /* Geo Filter */ 24 | GeoFilter *geoFilter; 25 | 26 | /* InKeys */ 27 | IdFilter *idFilter; 28 | 29 | RSPayload payload; 30 | 31 | } RSSearchRequest; 32 | 33 | RSSearchRequest *ParseRequest(RedisSearchCtx *ctx, RedisModuleString **argv, int argc, 34 | char **errStr); 35 | 36 | void RSSearchRequest_Free(RSSearchRequest *req); 37 | QueryParseCtx *SearchRequest_ParseQuery(RedisSearchCtx *sctx, RSSearchRequest *req, char **err); 38 | QueryPlan *SearchRequest_BuildPlan(RedisSearchCtx *sctx, RSSearchRequest *req, QueryParseCtx *q, 39 | char **err); 40 | 41 | // Remove any fields not explicitly requested by `RETURN`, iff any explicit 42 | // fields actually exist. 43 | void FieldList_RestrictReturn(FieldList *fields); 44 | 45 | #endif -------------------------------------------------------------------------------- /src/stemmer.h: -------------------------------------------------------------------------------- 1 | #ifndef __RS_STEMMER_H__ 2 | #define __RS_STEMMER_H__ 3 | #include 4 | 5 | typedef enum { SnowballStemmer } StemmerType; 6 | 7 | #define DEFAULT_LANGUAGE "english" 8 | #define STEM_PREFIX '+' 9 | #define STEMMER_EXPANDER_NAME "stem" 10 | 11 | /* Abstract "interface" for a pluggable stemmer, ensuring we can use multiple 12 | * stemmer libs */ 13 | typedef struct stemmer { 14 | void *ctx; 15 | const char *(*Stem)(void *ctx, const char *word, size_t len, size_t *outlen); 16 | void (*Free)(struct stemmer *); 17 | 18 | // Attempts to reset the stemmer using the given language and type. Returns 0 19 | // if this stemmer cannot be reused. 20 | int (*Reset)(struct stemmer *, StemmerType type, const char *language); 21 | 22 | const char *language; 23 | StemmerType type; // Type of stemmer 24 | } Stemmer; 25 | 26 | Stemmer *NewStemmer(StemmerType type, const char *language); 27 | 28 | int ResetStemmer(Stemmer *stemmer, StemmerType type, const char *language); 29 | 30 | /* check if a language is supported by our stemmers */ 31 | int IsSupportedLanguage(const char *language, size_t len); 32 | 33 | /* Get a stemmer expander instance for registering it */ 34 | void RegisterStemmerExpander(); 35 | 36 | /* Snoball Stemmer wrapper implementation */ 37 | const char *__sbstemmer_Stem(void *ctx, const char *word, size_t len, size_t *outlen); 38 | void __sbstemmer_Free(Stemmer *s); 39 | Stemmer *__newSnowballStemmer(const char *language); 40 | 41 | #endif -------------------------------------------------------------------------------- /src/stopwords.h: -------------------------------------------------------------------------------- 1 | #ifndef __REDISEARCH_STOPWORDS_H___ 2 | #define __REDISEARCH_STOPWORDS_H___ 3 | 4 | #include 5 | #include "redismodule.h" 6 | 7 | static const char *DEFAULT_STOPWORDS[] = { 8 | "a", "is", "the", "an", "and", "are", "as", "at", "be", "but", "by", "for", 9 | "if", "in", "into", "it", "no", "not", "of", "on", "or", "such", "that", "their", 10 | "then", "there", "these", "they", "this", "to", "was", "will", "with", NULL}; 11 | 12 | #ifndef __REDISEARCH_STOPORWORDS_C__ 13 | typedef struct StopWordList StopWordList; 14 | #else 15 | struct StopWordList; 16 | #endif 17 | 18 | /* Check if a stopword list contains a term. The term must be already lowercased */ 19 | int StopWordList_Contains(struct StopWordList *sl, const char *term, size_t len); 20 | 21 | struct StopWordList *DefaultStopWordList(); 22 | struct StopWordList *EmptyStopWordList(); 23 | 24 | /* Create a new stopword list from a list of redis strings */ 25 | struct StopWordList *NewStopWordList(RedisModuleString **strs, size_t len); 26 | 27 | /* Create a new stopword list from a list of NULL-terminated C strings */ 28 | struct StopWordList *NewStopWordListCStr(const char **strs, size_t len); 29 | 30 | /* Free a stopword list's memory */ 31 | void StopWordList_Unref(struct StopWordList *sl); 32 | 33 | #define StopWordList_Free StopWordList_Unref 34 | 35 | /* Load a stopword list from RDB */ 36 | struct StopWordList *StopWordList_RdbLoad(RedisModuleIO *rdb, int encver); 37 | 38 | /* Save a stopword list to RDB */ 39 | void StopWordList_RdbSave(RedisModuleIO *rdb, struct StopWordList *sl); 40 | 41 | void StopWordList_Ref(struct StopWordList *sl); 42 | 43 | #endif -------------------------------------------------------------------------------- /src/summarize_spec.h: -------------------------------------------------------------------------------- 1 | #ifndef SUMMARIZE_SPEC_H 2 | #define SUMMARIZE_SPEC_H 3 | 4 | #include 5 | #include "redismodule.h" 6 | #include "search_request.h" 7 | 8 | int ParseSummarize(RedisModuleString **argv, int argc, size_t *offset, FieldList *fields); 9 | int ParseHighlight(RedisModuleString **argv, int argc, size_t *offset, FieldList *fields); 10 | 11 | #endif -------------------------------------------------------------------------------- /src/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | INCLUDE (CTest) 2 | 3 | FUNCTION(RSTEST name) 4 | ADD_EXECUTABLE("${name}" "${name}.c") 5 | TARGET_LINK_LIBRARIES("${name}" "redisearchS") 6 | ADD_TEST(NAME "${name}" COMMAND "${name}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") 7 | ENDFUNCTION() 8 | 9 | FILE(GLOB TEST_SOURCES "test_*.c") 10 | 11 | FOREACH(n ${TEST_SOURCES}) 12 | GET_FILENAME_COMPONENT(test_name ${n} NAME_WE) 13 | MESSAGE("${n} => ${test_name}") 14 | RSTEST("${test_name}") 15 | ENDFOREACH() 16 | 17 | ADD_LIBRARY(example_extension SHARED "ext-example/example.c") 18 | ADD_DEPENDENCIES(test_extensions example_extension) 19 | SET_TESTS_PROPERTIES(test_extensions PROPERTIES ENVIRONMENT "EXT_TEST_PATH=$") -------------------------------------------------------------------------------- /src/tests/bench-decoder.c: -------------------------------------------------------------------------------- 1 | #include "redisearch.h" 2 | #include "index.h" 3 | #include "inverted_index.h" 4 | #include "spec.h" 5 | #include "rmutil/alloc.h" 6 | #include "time_sample.h" 7 | 8 | #define NUM_ENTRIES 5000000 9 | #define MY_FLAGS Index_StoreFreqs | Index_StoreFieldFlags 10 | 11 | static void writeEntry(InvertedIndex *idx, size_t id) { 12 | ForwardIndexEntry ent = {0}; 13 | ent.docId = id; 14 | ent.docScore = 1.0; 15 | ent.fieldMask = RS_FIELDMASK_ALL; 16 | ent.freq = 3; 17 | ent.term = "foo"; 18 | ent.vw = NULL; 19 | ent.len = 3; 20 | InvertedIndex_WriteEntry(idx, &ent); 21 | } 22 | 23 | int main(int argc, char **argv) { 24 | RMUTil_InitAlloc(); 25 | InvertedIndex *idx = NewInvertedIndex(MY_FLAGS, 1); 26 | for (size_t ii = 0; ii < NUM_ENTRIES; ++ii) { 27 | writeEntry(idx, ii); 28 | } 29 | 30 | for (size_t ii = 0; ii < 100; ++ii) { 31 | IndexReader *r = NewIndexReader(idx, NULL, RS_FIELDMASK_ALL, MY_FLAGS, NULL, 0); 32 | IndexIterator *it = NewReadIterator(r); 33 | TimeSample ts; 34 | TimeSampler_Start(&ts); 35 | RSIndexResult *res; 36 | while (INDEXREAD_EOF != it->Read(it->ctx, &res)) { 37 | TimeSampler_Tick(&ts); 38 | } 39 | TimeSampler_End(&ts); 40 | printf("%d iterations in %lldms, %fns/iter\n", ts.num, TimeSampler_DurationMS(&ts), 41 | TimeSampler_IterationMS(&ts) * 1000000); 42 | ReadIterator_Free(it); 43 | } 44 | return 0; 45 | } -------------------------------------------------------------------------------- /src/tests/ext-example/Makefile: -------------------------------------------------------------------------------- 1 | #set environment variable RS_INCLUDE_DIR to the location of redismodule.h 2 | ifndef RS_INCLUDE_DIR 3 | RS_INCLUDE_DIR=../../ 4 | endif 5 | 6 | # find the OS 7 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 8 | CFLAGS = -I$(RS_INCLUDE_DIR) -Wall -g -fPIC -O0 -std=gnu99 9 | CC:=$(shell sh -c 'type $(CC) >/dev/null 2>/dev/null && echo $(CC) || echo gcc') 10 | 11 | # Compile flags for non-osx / osx 12 | ifneq ($(uname_S),Darwin) 13 | SHOBJ_CFLAGS ?= -fno-common -g -ggdb 14 | SHOBJ_LDFLAGS ?= -shared -Bsymbolic 15 | else 16 | CFLAGS += -mmacosx-version-min=10.6 17 | SHOBJ_CFLAGS ?= -dynamic -fno-common -g -ggdb 18 | SHOBJ_LDFLAGS ?= -dylib -exported_symbol _RS_ExtensionInit -macosx_version_min 10.6 19 | endif 20 | 21 | all: example.so 22 | 23 | example.so: example.o 24 | $(LD) -o $@ example.o $(SHOBJ_LDFLAGS) $(LIBS) -lc 25 | 26 | clean: 27 | rm -rf *.xo *.so *.o 28 | 29 | FORCE: -------------------------------------------------------------------------------- /src/tests/ext-example/example.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "example.h" 6 | 7 | struct privdata { 8 | int freed; 9 | }; 10 | 11 | /* Calculate sum(TF-IDF)*document score for each result */ 12 | double myScorer(RSScoringFunctionCtx *ctx, RSIndexResult *h, RSDocumentMetadata *dmd, 13 | double minScore) { 14 | return 3.141; 15 | } 16 | 17 | double filterOutScorer(RSScoringFunctionCtx *ctx, RSIndexResult *h, RSDocumentMetadata *dmd, 18 | double minScore) { 19 | return RS_SCORE_FILTEROUT; 20 | } 21 | 22 | void myExpander(RSQueryExpanderCtx *ctx, RSToken *token) { 23 | ctx->ExpandToken(ctx, strdup("foo"), 3, 0x00ff); 24 | } 25 | 26 | int numFreed = 0; 27 | void myFreeFunc(void *p) { 28 | // printf("Freeing %p\n", p); 29 | numFreed++; 30 | free(p); 31 | } 32 | 33 | /* Register the default extension */ 34 | int RS_ExtensionInit(RSExtensionCtx *ctx) { 35 | 36 | struct privdata *spd = malloc(sizeof(struct privdata)); 37 | spd->freed = 0; 38 | if (ctx->RegisterScoringFunction("example_scorer", myScorer, myFreeFunc, spd) == REDISEARCH_ERR) { 39 | return REDISEARCH_ERR; 40 | } 41 | 42 | if (ctx->RegisterScoringFunction("filterout_scorer", filterOutScorer, myFreeFunc, spd) == 43 | REDISEARCH_ERR) { 44 | return REDISEARCH_ERR; 45 | } 46 | 47 | spd = malloc(sizeof(struct privdata)); 48 | spd->freed = 0; 49 | /* Snowball Stemmer is the default expander */ 50 | if (ctx->RegisterQueryExpander("example_expander", myExpander, myFreeFunc, spd) == 51 | REDISEARCH_ERR) { 52 | return REDISEARCH_ERR; 53 | } 54 | 55 | return REDISEARCH_OK; 56 | } -------------------------------------------------------------------------------- /src/tests/ext-example/example.h: -------------------------------------------------------------------------------- 1 | #ifndef EXT_EXAMPLE_H__ 2 | #define EXT_EXAMPLE_H__ 3 | 4 | #include "redisearch.h" 5 | 6 | #define EXPANDER_NAME "EXAMPLE_EXPANDER" 7 | #define SCORER_NAME "EXAMPLE_SCORER" 8 | 9 | const char *extentionName = "EXAMPLE_EXTENSION"; 10 | 11 | int RS_ExtensionInit(RSExtensionCtx *ctx); 12 | 13 | #endif -------------------------------------------------------------------------------- /src/tests/test_array.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "util/array.h" 4 | #include "rmutil/alloc.h" 5 | #include "test_util.h" 6 | 7 | int testArray() { 8 | Array arr; 9 | Array_Init(&arr); 10 | ASSERT_EQUAL(0, arr.capacity); 11 | ASSERT_EQUAL(0, arr.len); 12 | ASSERT(arr.data == NULL); 13 | 14 | void *p = Array_Add(&arr, 2); 15 | ASSERT_EQUAL(16, arr.capacity); 16 | ASSERT_EQUAL(2, arr.len); 17 | ASSERT(p == arr.data); 18 | 19 | p = Array_Add(&arr, 20); 20 | ASSERT_EQUAL(32, arr.capacity); 21 | ASSERT_EQUAL(22, arr.len); 22 | ASSERT((char *)p == arr.data + 2); 23 | 24 | Array_ShrinkToSize(&arr); 25 | ASSERT_EQUAL(22, arr.capacity); 26 | 27 | Array_Free(&arr); 28 | return 0; 29 | } 30 | 31 | TEST_MAIN({ 32 | RMUTil_InitAlloc(); 33 | TESTFUNC(testArray); 34 | }) -------------------------------------------------------------------------------- /src/tests/test_cntokenize.c: -------------------------------------------------------------------------------- 1 | // TODO: We might not need all these includes 2 | #include "../buffer.h" 3 | #include "../index.h" 4 | #include "../inverted_index.h" 5 | #include "../index_result.h" 6 | #include "../query_parser/tokenizer.h" 7 | #include "../rmutil/alloc.h" 8 | #include "../spec.h" 9 | #include "../tokenize.h" 10 | #include "../varint.h" 11 | #include "test_util.h" 12 | #include "time_sample.h" 13 | #include "../rmutil/alloc.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | static char *getFile(const char *name) { 21 | FILE *fp = fopen(name, "rb"); 22 | if (fp == NULL) { 23 | perror(name); 24 | abort(); 25 | } 26 | 27 | if (fseek(fp, 0, SEEK_END) != 0) { 28 | perror(name); 29 | abort(); 30 | } 31 | size_t nbuf = ftell(fp); 32 | if (fseek(fp, 0, SEEK_SET) != 0) { 33 | perror(name); 34 | abort(); 35 | } 36 | 37 | if (nbuf == 0) { 38 | fprintf(stderr, "File is empty!\n"); 39 | abort(); 40 | } 41 | 42 | char *buf = malloc(nbuf + 1); 43 | buf[nbuf] = '\0'; 44 | 45 | size_t nr, offset = 0; 46 | do { 47 | nr = fread(buf + offset, 1, nbuf - offset, fp); 48 | offset += nr; 49 | } while (nr > 0); 50 | 51 | if (strlen(buf) == 0) { 52 | perror(name); 53 | abort(); 54 | } 55 | fclose(fp); 56 | 57 | return buf; 58 | } 59 | 60 | static int testCnTokenize(void) { 61 | char *cnTxt = getFile("cn_sample.txt"); 62 | RSTokenizer *cnTok = NewChineseTokenizer(NULL, NULL, 0); 63 | ASSERT(cnTok != NULL); 64 | cnTok->Start(cnTok, cnTxt, strlen(cnTxt), 0); 65 | Token t; 66 | uint32_t pos; 67 | while ((pos = cnTok->Next(cnTok, &t)) != 0) { 68 | printf("Token: %.*s. Raw: %.*s. Pos=%u\n", (int)t.tokLen, t.tok, (int)t.rawLen, t.raw, t.pos); 69 | ASSERT(pos == t.pos); 70 | } 71 | cnTok->Free(cnTok); 72 | free(cnTxt); 73 | return 0; 74 | } 75 | 76 | TEST_MAIN({ 77 | // LOGGING_INIT(L_INFO); 78 | RMUTil_InitAlloc(); 79 | TESTFUNC(testCnTokenize); 80 | }); -------------------------------------------------------------------------------- /src/tests/test_khtable.c: -------------------------------------------------------------------------------- 1 | #include "test_util.h" 2 | #include "../util/khtable.h" 3 | #include "../util/fnv.h" 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | KHTableEntry base; 9 | char *key; 10 | uint32_t hash; 11 | uint32_t value; 12 | } MyEntry; 13 | 14 | static int myEntryCompare(const KHTableEntry *e, const void *k, size_t n, uint32_t h) { 15 | const MyEntry *ent = (const MyEntry *)e; 16 | return !(ent->hash == h && strcmp(k, ent->key) == 0); 17 | } 18 | 19 | static uint32_t myHash(const KHTableEntry *e) { 20 | return ((const MyEntry *)e)->hash; 21 | } 22 | 23 | static KHTableEntry *myAlloc() { 24 | return calloc(1, sizeof(MyEntry)); 25 | } 26 | 27 | static uint32_t calcHash(const char *s) { 28 | return rs_fnv_32a_buf((char *)s, strlen(s), 0); 29 | } 30 | 31 | static KHTableProcs myProcs = {.Alloc = myAlloc, .Hash = myHash, .Compare = myEntryCompare}; 32 | 33 | static void *pCtx = (void *)0x01; 34 | static void *pArg = (void *)0x02; 35 | 36 | static void freeFn(KHTableEntry *ent, void *ctx, void *arg) { 37 | free(ent); 38 | assert(ctx == pCtx); 39 | assert(arg == pArg); 40 | } 41 | 42 | int testKhTable() { 43 | KHTable kht; 44 | KHTable_Init(&kht, &myProcs, pCtx, 4); 45 | 46 | MyEntry *ent = NULL; 47 | ent = (void *)KHTable_GetEntry(&kht, "key", 0, calcHash("key"), NULL); 48 | ASSERT(ent == NULL); // Not found, and no isNew pointer 49 | 50 | int isNew = 0; 51 | ent = (void *)KHTable_GetEntry(&kht, "key", 0, calcHash("key"), &isNew); 52 | ASSERT(ent != NULL); 53 | ASSERT(isNew != 0); 54 | ent->key = "key"; 55 | ent->hash = calcHash("key"); 56 | ent->value = 42; 57 | 58 | isNew = 0; 59 | MyEntry *ent2 = (void *)KHTable_GetEntry(&kht, "key", 0, calcHash("key"), NULL); 60 | ASSERT(ent2 == ent); 61 | 62 | // Try it again, but with isNew 63 | ent2 = (void *)KHTable_GetEntry(&kht, "key", 0, calcHash("key"), &isNew); 64 | ASSERT(ent2 == ent); 65 | ASSERT(isNew == 0); 66 | 67 | KHTable_FreeEx(&kht, pArg, freeFn); 68 | return 0; 69 | } 70 | 71 | TEST_MAIN({ TESTFUNC(testKhTable); }) -------------------------------------------------------------------------------- /src/tests/test_qint.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "qint.h" 4 | #include "rmutil/alloc.h" 5 | 6 | int main(int argc, char **argv) { 7 | RMUTil_InitAlloc(); 8 | Buffer *b = NewBuffer(1024); 9 | BufferWriter w = NewBufferWriter(b); 10 | qint_encode4(&w, 123, 456, 789, 101112); 11 | 12 | uint32_t arr[4]; 13 | BufferReader r = NewBufferReader(b); 14 | qint_decode(&r, arr, 4); 15 | assert(arr[0] == 123); 16 | assert(arr[1] == 456); 17 | assert(arr[2] == 789); 18 | assert(arr[3] == 101112); 19 | 20 | memset(arr, 0, sizeof arr); 21 | r = NewBufferReader(b); 22 | qint_decode4(&r, &arr[0], &arr[1], &arr[2], &arr[3]); 23 | assert(arr[0] == 123); 24 | assert(arr[1] == 456); 25 | assert(arr[2] == 789); 26 | assert(arr[3] == 101112); 27 | 28 | memset(arr, 0, sizeof arr); 29 | r = NewBufferReader(b); 30 | qint_decode3(&r, &arr[0], &arr[1], &arr[2]); 31 | assert(arr[0] == 123); 32 | assert(arr[1] == 456); 33 | assert(arr[2] == 789); 34 | 35 | return 0; 36 | } -------------------------------------------------------------------------------- /src/tests/test_quantile.c: -------------------------------------------------------------------------------- 1 | #include "../util/quantile.h" 2 | #include "../buffer.h" 3 | #include "../rmutil/alloc.h" 4 | #include "test_util.h" 5 | #include 6 | #include 7 | #include 8 | 9 | static FILE *fp; 10 | static Buffer buf; 11 | static double *input; 12 | static size_t numInput; 13 | 14 | static int testBasic() { 15 | double quantiles[] = {0.50, 0.90, 0.99}; 16 | QuantStream *stream = NewQuantileStream(quantiles, 3, 500); 17 | for (size_t ii = 0; ii < numInput; ++ii) { 18 | QS_Insert(stream, input[ii]); 19 | } 20 | double res50 = QS_Query(stream, 0.50); 21 | double res90 = QS_Query(stream, 0.90); 22 | double res99 = QS_Query(stream, 0.99); 23 | size_t count = QS_GetCount(stream); 24 | printf("50: %lf, 90: %lf, 99: %lf\n", res50, res90, res99); 25 | printf("Count: %lu\n", count); 26 | // QS_Dump(stream, stdout); 27 | QS_Free(stream); 28 | return 0; 29 | } 30 | 31 | TEST_MAIN({ 32 | RMUTil_InitAlloc(); 33 | 34 | fp = fopen("./quantile_data.txt", "rb"); 35 | assert(fp); 36 | Buffer_Init(&buf, 4096); 37 | BufferWriter bw = NewBufferWriter(&buf); 38 | 39 | double d; 40 | while (fscanf(fp, "%lf", &d) != EOF) { 41 | Buffer_Write(&bw, &d, sizeof d); 42 | numInput++; 43 | } 44 | fclose(fp); 45 | printf("Have %lu items\n", numInput); 46 | input = (double *)buf.data; 47 | 48 | TESTFUNC(testBasic); 49 | 50 | Buffer_Free(&buf); 51 | }) -------------------------------------------------------------------------------- /src/tests/test_stopwords.c: -------------------------------------------------------------------------------- 1 | #include "test_util.h" 2 | #include 3 | #include 4 | 5 | void RMUTil_InitAlloc(); 6 | 7 | int testStopwordList() { 8 | 9 | char *terms[] = {strdup("foo"), strdup("bar"), strdup("שלום"), strdup("Hello"), strdup("WORLD")}; 10 | const char *test_terms[] = {"foo", "bar", "שלום", "hello", "world"}; 11 | 12 | StopWordList *sl = NewStopWordListCStr((const char **)terms, sizeof(terms) / sizeof(char *)); 13 | ASSERT(sl != NULL); 14 | 15 | for (int i = 0; i < sizeof(test_terms) / sizeof(const char *); i++) { 16 | ASSERT(StopWordList_Contains(sl, test_terms[i], strlen(test_terms[i]))); 17 | } 18 | 19 | ASSERT(!StopWordList_Contains(sl, "asdfasdf", strlen("asdfasdf"))); 20 | ASSERT(!StopWordList_Contains(sl, NULL, 0)); 21 | ASSERT(!StopWordList_Contains(NULL, NULL, 0)); 22 | 23 | StopWordList_Free(sl); 24 | for (int i = 0; i < sizeof(terms) / sizeof(const char *); i++) { 25 | free(terms[i]); 26 | } 27 | return 0; 28 | } 29 | 30 | int testDefaultStopwords() { 31 | 32 | StopWordList *sl = DefaultStopWordList(); 33 | for (int i = 0; DEFAULT_STOPWORDS[i] != NULL; i++) { 34 | ASSERT(StopWordList_Contains(sl, DEFAULT_STOPWORDS[i], strlen(DEFAULT_STOPWORDS[i]))); 35 | } 36 | const char *test_terms[] = {"foo", "bar", "שלום", "hello", "world", "x", "i", "t"}; 37 | for (int i = 0; i < sizeof(test_terms) / sizeof(const char *); i++) { 38 | // printf("checking %s\n", test_terms[i]); 39 | ASSERT(!StopWordList_Contains(sl, test_terms[i], strlen(test_terms[i]))); 40 | } 41 | 42 | StopWordList_Free(sl); 43 | return 0; 44 | } 45 | 46 | TEST_MAIN({ 47 | RMUTil_InitAlloc(); 48 | TESTFUNC(testStopwordList); 49 | TESTFUNC(testDefaultStopwords); 50 | }); -------------------------------------------------------------------------------- /src/tests/test_tag_index.c: -------------------------------------------------------------------------------- 1 | #include "test_util.h" 2 | #include "../tag_index.h" 3 | #include "../rmutil/alloc.h" 4 | #include "time_sample.h" 5 | #include "../util/arr.h" 6 | int testTagIndexCreate() { 7 | TagIndex *idx = NewTagIndex(); 8 | ASSERT(idx); 9 | // ASSERT_STRING_EQ(idx->) 10 | int N = 100000; 11 | char **v = array_newlen(char *, 3); 12 | v[0] = strdup("hello"); 13 | v[1] = strdup("world"); 14 | v[2] = strdup("foo"); 15 | size_t totalSZ = 0; 16 | for (t_docId d = 1; d <= N; d++) { 17 | size_t sz = TagIndex_Index(idx, v, d); 18 | ASSERT(sz > 0); 19 | totalSZ += sz; 20 | // make sure repeating push of the same vector doesn't get indexed 21 | sz = TagIndex_Index(idx, v, d); 22 | ASSERT(sz == 0); 23 | } 24 | 25 | ASSERT_EQUAL(idx->values->cardinality, array_len(v)); 26 | ASSERT_EQUAL(300000, totalSZ); 27 | 28 | IndexIterator *it = TagIndex_OpenReader(idx, NULL, "hello", 5, NULL, NULL, NULL, 1); 29 | ASSERT(it != NULL); 30 | RSIndexResult *r; 31 | t_docId n = 1; 32 | 33 | TimeSample ts; 34 | TimeSampler_Start(&ts); 35 | while (INDEXREAD_EOF != it->Read(it->ctx, &r)) { 36 | // printf("DocId: %d\n", r->docId); 37 | ASSERT_EQUAL(n++, r->docId); 38 | TimeSampler_Tick(&ts); 39 | } 40 | 41 | TimeSampler_End(&ts); 42 | printf("%d iterations in %lldns, rate %fns/iter\n", N, ts.durationNS, 43 | TimeSampler_IterationMS(&ts) * 1000000); 44 | ASSERT_EQUAL(N + 1, n); 45 | it->Free(it); 46 | array_free(v); 47 | return 0; 48 | } 49 | 50 | TEST_MAIN({ 51 | RMUTil_InitAlloc(); 52 | TESTFUNC(testTagIndexCreate); 53 | }); -------------------------------------------------------------------------------- /src/tests/test_tokenize.c: -------------------------------------------------------------------------------- 1 | #include "test_util.h" 2 | #include "../tokenize.h" 3 | #include "../stemmer.h" 4 | #include "../rmutil/alloc.h" 5 | 6 | int testTokenize() { 7 | Stemmer *st = NewStemmer(SnowballStemmer, "english"); 8 | 9 | RSTokenizer *tk = GetSimpleTokenizer(st, DefaultStopWordList()); 10 | char *txt = strdup("hello worlds - - -,,, . . . -=- hello\\-world to be שלום עולם"); 11 | const char *expected[] = {"hello", "worlds", "hello-world", "שלום", "עולם"}; 12 | const char *stems[] = {NULL, "+world", NULL, NULL, NULL, NULL}; 13 | tk->Start(tk, txt, strlen(txt), TOKENIZE_DEFAULT_OPTIONS); 14 | Token tok; 15 | int i = 0; 16 | while (tk->Next(tk, &tok)) { 17 | ; 18 | 19 | ASSERT(tok.tokLen == strlen(expected[i])); 20 | ASSERT(!strncmp(tok.tok, expected[i], tok.tokLen)); 21 | if (!stems[i]) { 22 | ASSERT(tok.stem == NULL); 23 | } else { 24 | ASSERT(!strncmp(tok.stem, stems[i], tok.stemLen)); 25 | } 26 | i++; 27 | } 28 | free(txt); 29 | 30 | RETURN_TEST_SUCCESS; 31 | } 32 | 33 | TEST_MAIN({ 34 | RMUTil_InitAlloc(); 35 | TESTFUNC(testTokenize); 36 | }) -------------------------------------------------------------------------------- /src/toksep.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKSEP_H 2 | #define TOKSEP_H 3 | 4 | #include 5 | #include 6 | //! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ ` { | } ~ 7 | static const char ToksepMap_g[256] = { 8 | [' '] = 1, ['\t'] = 1, [','] = 1, ['.'] = 1, ['/'] = 1, ['('] = 1, [')'] = 1, ['{'] = 1, 9 | ['}'] = 1, ['['] = 1, [']'] = 1, [':'] = 1, [';'] = 1, ['~'] = 1, ['!'] = 1, ['@'] = 1, 10 | ['#'] = 1, ['$'] = 1, ['%'] = 1, ['^'] = 1, ['&'] = 1, ['*'] = 1, ['-'] = 1, ['='] = 1, 11 | ['+'] = 1, ['|'] = 1, ['\''] = 1, ['`'] = 1, ['"'] = 1, ['<'] = 1, ['>'] = 1, ['?'] = 1, 12 | }; 13 | 14 | /** 15 | * Function reads string pointed to by `s` and indicates the length of the next 16 | * token in `tokLen`. `s` is set to NULL if this is the last token. 17 | */ 18 | static inline char *toksep(char **s, size_t *tokLen) { 19 | uint8_t *pos = (uint8_t *)*s; 20 | char *orig = *s; 21 | for (; *pos; ++pos) { 22 | if (ToksepMap_g[*pos] && ((char *)pos == orig || *(pos - 1) != '\\')) { 23 | *s = (char *)++pos; 24 | *tokLen = ((char *)pos - orig) - 1; 25 | if (!*pos) { 26 | *s = NULL; 27 | } 28 | return orig; 29 | } 30 | } 31 | 32 | // Didn't find a terminating token. Use a simpler length calculation 33 | *s = NULL; 34 | *tokLen = (char *)pos - orig; 35 | return orig; 36 | } 37 | 38 | static inline int istoksep(int c) { 39 | return ToksepMap_g[(uint8_t)c] != 0; 40 | } 41 | 42 | #endif -------------------------------------------------------------------------------- /src/trie/Makefile: -------------------------------------------------------------------------------- 1 | # set environment variable RM_INCLUDE_DIR to the location of redismodule.h 2 | ifndef RM_INCLUDE_DIR 3 | RM_INCLUDE_DIR=../ 4 | endif 5 | 6 | CFLAGS ?= -g -fPIC -O3 -std=gnu99 -Wall -Wno-unused-function -Wno-unused-variable 7 | CFLAGS += -I$(RM_INCLUDE_DIR) 8 | CC=gcc 9 | OBJS=levenshtein.o rune_util.o sparse_vector.o trie.o trie_type.o 10 | 11 | all: libtrie.a 12 | 13 | clean: 14 | rm -rf *.o *.a 15 | 16 | libtrie.a: $(OBJS) 17 | ar rcs $@ $^ 18 | 19 | # test_trie: test.o $(OBJS) 20 | # $(CC) -Wall -o test test.o $(OBJS) -lc -O0 21 | # @(sh -c ./test_vector) 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/trie/rune_util.h: -------------------------------------------------------------------------------- 1 | #ifndef __RUNE_UTIL_H__ 2 | #define __RUNE_UTIL_H__ 3 | 4 | #include "../dep/libnu/libnu.h" 5 | 6 | /* Internally, the trie works with 16/32 bit "Runes", i.e. fixed width unicode 7 | * characters. 16 bit shuold be fine for most use cases */ 8 | #ifdef TRIE_32BIT_RUNES 9 | typedef uint32_t rune; 10 | #else // default - 16 bit runes 11 | typedef uint16_t rune; 12 | #endif 13 | 14 | /* fold rune: assumes rune is of the correct size */ 15 | rune runeFold(rune r); 16 | 17 | /* Convert a rune string to utf-8 characters */ 18 | char *runesToStr(rune *in, size_t len, size_t *utflen); 19 | 20 | rune *strToFoldedRunes(char *str, size_t *len); 21 | 22 | /* Convert a utf-8 string to constant width runes */ 23 | rune *strToRunes(const char *str, size_t *len); 24 | 25 | /* Decode a string to a rune in-place */ 26 | size_t strToRunesN(const char *s, size_t slen, rune *outbuf); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/trie/sparse_vector.c: -------------------------------------------------------------------------------- 1 | #include "sparse_vector.h" 2 | #include 3 | 4 | inline size_t __sv_sizeof(size_t cap) { 5 | return sizeof(sparseVector) + cap * sizeof(sparseVectorEntry); 6 | } 7 | 8 | inline sparseVector *__sv_resize(sparseVector *v, size_t cap) { 9 | v = realloc(v, __sv_sizeof(cap)); 10 | v->cap = cap; 11 | return v; 12 | } 13 | 14 | inline sparseVector *newSparseVectorCap(size_t cap) { 15 | sparseVector *v = malloc(__sv_sizeof(cap)); 16 | 17 | v->cap = cap; 18 | v->len = 0; 19 | return v; 20 | } 21 | // newSparseVector creates a new sparse vector with the initial values of the 22 | // dense int slice given to it 23 | sparseVector *newSparseVector(int *values, int len) { 24 | sparseVector *v = newSparseVectorCap(len * 2); 25 | v->len = len; 26 | 27 | for (int i = 0; i < len; i++) { 28 | v->entries[i] = (sparseVectorEntry){i, values[i]}; 29 | } 30 | 31 | return v; 32 | } 33 | 34 | // append appends another sparse vector entry with the given index and value. 35 | // NOTE: We do not check 36 | // that an entry with the same index is present in the vector 37 | void sparseVector_append(sparseVector **vp, int index, int value) { 38 | sparseVector *v = *vp; 39 | if (v->len == v->cap) { 40 | v->cap = v->cap ? v->cap * 2 : 1; 41 | v = __sv_resize(v, v->cap); 42 | } 43 | 44 | v->entries[v->len++] = (sparseVectorEntry){index, value}; 45 | *vp = v; 46 | } 47 | 48 | void sparseVector_free(sparseVector *v) { free(v); } 49 | -------------------------------------------------------------------------------- /src/trie/sparse_vector.h: -------------------------------------------------------------------------------- 1 | #ifndef __SPARSEVECTOR_H__ 2 | #define __SPARSEVECTOR_H__ 3 | 4 | #include 5 | 6 | typedef struct { 7 | int idx, val; 8 | } sparseVectorEntry; 9 | 10 | // sparseVector is a crude implementation of a sparse vector for our needs 11 | typedef struct { 12 | size_t len; 13 | size_t cap; 14 | sparseVectorEntry entries[]; 15 | } sparseVector; 16 | 17 | size_t __sv_sizeof(size_t cap); 18 | 19 | sparseVector *__sv_resize(sparseVector *v, size_t cap); 20 | sparseVector *newSparseVectorCap(size_t cap); 21 | 22 | // append appends another sparse vector entry with the given index and value. 23 | // NOTE: We do not check 24 | // that an entry with the same index is present in the vector 25 | void sparseVector_append(sparseVector **v, int index, int value); 26 | 27 | // newSparseVector creates a new sparse vector with the initial values of the 28 | // dense int slice given to it 29 | sparseVector *newSparseVector(int *values, int len); 30 | 31 | void sparseVector_free(sparseVector *v); 32 | #endif -------------------------------------------------------------------------------- /src/util/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS ?= -g -ggdb -fPIC -lc -lm -O2 -std=gnu99 2 | 3 | CC=gcc 4 | .SUFFIXES: .c .so .xo .o 5 | 6 | all: heap.o logging.o fnv.o block_alloc.o 7 | -------------------------------------------------------------------------------- /src/util/arr_rm_alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef ARR_RM_ALLOC_H_ 2 | #define ARR_RM_ALLOC_H_ 3 | 4 | /* A wrapper for arr.h that sets the allocation functions to those of the RedisModule_Alloc & 5 | * friends. This file should not be included alongside arr.h, and should not be included from .h 6 | * files in general */ 7 | 8 | #include 9 | 10 | /* Define the allcation functions before including arr.h */ 11 | #define array_alloc_fn RedisModule_Alloc 12 | #define array_realloc_fn RedisModule_Realloc 13 | #define array_free_fn RedisModule_Free 14 | 15 | #include "arr.h" 16 | 17 | #endif -------------------------------------------------------------------------------- /src/util/array.c: -------------------------------------------------------------------------------- 1 | #include "array.h" 2 | #include "rmalloc.h" 3 | #include "util/minmax.h" 4 | 5 | static ArrayAllocProcs libcAllocProcs_g = {.Alloc = malloc, .Realloc = realloc, .Free = free}; 6 | static ArrayAllocProcs rmAllocProcs_g = { 7 | .Alloc = rm_malloc, .Realloc = rm_realloc, .Free = rm_free}; 8 | 9 | void Array_InitEx(Array *array, ArrayAllocatorType allocType) { 10 | array->capacity = 0; 11 | array->len = 0; 12 | array->data = NULL; 13 | if (allocType == ArrayAlloc_LibC) { 14 | array->procs = &libcAllocProcs_g; 15 | } else { 16 | array->procs = &rmAllocProcs_g; 17 | } 18 | } 19 | 20 | void Array_Free(Array *array) { 21 | array->procs->Free(array->data); 22 | array->capacity = 0; 23 | array->len = 0; 24 | array->data = NULL; 25 | } 26 | 27 | int Array_Resize(Array *array, uint32_t newSize) { 28 | uint32_t newCapacity = array->capacity ? array->capacity : 16; 29 | while (newCapacity - array->len < newSize) { 30 | newCapacity *= 2; 31 | if (newCapacity < array->capacity) { 32 | return -1; 33 | } 34 | } 35 | newCapacity = Max(newCapacity, 16); 36 | if ((array->data = array->procs->Realloc(array->data, newCapacity)) == NULL) { 37 | return -1; 38 | } 39 | array->capacity = newCapacity; 40 | array->len = newSize; 41 | return 0; 42 | } 43 | 44 | void *Array_Add(Array *array, uint32_t toAdd) { 45 | uint32_t oldLen = array->len; 46 | if (array->capacity - array->len < toAdd) { 47 | if (Array_Resize(array, array->len + toAdd) != 0) { 48 | return NULL; 49 | } 50 | } else { 51 | array->len += toAdd; 52 | } 53 | 54 | return array->data + oldLen; 55 | } 56 | 57 | void Array_Write(Array *arr, const void *data, size_t len) { 58 | void *ptr = Array_Add(arr, len); 59 | memcpy(ptr, data, len); 60 | } 61 | 62 | void Array_ShrinkToSize(Array *array) { 63 | if (array->capacity > array->len) { 64 | array->capacity = array->len; 65 | array->data = array->procs->Realloc(array->data, array->capacity); 66 | } 67 | } -------------------------------------------------------------------------------- /src/util/array.h: -------------------------------------------------------------------------------- 1 | #ifndef ARRAY_H 2 | #define ARRAY_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | void *(*Alloc)(size_t); 9 | void *(*Realloc)(void *, size_t); 10 | void (*Free)(void *); 11 | } ArrayAllocProcs; 12 | 13 | /** Array datatype. Simple wrapper around a C array, with capacity and length. */ 14 | typedef struct Array { 15 | char *data; 16 | uint32_t len; 17 | uint32_t capacity; 18 | const ArrayAllocProcs *procs; 19 | } Array; 20 | 21 | typedef enum { 22 | ArrayAlloc_LibC, 23 | ArrayAlloc_RM, 24 | ArrayAlloc_Default = ArrayAlloc_RM 25 | } ArrayAllocatorType; 26 | 27 | void Array_InitEx(Array *array, ArrayAllocatorType allocType); 28 | 29 | static inline void Array_Init(Array *array) { 30 | Array_InitEx(array, ArrayAlloc_Default); 31 | } 32 | 33 | /** 34 | * Free any memory allocated by this array. 35 | */ 36 | void Array_Free(Array *array); 37 | 38 | /** 39 | * "Steal" the contents of the array. The caller now owns its contents. 40 | */ 41 | static inline char *Array_Steal(Array *array, size_t *len) { 42 | *len = array->len; 43 | char *ret = array->data; 44 | array->data = NULL; 45 | array->len = 0; 46 | array->capacity = 0; 47 | return ret; 48 | } 49 | 50 | /** 51 | * Add item to the array 52 | * elemSize is the size of the new item. 53 | * Returns a pointer to the newly added item. The memory is allocated but uninitialized 54 | */ 55 | void *Array_Add(Array *array, uint32_t elemSize); 56 | void Array_Write(Array *array, const void *data, size_t len); 57 | int Array_Resize(Array *array, uint32_t newSize); 58 | 59 | /** 60 | * Shrink the array down to size, so that any preemptive allocations are removed. 61 | * This should be used when no more elements will be added to the array. 62 | */ 63 | void Array_ShrinkToSize(Array *array); 64 | 65 | #define ARRAY_GETSIZE_AS(arr, T) ((arr)->len / (sizeof(T))) 66 | #define ARRAY_GETARRAY_AS(arr, T) ((T)((arr)->data)) 67 | #define ARRAY_ADD_AS(arr, T) Array_Add(arr, sizeof(T)) 68 | #define ARRAY_GETITEM_AS(arr, ix, T) (ARRAY_GETARRAY_AS(arr, T) + ix) 69 | #endif -------------------------------------------------------------------------------- /src/util/block_alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef BLOCK_ALLOC_H 2 | #define BLOCK_ALLOC_H 3 | 4 | #include 5 | 6 | typedef struct BlkAllocBlock { 7 | struct BlkAllocBlock *next; 8 | size_t numUsed; 9 | size_t capacity; 10 | char data[0] __attribute__((aligned(16))); 11 | } BlkAllocBlock; 12 | 13 | typedef struct BlkAlloc { 14 | BlkAllocBlock *root; 15 | BlkAllocBlock *last; 16 | 17 | // Available blocks - used when recycling the allocator 18 | BlkAllocBlock *avail; 19 | } BlkAlloc; 20 | 21 | // Initialize a block allocator 22 | static inline void BlkAlloc_Init(BlkAlloc *alloc) { 23 | alloc->root = NULL; 24 | alloc->last = NULL; 25 | alloc->avail = NULL; 26 | } 27 | 28 | /** 29 | * Allocate a new element from the block allocator. A pointer of size elemSize 30 | * will be returned. blockSize is the size of the new block to be created 31 | * (if the current block has no more room for elemSize). blockSize should be 32 | * greater than elemSize, and should likely be a multiple thereof. 33 | * 34 | * The returned pointer remains valid until FreeAll is called. 35 | */ 36 | void *BlkAlloc_Alloc(BlkAlloc *alloc, size_t elemSize, size_t blockSize); 37 | 38 | typedef void (*BlkAllocCleaner)(void *ptr, void *arg); 39 | 40 | /** 41 | * Free all memory allocated by the allocator. 42 | * If a cleaner function is called, it will be called for each element. Elements 43 | * are assumed to be elemSize spaces apart from each other. 44 | */ 45 | void BlkAlloc_FreeAll(BlkAlloc *alloc, BlkAllocCleaner cleaner, void *arg, size_t elemSize); 46 | 47 | /** 48 | * Like FreeAll, except the blocks are recycled and placed inside the 'avail' 49 | * pool instead. 50 | */ 51 | void BlkAlloc_Clear(BlkAlloc *alloc, BlkAllocCleaner cleaner, void *arg, size_t elemSize); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/util/fnv.h: -------------------------------------------------------------------------------- 1 | #ifndef __FT_FNV_H__ 2 | #define __FT_FNV_H__ 3 | 4 | #include 5 | #include 6 | 7 | #define FNV_32_PRIME ((Fnv32_t)0x01000193) 8 | 9 | uint32_t rs_fnv_32a_buf(void *buf, size_t len, uint32_t hval); 10 | 11 | uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/util/logging.c: -------------------------------------------------------------------------------- 1 | #include "logging.h" 2 | 3 | int LOGGING_LEVEL = 0; 4 | //L_DEBUG | L_INFO 5 | 6 | 7 | void LOGGING_INIT(int level) { 8 | LOGGING_LEVEL = level; 9 | } 10 | -------------------------------------------------------------------------------- /src/util/logging.h: -------------------------------------------------------------------------------- 1 | #ifndef __MDMA_LOGGING__ 2 | #define __MDMA_LOGGING__ 3 | 4 | #define L_DEBUG 1 5 | #define L_INFO 2 6 | #define L_WARN 4 7 | #define L_ERROR 8 8 | #define L_TRACE 16 9 | 10 | 11 | extern int LOGGING_LEVEL; 12 | //L_DEBUG | L_INFO 13 | 14 | 15 | extern void LOGGING_INIT(int level); 16 | 17 | #define LG_MSG(...) fprintf(stdout, __VA_ARGS__); 18 | #define LG_DEBUG(...) if (LOGGING_LEVEL & L_DEBUG) { LG_MSG("[DEBUG %s:%d@%s] ", __FILE__ , __LINE__, __FUNCTION__); LG_MSG(__VA_ARGS__); LG_MSG("\n"); } 19 | #define LG_INFO(...) if (LOGGING_LEVEL & L_INFO) { LG_MSG("[INFO %s:%d] ", __FILE__ , __LINE__); LG_MSG(__VA_ARGS__); } 20 | #define LG_WARN(...) if (LOGGING_LEVEL & L_WARN) { LG_MSG("[WARNING %s:%d] ", __FILE__ , __LINE__); LG_MSG(__VA_ARGS__); } 21 | #define LG_ERROR(...) if (LOGGING_LEVEL & L_ERROR) { LG_MSG("[ERROR %s:%d] ", __FILE__ , __LINE__); LG_MSG(__VA_ARGS__); } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/util/mempool.c: -------------------------------------------------------------------------------- 1 | #define _RS_MEMPOOL_C_ 2 | #include "mempool.h" 3 | #include 4 | #include 5 | #include 6 | 7 | typedef struct mempool_t { 8 | void **entries; 9 | size_t top; 10 | size_t cap; 11 | size_t max; // max size for pool 12 | mempool_alloc_fn alloc; 13 | mempool_free_fn free; 14 | pthread_mutex_t lock; 15 | } mempool_t; 16 | 17 | mempool_t *mempool_new(size_t cap, mempool_alloc_fn alloc, mempool_free_fn freefn) { 18 | return mempool_new_limited(cap, 0, alloc, freefn); 19 | } 20 | 21 | mempool_t *mempool_new_limited(size_t cap, size_t max, mempool_alloc_fn alloc, 22 | mempool_free_fn free) { 23 | mempool_t *p = malloc(sizeof(mempool_t)); 24 | p->entries = calloc(cap, sizeof(void *)); 25 | p->alloc = alloc; 26 | p->free = free; 27 | p->cap = cap; 28 | p->max = max; 29 | p->top = 0; 30 | return p; 31 | } 32 | 33 | void *mempool_get(mempool_t *p) { 34 | void *ret = NULL; 35 | if (p->top > 0) { 36 | ret = p->entries[--p->top]; 37 | 38 | } else { 39 | ret = p->alloc(); 40 | } 41 | return ret; 42 | } 43 | 44 | inline void mempool_release(mempool_t *p, void *ptr) { 45 | 46 | if (p->top == p->cap) { 47 | // This is a limited pool, and we can't outgrow ourselves now, just free the ptr immediately 48 | if (p->max && p->max == p->top) { 49 | p->free(ptr); 50 | return; 51 | } 52 | // grow the pool 53 | p->cap += p->cap ? MIN(p->cap, 1024) : 1; 54 | p->entries = realloc(p->entries, p->cap * sizeof(void *)); 55 | } 56 | p->entries[p->top++] = ptr; 57 | } 58 | 59 | void mempool_destroy(mempool_t *p) { 60 | for (size_t i = 0; i < p->top; i++) { 61 | p->free(p->entries[i]); 62 | } 63 | free(p->entries); 64 | } 65 | -------------------------------------------------------------------------------- /src/util/mempool.h: -------------------------------------------------------------------------------- 1 | #ifndef __RS_MEMPOOL_H__ 2 | #define __RS_MEMPOOL_H__ 3 | 4 | /* Mempool - an uber simple, thread-unsafe, memory pool */ 5 | #include 6 | #include 7 | 8 | /* stateless allocation function for the pool */ 9 | typedef void *(*mempool_alloc_fn)(); 10 | /* free function for the pool */ 11 | typedef void (*mempool_free_fn)(void *); 12 | 13 | /* mempool - the struct holding the memory pool */ 14 | #ifndef _RS_MEMPOOL_C_ 15 | typedef struct mempool_t mempool_t; 16 | #else 17 | struct mempool_t; 18 | #endif 19 | 20 | #define MEMPOOOL_STATIC_ALLOCATOR(name, sz) \ 21 | void *name() { \ 22 | return malloc(sz); \ 23 | } 24 | /* Create a new memory pool */ 25 | struct mempool_t *mempool_new(size_t cap, mempool_alloc_fn alloc, mempool_free_fn free); 26 | struct mempool_t *mempool_new_limited(size_t cap, size_t max_cap, mempool_alloc_fn alloc, 27 | mempool_free_fn free); 28 | 29 | /* Get an entry from the pool, allocating a new instance if unavailable */ 30 | void *mempool_get(struct mempool_t *p); 31 | 32 | /* Release an allocated instance to the pool */ 33 | void mempool_release(struct mempool_t *p, void *ptr); 34 | 35 | /* destroy the pool, releasing all entries in it and destroying its internal array */ 36 | void mempool_destroy(struct mempool_t *p); 37 | #endif -------------------------------------------------------------------------------- /src/util/minmax.h: -------------------------------------------------------------------------------- 1 | #ifndef MINMAX_H 2 | #define MINMAX_H 3 | 4 | #define Min(a, b) (a) < (b) ? (a) : (b) 5 | #define Max(a, b) (a) > (b) ? (a) : (b) 6 | 7 | #endif -------------------------------------------------------------------------------- /src/util/minmax_heap.h: -------------------------------------------------------------------------------- 1 | #ifndef MINMAX_HEAP_H_ 2 | #define MINMAX_HEAP_H_ 3 | 4 | #include 5 | 6 | typedef int (*mmh_cmp_func)(const void*, const void*, const void*); 7 | typedef void (*mmh_free_func)(void*); 8 | typedef struct heap { 9 | 10 | size_t count; 11 | size_t size; 12 | mmh_cmp_func cmp; 13 | void* cmp_ctx; 14 | void** data; 15 | mmh_free_func free_func; 16 | } heap_t; 17 | 18 | heap_t* mmh_init(mmh_cmp_func cmp, void* cmp_ctx, mmh_free_func free_func); 19 | heap_t* mmh_init_with_size(size_t size, mmh_cmp_func cmp, void* cmp_ctx, mmh_free_func free_func); 20 | void mmh_free(heap_t* h); 21 | 22 | void mmh_dump(heap_t* h); 23 | void mmh_insert(heap_t* h, void* value); 24 | void* mmh_pop_min(heap_t* h); 25 | void* mmh_pop_max(heap_t* h); 26 | void* mmh_peek_min(const heap_t* h); 27 | void* mmh_peek_max(const heap_t* h); 28 | 29 | #endif // MINMAX_HEAP_H_ 30 | -------------------------------------------------------------------------------- /src/util/misc.c: -------------------------------------------------------------------------------- 1 | #include "misc.h" 2 | #include 3 | 4 | void GenericAofRewrite_DisabledHandler(RedisModuleIO *aof, RedisModuleString *key, void *value) { 5 | RedisModule_Log(RedisModule_GetContextFromIO(aof), "error", 6 | "Requested AOF, but this is unsupported for this module"); 7 | abort(); 8 | } 9 | -------------------------------------------------------------------------------- /src/util/misc.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_MISC_H 2 | #define RS_MISC_H 3 | 4 | #include "redismodule.h" 5 | 6 | /** 7 | * This handler crashes 8 | */ 9 | void GenericAofRewrite_DisabledHandler(RedisModuleIO *aof, RedisModuleString *key, void *value); 10 | 11 | #endif -------------------------------------------------------------------------------- /src/util/quantile.h: -------------------------------------------------------------------------------- 1 | #ifndef QUANTILE_H 2 | #define QUANTILE_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct QuantStream QuantStream; 8 | 9 | QuantStream *NewQuantileStream(const double *quantiles, size_t numQuantiles, size_t bufferLength); 10 | void QS_Insert(QuantStream *qs, double val); 11 | double QS_Query(QuantStream *qs, double val); 12 | void QS_Free(QuantStream *qs); 13 | void QS_Dump(const QuantStream *stream, FILE *fp); 14 | size_t QS_GetCount(const QuantStream *stream); 15 | 16 | #endif -------------------------------------------------------------------------------- /src/util/strconv.h: -------------------------------------------------------------------------------- 1 | #ifndef RS_STRCONV_H_ 2 | #define RS_STRCONV_H_ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | /* Strconv - common simple string conversion utils */ 9 | 10 | // Case insensitive string equal 11 | #define STR_EQCASE(str, len, other) (len == strlen(other) && !strncasecmp(str, other, len)) 12 | 13 | // Case sensitive string equal 14 | #define STR_EQ(str, len, other) (len == strlen(other) && !strncmp(str, other, len)) 15 | 16 | /* Parse string into int, returning 1 on success, 0 otherwise */ 17 | static int ParseInteger(const char *arg, long long *val) { 18 | 19 | char *e = NULL; 20 | *val = strtoll(arg, &e, 10); 21 | errno = 0; 22 | if ((errno == ERANGE && (*val == LONG_MAX || *val == LONG_MIN)) || (errno != 0 && *val == 0) || 23 | *e != '\0') { 24 | *val = -1; 25 | return 0; 26 | } 27 | 28 | return 1; 29 | } 30 | 31 | /* Parse string into double, returning 1 on success, 0 otherwise */ 32 | static int ParseDouble(const char *arg, double *d) { 33 | char *e; 34 | *d = strtod(arg, &e); 35 | errno = 0; 36 | 37 | if ((errno == ERANGE && (*d == HUGE_VAL || *d == -HUGE_VAL)) || (errno != 0 && *d == 0) || 38 | *e != '\0') { 39 | return 0; 40 | } 41 | 42 | return 1; 43 | } 44 | 45 | static int ParseBoolean(const char *arg, int *res) { 46 | if (STR_EQCASE(arg, strlen(arg), "true") || STR_EQCASE(arg, strlen(arg), "1")) { 47 | *res = 1; 48 | return 1; 49 | } 50 | 51 | if (STR_EQCASE(arg, strlen(arg), "false") || STR_EQCASE(arg, strlen(arg), "0")) { 52 | *res = 0; 53 | return 1; 54 | } 55 | 56 | return 0; 57 | } 58 | 59 | #endif -------------------------------------------------------------------------------- /src/varint.h: -------------------------------------------------------------------------------- 1 | #ifndef __VARINT_H__ 2 | #define __VARINT_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "buffer.h" 8 | #include "redisearch.h" 9 | 10 | /* Read an encoded integer from the buffer. It is assumed that the buffer will not overflow */ 11 | static inline uint32_t ReadVarint(BufferReader *b) { 12 | 13 | unsigned char c = BUFFER_READ_BYTE(b); 14 | 15 | uint32_t val = c & 127; 16 | while (c >> 7) { 17 | ++val; 18 | c = BUFFER_READ_BYTE(b); 19 | val = (val << 7) | (c & 127); 20 | } 21 | 22 | return val; 23 | } 24 | 25 | static inline t_fieldMask ReadVarintFieldMask(BufferReader *b) { 26 | 27 | unsigned char c = BUFFER_READ_BYTE(b); 28 | 29 | t_fieldMask val = c & 127; 30 | while (c >> 7) { 31 | ++val; 32 | c = BUFFER_READ_BYTE(b); 33 | val = (val << 7) | (c & 127); 34 | } 35 | 36 | return val; 37 | } 38 | 39 | size_t WriteVarint(uint32_t value, BufferWriter *w); 40 | 41 | size_t WriteVarintFieldMask(t_fieldMask value, BufferWriter *w); 42 | 43 | typedef struct { 44 | Buffer buf; 45 | // how many members we've put in 46 | size_t nmemb; 47 | uint32_t lastValue; 48 | } VarintVectorWriter; 49 | 50 | #define MAX_VARINT_LEN 5 51 | 52 | VarintVectorWriter *NewVarintVectorWriter(size_t cap); 53 | size_t VVW_Write(VarintVectorWriter *w, uint32_t i); 54 | size_t VVW_Truncate(VarintVectorWriter *w); 55 | void VVW_Free(VarintVectorWriter *w); 56 | void VVW_Init(VarintVectorWriter *w, size_t cap); 57 | 58 | static inline void VVW_Cleanup(VarintVectorWriter *w) { 59 | Buffer_Free(&w->buf); 60 | memset(&w->buf, 0, sizeof w->buf); 61 | } 62 | 63 | static inline void VVW_Reset(VarintVectorWriter *w) { 64 | w->lastValue = 0; 65 | w->nmemb = 0; 66 | w->buf.offset = 0; 67 | } 68 | 69 | #define VVW_GetCount(vvw) ((vvw) ? (vvw)->nmemb : 0) 70 | #define VVW_GetByteLength(vvw) ((vvw) ? (vvw)->buf.offset : 0) 71 | #define VVW_GetByteData(vvw) ((vvw) ? (vvw)->buf.data : NULL) 72 | #define VVW_OFFSETVECTOR_INIT(vvw) \ 73 | { .data = VVW_GetByteData(vvw), .len = VVW_GetByteLength(vvw) } 74 | #endif 75 | -------------------------------------------------------------------------------- /src/version.h: -------------------------------------------------------------------------------- 1 | // This is where the modules build/version is declared. 2 | // If declared with -D in compile time, this file is ignored 3 | #ifndef REDISEARCH_MODULE_VERSION 4 | 5 | #define REDISEARCH_VERSION_MAJOR 1 6 | #define REDISEARCH_VERSION_MINOR 2 7 | #define REDISEARCH_VERSION_PATCH 0 8 | 9 | #define REDISEARCH_MODULE_VERSION \ 10 | (REDISEARCH_VERSION_MAJOR * 10000 + REDISEARCH_VERSION_MINOR * 100 + REDISEARCH_VERSION_PATCH) 11 | 12 | #ifdef RS_GIT_VERSION 13 | static inline const char* RS_GetExtraVersion() { 14 | return RS_GIT_VERSION; 15 | } 16 | #else 17 | static inline const char* RS_GetExtraVersion() { 18 | return ""; 19 | } 20 | #endif 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /srcutil/gen_parser_toplevel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import sys 4 | 5 | """ 6 | This script generates a source file suitable for compilation. Because our 7 | parser generator (Lemon) always outputs the same symbols, we need a way to 8 | namespace them so that they don't crash. The approach we use will leave the 9 | file as-is, but generate an include wrapper, so that the symbols are changed 10 | before the actual source file is included, and then compiled with the macro 11 | definition instead. 12 | 13 | This script writes to stdout; the output may be captured and redirected to 14 | another file. 15 | """ 16 | 17 | ap = argparse.ArgumentParser() 18 | ap.add_argument('-p', '--prefix', help='Prefix for function names', required=True) 19 | ap.add_argument('-i', '--include', help='Next-include for actual parser code', 20 | default='parser.c.inc') 21 | options = ap.parse_args() 22 | 23 | fp = sys.stdout 24 | NAMES = ( 25 | 'Parse', 'ParseTrace', 'ParseAlloc', 'ParseFree', 'ParseInit', 26 | 'ParseFinalize', 27 | 'ParseStackPeack') 28 | 29 | 30 | for name in NAMES: 31 | fp.write('#define {name} {prefix}_{name}\n'.format(name=name, prefix=options.prefix)) 32 | fp.flush() 33 | 34 | fp.write('#include "{}"\n'.format(options.include)) 35 | fp.flush() -------------------------------------------------------------------------------- /srcutil/make-parser.mk: -------------------------------------------------------------------------------- 1 | LEMON := $(SRCUTIL)/lemon 2 | TEMPLATE := $(SRCUTIL)/lempar.c 3 | RAGEL := ragel 4 | 5 | all: lexer.c parser-toplevel.c parser.c.inc 6 | 7 | lexer.c: lexer.rl 8 | $(RAGEL) -s lexer.rl -o $@ 9 | 10 | parser.c.inc: parser.y 11 | $(LEMON) -s -T$(TEMPLATE) parser.y 12 | mv parser.c parser.c.inc 13 | 14 | parser-toplevel.c: $(SRCUTIL)/gen_parser_toplevel.py 15 | $(SRCUTIL)/gen_parser_toplevel.py -p $(PARSER_SYMBOL_PREFIX) -i parser.c.inc > $@ 16 | 17 | clean: 18 | rm -f lexer.c parser.c parser.c.inc parser-toplevel.c 19 | --------------------------------------------------------------------------------