├── .drone.yml ├── .gitignore ├── .travis.yml ├── Dockerfile ├── Makefile ├── PROPOSAL.md ├── README.md ├── cmd ├── cli │ ├── .travis.yml │ ├── README.md │ ├── arch.txt │ ├── batch.go │ ├── cli.go │ ├── data.json │ ├── docs │ │ └── commands.md │ ├── index_data.ns │ ├── main.go │ └── parser │ │ ├── parser.go │ │ └── parser_test.go └── import │ ├── Dockerfile │ ├── LICENSE │ ├── README.md │ ├── main.go │ ├── nsimport │ ├── profile.png │ ├── profile0.png │ ├── profile0.prof │ ├── profile1.prof │ ├── profile2.prof │ └── samples │ └── operating_systems.json ├── docs ├── Dockerfile ├── README.md ├── architecture.md ├── dump-restore.md ├── img │ └── NeoSearch.png ├── motivation.md └── rest │ └── api.yml ├── hack ├── check.sh ├── deps.sh ├── docs.sh ├── gendeps.sh ├── make.sh ├── make │ ├── cli │ ├── import │ ├── library │ └── server └── stress-test.sh ├── lib └── neosearch │ ├── allocations.log │ ├── cache │ ├── cache.go │ ├── cache_test.go │ └── lru.go │ ├── config │ └── config.go │ ├── engine │ ├── command.go │ ├── command_test.go │ ├── config.go │ ├── engine.go │ └── engine_test.go │ ├── examples_test.go │ ├── index │ ├── filter.go │ ├── index.go │ ├── index_build_test.go │ ├── index_metadata_test.go │ ├── index_object_test.go │ ├── index_test.go │ └── metadata.go │ ├── neosearch.go │ ├── neosearch_test.go │ ├── search │ └── search.go │ ├── store │ ├── goleveldb │ │ ├── iterator.go │ │ ├── reader.go │ │ ├── store.go │ │ ├── store_test.go │ │ ├── util.go │ │ └── writer.go │ ├── leveldb │ │ ├── iterator.go │ │ ├── reader.go │ │ ├── store.go │ │ ├── store_test.go │ │ ├── util.go │ │ └── writer.go │ ├── registry.go │ ├── store.go │ ├── test │ │ └── store.go │ └── utils.go │ ├── utils │ ├── arrays.go │ ├── byte.go │ ├── conversions.go │ └── fields.go │ └── version │ └── version.go ├── mkdocs.yml └── service └── neosearch ├── Dockerfile ├── README.md ├── config.yml ├── handler └── default.go ├── home ├── home.go └── home_test.go ├── index ├── add.go ├── add_test.go ├── create.go ├── create_test.go ├── delete.go ├── delete_test.go ├── get.go ├── get_analyze.go ├── get_analyze_test.go ├── get_test.go ├── index.go ├── index_test.go ├── search.go └── search_test.go ├── main.go └── server ├── server.go └── server_test.go /.drone.yml: -------------------------------------------------------------------------------- 1 | notify: 2 | webhook: 3 | urls: 4 | - "https://webhooks.gitter.im/e/46c4a637df6b05959110" 5 | on_success: true 6 | on_failure: true 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /neosearch/neosearch 2 | bundles/ 3 | *.test 4 | profile.cov 5 | coverage.txt 6 | /site 7 | /hack/deps.txt 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - tip 5 | install: 6 | - cd $HOME/gopath/src/github.com/NeowayLabs/neosearch 7 | - go get -d -v ./... 8 | - go build -v ./... 9 | script: 10 | - go get golang.org/x/tools/cmd/vet 11 | - go get github.com/axw/gocov/gocov 12 | - go get github.com/mattn/goveralls 13 | - go get -u github.com/golang/lint 14 | - go get github.com/tools/godep 15 | - go get golang.org/x/tools/cover 16 | - hack/check.sh 17 | # - goveralls -coverprofile=coverage.txt -service=travis-ci 18 | before_install: 19 | sudo pip install codecov 20 | after_success: 21 | codecov 22 | notifications: 23 | email: 24 | - tiago.natel@neoway.com.br 25 | - tiagokatcipis@gmail.com 26 | - paulo.pizarro@gmail.com 27 | webhooks: 28 | urls: 29 | - "https://webhooks.gitter.im/e/5c49f66645e9c101199e" 30 | on_success: change 31 | on_failure: always 32 | on_start: false 33 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This file describes the standard way to build neosearch, using docker 2 | 3 | FROM ubuntu:14.04 4 | MAINTAINER Tiago Katcipis (@tiagokatcipis) 5 | 6 | # Packaged dependencies 7 | RUN apt-get update && apt-get install -y \ 8 | ca-certificates \ 9 | build-essential \ 10 | curl \ 11 | git \ 12 | bzr \ 13 | mercurial \ 14 | --no-install-recommends 15 | 16 | # Install Go 17 | ENV GO_VERSION 1.4.2 18 | RUN curl -sSL https://golang.org/dl/go${GO_VERSION}.src.tar.gz | tar -v -C /usr/local -xz \ 19 | && mkdir -p /go/bin 20 | ENV PATH /go/bin:/usr/local/go/bin:$PATH 21 | ENV GOPATH /go 22 | RUN cd /usr/local/go/src && ./make.bash --no-clean 2>&1 23 | 24 | # Grab Go test coverage tools 25 | RUN go get golang.org/x/tools/cmd/cover && \ 26 | go get github.com/tools/godep && \ 27 | go get github.com/axw/gocov/gocov && \ 28 | go get golang.org/x/tools/cmd/cover && \ 29 | go get github.com/golang/lint/golint && \ 30 | go get golang.org/x/tools/cmd/goimports && \ 31 | go get golang.org/x/tools/cmd/godoc && \ 32 | go get golang.org/x/tools/cmd/vet 33 | 34 | # Install package dependencies 35 | RUN go get -d github.com/extemporalgenome/slug && \ 36 | go get -d golang.org/x/text && \ 37 | go get -d github.com/syndtr/goleveldb/leveldb && \ 38 | go get -d github.com/golang/snappy && \ 39 | go get -d github.com/iNamik/go_lexer && \ 40 | go get -d github.com/iNamik/go_container && \ 41 | go get -d github.com/iNamik/go_pkg && \ 42 | go get -d gopkg.in/yaml.v2 && \ 43 | go get -d github.com/jteeuwen/go-pkg-optarg && \ 44 | go get -d launchpad.net/gommap && \ 45 | go get -d github.com/julienschmidt/httprouter && \ 46 | go get -d github.com/peterh/liner 47 | 48 | ENV STORAGE_ENGINE goleveldb 49 | 50 | WORKDIR /go/src/github.com/NeowayLabs/neosearch 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all build server cli check shell docs docs-view docs-shell 2 | 3 | DOCKER_DEVIMAGE = neosearch-dev 4 | DOCKER_DOCSIMAGE = neosearch-docs 5 | DEV_WORKDIR = /go/src/github.com/NeowayLabs/neosearch 6 | CURRENT_PATH = $(shell pwd) 7 | MOUNT_DEV_VOLUME = -v $(CURRENT_PATH):$(DEV_WORKDIR) 8 | TEST_DIRECTORY ?= . 9 | SHELL_EXPORT := $(foreach v,$(MAKE_ENV),$(v)='$($(v))') 10 | 11 | ifeq ($(STORAGE_ENGINE),) 12 | export STORAGE_ENGINE=goleveldb 13 | else 14 | export STORAGE_ENGINE 15 | endif 16 | 17 | ifeq ($(TEST_DIRECTORY),) 18 | export TEST_DIRECTORY=. 19 | else 20 | export TEST_DIRECTORY 21 | endif 22 | 23 | all: build 24 | @-docker rm -vf neosearch-ctn 25 | docker run --name neosearch-ctn -e STORAGE_ENGINE=$(STORAGE_ENGINE) -v `pwd`:$(DEV_WORKDIR) -i -t $(DOCKER_DEVIMAGE) hack/make.sh 26 | 27 | server: build 28 | @-docker rm -vf neosearch-ctn 29 | docker run --name neosearch-ctn -e STORAGE_ENGINE=$(STORAGE_ENGINE) -v $(CURRENT_PATH):$(DEV_WORKDIR) -i -t $(DOCKER_DEVIMAGE) hack/make.sh server 30 | 31 | cli: build 32 | @-docker rm -vf neosearch-ctn 33 | docker run --name neosearch-ctn -e STORAGE_ENGINE=$(STORAGE_ENGINE) -v `pwd`:$(DEV_WORKDIR) -i -t $(DOCKER_DEVIMAGE) hack/make.sh cli 34 | 35 | library: build 36 | @-docker rm -vf neosearch-ctn 37 | docker run --name neosearch-ctn -e STORAGE_ENGINE=$(STORAGE_ENGINE) -v `pwd`:$(DEV_WORKDIR) -i -t $(DOCKER_DEVIMAGE) hack/make.sh library 38 | 39 | check: build 40 | @-docker rm -vf neosearch-ctn 41 | docker run --name neosearch-ctn -e TEST_DIRECTORY=$(TEST_DIRECTORY) -e STORAGE_ENGINE=$(STORAGE_ENGINE) -v `pwd`:$(DEV_WORKDIR) -i $(DOCKER_DEVIMAGE) hack/check.sh 42 | 43 | shell: build 44 | docker run --rm -e STORAGE_ENGINE=$(STORAGE_ENGINE) -v `pwd`:$(DEV_WORKDIR) --privileged -i -t $(DOCKER_DEVIMAGE) bash 45 | 46 | docs: build-docs 47 | docker run --rm $(MOUNT_DEV_VOLUME) --privileged $(DOCKER_DOCSIMAGE) hack/docs.sh 48 | 49 | docs-view: docs 50 | xdg-open ./site/index.html 51 | 52 | docs-shell: build-docs 53 | docker run --rm $(MOUNT_DEV_VOLUME) --privileged -t -i $(DOCKER_DOCSIMAGE) bash 54 | 55 | hack/deps.txt: 56 | ./hack/gendeps.sh "$(STORAGE_ENGINE)" 57 | 58 | build: hack/deps.txt 59 | docker build -t $(DOCKER_DEVIMAGE) . 60 | 61 | build-docs: build 62 | docker build -t $(DOCKER_DOCSIMAGE) -f ./docs/Dockerfile . 63 | -------------------------------------------------------------------------------- /PROPOSAL.md: -------------------------------------------------------------------------------- 1 | # Original project proposal 2 | 3 | Proposal of a new buzzwords-complete solution to big data! 4 | 5 | Features 6 | ========== 7 | 8 | * Schemaless 9 | * Sharding 10 | * Replicas 11 | * Rest API 12 | * Spatial Index 13 | 14 | Indexed Documents 15 | ================== 16 | 17 | Indexed with key/value data storage. 18 | ``` 19 | Example document 1: 20 | { 21 | id: 1, 22 | name: "Hello NeoSearch", 23 | title: "NeoSearch - High Performance Distributed Search Index" 24 | } 25 | ``` 26 | # Using the REST to create the document 1 27 | POST /index/example.idx/ 28 | 29 | Will result for name.idx index (the following operations will be executed at lower level): 30 | ``` 31 | neosearch> using name.idx MERGESET "hello" 1 32 | hello: [1] 33 | neosearch> using name.idx MERGESET "neosearch" 1 34 | neosearch: [1] 35 | ``` 36 | Will result for title.idx index: 37 | ``` 38 | neosearch> using title.idx MERGESET "neosearch" 1 39 | neosearch: [1] 40 | neosearch> using title.idx MERGESET "high" 41 | high: [1] 42 | neosearch> using title.idx MERGESET "performance" 1 43 | performance: [1] 44 | neosearch: using title.idx MERGESET "distributed" 1 45 | distributed: [1] 46 | neosearch> using title.idx MERGESET "search" 1 47 | search: [1] 48 | neosearch> using title.idx MERGESET "index" 1 49 | index: [1] 50 | ``` 51 | Lets index a new document: 52 | 53 | Example document 2: 54 | ``` 55 | { 56 | id: 2, 57 | name: "Do we need the Paxos algorithm for master-slave election consensus on NeoSearch?", 58 | title: "NeoSearch - Buzz-words complete index solution" 59 | } 60 | ``` 61 | 62 | # Save the document 2 63 | POST /index/example.idx 64 | 65 | Will result for name.idx index (example for a English analyser): 66 | ``` 67 | neosearch> using name.idx MERGESET "need" 2 68 | need: [2] 69 | neosearch> using name.idx MERGESET "paxos" 2 70 | paxos: [2] 71 | neosearch> using name.idx MERGESET "algorithm" 2 72 | paxos: [2] 73 | neosearch> using name.idx MERGESET "master-slave" 2 74 | master-slave: [2] 75 | neosearch> using name.idx MERGESET "election" 2 76 | election: [2] 77 | neosearch> using name.idx MERGESET "consensus" 2 78 | consensus: [2] 79 | neosearch> using name.idx MERGESET "neosearch" 2 80 | neosearch: [1, 2] // HEY, LOOK THE MERGESET here!! 81 | ``` 82 | Then, store the document: 83 | ``` 84 | neosearch> using document.db SET 2 "{ 85 | id: 2, 86 | name: "Do we need the Paxos algorithm for master-slave election consensus on NeoSearch?", 87 | title: "NeoSearch - Buzz-words complete index solution" 88 | }" 89 | ``` 90 | 91 | After the two documents indexed we have (ignoring the "id" field fow now): 92 | ```bash 93 | $ ls /neosearch_data/indexes/ 94 | example.idx 95 | ``` 96 | ```bash 97 | $ ls /neosearch_data/indexes/example.idx/ 98 | metadata.json name.idx title.idx document.db 99 | ``` 100 | # Retrieving documents by shard-id 101 | ``` 102 | neosearch> using document.db GET 1 103 | { 104 | document: { 105 | id: 1, 106 | name: "Hello NeoSearch", 107 | title: "NeoSearch - High Performance Distributed Search Index" 108 | } 109 | } 110 | ``` 111 | # NeoSearch Key/Value datastore interface 112 | Low-level searching documents in index using the key/value interface. This is much closer to the pure key-value interface. 113 | ``` 114 | neosearch> using index id.idx GET 1 115 | { 116 | results: 1 117 | documents: [1] 118 | } 119 | ``` 120 | ``` 121 | neosearch> using index name.idx GET "test" 122 | { 123 | results: 0 124 | } 125 | ``` 126 | ``` 127 | neosearch> using index name.idx GET "hello" 128 | { 129 | results: 1, 130 | documents: [1], 131 | highlight: { 132 | "1": { 133 | start: 0, 134 | end: 4 135 | } 136 | } 137 | } 138 | ``` 139 | ``` 140 | neosearch> using index name.idx GET "NeoSearch" 141 | { 142 | results: 2, 143 | documents: [1,2], 144 | highlight: { 145 | "1": { 146 | start: 6, 147 | end: 15 148 | }, 149 | "2": { 150 | start: 70, 151 | end: 79 152 | } 153 | } 154 | ``` 155 | ``` 156 | neosearch> using index title.idx GET "buzz-words" 157 | { 158 | results: 1, 159 | documents: [2], 160 | highlight: { 161 | "2": { 162 | start: 12, 163 | end: 22 164 | } 165 | } 166 | ``` 167 | # Algorithm for query operations 168 | 169 | ### AND algorithm 170 | 171 | High-level query DSL search: 172 | ``` 173 | GET /search/example.idx 174 | { 175 | query: { 176 | group: { 177 | operator: "AND" 178 | filters: [ 179 | {name: "hello"}, 180 | {title: "buzz-words"} 181 | ] 182 | }, 183 | operator: "OR", 184 | filters: [ 185 | {name: "NeoSearch"} 186 | ] 187 | } 188 | } 189 | ``` 190 | This query DSL will result in the query below: 191 | ``` 192 | (name: "hello" AND title: "buzz-words") OR name: "NeoSearch" 193 | ``` 194 | Each "group" field is a "parenthesized" subquery that need be searched in the specified order. 195 | 196 | 1- First the subquery get the documents that have "hello" from name.idx; 197 | ``` 198 | neosearch> using index name.idx GET "hello" 199 | { 200 | results: 1, 201 | documents: [1], 202 | highlight: { 203 | "1": { 204 | start: 0, 205 | end: 4 206 | } 207 | } 208 | } 209 | ``` 210 | 2- Then get documents that have "buzz-words" from title.idx; 211 | ``` 212 | neosearch> using index title.idx GET "buzz-words" 213 | { 214 | results: 1, 215 | documents: [2], 216 | highlight: { 217 | "2": { 218 | start: 12, 219 | end: 22 220 | } 221 | } 222 | ``` 223 | After that, use the algorithms for union/merge/intersect the arrays to provide AND, OR, XOR, NOT, etc, in the results. 224 | ## Basic Algorithm for AND-merge result sets; 225 | ``` 226 | name_docs = name_index.get("hello") // returns [1] 227 | title_docs = title_index.get("buzz-words") // returns [2] 228 | result_docs = array_intersects(name_docs, title_docs) // [] 229 | ``` 230 | ## Basic algorithm for OR-merge result-sets 231 | ``` 232 | name_docs = name_index.get("hello") // returns [1] 233 | title_docs = title_index.get("buzz-words") // returns [2] 234 | result_docs = array_union_uniques(name_docs, title_docs) // [1, 2] 235 | ``` 236 | Then for the query below: 237 | ``` 238 | (name: "hello" AND title: "buzz-words") OR name: "NeoSearch" 239 | ``` 240 | We have this algorithm: 241 | ``` 242 | name_docs = name_index.get("hello") // returns [1] 243 | title_docs = title_index.get("buzz-words") // returns [2] 244 | name2_docs = name_index.get("NeoSearch") // returns [1,2] 245 | # first apply the AND operation 246 | result_docs = array_intersects(name_docs, title_docs) // returns [] 247 | # Then, apply the OR in the results 248 | result_docs = array_union_unique(result_docs, name2_docs) // returns [1, 2] 249 | ``` 250 | 251 | Sharding / Partitioning (TO BE DEFINED) 252 | ================== 253 | 254 | The sharding should support *Routing* for single key-field, combined key-fields, document hash or timestamp. 255 | 256 | Warning: The wrong architecture can fuck up everything... 257 | Eg.: Data migration/realloc when shards entering the cluster? Problems.... 258 | 259 | Replication (TO BE DEFINED) 260 | ============ 261 | 262 | Every master can have one or more replicas. Each replica contains all of the indexed documents at the master node. The master node can be a shard, then the replicas will contains only the sharded data. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/NeowayLabs/neosearch.svg?branch=master)](https://travis-ci.org/NeowayLabs/neosearch) [![Build Status](https://drone.io/github.com/NeowayLabs/neosearch/status.png)](https://drone.io/github.com/NeowayLabs/neosearch/latest) [![GoDoc](https://godoc.org/github.com/NeowayLabs/neosearch/lib/neosearch?status.svg)](https://godoc.org/github.com/NeowayLabs/neosearch/lib/neosearch) [![codecov.io](http://codecov.io/github/NeowayLabs/neosearch/coverage.svg?branch=master)](http://codecov.io/github/NeowayLabs/neosearch?branch=master) 2 | 3 | NeoSearch - Neoway Full Text Search Index 4 | ========================================== 5 | 6 | [![Join the chat at https://gitter.im/NeowayLabs/neosearch](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/NeowayLabs/neosearch?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 7 | 8 | NeoSearch is a feature-limited full-text-search library with focus on indices relationships. Its main goal is to provide very fast JOIN operations between information stored on different indices. 9 | 10 | It's not a complete FTS (Full Text Search) engine in the common sense, instead, it aims to solve very specific problems of FTS. At the moment, NeoSearch is a laboratory for research, not recommended for production usage. Here we will test various technologies for fast storage and search algorithms. In the future, maybe, we can be proud of a very nice tech for solving search problems in big data companies. 11 | 12 | NeoSearch is like a Lucene library but without all of the complexities of a complete FTS engine, written in Go, and focused on high performance search with data relationships. 13 | 14 | It's not complete yet, still in active development, so stay tuned for updates. 15 | 16 | ## Why another text search engine ? 17 | 18 | We believe to have good reasons on building a new text search engine. 19 | 20 | Take a look at [ours motives here](./docs/motivation.md) 21 | 22 | 23 | ## Install 24 | 25 | Install dependencies: 26 | 27 | * snappy (optional, only required for compressed data) 28 | * Go 1.4 29 | 30 | and get the code: 31 | 32 | ```bash 33 | go get -v -u -t github.com/NeowayLabs/neosearch/... 34 | 35 | cd $GOPATH/src/github.com/NeowayLabs/neosearch 36 | go test -v ./... 37 | ``` 38 | 39 | ### Storage LevelDB - Optional 40 | 41 | Install dependencies: 42 | 43 | * leveldb >= 1.15 (optional) 44 | 45 | ```bash 46 | export CGO_CFLAGS='-I ' 47 | export CGO_LDFLAGS='-L ' 48 | 49 | go get -tags leveldb -v -u -t github.com/NeowayLabs/neosearch/... 50 | 51 | cd $GOPATH/src/github.com/NeowayLabs/neosearch 52 | go test -tags leveldb -v ./lib/neosearch/store/leveldb ./... 53 | ``` 54 | 55 | ## Contributing 56 | 57 | Looking for some fun ? Starting to develop on NeoSearch is as easy as installing docker :D 58 | 59 | First of all install [Docker](https://docs.docker.com/installation/). 60 | 61 | After you get docker installed, just get the code: 62 | 63 | git clone git@github.com:NeowayLabs/neosearch.git 64 | 65 | And build it: 66 | 67 | make build 68 | 69 | If you get no errors, you are good to go :D. Just start messing around with the code on your preferred editor/IDE. 70 | 71 | Compiling the code: 72 | 73 | make 74 | 75 | Running the tests: 76 | 77 | make check 78 | 79 | Yeah, simple like that :D 80 | -------------------------------------------------------------------------------- /cmd/cli/.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.3 5 | - tip 6 | install: 7 | - cd /tmp && git clone https://github.com/google/leveldb.git && cd /tmp/leveldb && make 8 | - cd $HOME/gopath/src/github.com/NeowayLabs/neosearch-cli 9 | - go get -v github.com/jmhodges/levigo 10 | - go get -d -v ./... 11 | - go build -tags leveldb -v 12 | script: 13 | - go get golang.org/x/tools/cmd/vet 14 | - go get github.com/axw/gocov/gocov 15 | - go get github.com/mattn/goveralls 16 | - if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi 17 | - go test -tags leveldb -v ./... 18 | 19 | notifications: 20 | email: 21 | - tiago.natel@neoway.com.br 22 | - tiagokatcipis@gmail.com -------------------------------------------------------------------------------- /cmd/cli/README.md: -------------------------------------------------------------------------------- 1 | # neosearch-cli 2 | 3 | Command-line tool for executing low-level commands at the core of NeoSearch. 4 | 5 | # Installation 6 | 7 | ``` 8 | go get -tags leveldb github.com/NeowayLabs/neosearch 9 | export CGO_LDFLAGS="-L/usr/lib -lleveldb -lsnappy -lstdc++" 10 | export GO_LDFLAGS="-extld g++ -linkmode external -extldflags -static" 11 | go get -tags leveldb -x -a -ldflags "$GO_LDFLAGS" -v github.com/NeowayLabs/neosearch 12 | $GOPATH/bin/neosearch-cli -d /data 13 | ``` 14 | 15 | # NeoSearch Key/Value Syntax 16 | 17 | NeoSearch has its own key/value syntax for commands. It's ridiculous simple: 18 | 19 | ``` 20 | USING 21 | ``` 22 | Examples: 23 | ``` 24 | USING titie.idx SET neosearch "fast searching with document/indexes joins, spatial index and more" 25 | ``` 26 | -------------------------------------------------------------------------------- /cmd/cli/arch.txt: -------------------------------------------------------------------------------- 1 | NeoSearch - Neoway Full Text Search Index 2 | ========================================== 3 | 4 | Proposal of a new buzzwords-complete solution to big data! 5 | 6 | Features 7 | ========== 8 | 9 | * Schemaless 10 | * Sharding 11 | * Replicas 12 | * Rest API 13 | * Spatial Index 14 | 15 | Indexed Documents 16 | ================== 17 | 18 | Indexed with key/value data storage. 19 | ``` 20 | Example document 1: 21 | { 22 | id: 1, 23 | name: "Hello NeoSearch", 24 | title: "NeoSearch - High Performance Distributed Search Index" 25 | } 26 | ``` 27 | # Using the REST to create the document 1 28 | POST /index/example.idx/ 29 | 30 | Will result for name.idx index: 31 | ``` 32 | neosearch> using name.idx MERGESET "hello" 1 33 | hello: [1] 34 | neosearch> using name.idx MERGESET "neosearch" 1 35 | neosearch: [1] 36 | ``` 37 | Will result for title.idx index: 38 | ``` 39 | neosearch> using title.idx MERGESET "neosearch" 1 40 | neosearch: [1] 41 | neosearch> using title.idx MERGESET "high" 42 | high: [1] 43 | neosearch> using title.idx MERGESET "performance" 1 44 | performance: [1] 45 | neosearch: using title.idx MERGESET "distributed" 1 46 | distributed: [1] 47 | neosearch> using title.idx MERGESET "search" 1 48 | search: [1] 49 | neosearch> using title.idx MERGESET "index" 1 50 | index: [1] 51 | ``` 52 | Lets index a new document: 53 | 54 | Example document 2: 55 | ``` 56 | { 57 | id: 2, 58 | name: "Do we need the Paxos algorithm for master-slave election consensus on NeoSearch?", 59 | title: "NeoSearch - Buzz-words complete index solution" 60 | } 61 | ``` 62 | 63 | # Save the document 2 64 | POST /index/example.idx 65 | 66 | Will result for name.idx index (example for a English analyser): 67 | ``` 68 | neosearch> using name.idx MERGESET "need" 2 69 | need: [2] 70 | neosearch> using name.idx MERGESET "paxos" 2 71 | paxos: [2] 72 | neosearch> using name.idx MERGESET "algorithm" 2 73 | paxos: [2] 74 | neosearch> using name.idx MERGESET "master-slave" 2 75 | master-slave: [2] 76 | neosearch> using name.idx MERGESET "election" 2 77 | election: [2] 78 | neosearch> using name.idx MERGESET "consensus" 2 79 | consensus: [2] 80 | neosearch> using name.idx MERGESET "neosearch" 2 81 | neosearch: [1, 2] // HEY, LOOK THE MERGESET here!! 82 | ``` 83 | Then, store the document: 84 | ``` 85 | neosearch> using document.db SET 2 "{ 86 | id: 2, 87 | name: "Do we need the Paxos algorithm for master-slave election consensus on NeoSearch?", 88 | title: "NeoSearch - Buzz-words complete index solution" 89 | }" 90 | ``` 91 | 92 | After the two documents indexed we have (ignoring the "id" field fow now): 93 | ```bash 94 | $ ls /neosearch_data/indexes/ 95 | example.idx 96 | ``` 97 | ```bash 98 | $ ls /neosearch_data/indexes/example.idx/ 99 | metadata.json name.idx title.idx document.db 100 | ``` 101 | # Retrieving documents by shard-id 102 | ``` 103 | neosearch> using document.db GET 1 104 | { 105 | document: { 106 | id: 1, 107 | name: "Hello NeoSearch", 108 | title: "NeoSearch - High Performance Distributed Search Index" 109 | } 110 | } 111 | ``` 112 | # NeoSearch Key/Value datastore interface 113 | Low-level searching documents in index using the key/value interface. This is much closer to the pure key-value interface. 114 | ``` 115 | neosearch> using index id.idx GET 1 116 | { 117 | results: 1 118 | documents: [1] 119 | } 120 | ``` 121 | ``` 122 | neosearch> using index name.idx GET "test" 123 | { 124 | results: 0 125 | } 126 | ``` 127 | ``` 128 | neosearch> using index name.idx GET "hello" 129 | { 130 | results: 1, 131 | documents: [1], 132 | highlight: { 133 | "1": { 134 | start: 0, 135 | end: 4 136 | } 137 | } 138 | } 139 | ``` 140 | ``` 141 | neosearch> using index name.idx GET "NeoSearch" 142 | { 143 | results: 2, 144 | documents: [1,2], 145 | highlight: { 146 | "1": { 147 | start: 6, 148 | end: 15 149 | }, 150 | "2": { 151 | start: 70, 152 | end: 79 153 | } 154 | } 155 | ``` 156 | ``` 157 | neosearch> using index title.idx GET "buzz-words" 158 | { 159 | results: 1, 160 | documents: [2], 161 | highlight: { 162 | "2": { 163 | start: 12, 164 | end: 22 165 | } 166 | } 167 | ``` 168 | # Algorithm for query operations 169 | 170 | ### AND algorithm 171 | 172 | High-level query DSL search: 173 | ``` 174 | GET /search/example.idx 175 | { 176 | query: { 177 | group: { 178 | operator: "AND" 179 | filters: [ 180 | {name: "hello"}, 181 | {title: "buzz-words"} 182 | ] 183 | }, 184 | operator: "OR", 185 | filters: [ 186 | {name: "NeoSearch"} 187 | ] 188 | } 189 | } 190 | ``` 191 | This query DSL will result in the query below: 192 | ``` 193 | (name: "hello" AND title: "buzz-words") OR name: "NeoSearch" 194 | ``` 195 | Each "group" field is a "parenthesized" subquery that need be searched in the specified order. 196 | 197 | 1- First the subquery get the documents that have "hello" from name.idx; 198 | ``` 199 | neosearch> using index name.idx GET "hello" 200 | { 201 | results: 1, 202 | documents: [1], 203 | highlight: { 204 | "1": { 205 | start: 0, 206 | end: 4 207 | } 208 | } 209 | } 210 | ``` 211 | 2- Then get documents that have "buzz-words" from title.idx; 212 | ``` 213 | neosearch> using index title.idx GET "buzz-words" 214 | { 215 | results: 1, 216 | documents: [2], 217 | highlight: { 218 | "2": { 219 | start: 12, 220 | end: 22 221 | } 222 | } 223 | ``` 224 | After that, use the algorithms for union/merge/intersect the arrays to provide AND, OR, XOR, NOT, etc, in the results. 225 | ## Basic Algorithm for AND-merge result sets; 226 | ``` 227 | name_docs = name_index.get("hello") // returns [1] 228 | title_docs = title_index.get("buzz-words") // returns [2] 229 | result_docs = array_intersects(name_docs, title_docs) // [] 230 | ``` 231 | ## Basic algorithm for OR-merge result-sets 232 | ``` 233 | name_docs = name_index.get("hello") // returns [1] 234 | title_docs = title_index.get("buzz-words") // returns [2] 235 | result_docs = array_union_uniques(name_docs, title_docs) // [1, 2] 236 | ``` 237 | Then for the query below: 238 | ``` 239 | (name: "hello" AND title: "buzz-words") OR name: "NeoSearch" 240 | ``` 241 | We have this algorithm: 242 | ``` 243 | name_docs = name_index.get("hello") // returns [1] 244 | title_docs = title_index.get("buzz-words") // returns [2] 245 | name2_docs = name_index.get("NeoSearch") // returns [1,2] 246 | # first apply the AND operation 247 | result_docs = array_intersects(name_docs, title_docs) // returns [] 248 | # Then, apply the OR in the results 249 | result_docs = array_union_unique(result_docs, name2_docs) // returns [1, 2] 250 | ``` 251 | 252 | Sharding / Partitioning (TO BE DEFINED) 253 | ================== 254 | 255 | The sharding should support *Routing* for single key-field, combined key-fields, document hash or timestamp. 256 | 257 | Warning: The wrong architecture can fuck up everything... 258 | Eg.: Data migration/realloc when shards entering the cluster? Problems.... 259 | 260 | Replication (TO BE DEFINED) 261 | ============ 262 | 263 | Every master can have one or more replicas. Each replica contains all of the indexed documents at the master node. The master node can be a shard, then the replicas will contains only the sharded data. 264 | 265 | -------------------------------------------------------------------------------- /cmd/cli/batch.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/NeowayLabs/neosearch/cmd/cli/parser" 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 9 | ) 10 | 11 | func batch(ng *engine.Engine, filePath string) error { 12 | file, err := os.Open(filePath) 13 | 14 | if err != nil { 15 | panic(err) 16 | } 17 | 18 | commands := []engine.Command{} 19 | 20 | err = parser.FromReader(file, &commands) 21 | 22 | for _, cmd := range commands { 23 | d, err := ng.Execute(cmd) 24 | if err != nil { 25 | fmt.Println(err) 26 | } else { 27 | fmt.Println("Data: ", d) 28 | } 29 | } 30 | 31 | return nil 32 | } 33 | -------------------------------------------------------------------------------- /cmd/cli/cli.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "strings" 8 | 9 | "github.com/NeowayLabs/neosearch/cmd/cli/parser" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 11 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 12 | "github.com/peterh/liner" 13 | ) 14 | 15 | var ( 16 | historyFile = "cli.history.txt" 17 | keywords = []string{"using", "set", "get", "mergeset", "delete"} 18 | ) 19 | 20 | func setupNeosearchDir(homePath string) error { 21 | return os.Mkdir(homePath+"/.neosearch/", 0755) 22 | } 23 | 24 | func cli(ng *engine.Engine, homePath string) error { 25 | var cmdline string 26 | var err error 27 | var enableHistory bool 28 | 29 | if homePath != "" { 30 | enableHistory = true 31 | setupNeosearchDir(homePath) 32 | } else { 33 | fmt.Printf("No user home provided... \n") 34 | fmt.Printf("Provide --home to enable command history.\n") 35 | } 36 | 37 | line := liner.NewLiner() 38 | defer line.Close() 39 | 40 | line.SetCompleter(func(line string) (c []string) { 41 | for _, n := range keywords { 42 | if strings.HasPrefix(n, strings.ToLower(line)) { 43 | c = append(c, n) 44 | } 45 | } 46 | return 47 | }) 48 | 49 | if enableHistory { 50 | if f, err := os.Open(homePath + "/.neosearch/" + historyFile); err == nil { 51 | line.ReadHistory(f) 52 | f.Close() 53 | } 54 | } 55 | 56 | // command-line here 57 | for { 58 | if cmdline, err = line.Prompt("neosearch>"); err != nil { 59 | if err.Error() == "EOF" { 60 | break 61 | } 62 | 63 | continue 64 | } 65 | 66 | line.AppendHistory(cmdline) 67 | 68 | command := []engine.Command{} 69 | 70 | if strings.ToLower(cmdline) == "quit" || 71 | strings.ToLower(cmdline) == "quit;" { 72 | break 73 | } 74 | 75 | err = parser.FromString(cmdline, &command) 76 | if err != nil { 77 | fmt.Println(err) 78 | } else { 79 | for _, cmd := range command { 80 | data, err := ng.Execute(cmd) 81 | if err != nil { 82 | fmt.Println("ERROR: ", err) 83 | } else { 84 | fmt.Printf("%s: Success\n", cmd.Command) 85 | 86 | if data != nil { 87 | ext := cmd.Index[len(cmd.Index)-3 : len(cmd.Index)] 88 | if ext == "idx" { 89 | uints := utils.GetUint64Array(data) 90 | fmt.Printf("Result[%s]: %v\n", ext, uints) 91 | } else { 92 | fmt.Printf("Result: %s\n", string(data)) 93 | } 94 | } 95 | } 96 | 97 | } 98 | } 99 | } 100 | 101 | if enableHistory { 102 | if f, err := os.Create(homePath + "/.neosearch/" + historyFile); err != nil { 103 | log.Print("Error writing history file: ", err) 104 | } else { 105 | line.WriteHistory(f) 106 | f.Close() 107 | } 108 | } 109 | 110 | fmt.Println("Exiting...") 111 | return nil 112 | } 113 | -------------------------------------------------------------------------------- /cmd/cli/docs/commands.md: -------------------------------------------------------------------------------- 1 | What if the neosearch commands can be piped between each other? 2 | 3 | (USING name.idx GET "neoway") INTERSECTS (USING uf.idx GET "sc") 4 | 5 | (define names (index "name.idx")) 6 | (define states (index "state.idx")) 7 | 8 | (and (get names "neoway") (get states "sc")) 9 | 10 | (put names ('neoway '(1 2 3 4 5))) 11 | OK 12 | (get names "neoway") 13 | '(1 2 3 4 5) 14 | -------------------------------------------------------------------------------- /cmd/cli/index_data.ns: -------------------------------------------------------------------------------- 1 | using title.idx set "hello" 1; 2 | using id.idx set 1 "{test: \"1\"}"; -------------------------------------------------------------------------------- /cmd/cli/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 7 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 8 | "github.com/jteeuwen/go-pkg-optarg" 9 | ) 10 | 11 | func main() { 12 | var fileOpt, dataDirOpt, homeOpt string 13 | var helpOpt, debugOpt bool 14 | 15 | optarg.Add("f", "from-file", "Read NeoSearch low-level instructions from file", "") 16 | optarg.Add("d", "data-dir", "Data directory", "") 17 | optarg.Add("t", "trace-debug", "Enable trace for debug", false) 18 | optarg.Add("h", "help", "Display this help", false) 19 | optarg.Add("m", "home", "User home for store command history", "") 20 | 21 | for opt := range optarg.Parse() { 22 | switch opt.ShortName { 23 | case "f": 24 | fileOpt = opt.String() 25 | break 26 | case "d": 27 | dataDirOpt = opt.String() 28 | break 29 | case "m": 30 | homeOpt = opt.String() 31 | break 32 | case "t": 33 | debugOpt = true 34 | break 35 | case "h": 36 | helpOpt = true 37 | break 38 | } 39 | } 40 | 41 | if helpOpt { 42 | optarg.Usage() 43 | os.Exit(0) 44 | } 45 | 46 | if homeOpt == "" { 47 | if homeEnv := os.Getenv("HOME"); homeEnv != "" { 48 | homeOpt = homeEnv 49 | } 50 | } 51 | 52 | if dataDirOpt == "" { 53 | dataDirOpt, _ = os.Getwd() 54 | } 55 | 56 | ng := engine.New(&engine.Config{ 57 | KVConfig: store.KVConfig{ 58 | "dataDir": dataDirOpt, 59 | "debug": debugOpt, 60 | }, 61 | }) 62 | 63 | defer ng.Close() 64 | 65 | if fileOpt != "" { 66 | batch(ng, fileOpt) 67 | } else { 68 | cli(ng, homeOpt) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /cmd/cli/parser/parser_test.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 11 | ) 12 | 13 | func TestCliParserFromReader(t *testing.T) { 14 | commands := []engine.Command{} 15 | error := FromReader(strings.NewReader("using sample.TEST mergeset a 1;"), &commands) 16 | 17 | if error != nil { 18 | t.Error(error) 19 | } 20 | 21 | compareCommand(commands[0], engine.Command{ 22 | Index: "sample", 23 | Database: "TEST", 24 | Command: "mergeset", 25 | Key: []byte("a"), 26 | KeyType: engine.TypeString, 27 | Value: utils.Uint64ToBytes(1), 28 | ValueType: engine.TypeUint, 29 | }, t) 30 | 31 | compareArray(`using sample.test.idx mergeset a 2; 32 | using sample.document.db set 1 "{id: 1, name: \"teste\"}"; 33 | using sample.lalala set hello "world"; 34 | using sample.mimimi get hello; 35 | using sample.lelele delete "teste"; 36 | using sample.bleh.idx get uint(1); 37 | using sample.aaaa.bbb set uint(10000) int(10); 38 | using sample.bbbb.ccc mergeset "hellooooooooooooooooo" uint(102999299112211223); 39 | using sample.aaa delete "bbb" 40 | `, []engine.Command{ 41 | engine.Command{ 42 | Index: "sample", 43 | Database: "test.idx", 44 | Command: "mergeset", 45 | Key: []byte("a"), 46 | KeyType: engine.TypeString, 47 | Value: utils.Uint64ToBytes(2), 48 | ValueType: engine.TypeUint, 49 | }, 50 | engine.Command{ 51 | Index: "sample", 52 | Database: "document.db", 53 | Command: "set", 54 | Key: utils.Int64ToBytes(1), 55 | KeyType: engine.TypeInt, 56 | Value: []byte("{id: 1, name: \"teste\"}"), 57 | ValueType: engine.TypeString, 58 | }, 59 | engine.Command{ 60 | Index: "sample", 61 | Database: "lalala", 62 | Command: "set", 63 | Key: []byte("hello"), 64 | KeyType: engine.TypeString, 65 | Value: []byte("world"), 66 | ValueType: engine.TypeString, 67 | }, 68 | engine.Command{ 69 | Index: "sample", 70 | Database: "mimimi", 71 | Command: "get", 72 | Key: []byte("hello"), 73 | KeyType: engine.TypeString, 74 | }, 75 | engine.Command{ 76 | Index: "sample", 77 | Database: "lelele", 78 | Command: "delete", 79 | Key: []byte("teste"), 80 | KeyType: engine.TypeString, 81 | }, 82 | engine.Command{ 83 | Index: "sample", 84 | Database: "bleh.idx", 85 | Command: "get", 86 | Key: utils.Uint64ToBytes(1), 87 | KeyType: engine.TypeUint, 88 | }, 89 | engine.Command{ 90 | Index: "sample", 91 | Database: "aaaa.bbb", 92 | Command: "set", 93 | Key: utils.Uint64ToBytes(10000), 94 | KeyType: engine.TypeUint, 95 | Value: utils.Int64ToBytes(10), 96 | ValueType: engine.TypeInt, 97 | }, 98 | engine.Command{ 99 | Index: "sample", 100 | Database: "bbbb.ccc", 101 | Command: "mergeset", 102 | Key: []byte("hellooooooooooooooooo"), 103 | KeyType: engine.TypeString, 104 | Value: utils.Uint64ToBytes(102999299112211223), 105 | ValueType: engine.TypeUint, 106 | }, 107 | engine.Command{ 108 | Index: "sample", 109 | Database: "aaa", 110 | Command: "delete", 111 | Key: []byte("bbb"), 112 | KeyType: engine.TypeString, 113 | }, 114 | }, t) 115 | 116 | // underscore in the index name should pass 117 | commands = []engine.Command{} 118 | error = FromReader(strings.NewReader(`using sample.user_password set admin "s3cr3t"`), 119 | &commands) 120 | 121 | if error != nil { 122 | t.Error(error) 123 | } 124 | 125 | compareCommand(commands[0], engine.Command{ 126 | Index: "sample", 127 | Database: "user_password", 128 | Command: "set", 129 | Key: []byte("admin"), 130 | KeyType: engine.TypeString, 131 | Value: []byte("s3cr3t"), 132 | ValueType: engine.TypeString, 133 | }, t) 134 | 135 | // invalid keyword "usinga" 136 | shouldThrowError(` 137 | usinga sample.test.idx set "hello" "world"; 138 | `, t) 139 | 140 | } 141 | 142 | func compareCommand(cmd engine.Command, expected engine.Command, t *testing.T) { 143 | if !reflect.DeepEqual(cmd, expected) { 144 | t.Errorf("Unexpected parsed command: %v !== %v", cmd.Reverse(), expected.Reverse()) 145 | fmt.Printf("%v !== %v\n", cmd, expected) 146 | } 147 | } 148 | 149 | func shouldThrowError(bufferCommands string, t *testing.T) { 150 | resultCommands := []engine.Command{} 151 | 152 | error := FromReader(strings.NewReader(bufferCommands), &resultCommands) 153 | 154 | if error == nil { 155 | t.Errorf("Test SHOULD fail: %v", resultCommands) 156 | return 157 | } 158 | } 159 | 160 | func compareArray(bufferCommands string, expectedCommands []engine.Command, t *testing.T) { 161 | resultCommands := []engine.Command{} 162 | 163 | error := FromReader(strings.NewReader(bufferCommands), &resultCommands) 164 | 165 | if error != nil { 166 | t.Error(error) 167 | return 168 | } 169 | 170 | if len(resultCommands) != len(expectedCommands) { 171 | t.Errorf("Failed to parse all of the cmdline tests:\n\t %v !== \n\t %v", resultCommands, expectedCommands) 172 | } 173 | 174 | for i := 0; i < len(resultCommands); i++ { 175 | compareCommand(resultCommands[i], expectedCommands[i], t) 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /cmd/import/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | ADD ./neosearch-import /neosearch-import 4 | 5 | VOLUME ["/data"] 6 | 7 | CMD ["./neosearch-import"] -------------------------------------------------------------------------------- /cmd/import/README.md: -------------------------------------------------------------------------------- 1 | # neosearch-import 2 | 3 | # Build 4 | 5 | ``` 6 | go get -u github.com/NeowayLabs/neosearch 7 | go build -v -tags leveldb 8 | ``` 9 | 10 | # usage 11 | 12 | ``` 13 | $ ./neosearch-import 14 | [General options] 15 | --file, -f: Read NeoSearch JSON database from file. (Required) 16 | --create, -c: Create new index database 17 | --name, -n: Name of index database 18 | --data-dir, -d: Data directory 19 | --help, -h: Display this help 20 | ``` 21 | 22 | Indexing the sample file: 23 | ``` 24 | $ mkdir /tmp/data 25 | $ ./neosearch-import -f samples/operating_systems.json -c -d /tmp/data -n operating-systems 26 | ``` 27 | 28 | # How to verify the indexed data? 29 | 30 | Use the [neosearch-cli](https://github.com/NeowayLabs/neosearch-cli) tool: 31 | 32 | ``` 33 | $ go get -v -tags leveldb github.com/NeowayLabs/neosearch-cli 34 | $ neosearch-cli -d /tmp/data 35 | 36 | neosearch>using document.db get 0 37 | get: Success 38 | Result: {"_id":0,"authors":["Ken Thompson","Dennis Ritchie","Brian Kernighan","Douglas McIlroy","Joe Ossanna"],"family":"unix","id":1,"kernel":"unix","kernelType":"monolithic","name":"Unix","year":1971} 39 | neosearch>using document.db get 1 40 | get: Success 41 | Result: {"_id":1,"authors":["Ken Thompson","Dennis Ritchie","Rob Pike","Russ Cox","Dave Presotto","Phil Winterbottom"],"family":"unix","id":2,"kernel":"plan9","kernelType":"Hybrid","name":"Plan9 From Outer Space","year":1992} 42 | neosearch>using document.db get 2 43 | get: Success 44 | Result: {"_id":2,"authors":["Judd Vinet"],"family":"unix","id":3,"kernel":"Linux","kernelType":"monolithic","name":"ArchLinux","year":2002} 45 | neosearch>using document.db get 3 46 | get: Success 47 | Result: {"_id":3,"authors":["Patrick Volkerding"],"family":"unix","id":4,"kernel":"Linux","kernelType":"monolithic","name":"Slackware","year":1993} 48 | neosearch>using document.db get 4 49 | get: Success 50 | Result: {"_id":4,"authors":["Dave Cutler","Others"],"family":"windows","id":5,"kernel":"Windows NT","kernelType":"hybrid","name":"Windows NT","year":1993} 51 | neosearch> 52 | neosearch> 53 | neosearch>using name.idx get "plan9" 54 | get: Success 55 | Result[idx]: [1] 56 | neosearch> 57 | ``` 58 | -------------------------------------------------------------------------------- /cmd/import/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "runtime" 10 | "runtime/pprof" 11 | "time" 12 | 13 | "launchpad.net/gommap" 14 | 15 | "github.com/NeowayLabs/neosearch/lib/neosearch" 16 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 17 | "github.com/NeowayLabs/neosearch/lib/neosearch/index" 18 | "github.com/jteeuwen/go-pkg-optarg" 19 | ) 20 | 21 | func main() { 22 | var ( 23 | fileOpt, 24 | dataDirOpt, 25 | databaseName, 26 | profileFile, 27 | metadataStr string 28 | metadata = index.Metadata{} 29 | helpOpt, newIndex, debugOpt bool 30 | err error 31 | index *index.Index 32 | batchSize int 33 | ) 34 | 35 | optarg.Header("General options") 36 | optarg.Add("f", "file", "Read NeoSearch JSON database from file. (Required)", "") 37 | optarg.Add("c", "create", "Create new index database", false) 38 | optarg.Add("b", "batch-size", "Batch size", 1000) 39 | optarg.Add("n", "name", "Name of index database", "") 40 | optarg.Add("d", "data-dir", "Data directory", "") 41 | optarg.Add("t", "trace-debug", "Enable trace for debug", false) 42 | optarg.Add("h", "help", "Display this help", false) 43 | optarg.Add("p", "cpuprofile", "write cpu profile to file", "") 44 | optarg.Add("m", "metadata", "metadata of documents", "") 45 | 46 | for opt := range optarg.Parse() { 47 | switch opt.ShortName { 48 | case "f": 49 | fileOpt = opt.String() 50 | case "b": 51 | batchSize = opt.Int() 52 | case "d": 53 | dataDirOpt = opt.String() 54 | case "n": 55 | databaseName = opt.String() 56 | case "c": 57 | newIndex = true 58 | case "t": 59 | debugOpt = true 60 | case "p": 61 | profileFile = opt.String() 62 | case "m": 63 | metadataStr = opt.String() 64 | case "h": 65 | helpOpt = true 66 | } 67 | } 68 | 69 | if helpOpt { 70 | optarg.Usage() 71 | os.Exit(0) 72 | } 73 | 74 | if dataDirOpt == "" { 75 | dataDirOpt, _ = os.Getwd() 76 | } 77 | 78 | if fileOpt == "" { 79 | optarg.Usage() 80 | os.Exit(1) 81 | } 82 | 83 | if profileFile != "" { 84 | f, err := os.Create(profileFile) 85 | if err != nil { 86 | log.Fatal(err) 87 | } 88 | 89 | fmt.Println("Profiling to file: ", profileFile) 90 | pprof.StartCPUProfile(f) 91 | defer pprof.StopCPUProfile() 92 | } 93 | 94 | if metadataStr != "" { 95 | err = json.Unmarshal([]byte(metadataStr), &metadata) 96 | 97 | if err != nil { 98 | log.Fatal(err) 99 | } 100 | } 101 | 102 | cfg := config.NewConfig() 103 | 104 | cfg.Option(config.DataDir(dataDirOpt)) 105 | cfg.Option(config.Debug(debugOpt)) 106 | 107 | neo := neosearch.New(cfg) 108 | 109 | if newIndex { 110 | log.Printf("Creating index %s\n", databaseName) 111 | index, err = neo.CreateIndex(databaseName) 112 | } else { 113 | log.Printf("Opening index %s ...\n", databaseName) 114 | index, err = neo.OpenIndex(databaseName) 115 | } 116 | 117 | if err != nil { 118 | log.Fatalf("Failed to open database '%s': %v", err) 119 | return 120 | } 121 | 122 | file, err := os.OpenFile(fileOpt, os.O_RDONLY, 0) 123 | 124 | if err != nil { 125 | log.Fatalf("Unable to open file: %s", fileOpt) 126 | return 127 | } 128 | 129 | jsonBytes, err := gommap.Map(file.Fd(), gommap.PROT_READ, 130 | gommap.MAP_PRIVATE) 131 | 132 | if err != nil { 133 | panic(err) 134 | } 135 | 136 | data := make([]map[string]interface{}, 0) 137 | 138 | err = json.Unmarshal(jsonBytes, &data) 139 | 140 | if err != nil { 141 | panic(err) 142 | } 143 | 144 | jsonBytes = nil 145 | 146 | startTime := time.Now() 147 | 148 | index.Batch() 149 | var count int 150 | totalResults := len(data) 151 | 152 | runtime.GC() 153 | 154 | cleanup := func() { 155 | neo.Close() 156 | file.Close() 157 | if profileFile != "" { 158 | fmt.Println("stopping profile: ", profileFile) 159 | pprof.StopCPUProfile() 160 | } 161 | } 162 | 163 | c := make(chan os.Signal, 1) 164 | signal.Notify(c, os.Interrupt) 165 | go func() { 166 | <-c 167 | cleanup() 168 | os.Exit(1) 169 | }() 170 | 171 | defer func() { 172 | if r := recover(); r != nil { 173 | fmt.Println("Recovered from panic", r) 174 | cleanup() 175 | os.Exit(1) 176 | } 177 | 178 | cleanup() 179 | }() 180 | 181 | fmt.Println("Importing ", len(data), " records") 182 | 183 | for idx := range data { 184 | dataEntry := data[idx] 185 | 186 | if dataEntry["_id"] == nil { 187 | dataEntry["_id"] = idx 188 | } 189 | 190 | entryJSON, err := json.Marshal(&dataEntry) 191 | if err != nil { 192 | log.Println(err) 193 | return 194 | } 195 | 196 | err = index.Add(uint64(idx), entryJSON, metadata) 197 | if err != nil { 198 | panic(err) 199 | } 200 | 201 | if count == batchSize { 202 | count = 0 203 | 204 | fmt.Println("Flushing batch: ", idx, " from ", totalResults) 205 | index.FlushBatch() 206 | if idx != (totalResults - 1) { 207 | index.Batch() 208 | } 209 | 210 | runtime.GC() 211 | } else { 212 | count = count + 1 213 | } 214 | 215 | data[idx] = nil 216 | } 217 | 218 | index.FlushBatch() 219 | index.Close() 220 | neo.Close() 221 | 222 | elapsed := time.Since(startTime) 223 | 224 | log.Printf("Database indexed in %v\n", elapsed) 225 | } 226 | -------------------------------------------------------------------------------- /cmd/import/nsimport: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/cmd/import/nsimport -------------------------------------------------------------------------------- /cmd/import/profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/cmd/import/profile.png -------------------------------------------------------------------------------- /cmd/import/profile0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/cmd/import/profile0.png -------------------------------------------------------------------------------- /cmd/import/profile0.prof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/cmd/import/profile0.prof -------------------------------------------------------------------------------- /cmd/import/profile1.prof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/cmd/import/profile1.prof -------------------------------------------------------------------------------- /cmd/import/profile2.prof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/cmd/import/profile2.prof -------------------------------------------------------------------------------- /cmd/import/samples/operating_systems.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "name": "Unix", 5 | "family": "unix", 6 | "year": 1971, 7 | "kernel": "unix", 8 | "kernelType": "monolithic", 9 | "authors": [ 10 | "Ken Thompson", 11 | "Dennis Ritchie", 12 | "Brian Kernighan", 13 | "Douglas McIlroy", 14 | "Joe Ossanna" 15 | ] 16 | }, 17 | { 18 | "id": 2, 19 | "name": "Plan9 From Outer Space", 20 | "family": "unix", 21 | "kernel": "plan9", 22 | "kernelType": "Hybrid", 23 | "year": 1992, 24 | "authors": [ 25 | "Ken Thompson", 26 | "Dennis Ritchie", 27 | "Rob Pike", 28 | "Russ Cox", 29 | "Dave Presotto", 30 | "Phil Winterbottom" 31 | ] 32 | }, 33 | { 34 | "id": 3, 35 | "name": "ArchLinux", 36 | "family": "unix", 37 | "year": 2002, 38 | "kernel": "Linux", 39 | "kernelType": "monolithic", 40 | "authors": [ 41 | "Judd Vinet" 42 | ] 43 | }, 44 | { 45 | "id": 4, 46 | "name": "Slackware", 47 | "family": "unix", 48 | "kernel": "Linux", 49 | "kernelType": "monolithic", 50 | "year": 1993, 51 | "authors": [ 52 | "Patrick Volkerding" 53 | ] 54 | }, 55 | { 56 | "id": 5, 57 | "name": "Windows NT", 58 | "family": "windows", 59 | "kernel": "Windows NT", 60 | "kernelType": "hybrid", 61 | "year": 1993, 62 | "authors": [ 63 | "Dave Cutler", 64 | "Others" 65 | ] 66 | } 67 | ] 68 | -------------------------------------------------------------------------------- /docs/Dockerfile: -------------------------------------------------------------------------------- 1 | # This file describes the standard way to build neosearch, using docker 2 | 3 | FROM neosearch-dev 4 | MAINTAINER Tiago Katcipis (@tiagokatcipis) 5 | 6 | # Packaged dependencies 7 | RUN apt-get update && apt-get install -y \ 8 | python-pip \ 9 | openjdk-7-jdk \ 10 | maven \ 11 | --no-install-recommends 12 | 13 | # Install Mkdocs 14 | RUN pip install mkdocs 15 | 16 | WORKDIR /swagger-codegen 17 | 18 | RUN git clone https://github.com/swagger-api/swagger-codegen.git . 19 | 20 | RUN mvn package 21 | 22 | WORKDIR /go/src/github.com/NeowayLabs/neosearch 23 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | * [Motivation](https://github.com/NeowayLabs/neosearch/blob/master/docs/motivation.md) 4 | * [Architecture](https://github.com/NeowayLabs/neosearch/blob/master/docs/architecture.md) 5 | 6 | ## Proposals 7 | 8 | * [Dump and Restore](https://github.com/NeowayLabs/neosearch/blob/master/docs/dump-restore.md) 9 | 10 | -------------------------------------------------------------------------------- /docs/dump-restore.md: -------------------------------------------------------------------------------- 1 | # Proposal for dump/restore 2 | 3 | We need a way of dump and restore of indices database. Today we have a low level interface using `engine.Command` to communicate with storage. This is the interface used by [neosearch-cli](https://github.com/NeowayLabs/neosearch-cli) to access the indices. With `neosearch-cli` we can process commands stored in a text file with neosearch command syntax like below: 4 | 5 | ``` 6 | using title.idx set "hello" 1; 7 | using id.idx set 1 "{test: \"1\"}"; 8 | using title.idx mergeset "hello" 2; 9 | using title.idx mergeset "hello" 10; 10 | using document.db set 1 "{\"title\": \"hello\", \"id\": 1}"; 11 | ``` 12 | 13 | As said [here](https://github.com/NeowayLabs/neosearch/wiki/Internal-Concepts#indexing-steps) we can simulate the internal process of indexing a document in neosearch with commands stored in a file and processed with `neosearch-cli`. We can generate a huge `index.ns` file with all of the commands needed to re-index the database. 14 | 15 | To implement the `dump` feature, we only need to get a [Iterator](https://github.com/NeowayLabs/neosearch/blob/master/store/store.go#L15) in each index database and create `engine.Command` entries in a file with the `neosearch-cli` syntax (like [this](https://github.com/NeowayLabs/neosearch-cli/blob/master/index_data.ns)). 16 | 17 | To implement the `restore` feature we only need to add parallelism to `neosearch-cli` tool to process the dumped file. As both the neosearch library and neosearch-cli tool uses the `engine.Command` to interact with storage we can guarantee that this will works as expected. 18 | -------------------------------------------------------------------------------- /docs/img/NeoSearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NeowayLabs/neosearch/bae6caffbf140bd195cc4dea90cc6c643e38f5e2/docs/img/NeoSearch.png -------------------------------------------------------------------------------- /docs/motivation.md: -------------------------------------------------------------------------------- 1 | # Motivation 2 | 3 | To understand the motivation behind NeoSearch's creation, we need a bit of 4 | background about the project [Lucene](http://lucene.apache.org/) and the type 5 | of problems for which it doesn't work. 6 | 7 | ## Data Join 8 | 9 | Lucene and SOLR were used internally at [Neoway](http://www.neoway.com.br) 10 | for over five years, and during this time, it was the only mature tech for 11 | full-text search that we could find. When we had only one main index, 12 | which stored all the search information, the SOLR solved our problem very well. 13 | 14 | But as the company grew, and the information captured by our robots became 15 | more structured, the flat characteristic of Lucene / SOLR began to show it's 16 | cracks. In short, Lucene was not designed to JOIN between different indices. 17 | All current solutions to this problem, both in SOLR and ElasticSearch, are 18 | workarounds to solve a problem in an architecture that is not designed to 19 | solve that problem. 20 | 21 | At first we tried to arrange the information in separate 22 | indices and use the "Join" syntax available in SOLR-4 to search relations 23 | between them. But in this way we completely 24 | [lost the ability to scale horizontally](https://wiki.apache.org/solr/DistributedSearch#line-38). 25 | 26 | The actual solution presented for this by SOLR and ElasticSearch is the 27 | parent-child relationship between documents. This technique is a better 28 | approach, but, in the same way, the index doesn't scale correctly across 29 | shards and requires a special way to index documents that have relationships. 30 | Some problems are: 31 | 32 | * The child documents are always stored in the same shard of parent documents; 33 | * All map of parent and child IDs are stored in memory; 34 | * Child document is limited to have only one parent; 35 | 36 | To explain these limitations, think in the following example of indexing USA 37 | population and companies: 38 | 39 | Imagine we have an index called "people" that have 310 million entries and we 40 | have other index called "company" that have 31.6 million entries. The company 41 | index has a relationship with the people index by the "partner" field and 42 | "employ" field. 43 | 44 | * Each company have one or more partners in the people index; 45 | * Each company have zero or more employees in the people index; 46 | 47 | Using the solution available in Lucene indices, we have to first index the 48 | parent documents, in this case `company` documents, and then index two others 49 | indices for people. The first for index the partners and the other for index 50 | the employees. For each partner, we will index in the `people_partner` index 51 | specifying the correct company parent. And, for each employee people we will 52 | index in the "people_employee" index specifying the parent company document. 53 | 54 | Some problems that arises: 55 | 56 | * We will end with irregular shards. ElasticSearch put the parent and children 57 | documents in the same shard, then the size (MEM, CPU, Disk, etc.) 58 | needed by the shard machines isn't predictable 59 | because each company have a different number of employees and partners. 60 | 61 | * Another problem is that for each relationship with people index, you will 62 | need to replicate the information in another index (like `partner_people`, 63 | `employee_people`, `former_employee_people`, etc.). 64 | 65 | * If the information of one entry in the people index change, we will need 66 | to update this information in every `people like` index. **Critical** in our 67 | data model. 68 | 69 | * For each parent-child relationship, ElasticSearch will maintain the parent 70 | IDs (in string format) and child IDs (8 bytes per ID) for each relation in 71 | memory. This implementation can be a serious problem if the parent index has 72 | a lot of relations. In the case above is ~ **4.75 GB** only for the memory map 73 | if we consider an average of 3 partners and three employees per company. 74 | 75 | The item above is only one example that shows that relationships are a big 76 | problem in the current search solutions. In the business intelligence field, 77 | we need to cross a lot of information to find patterns, trends, frauds, etc, 78 | and duplicate all of that information on the indices isn't an option. We know 79 | that search engines aren't relational databases, but to manage relationships 80 | in a reverse index is crucial today, and for this reason ElasticSearch and 81 | SOLR support workarounds for this. 82 | 83 | We seek for a reverse index solution that manages relationships and index updates efficiently. 84 | 85 | 86 | ## To be or not to be schemaless 87 | 88 | Another problem is the fact that Lucene has the need of a fixed schema for documents. 89 | Why is that a problem ? Well currently if you use elastic search, [it will say that it is 90 | schemaless](https://www.elastic.co/guide/en/elasticsearch/reference/1.6/mapping-object-type.html), 91 | but it is not, at least not in the sense of a normal document based database. 92 | 93 | If you want to add a few documents with a field *id* mapping to a string, and then have a few more with 94 | this same name but mapping to a integer ... you are going to have a bad time. 95 | 96 | By bad time, we mean fully re-indexing your whole database. Since our business depends on being agile and adapting 97 | fast to changes Neosearch strives to be truly schemaless and avoid re-indexing at all costs. 98 | -------------------------------------------------------------------------------- /docs/rest/api.yml: -------------------------------------------------------------------------------- 1 | --- 2 | swagger: "2.0" 3 | info: 4 | description: "NeoSearch REST API Documentation. More information on http://github.com/NeowayLabs/neosearch" 5 | version: "1.0.0" 6 | title: "NeoSearch" 7 | termsOfService: "" 8 | contact: 9 | email: "tiago.natel@neoway.com.br" 10 | license: 11 | name: "Apache 2.0" 12 | url: "http://www.apache.org/licenses/LICENSE-2.0.html" 13 | host: "neosearch.github.io" 14 | basePath: "/v1" 15 | schemes: 16 | - "http" 17 | paths: 18 | /{index}: 19 | get: 20 | tags: 21 | - "index" 22 | - "info" 23 | summary: "Get information about the index" 24 | description: "" 25 | operationId: "infoIndex" 26 | produces: 27 | - "application/json" 28 | parameters: 29 | - 30 | in: "path" 31 | name: "index" 32 | type: "string" 33 | description: "Name of the index" 34 | required: true 35 | responses: 36 | 200: 37 | description: "Index informations" 38 | schema: 39 | type: "object" 40 | items: 41 | "$ref": "#/definitions/index" 42 | 400: 43 | description: "There is no index with that name" 44 | put: 45 | tags: 46 | - "index" 47 | - "create" 48 | summary: "Create new index" 49 | description: "" 50 | operationId: "createIndex" 51 | produces: 52 | - "application/json" 53 | parameters: 54 | - 55 | in: "path" 56 | name: "index" 57 | type: "string" 58 | description: "Name of the index" 59 | required: true 60 | responses: 61 | 200: 62 | description: "Index created successfully" 63 | schema: 64 | type: "object" 65 | items: 66 | "$ref": "#/definitions/status" 67 | 400: 68 | description: "Invalid index name" 69 | schema: 70 | items: 71 | "$ref": "#/definitions/status" 72 | delete: 73 | tags: 74 | - "index" 75 | - "delete" 76 | summary: "Delete index" 77 | description: "" 78 | operationId: "deleteIndex" 79 | produces: 80 | - "application/json" 81 | parameters: 82 | - name: index 83 | in: path 84 | description: Name of the index 85 | type: string 86 | required: true 87 | responses: 88 | 200: 89 | description: "deleted successfully" 90 | schema: 91 | items: 92 | $ref: "#/definitions/status" 93 | /{index}/{id}: 94 | get: 95 | tags: 96 | - "get" 97 | - "document" 98 | summary: "Get document" 99 | produces: 100 | - "application/json" 101 | parameters: 102 | - name: "index" 103 | in: path 104 | description: "Name of the index" 105 | type: string 106 | required: true 107 | - name: "id" 108 | in: path 109 | description: "id of document" 110 | type: integer 111 | format: uint64 112 | responses: 113 | 200: 114 | description: "Success" 115 | schema: 116 | items: 117 | type: "object" 118 | post: 119 | tags: 120 | - "add" 121 | - "document" 122 | summary: "Add document to index" 123 | operationId: "addDocument" 124 | consumes: 125 | - "application/json" 126 | produces: 127 | - "application/json" 128 | parameters: 129 | - name: "index" 130 | in: path 131 | description: "Name of the index" 132 | type: string 133 | required: true 134 | - name: id 135 | in: path 136 | description: ID of document 137 | type: string 138 | responses: 139 | 200: 140 | description: "Document indexed" 141 | schema: 142 | $ref: "#/definitions/status" 143 | definitions: 144 | status: 145 | properties: 146 | error: 147 | type: "string" 148 | index: 149 | properties: 150 | name: 151 | type: "string" 152 | xml: 153 | name: "index" 154 | -------------------------------------------------------------------------------- /hack/check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # The script does automatic checking on a Go package and its sub-packages, including: 3 | # 1. gofmt (http://golang.org/cmd/gofmt/) 4 | # 2. goimports (https://github.com/bradfitz/goimports) 5 | # 3. golint (https://github.com/golang/lint) 6 | # 4. go vet (http://golang.org/cmd/vet) 7 | # 5. race detector (http://blog.golang.org/race-detector) 8 | # 6. test coverage (http://blog.golang.org/cover) 9 | 10 | set -e 11 | 12 | GO="go" 13 | TEST_FLAGS="-tags $STORAGE_ENGINE" 14 | 15 | # Automatic checks 16 | test -z "$(gofmt -l -w . | tee /dev/stderr)" 17 | #test -z "$(goimports -l -w . | tee /dev/stderr)" 18 | test -z "$(golint . | tee /dev/stderr)" 19 | #$GO vet ./... 20 | #$GO test -tags goleveldb -race ./... 21 | 22 | # Run test coverage on each subdirectories and merge the coverage profile. 23 | 24 | echo "mode: count" > coverage.txt 25 | 26 | if [ "x${TEST_DIRECTORY:0:1}" != "x." ]; then 27 | TEST_DIRECTORY="./$TEST_DIRECTORY" 28 | fi 29 | 30 | # Standard $GO tooling behavior is to ignore dirs with leading underscore 31 | for dir in $(find "$TEST_DIRECTORY" -maxdepth 10 -not -path './.git*' -not -path './Godeps/*' -not -path './lib/neosearch/store/leveldb' -type d); 32 | do 33 | if ls $dir/*.go &> /dev/null; then 34 | $GO test $TEST_FLAGS -v -race -covermode=count -coverprofile="$dir/profile.tmp" "$dir" 35 | if [ -f $dir/profile.tmp ] 36 | then 37 | cat $dir/profile.tmp | tail -n +2 >> coverage.txt 38 | rm $dir/profile.tmp 39 | fi 40 | 41 | # Stress 42 | # hack/stress-test.sh 43 | fi 44 | done 45 | 46 | $GO tool cover -func coverage.txt 47 | 48 | # To submit the test coverage result to coveralls.io, 49 | # use goveralls (https://github.com/mattn/goveralls) 50 | # goveralls -coverprofile=profile.cov -service=travis-ci 51 | -------------------------------------------------------------------------------- /hack/deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DEPS_FILE="$1" 4 | 5 | if [[ -z "$DEPS_FILE" ]]; then 6 | DEPS_FILE="/deps.txt" 7 | fi 8 | 9 | cat "$DEPS_FILE" | xargs go get -tags $STORAGE_ENGINE -v -d 2>/dev/null 10 | 11 | exit 0 12 | -------------------------------------------------------------------------------- /hack/docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "Generating mkdocs" 4 | mkdocs build --clean 5 | 6 | echo "Generating Go code docs" 7 | mkdir -p site/code 8 | godoc -html=true . > site/code/index.html 9 | 10 | echo "Generating REST API docs" 11 | swagger="/swagger-codegen/modules/swagger-codegen-cli/target/swagger-codegen-cli.jar" 12 | java -jar $swagger generate -i ./docs/rest/api.yml -l html -o site/rest 13 | -------------------------------------------------------------------------------- /hack/gendeps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | STORAGE_ENGINE="$1" 4 | 5 | if [[ -z "$STORAGE_ENGINE" ]]; then 6 | STORAGE_ENGINE="goleveldb" 7 | fi 8 | 9 | echo "Generating dependencies file for storage engine ($STORAGE_ENGINE): hack/deps.txt" 10 | go get -insecure -v -u -tags "$STORAGE_ENGINE" ./... 2>&1 | grep download | grep -v neosearch | sed 's/ (download)//g' > hack/deps.txt 11 | 12 | -------------------------------------------------------------------------------- /hack/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | VERSION=0.1.0 4 | 5 | DEFAULT_BUNDLES=( 6 | library 7 | server 8 | cli 9 | import 10 | ) 11 | 12 | bundle() { 13 | bundlescript=$1 14 | bundle=$(basename $bundlescript) 15 | echo "---> Making bundle: $bundle (in bundles/$VERSION/$bundle)" 16 | mkdir -p bundles/$VERSION/$bundle 17 | source "$bundlescript" "$(pwd)/bundles/$VERSION/$bundle" 18 | } 19 | 20 | main() { 21 | # We want this to fail if the bundles already exist and cannot be removed. 22 | # This is to avoid mixing bundles from different versions of the code. 23 | mkdir -p bundles 24 | if [ -e "bundles/$VERSION" ]; then 25 | echo "bundles/$VERSION already exists. Removing." 26 | rm -fr bundles/$VERSION && mkdir bundles/$VERSION || exit 1 27 | echo 28 | fi 29 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 30 | 31 | if [ $# -lt 1 ]; then 32 | bundles=(${DEFAULT_BUNDLES[@]}) 33 | else 34 | bundles=($@) 35 | fi 36 | 37 | 38 | for bundle in ${bundles[@]}; do 39 | bundle $SCRIPTDIR/make/$bundle 40 | echo 41 | done 42 | } 43 | 44 | main "$@" 45 | -------------------------------------------------------------------------------- /hack/make/cli: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | DEST=$1 6 | BINARY_NAME="nscli-$VERSION" 7 | 8 | go build \ 9 | -v \ 10 | -tags "$STORAGE_ENGINE" \ 11 | -o "$DEST/$BINARY_NAME" \ 12 | ./cmd/cli 13 | 14 | echo "Created binary: $DEST/$BINARY_FULLNAME" 15 | ln -sf "$BINARY_NAME" "$DEST/nscli" 16 | 17 | -------------------------------------------------------------------------------- /hack/make/import: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | DEST=$1 6 | BINARY_NAME="nsimport-$VERSION" 7 | 8 | go build \ 9 | -v \ 10 | -tags "$STORAGE_ENGINE" \ 11 | -o "$DEST/$BINARY_NAME" \ 12 | ./cmd/import 13 | 14 | echo "Created binary: $DEST/$BINARY_FULLNAME" 15 | ln -sf "$BINARY_NAME" "$DEST/nsimport" 16 | 17 | -------------------------------------------------------------------------------- /hack/make/library: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | DEST=$1 6 | 7 | go build -tags "$STORAGE_ENGINE" -v -o "$DEST/neosearch.a" ./lib/neosearch/ 8 | 9 | 10 | -------------------------------------------------------------------------------- /hack/make/server: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | DEST=$1 6 | BINARY_NAME="neosearch-$VERSION" 7 | 8 | echo "Building with -tags $STORAGE_ENGINE" 9 | 10 | go build \ 11 | -v \ 12 | -tags "$STORAGE_ENGINE" \ 13 | -o "$DEST/$BINARY_NAME" \ 14 | ./service/neosearch 15 | 16 | echo "Created binary: $DEST/$BINARY_FULLNAME" 17 | ln -sf "$BINARY_NAME" "$DEST/neosearch" 18 | 19 | -------------------------------------------------------------------------------- /hack/stress-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # go test -c 4 | # comment above and uncomment below to enable the race builder 5 | go test -c -race 6 | PKG=$(basename $(pwd)) 7 | 8 | while true ; do 9 | export GOMAXPROCS=$[ 1 + $[ RANDOM % 128 ]] 10 | ./$PKG.test $@ 2>&1 11 | done 12 | -------------------------------------------------------------------------------- /lib/neosearch/cache/cache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | type OnRemoveCb func(key string, value interface{}) 4 | 5 | type Cache interface { 6 | // Add new entry to cache 7 | Add(key string, value interface{}) 8 | 9 | // Get the entry with `key` 10 | Get(key string) (interface{}, bool) 11 | 12 | // Remove `key` from cache 13 | Remove(key string) bool 14 | 15 | // OnRemove set a callback to be executed when entries are removed 16 | // from cache 17 | OnRemove(cb OnRemoveCb) 18 | 19 | // MaxEntries set or update the max entries of cache 20 | MaxEntries(max int) 21 | 22 | Clean() 23 | 24 | Len() int 25 | } 26 | -------------------------------------------------------------------------------- /lib/neosearch/cache/cache_test.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import "testing" 4 | 5 | func TestLRUCreate(t *testing.T) { 6 | lru := NewLRUCache(1) 7 | 8 | if lru == nil { 9 | t.Error("Failed to create LRUCache") 10 | } 11 | 12 | lru = NewLRUCache(0) 13 | 14 | if lru != nil { 15 | t.Error("LRUCache should fails for zero length") 16 | } 17 | 18 | lru = NewLRUCache(-1) 19 | 20 | if lru != nil { 21 | t.Error("LRUCache should fails for zero length") 22 | } 23 | } 24 | 25 | func TestLRUAllowAddInfiniValues(t *testing.T) { 26 | var ( 27 | vi interface{} 28 | value int 29 | ok bool 30 | ) 31 | 32 | lru := NewLRUCache(1) 33 | 34 | if lru.Len() != 0 { 35 | t.Error("OMG... Very piece of buggy code") 36 | } 37 | 38 | if vi, ok = lru.Get("some thing"); ok == true || vi != nil { 39 | t.Error("... i dont beliece that ...") 40 | } 41 | 42 | lru.Add("a", 10) 43 | 44 | if vi, ok = lru.Get("a"); !ok || vi == nil { 45 | t.Error("Failed to get entry") 46 | } else { 47 | value = vi.(int) 48 | 49 | if value != 10 { 50 | t.Error("Failed to get entry") 51 | } 52 | } 53 | 54 | if lru.Len() != 1 { 55 | t.Error("Invalid length") 56 | } 57 | 58 | lru.Add("b", 11) 59 | 60 | if vi, ok = lru.Get("b"); !ok || vi == nil { 61 | t.Error("Failed to get entry") 62 | } else { 63 | value = vi.(int) 64 | 65 | if value != 11 { 66 | t.Error("Failed to get entry") 67 | } 68 | } 69 | 70 | if vi, ok = lru.Get("a"); ok || vi != nil { 71 | t.Error("entry shouldnt exists") 72 | } 73 | 74 | if lru.Len() != 1 { 75 | t.Error("Invalid Length") 76 | } 77 | 78 | lru.Add("c", 12) 79 | 80 | if vi, ok = lru.Get("c"); !ok || vi == nil { 81 | t.Error("Failed to get entry") 82 | } else { 83 | value = vi.(int) 84 | 85 | if value != 12 { 86 | t.Error("Failed to get entry") 87 | } 88 | } 89 | 90 | if vi, ok = lru.Get("a"); ok || vi != nil { 91 | t.Error("entry shouldnt exists") 92 | } 93 | 94 | if vi, ok = lru.Get("b"); ok || vi != nil { 95 | t.Error("entry shouldnt exists") 96 | } 97 | 98 | if lru.Len() != 1 { 99 | t.Error("Invalid Length") 100 | } 101 | 102 | lru = nil 103 | 104 | lru = NewLRUCache(2) 105 | 106 | lru.Add("a", 1) 107 | 108 | if lru.Len() != 1 { 109 | t.Error("Invalid Length") 110 | } 111 | 112 | lru.Add("b", 2) 113 | 114 | if lru.Len() != 2 { 115 | t.Error("Invalid Length") 116 | } 117 | 118 | lru.Add("c", 3) 119 | 120 | if lru.Len() != 2 { 121 | t.Error("Invalid Length") 122 | } 123 | } 124 | 125 | func TestLRUOnRemoveCallback(t *testing.T) { 126 | lru := NewLRUCache(2) 127 | works := false 128 | removedKey := "" 129 | removedVal := 0 130 | 131 | lru.OnRemove(func(key string, value interface{}) { 132 | v, _ := value.(int) 133 | 134 | removedKey = key 135 | removedVal = v 136 | works = true 137 | }) 138 | 139 | lru.Add("teste", 1) 140 | lru.Add("teste2", 2) 141 | lru.Add("teste3", 3) 142 | 143 | if works != true || removedKey != "teste" || removedVal != 1 { 144 | t.Error("OnRemove callback not invoked OR called concurrently") 145 | } 146 | 147 | works = false 148 | lru.Add("teste4", 4) 149 | 150 | if works != true || removedKey != "teste2" || removedVal != 2 { 151 | t.Error("OnRemove callback not invoked OR called concurrently") 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /lib/neosearch/cache/lru.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import "container/list" 4 | 5 | type LRUCache struct { 6 | max int 7 | 8 | onRemove OnRemoveCb 9 | 10 | ll *list.List 11 | cache map[string]*list.Element 12 | } 13 | 14 | type entry struct { 15 | key string 16 | value interface{} 17 | } 18 | 19 | // NewCache returns a new LRU cache for interface{} entries 20 | func NewLRUCache(max int) *LRUCache { 21 | if max <= 0 { 22 | return nil 23 | } 24 | 25 | return &LRUCache{ 26 | max: max, 27 | ll: list.New(), 28 | cache: make(map[string]*list.Element), 29 | } 30 | } 31 | 32 | func (lru *LRUCache) OnRemove(cb OnRemoveCb) { 33 | lru.onRemove = cb 34 | } 35 | 36 | // MaxEntries update the max allowed entries in cache 37 | func (lru *LRUCache) MaxEntries(max int) { 38 | lru.max = max 39 | } 40 | 41 | func (lru *LRUCache) Len() int { 42 | return lru.ll.Len() 43 | } 44 | 45 | // Add new interface{} value to LRUCache. 46 | func (lru *LRUCache) Add(key string, value interface{}) { 47 | var ( 48 | elem *list.Element 49 | ok bool 50 | ) 51 | 52 | if elem, ok = lru.cache[key]; ok { 53 | lru.removeElement(elem) 54 | } 55 | 56 | elem = lru.ll.PushFront(&entry{key, value}) 57 | lru.cache[key] = elem 58 | 59 | if lru.ll.Len() > lru.max { 60 | lru.removeOldest() 61 | } 62 | } 63 | 64 | // Get the given `key` from cache. If the key exists, it will be ranked 65 | // to top of the cache. 66 | func (lru *LRUCache) Get(key string) (interface{}, bool) { 67 | var ( 68 | elem *list.Element 69 | ok bool 70 | ) 71 | 72 | if lru.cache == nil || len(lru.cache) == 0 { 73 | return nil, false 74 | } 75 | 76 | elem, ok = lru.cache[key] 77 | 78 | if ok { 79 | lru.ll.MoveToFront(elem) 80 | return elem.Value.(*entry).value, true 81 | } 82 | 83 | return nil, false 84 | } 85 | 86 | func (lru *LRUCache) Remove(key string) bool { 87 | var ( 88 | elem *list.Element 89 | ok bool 90 | ) 91 | 92 | if lru.cache == nil || len(lru.cache) == 0 { 93 | return false 94 | } 95 | 96 | elem, ok = lru.cache[key] 97 | 98 | if ok { 99 | lru.removeElement(elem) 100 | return true 101 | } 102 | 103 | return false 104 | } 105 | 106 | func (lru *LRUCache) removeOldest() { 107 | var elem *list.Element 108 | 109 | if lru.cache == nil || len(lru.cache) == 0 { 110 | return 111 | } 112 | 113 | elem = lru.ll.Back() 114 | if elem != nil { 115 | lru.removeElement(elem) 116 | } 117 | } 118 | 119 | func (lru *LRUCache) removeElement(elem *list.Element) { 120 | lru.ll.Remove(elem) 121 | 122 | kv := elem.Value.(*entry) 123 | delete(lru.cache, kv.key) 124 | 125 | if lru.onRemove != nil { 126 | lru.onRemove(kv.key, kv.value) 127 | } 128 | } 129 | 130 | // Clean remove all elements of cache calling the OnRemove callback 131 | // when needed! 132 | func (lru *LRUCache) Clean() { 133 | var ( 134 | cacheLen int = len(lru.cache) 135 | elem *list.Element 136 | key string 137 | value interface{} 138 | ) 139 | 140 | if lru.cache == nil || cacheLen == 0 { 141 | return 142 | } 143 | 144 | for elem = lru.ll.Front(); elem != nil; elem = elem.Next() { 145 | ee := elem.Value.(*entry) 146 | key = ee.key 147 | value = ee.value 148 | 149 | if lru.onRemove != nil { 150 | lru.onRemove(key, value) 151 | } 152 | 153 | lru.removeElement(elem) 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /lib/neosearch/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "io/ioutil" 5 | "log" 6 | "os" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 10 | "gopkg.in/yaml.v2" 11 | ) 12 | 13 | const ( 14 | // DefaultDataDir is the default root directory 15 | // Config.DataDir. 16 | DefaultDataDir string = "/data" 17 | 18 | // Config.Debug. 19 | DefaultDebug bool = false 20 | 21 | // DefaultMaxIndicesOpen is the default max number of indices maintained open 22 | // Config.MaxIndicesOpen. 23 | DefaultMaxIndicesOpen int = 50 24 | ) 25 | 26 | // Option is a internal type used for setting config options. 27 | // This need be exported only because the usage of the 28 | // self-referential functions in the option assignments. 29 | // More details on the Rob Pike blog post below: 30 | // http://commandcenter.blogspot.com.br/2014/01/self-referential-functions-and-design.html 31 | type Option func(c *Config) Option 32 | 33 | // Config stores NeoSearch configurations 34 | type Config struct { 35 | // Root directory where all of the indices will be written. 36 | DataDir string `yaml:"dataDir"` 37 | 38 | // Enables debug in every sub-module 39 | Debug bool `yaml:"debug"` 40 | 41 | // MaxIndicesOpen is the max number of indices maintained open 42 | MaxIndicesOpen int `yaml:"maxIndicesOpen"` 43 | 44 | // Engine engine configurations 45 | Engine *engine.Config `yaml:"engine"` 46 | } 47 | 48 | // NewConfig creates new config 49 | func NewConfig() *Config { 50 | return &Config{ 51 | DefaultDataDir, 52 | DefaultDebug, 53 | DefaultMaxIndicesOpen, 54 | engine.NewConfig(), 55 | } 56 | } 57 | 58 | // Option configures the config struct 59 | func (c *Config) Option(opts ...Option) (previous Option) { 60 | for _, opt := range opts { 61 | previous = opt(c) 62 | } 63 | return previous 64 | } 65 | 66 | // Debug enables or disable debug 67 | func Debug(t bool) Option { 68 | return func(c *Config) Option { 69 | previous := c.Debug 70 | c.Debug = t 71 | return Debug(previous) 72 | } 73 | } 74 | 75 | // DataDir set the data directory for neosearch database and indices 76 | func DataDir(path string) Option { 77 | return func(c *Config) Option { 78 | previous := c.DataDir 79 | c.DataDir = path 80 | return DataDir(previous) 81 | } 82 | } 83 | 84 | // KVStore set the default kvstore 85 | func KVStore(kvstore string) Option { 86 | return func(c *Config) Option { 87 | previous := c.Engine.KVStore 88 | c.Engine.KVStore = kvstore 89 | return KVStore(previous) 90 | } 91 | } 92 | 93 | // MaxIndicesOpen set the maximum number of open indices 94 | func MaxIndicesOpen(size int) Option { 95 | return func(c *Config) Option { 96 | previous := c.MaxIndicesOpen 97 | c.MaxIndicesOpen = size 98 | return MaxIndicesOpen(previous) 99 | } 100 | } 101 | 102 | // Specific configurations to store 103 | func KVConfig(kvconfig store.KVConfig) Option { 104 | return func(c *Config) Option { 105 | previous := c.Engine.KVConfig 106 | c.Engine.KVConfig = kvconfig 107 | return KVConfig(previous) 108 | } 109 | } 110 | 111 | // ConfigFromFile loads configuration from YAML file 112 | func ConfigFromFile(filename string) (*Config, error) { 113 | // Load config from file 114 | file, err := os.Open(filename) 115 | 116 | if err != nil { 117 | log.Fatalf("Failed to open config file: %s", err.Error()) 118 | } 119 | 120 | fileContent, err := ioutil.ReadAll(file) 121 | 122 | if err != nil { 123 | log.Fatalf("Failed to read config file: %s", err.Error()) 124 | } 125 | 126 | cfg := NewConfig() 127 | 128 | err = yaml.Unmarshal(fileContent, &cfg) 129 | return cfg, err 130 | } 131 | -------------------------------------------------------------------------------- /lib/neosearch/engine/command.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 9 | ) 10 | 11 | // Command defines a NeoSearch internal command. 12 | // This command describes a single operation in the index storage and is 13 | // decomposed in the following parts: 14 | // - Index 15 | // - Database 16 | // - Key 17 | // - KeyType 18 | // - Value 19 | // - ValueType 20 | // - Batch 21 | type Command struct { 22 | Index string 23 | Database string 24 | Command string 25 | Key []byte 26 | KeyType uint8 27 | Value []byte 28 | ValueType uint8 29 | 30 | Batch bool 31 | } 32 | 33 | func (c Command) Println() { 34 | line := c.Reverse() 35 | fmt.Println(line) 36 | } 37 | 38 | func (c Command) Reverse() string { 39 | var ( 40 | keyStr string 41 | valStr string 42 | line string 43 | ) 44 | 45 | if c.Key != nil { 46 | if c.KeyType == TypeString { 47 | keyStr = `'` + string(c.Key) + `'` 48 | } else if c.KeyType == TypeUint { 49 | keyStr = `uint(` + strconv.Itoa(int(utils.BytesToUint64(c.Key))) + `)` 50 | } else if c.KeyType == TypeInt { 51 | keyStr = `int(` + strconv.Itoa(int(utils.BytesToInt64(c.Key))) + `)` 52 | } else if c.KeyType == TypeFloat { 53 | keyStr = `float(` + strconv.FormatFloat(utils.BytesToFloat64(c.Key), 'f', -1, 64) + `)` 54 | } else if c.KeyType == TypeBool { 55 | keyStr = `bool(` + string(c.Key) + `)` 56 | } else { 57 | fmt.Printf("Command error: %+v", c) 58 | panic(fmt.Errorf("Invalid command key type: %d - %+v", c.KeyType, string(c.Key))) 59 | } 60 | } 61 | 62 | if c.Value != nil { 63 | if c.ValueType == TypeString { 64 | valStr = `'` + string(c.Value) + `'` 65 | } else if c.ValueType == TypeUint { 66 | valStr = `uint(` + strconv.Itoa(int(utils.BytesToUint64(c.Value))) + `)` 67 | } else if c.ValueType == TypeInt { 68 | valStr = `int(` + strconv.Itoa(int(utils.BytesToInt64(c.Value))) + `)` 69 | } else if c.ValueType == TypeFloat { 70 | valStr = `float(` + strconv.FormatFloat(utils.BytesToFloat64(c.Value), 'f', -1, 64) + `)` 71 | } else if c.ValueType == TypeBool { 72 | valStr = `bool(` + string(c.Value) + `)` 73 | } else { 74 | panic(fmt.Errorf("Invalid command key type: %d", c.ValueType)) 75 | } 76 | } 77 | 78 | switch strings.ToUpper(c.Command) { 79 | case "SET", "MERGESET": 80 | line = fmt.Sprintf("USING %s.%s %s %s %s;", c.Index, c.Database, strings.ToUpper(c.Command), keyStr, valStr) 81 | case "BATCH", "flushbatch": 82 | line = fmt.Sprintf("USING %s.%s %s;", c.Index, c.Database, strings.ToUpper(c.Command)) 83 | case "GET", "DELETE": 84 | line = fmt.Sprintf("USING %s.%s %s %s;", c.Index, c.Database, strings.ToUpper(c.Command), keyStr) 85 | default: 86 | panic(fmt.Errorf("Invalid command: %s: %v", strings.ToUpper(c.Command), c)) 87 | } 88 | 89 | return line 90 | } 91 | -------------------------------------------------------------------------------- /lib/neosearch/engine/command_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 7 | ) 8 | 9 | func TestCommand(t *testing.T) { 10 | for _, testTable := range []struct { 11 | cmd Command 12 | expected string 13 | }{ 14 | { 15 | cmd: Command{ 16 | Database: "name.idx", 17 | Index: "empresas", 18 | Command: "mergeset", 19 | Key: []byte("teste"), 20 | KeyType: TypeString, 21 | Value: utils.Uint64ToBytes(1000), 22 | ValueType: TypeUint, 23 | }, 24 | expected: `USING empresas.name.idx MERGESET 'teste' uint(1000);`, 25 | }, 26 | { 27 | cmd: Command{ 28 | Database: "name.idx", 29 | Index: "empresas", 30 | Command: "batch", 31 | Key: nil, 32 | KeyType: TypeNil, 33 | Value: nil, 34 | ValueType: TypeNil, 35 | }, 36 | expected: `USING empresas.name.idx BATCH;`, 37 | }, 38 | { 39 | cmd: Command{ 40 | Database: "name.idx", 41 | Index: "empresas", 42 | Command: "get", 43 | Key: []byte("teste"), 44 | KeyType: TypeString, 45 | Value: nil, 46 | ValueType: TypeNil, 47 | }, 48 | expected: `USING empresas.name.idx GET 'teste';`, 49 | }, 50 | } { 51 | cmdRev := testTable.cmd.Reverse() 52 | 53 | if cmdRev != testTable.expected { 54 | t.Errorf("Differs: '%s' !== '%s'", cmdRev, testTable.expected) 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /lib/neosearch/engine/config.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 5 | "github.com/NeowayLabs/neosearch/lib/neosearch/store/goleveldb" 6 | ) 7 | 8 | const ( 9 | // OpenCacheSize is the default value for the maximum number of 10 | // open database files. This value can be override by 11 | // Config.OpenCacheSize. 12 | DefaultOpenCacheSize int = 100 13 | 14 | // BatchSize is the default size of cached operations before 15 | // a write batch occurs. You can override this value with 16 | // Config.BatchSize. 17 | DefaultBatchSize int = 5000 18 | 19 | // DefaultKVStore set the default KVStore 20 | DefaultKVStore string = goleveldb.KVName 21 | ) 22 | 23 | // Config configure the Engine 24 | type Config struct { 25 | // OpenCacheSize adjust the length of maximum number of 26 | // open indices. This is a LRU cache, the least used 27 | // database open will be closed when needed. 28 | OpenCacheSize int `yaml:"openCacheSize"` 29 | 30 | // BatchSize batch write size 31 | BatchSize int `yaml:"batchSize"` 32 | 33 | // KVStore configure the kvstore to be used 34 | KVStore string `yaml:"kvstore"` 35 | 36 | // KVStore specific options to kvstore 37 | KVConfig store.KVConfig `yaml:"kvconfig"` 38 | } 39 | 40 | // New creates new Config 41 | func NewConfig() *Config { 42 | return &Config{ 43 | DefaultOpenCacheSize, 44 | DefaultBatchSize, 45 | DefaultKVStore, 46 | nil, 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /lib/neosearch/engine/engine.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/NeowayLabs/neosearch/lib/neosearch/cache" 7 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 9 | ) 10 | 11 | // Engine type 12 | type Engine struct { 13 | stores cache.Cache 14 | config *Config 15 | debug bool 16 | } 17 | 18 | const ( 19 | _ = iota 20 | TypeNil 21 | TypeUint 22 | TypeInt 23 | TypeFloat 24 | TypeString 25 | TypeDate 26 | TypeBool 27 | TypeBinary // TODO: TBD 28 | ) 29 | 30 | // New creates a new Engine instance 31 | // Engine is the generic interface to access database/index files. 32 | // You can execute commands directly to database using Execute method 33 | // acquire direct iterators using the Store interface. 34 | func New(cfg *Config) *Engine { 35 | if cfg.OpenCacheSize == 0 { 36 | cfg.OpenCacheSize = DefaultOpenCacheSize 37 | } 38 | 39 | if cfg.BatchSize == 0 { 40 | cfg.BatchSize = DefaultBatchSize 41 | } 42 | 43 | if cfg.KVStore == "" { 44 | cfg.KVStore = DefaultKVStore 45 | } 46 | 47 | if cfg.KVConfig == nil { 48 | cfg.KVConfig = store.KVConfig{} 49 | } 50 | 51 | ng := &Engine{ 52 | config: cfg, 53 | stores: cache.NewLRUCache(cfg.OpenCacheSize), 54 | } 55 | 56 | if debug, ok := cfg.KVConfig["debug"].(bool); ok { 57 | ng.debug = debug 58 | } 59 | 60 | ng.stores.OnRemove(func(key string, value interface{}) { 61 | storekv, ok := value.(store.KVStore) 62 | 63 | if !ok { 64 | panic("Unexpected value in cache") 65 | } 66 | 67 | if storekv.IsOpen() { 68 | storekv.Close() 69 | } 70 | }) 71 | 72 | return ng 73 | } 74 | 75 | // Open the index and cache then for future uses 76 | func (ng *Engine) open(indexName, databaseName string) (store.KVStore, error) { 77 | var ( 78 | err error 79 | storekv store.KVStore 80 | ok bool 81 | value interface{} 82 | ) 83 | 84 | value, ok = ng.stores.Get(indexName + "." + databaseName) 85 | 86 | if ok == false || value == nil { 87 | storeConstructor := store.KVStoreConstructorByName(ng.config.KVStore) 88 | if storeConstructor == nil { 89 | return nil, errors.New("Unknown storage type") 90 | } 91 | 92 | storekv, err = storeConstructor(ng.config.KVConfig) 93 | if err != nil { 94 | return nil, err 95 | } 96 | 97 | ng.stores.Add(indexName+"."+databaseName, storekv) 98 | err = storekv.Open(indexName, databaseName) 99 | 100 | return storekv, err 101 | } 102 | 103 | storekv, ok = value.(store.KVStore) 104 | 105 | if ok { 106 | return storekv, nil 107 | } 108 | 109 | return nil, errors.New("Failed to convert cache entry to KVStore") 110 | } 111 | 112 | // Execute the given command 113 | func (ng *Engine) Execute(cmd Command) ([]byte, error) { 114 | var err error 115 | 116 | store, err := ng.GetStore(cmd.Index, cmd.Database) 117 | 118 | if ng.debug { 119 | cmd.Println() 120 | } 121 | 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | writer := store.Writer() 127 | 128 | reader := store.Reader() 129 | defer func() { 130 | reader.Close() 131 | }() 132 | 133 | switch cmd.Command { 134 | case "batch": 135 | writer.StartBatch() 136 | return nil, nil 137 | case "flushbatch": 138 | err = writer.FlushBatch() 139 | return nil, err 140 | case "set": 141 | err = writer.Set(cmd.Key, cmd.Value) 142 | return nil, err 143 | case "get": 144 | return reader.Get(cmd.Key) 145 | case "mergeset": 146 | v := utils.BytesToUint64(cmd.Value) 147 | return nil, writer.MergeSet(cmd.Key, v) 148 | case "delete": 149 | err = writer.Delete(cmd.Key) 150 | return nil, err 151 | } 152 | 153 | return nil, errors.New("Failed to execute command.") 154 | } 155 | 156 | // GetStore returns a instance of KVStore for the given index name 157 | // If the given index name isn't open, then this method will open 158 | // and cache the index for next use. 159 | func (ng *Engine) GetStore(indexName string, databaseName string) (store.KVStore, error) { 160 | var ( 161 | err error 162 | storekv store.KVStore 163 | ) 164 | 165 | storekv, err = ng.open(indexName, databaseName) 166 | 167 | if err != nil { 168 | return nil, err 169 | } 170 | 171 | return storekv, nil 172 | } 173 | 174 | // Close all of the open databases 175 | func (ng *Engine) Close() { 176 | // Clean will un-ref and Close the databases 177 | ng.stores.Clean() 178 | } 179 | -------------------------------------------------------------------------------- /lib/neosearch/engine/engine_test.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 10 | ) 11 | 12 | var DataDirTmp string 13 | var sampleIndex = "sample" 14 | 15 | func init() { 16 | var err error 17 | DataDirTmp, err = ioutil.TempDir("/tmp", "neosearch-index-") 18 | 19 | if err != nil { 20 | panic(err) 21 | } 22 | 23 | os.Mkdir(DataDirTmp+"/"+sampleIndex, 0755) 24 | } 25 | 26 | func execSequence(t *testing.T, ng *Engine, cmds []Command) { 27 | for _, cmd := range cmds { 28 | _, err := ng.Execute(cmd) 29 | 30 | if err != nil { 31 | t.Error(err) 32 | } 33 | } 34 | } 35 | 36 | func cmpIterator(t *testing.T, itReturns []map[int64]string, ng *Engine, seek []byte, index, database string) { 37 | storekv, err := ng.GetStore(index, database) 38 | 39 | if err != nil { 40 | t.Error(err) 41 | } 42 | 43 | reader := storekv.Reader() 44 | it := reader.GetIterator() 45 | defer func() { 46 | it.Close() 47 | reader.Close() 48 | }() 49 | 50 | it.Seek(seek) 51 | 52 | for i := 0; i < len(itReturns); i++ { 53 | for key, value := range itReturns[i] { 54 | if !it.Valid() { 55 | t.Errorf("Failed to seek to '%d' key", key) 56 | } 57 | 58 | val := it.Value() 59 | 60 | if len(val) == 0 || string(val) != value { 61 | t.Errorf("Failed to get '%d' key", key) 62 | } else if err := it.GetError(); err != nil { 63 | t.Error(err) 64 | } 65 | } 66 | 67 | it.Next() 68 | } 69 | } 70 | 71 | // TestEngineIntegerKeyOrder verifies if the chosen storage engine is really 72 | // a LSM database ordered by key with ByteWise comparator. 73 | func TestEngineIntegerKeyOrder(t *testing.T) { 74 | ng := New(&Config{ 75 | KVConfig: store.KVConfig{ 76 | "dataDir": DataDirTmp, 77 | }, 78 | OpenCacheSize: 1, 79 | }) 80 | 81 | cmds := []Command{ 82 | { 83 | Index: "sample", 84 | Database: "test.idx", 85 | Key: []byte("AAA"), 86 | Value: []byte("value AAA"), 87 | Command: "set", 88 | }, 89 | { 90 | Index: "sample", 91 | Database: "test.idx", 92 | Key: utils.Uint64ToBytes(1), 93 | Value: []byte("value 1"), 94 | Command: "set", 95 | }, 96 | { 97 | Index: "sample", 98 | Database: "test.idx", 99 | Key: []byte("BBB"), 100 | Value: []byte("value BBB"), 101 | Command: "set", 102 | }, 103 | { 104 | Index: "sample", 105 | Database: "test.idx", 106 | Key: utils.Uint64ToBytes(2), 107 | Value: []byte("value 2"), 108 | Command: "set", 109 | }, 110 | { 111 | Index: "sample", 112 | Database: "test.idx", 113 | Key: utils.Uint64ToBytes(2000), 114 | Value: []byte("value 2000"), 115 | Command: "set", 116 | }, 117 | { 118 | Index: "sample", 119 | Database: "test.idx", 120 | Key: []byte("2000"), 121 | Value: []byte("value 2000"), 122 | Command: "set", 123 | }, 124 | { 125 | Index: "sample", 126 | Database: "test.idx", 127 | Key: utils.Uint64ToBytes(100000), 128 | Value: []byte("value 100000"), 129 | Command: "set", 130 | }, 131 | { 132 | Index: "sample", 133 | Database: "test.idx", 134 | Key: utils.Uint64ToBytes(1000000), 135 | Value: []byte("value 1000000"), 136 | Command: "set", 137 | }, 138 | { 139 | Index: "sample", 140 | Database: "test.idx", 141 | Key: utils.Uint64ToBytes(10000000000000), 142 | Value: []byte("value 10000000000000"), 143 | Command: "set", 144 | }, 145 | } 146 | 147 | execSequence(t, ng, cmds) 148 | 149 | itReturns := []map[int64]string{ 150 | { 151 | 1: "value 1", 152 | }, 153 | { 154 | 2: "value 2", 155 | }, 156 | { 157 | 2000: "value 2000", 158 | }, 159 | { 160 | 100000: "value 100000", 161 | }, 162 | { 163 | 1000000: "value 1000000", 164 | }, 165 | { 166 | 10000000000000: "value 10000000000000", 167 | }, 168 | } 169 | 170 | cmpIterator(t, itReturns, ng, utils.Uint64ToBytes(1), "sample", "test.idx") 171 | 172 | defer ng.Close() 173 | os.RemoveAll(DataDirTmp) 174 | } 175 | -------------------------------------------------------------------------------- /lib/neosearch/examples_test.go: -------------------------------------------------------------------------------- 1 | package neosearch_test 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 10 | ) 11 | 12 | func OnErrorPanic(err error) { 13 | if err != nil { 14 | panic(err) 15 | } 16 | } 17 | 18 | func Example() { 19 | dataDir, err := ioutil.TempDir("", "neosearchExample") 20 | defer os.RemoveAll(dataDir) 21 | 22 | OnErrorPanic(err) 23 | 24 | cfg := config.NewConfig() 25 | cfg.Option(config.DataDir(dataDir)) 26 | 27 | neo := neosearch.New(cfg) 28 | defer neo.Close() 29 | 30 | index, err := neo.CreateIndex("test") 31 | OnErrorPanic(err) 32 | 33 | err = index.Add(1, []byte(`{"id": 1, "name": "Neoway Business Solution"}`), nil) 34 | OnErrorPanic(err) 35 | 36 | err = index.Add(2, []byte(`{"id": 2, "name": "Google Inc."}`), nil) 37 | OnErrorPanic(err) 38 | 39 | err = index.Add(3, []byte(`{"id": 3, "name": "Facebook Company"}`), nil) 40 | OnErrorPanic(err) 41 | 42 | err = index.Add(4, []byte(`{"id": 4, "name": "Neoway Teste"}`), nil) 43 | OnErrorPanic(err) 44 | 45 | data, err := index.Get(1) 46 | OnErrorPanic(err) 47 | 48 | fmt.Println(string(data)) 49 | // Output: 50 | // {"id": 1, "name": "Neoway Business Solution"} 51 | } 52 | 53 | func ExampleMatchPrefix() { 54 | dataDir, err := ioutil.TempDir("", "neosearchExample") 55 | defer os.RemoveAll(dataDir) 56 | 57 | OnErrorPanic(err) 58 | 59 | cfg := config.NewConfig() 60 | cfg.Option(config.DataDir(dataDir)) 61 | cfg.Option(config.Debug(false)) 62 | 63 | neo := neosearch.New(cfg) 64 | defer neo.Close() 65 | 66 | index, err := neo.CreateIndex("test") 67 | OnErrorPanic(err) 68 | 69 | err = index.Add(1, []byte(`{"id": 1, "name": "Neoway Business Solution"}`), nil) 70 | OnErrorPanic(err) 71 | 72 | err = index.Add(2, []byte(`{"id": 2, "name": "Google Inc."}`), nil) 73 | OnErrorPanic(err) 74 | 75 | err = index.Add(3, []byte(`{"id": 3, "name": "Facebook Company"}`), nil) 76 | OnErrorPanic(err) 77 | 78 | err = index.Add(4, []byte(`{"id": 4, "name": "Neoway Teste"}`), nil) 79 | OnErrorPanic(err) 80 | 81 | values, err := index.MatchPrefix([]byte("name"), []byte("neoway")) 82 | OnErrorPanic(err) 83 | 84 | for _, value := range values { 85 | fmt.Println(value) 86 | } 87 | 88 | // Output: 89 | // {"id": 1, "name": "Neoway Business Solution"} 90 | // {"id": 4, "name": "Neoway Teste"} 91 | } 92 | -------------------------------------------------------------------------------- /lib/neosearch/index/filter.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 7 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 8 | ) 9 | 10 | func (i *Index) FilterTermID(field, value []byte, limit uint64) ([]uint64, uint64, error) { 11 | cmd := engine.Command{} 12 | cmd.Index = i.Name 13 | // TODO: implement search for every type 14 | cmd.Database = utils.FieldNorm(string(field)) + "_string.idx" 15 | cmd.Command = "get" 16 | cmd.Key = value 17 | cmd.KeyType = engine.TypeString 18 | data, err := i.engine.Execute(cmd) 19 | 20 | if err != nil { 21 | return nil, 0, err 22 | } 23 | 24 | dataLimit := uint64(len(data) / 8) 25 | total := dataLimit 26 | 27 | if limit > 0 && limit < dataLimit { 28 | dataLimit = limit 29 | } 30 | 31 | docIDs := make([]uint64, dataLimit) 32 | 33 | if len(data) > 0 { 34 | for i, j := uint64(0), uint64(0); i < dataLimit*8; i, j = i+8, j+1 { 35 | v := utils.BytesToUint64(data[i : i+8]) 36 | docIDs[j] = v 37 | } 38 | 39 | } 40 | 41 | return docIDs, total, nil 42 | } 43 | 44 | // FilterTerm filter the index for all documents that have `value` in the 45 | // field `field` and returns upto `limit` documents. A limit of 0 (zero) is 46 | // the same as no limit (all of the records will return).. 47 | func (i *Index) FilterTerm(field []byte, value []byte, limit uint64) ([]string, uint64, error) { 48 | docIDs, total, err := i.FilterTermID(field, value, limit) 49 | 50 | if err != nil { 51 | return nil, 0, err 52 | } 53 | 54 | docs := make([]string, len(docIDs)) 55 | 56 | for idx, docID := range docIDs { 57 | if byteDoc, err := i.Get(docID); err == nil { 58 | docs[idx] = string(byteDoc) 59 | } else { 60 | return nil, 0, err 61 | } 62 | } 63 | 64 | return docs, total, nil 65 | } 66 | 67 | func (i *Index) matchPrefix(field []byte, value []byte) ([]uint64, error) { 68 | var ( 69 | docIDs []uint64 70 | ) 71 | 72 | // TODO: Implement search for all of field types 73 | storekv, err := i.engine.GetStore(i.Name, utils.FieldNorm(string(field))+"_string.idx") 74 | 75 | if err != nil { 76 | return nil, err 77 | } 78 | 79 | reader := storekv.Reader() 80 | it := reader.GetIterator() 81 | defer func() { 82 | it.Close() 83 | reader.Close() 84 | }() 85 | 86 | defer it.Close() 87 | 88 | for it.Seek(value); it.Valid(); it.Next() { 89 | if bytes.HasPrefix(it.Key(), value) { 90 | var ids []uint64 91 | dataBytes := it.Value() 92 | 93 | if len(dataBytes) == 0 { 94 | continue 95 | } 96 | 97 | for i := 0; i < len(dataBytes); i += 8 { 98 | v := utils.BytesToUint64(dataBytes[i : i+8]) 99 | ids = append(ids, v) 100 | } 101 | 102 | if len(docIDs) == 0 { 103 | docIDs = ids 104 | continue 105 | } 106 | 107 | for _, id := range ids { 108 | docIDs = utils.UniqueUint64Add(docIDs, id) 109 | } 110 | } 111 | } 112 | 113 | if err := it.GetError(); err != nil { 114 | return nil, err 115 | } 116 | 117 | return docIDs, nil 118 | } 119 | 120 | // MatchPrefix search documents where field `field` starts with `value`. 121 | func (i *Index) MatchPrefix(field []byte, value []byte) ([]string, error) { 122 | var docs []string 123 | 124 | docIDs, err := i.matchPrefix(field, value) 125 | 126 | if err != nil { 127 | return nil, err 128 | } 129 | 130 | for _, docID := range docIDs { 131 | d, err := i.Get(docID) 132 | 133 | if err != nil { 134 | return nil, err 135 | } 136 | 137 | docs = append(docs, string(d)) 138 | } 139 | 140 | return docs, nil 141 | } 142 | -------------------------------------------------------------------------------- /lib/neosearch/index/index_build_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 11 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 12 | ) 13 | 14 | // TODO; test buildIndex* functions 15 | // use table-driven tests 16 | 17 | func TestIndexBuildUintCommands(t *testing.T) { 18 | var ( 19 | idx *Index 20 | err error 21 | indexDir = DataDirTmp + "/test-uint" 22 | commands []engine.Command 23 | cmd engine.Command 24 | ) 25 | 26 | cfg := config.NewConfig() 27 | cfg.Option(config.DataDir(DataDirTmp)) 28 | 29 | err = os.MkdirAll(DataDirTmp, 0755) 30 | 31 | if err != nil { 32 | t.Error("Failed to create directory") 33 | goto cleanup 34 | } 35 | 36 | idx, err = New("test-uint", cfg, true) 37 | 38 | if err != nil { 39 | t.Error(err) 40 | goto cleanup 41 | } 42 | 43 | commands, err = idx.buildIndexUint64(uint64(1), "teste", uint64(1)) 44 | 45 | if err != nil { 46 | t.Error(err) 47 | goto cleanup 48 | } 49 | 50 | if len(commands) != 1 { 51 | t.Error("invalid commands returned by buildUint") 52 | goto cleanup 53 | } 54 | 55 | cmd = commands[0] 56 | 57 | if cmd.Index != "test-uint" || 58 | cmd.Database != "teste_uint.idx" || 59 | utils.BytesToUint64(cmd.Key) != uint64(1) || 60 | utils.BytesToUint64(cmd.Value) != uint64(1) || 61 | strings.ToLower(cmd.Command) != "mergeset" { 62 | t.Error("commands differs") 63 | fmt.Println("Key: ", utils.BytesToUint64(cmd.Key)) 64 | fmt.Println("Value: ", utils.BytesToUint64(cmd.Value)) 65 | fmt.Println("Index: ", cmd.Index) 66 | cmd.Println() 67 | goto cleanup 68 | } 69 | 70 | cleanup: 71 | idx.Close() 72 | os.RemoveAll(indexDir) 73 | } 74 | 75 | func TestIndexBuildBoolCommands(t *testing.T) { 76 | var ( 77 | idx *Index 78 | err error 79 | indexDir = DataDirTmp + "/test-bool" 80 | commands []engine.Command 81 | cmd engine.Command 82 | ) 83 | 84 | cfg := config.NewConfig() 85 | cfg.Option(config.DataDir(DataDirTmp)) 86 | 87 | err = os.MkdirAll(DataDirTmp, 0755) 88 | 89 | if err != nil { 90 | t.Error("Failed to create directory") 91 | goto cleanup 92 | } 93 | 94 | idx, err = New("test-bool", cfg, true) 95 | 96 | if err != nil { 97 | t.Error(err) 98 | goto cleanup 99 | } 100 | 101 | commands, err = idx.buildIndexBool(uint64(1), "teste", true) 102 | 103 | if err != nil { 104 | t.Error(err) 105 | goto cleanup 106 | } 107 | 108 | if len(commands) != 1 { 109 | t.Error("invalid commands returned by buildUint") 110 | goto cleanup 111 | } 112 | 113 | cmd = commands[0] 114 | 115 | if cmd.Index != "test-bool" || 116 | cmd.Database != "teste_bool.idx" || 117 | utils.BytesToBool(cmd.Key) != true || 118 | utils.BytesToUint64(cmd.Value) != uint64(1) || 119 | strings.ToLower(cmd.Command) != "mergeset" { 120 | t.Error("commands differs") 121 | fmt.Println("Key: ", utils.BytesToUint64(cmd.Key)) 122 | fmt.Println("Value: ", utils.BytesToUint64(cmd.Value)) 123 | fmt.Println("Index: ", cmd.Index) 124 | cmd.Println() 125 | goto cleanup 126 | } 127 | 128 | cleanup: 129 | idx.Close() 130 | os.RemoveAll(indexDir) 131 | } 132 | -------------------------------------------------------------------------------- /lib/neosearch/index/index_metadata_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "time" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 11 | ) 12 | 13 | func TestSimpleIndexWithMetadata(t *testing.T) { 14 | var ( 15 | indexName = "document-sample-metadata" 16 | indexDir = DataDirTmp + "/" + indexName 17 | commands, expectedCommands []engine.Command 18 | docJSON = []byte(`{"id": 1}`) 19 | err error 20 | index *Index 21 | metadata = Metadata{ 22 | "id": Metadata{ 23 | "type": "uint", 24 | }, 25 | } 26 | ) 27 | 28 | cfg := config.NewConfig() 29 | cfg.Option(config.DataDir(DataDirTmp)) 30 | 31 | err = os.MkdirAll(DataDirTmp, 0755) 32 | 33 | if err != nil { 34 | goto cleanup 35 | } 36 | 37 | index, err = New(indexName, cfg, true) 38 | 39 | if err != nil { 40 | t.Error(err) 41 | goto cleanup 42 | } 43 | 44 | if _, err := os.Stat(indexDir); os.IsNotExist(err) { 45 | t.Errorf("no such file or directory: %s", indexDir) 46 | goto cleanup 47 | } 48 | 49 | commands, err = index.BuildAdd(1, docJSON, metadata) 50 | 51 | if err != nil { 52 | t.Error(err.Error()) 53 | goto cleanup 54 | } 55 | 56 | expectedCommands = []engine.Command{ 57 | { 58 | Index: indexName, 59 | Database: "document.db", 60 | Key: utils.Uint64ToBytes(1), 61 | KeyType: engine.TypeUint, 62 | Value: docJSON, 63 | ValueType: engine.TypeString, 64 | Command: "set", 65 | }, 66 | { 67 | Index: indexName, 68 | Database: "id_uint.idx", 69 | Key: utils.Uint64ToBytes(1), 70 | KeyType: engine.TypeUint, 71 | Value: utils.Uint64ToBytes(1), 72 | ValueType: engine.TypeUint, 73 | Command: "mergeset", 74 | }, 75 | } 76 | 77 | if !compareCommands(t, commands, expectedCommands) { 78 | goto cleanup 79 | } 80 | 81 | docJSON = []byte(`{ 82 | "title": "NeoSearch - Reverse Index", 83 | "description": "Neoway Full Text Search" 84 | }`) 85 | 86 | metadata = Metadata{ 87 | "title": Metadata{ 88 | "type": "string", 89 | }, 90 | "description": Metadata{ 91 | "type": "string", 92 | }, 93 | } 94 | 95 | expectedCommands = []engine.Command{ 96 | { 97 | Index: indexName, 98 | Database: "document.db", 99 | Command: "set", 100 | Key: utils.Uint64ToBytes(2), 101 | KeyType: engine.TypeUint, 102 | Value: docJSON, 103 | ValueType: engine.TypeString, 104 | }, 105 | { 106 | Index: indexName, 107 | Database: "description_string.idx", 108 | Command: "mergeset", 109 | Key: []byte("neoway"), 110 | KeyType: engine.TypeString, 111 | Value: utils.Uint64ToBytes(2), 112 | ValueType: engine.TypeUint, 113 | }, 114 | { 115 | Index: indexName, 116 | Database: "description_string.idx", 117 | Command: "mergeset", 118 | Key: []byte("full"), 119 | KeyType: engine.TypeString, 120 | Value: utils.Uint64ToBytes(2), 121 | ValueType: engine.TypeUint, 122 | }, 123 | { 124 | Index: indexName, 125 | Database: "description_string.idx", 126 | Command: "mergeset", 127 | Key: []byte("text"), 128 | KeyType: engine.TypeString, 129 | Value: utils.Uint64ToBytes(2), 130 | ValueType: engine.TypeUint, 131 | }, 132 | { 133 | Index: indexName, 134 | Database: "description_string.idx", 135 | Command: "mergeset", 136 | Key: []byte("search"), 137 | KeyType: engine.TypeString, 138 | Value: utils.Uint64ToBytes(2), 139 | ValueType: engine.TypeUint, 140 | }, 141 | { 142 | Index: indexName, 143 | Database: "description_string.idx", 144 | Command: "mergeset", 145 | Key: []byte("neoway full text search"), 146 | KeyType: engine.TypeString, 147 | Value: utils.Uint64ToBytes(2), 148 | ValueType: engine.TypeUint, 149 | }, 150 | { 151 | Index: indexName, 152 | Database: "title_string.idx", 153 | Command: "mergeset", 154 | Key: []byte("neosearch"), 155 | KeyType: engine.TypeString, 156 | Value: utils.Uint64ToBytes(2), 157 | ValueType: engine.TypeUint, 158 | }, 159 | { 160 | Index: indexName, 161 | Database: "title_string.idx", 162 | Command: "mergeset", 163 | Key: []byte("-"), 164 | KeyType: engine.TypeString, 165 | Value: utils.Uint64ToBytes(2), 166 | ValueType: engine.TypeUint, 167 | }, 168 | { 169 | Index: indexName, 170 | Database: "title_string.idx", 171 | Command: "mergeset", 172 | Key: []byte("reverse"), 173 | KeyType: engine.TypeString, 174 | Value: utils.Uint64ToBytes(2), 175 | ValueType: engine.TypeUint, 176 | }, 177 | { 178 | Index: indexName, 179 | Database: "title_string.idx", 180 | Command: "mergeset", 181 | Key: []byte("index"), 182 | KeyType: engine.TypeString, 183 | Value: utils.Uint64ToBytes(2), 184 | ValueType: engine.TypeUint, 185 | }, 186 | { 187 | Index: indexName, 188 | Database: "title_string.idx", 189 | Command: "mergeset", 190 | Key: []byte("neosearch - reverse index"), 191 | KeyType: engine.TypeString, 192 | Value: utils.Uint64ToBytes(2), 193 | ValueType: engine.TypeUint, 194 | }, 195 | } 196 | 197 | commands, err = index.BuildAdd(2, docJSON, metadata) 198 | 199 | if err != nil { 200 | t.Error(err) 201 | goto cleanup 202 | } 203 | 204 | if !compareCommands(t, commands, expectedCommands) { 205 | goto cleanup 206 | } 207 | 208 | cleanup: 209 | index.Close() 210 | os.RemoveAll(indexDir) 211 | } 212 | 213 | func TestDateIndex(t *testing.T) { 214 | var ( 215 | indexName = "document-sample-date" 216 | indexDir = DataDirTmp + "/" + indexName 217 | commands, expectedCommands []engine.Command 218 | currentDate = time.Now() 219 | currentDateStr = currentDate.Format(time.ANSIC) 220 | expectedTime, _ = time.Parse(time.ANSIC, currentDateStr) 221 | expectedNano = expectedTime.UnixNano() 222 | docJSON = []byte(`{"id": 1, "createAt": "` + currentDateStr + `"}`) 223 | err error 224 | index *Index 225 | metadata = Metadata{ 226 | "id": Metadata{ 227 | "type": "uint", 228 | }, 229 | "createAt": Metadata{ 230 | "type": "date", 231 | }, 232 | } 233 | ) 234 | 235 | cfg := config.NewConfig() 236 | cfg.Option(config.DataDir(DataDirTmp)) 237 | 238 | err = os.MkdirAll(DataDirTmp, 0755) 239 | 240 | if err != nil { 241 | goto cleanup 242 | } 243 | 244 | index, err = New(indexName, cfg, true) 245 | 246 | if err != nil { 247 | t.Error(err) 248 | goto cleanup 249 | } 250 | 251 | if _, err := os.Stat(indexDir); os.IsNotExist(err) { 252 | t.Errorf("no such file or directory: %s", indexDir) 253 | goto cleanup 254 | } 255 | 256 | commands, err = index.BuildAdd(1, docJSON, metadata) 257 | 258 | if err != nil { 259 | t.Error(err.Error()) 260 | goto cleanup 261 | } 262 | 263 | expectedCommands = []engine.Command{ 264 | { 265 | Index: indexName, 266 | Database: "document.db", 267 | Key: utils.Uint64ToBytes(1), 268 | KeyType: engine.TypeUint, 269 | Value: docJSON, 270 | ValueType: engine.TypeString, 271 | Command: "set", 272 | }, 273 | { 274 | Index: indexName, 275 | Database: "createat_int.idx", 276 | Key: utils.Int64ToBytes(expectedNano), 277 | KeyType: engine.TypeInt, 278 | Value: utils.Uint64ToBytes(1), 279 | ValueType: engine.TypeUint, 280 | Command: "mergeset", 281 | }, 282 | { 283 | Index: indexName, 284 | Database: "id_uint.idx", 285 | Key: utils.Uint64ToBytes(1), 286 | KeyType: engine.TypeUint, 287 | Value: utils.Uint64ToBytes(1), 288 | ValueType: engine.TypeUint, 289 | Command: "mergeset", 290 | }, 291 | } 292 | 293 | if !compareCommands(t, commands, expectedCommands) { 294 | goto cleanup 295 | } 296 | 297 | cleanup: 298 | index.Close() 299 | os.RemoveAll(indexDir) 300 | } 301 | -------------------------------------------------------------------------------- /lib/neosearch/index/index_object_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 10 | ) 11 | 12 | func TestBuildAddObjectDocument(t *testing.T) { 13 | var ( 14 | indexName = "document-with-object-sample" 15 | indexDir = DataDirTmp + "/" + indexName 16 | commands, expectedCommands []engine.Command 17 | docJSON = []byte(` 18 | { 19 | "id": 1, 20 | "address": { 21 | "city": "florianópolis", 22 | "district": "Itacorubi", 23 | "street": "Patricio Farias", 24 | "latlon": [ 25 | -27.545198, 26 | -48.504827 27 | ] 28 | } 29 | }`) 30 | err error 31 | index *Index 32 | ) 33 | 34 | cfg := config.NewConfig() 35 | cfg.Option(config.DataDir(DataDirTmp)) 36 | 37 | err = os.MkdirAll(DataDirTmp, 0755) 38 | 39 | if err != nil { 40 | goto cleanup 41 | } 42 | 43 | index, err = New(indexName, cfg, true) 44 | 45 | if err != nil { 46 | t.Error(err) 47 | goto cleanup 48 | } 49 | 50 | if _, err := os.Stat(indexDir); os.IsNotExist(err) { 51 | t.Errorf("no such file or directory: %s", indexDir) 52 | goto cleanup 53 | } 54 | 55 | commands, err = index.BuildAdd(1, docJSON, nil) 56 | 57 | if err != nil { 58 | t.Error(err.Error()) 59 | goto cleanup 60 | } 61 | 62 | expectedCommands = []engine.Command{ 63 | { 64 | Index: indexName, 65 | Database: "document.db", 66 | Key: utils.Uint64ToBytes(1), 67 | KeyType: engine.TypeUint, 68 | Value: docJSON, 69 | ValueType: engine.TypeString, 70 | Command: "set", 71 | }, 72 | { 73 | Index: indexName, 74 | Database: "address.city_string.idx", 75 | Key: []byte("florianópolis"), 76 | KeyType: engine.TypeString, 77 | Value: utils.Uint64ToBytes(1), 78 | ValueType: engine.TypeUint, 79 | Command: "mergeset", 80 | }, 81 | { 82 | Index: indexName, 83 | Database: "address.district_string.idx", 84 | Key: []byte("itacorubi"), 85 | KeyType: engine.TypeString, 86 | Value: utils.Uint64ToBytes(1), 87 | ValueType: engine.TypeUint, 88 | Command: "mergeset", 89 | }, 90 | { 91 | Index: indexName, 92 | Database: "address.latlon_float.idx", 93 | Key: utils.Float64ToBytes(-27.545198), 94 | KeyType: engine.TypeFloat, 95 | Value: utils.Uint64ToBytes(1), 96 | ValueType: engine.TypeUint, 97 | Command: "mergeset", 98 | }, 99 | { 100 | Index: indexName, 101 | Database: "address.latlon_float.idx", 102 | Key: utils.Float64ToBytes(-48.504827), 103 | KeyType: engine.TypeFloat, 104 | Value: utils.Uint64ToBytes(1), 105 | ValueType: engine.TypeUint, 106 | Command: "mergeset", 107 | }, 108 | { 109 | Index: indexName, 110 | Database: "address.street_string.idx", 111 | Key: []byte("patricio"), 112 | KeyType: engine.TypeString, 113 | Value: utils.Uint64ToBytes(1), 114 | ValueType: engine.TypeUint, 115 | Command: "mergeset", 116 | }, 117 | { 118 | Index: indexName, 119 | Database: "address.street_string.idx", 120 | Key: []byte("farias"), 121 | KeyType: engine.TypeString, 122 | Value: utils.Uint64ToBytes(1), 123 | ValueType: engine.TypeUint, 124 | Command: "mergeset", 125 | }, 126 | { 127 | Index: indexName, 128 | Database: "address.street_string.idx", 129 | Key: []byte("patricio farias"), 130 | KeyType: engine.TypeString, 131 | Value: utils.Uint64ToBytes(1), 132 | ValueType: engine.TypeUint, 133 | Command: "mergeset", 134 | }, 135 | { 136 | Index: indexName, 137 | Database: "id_float.idx", 138 | Key: utils.Float64ToBytes(1), 139 | KeyType: engine.TypeFloat, 140 | Value: utils.Uint64ToBytes(1), 141 | ValueType: engine.TypeUint, 142 | Command: "mergeset", 143 | }, 144 | } 145 | 146 | if !compareCommands(t, commands, expectedCommands) { 147 | goto cleanup 148 | } 149 | 150 | cleanup: 151 | index.Close() 152 | os.RemoveAll(indexDir) 153 | } 154 | -------------------------------------------------------------------------------- /lib/neosearch/index/metadata.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | type Metadata map[string]interface{} 4 | -------------------------------------------------------------------------------- /lib/neosearch/neosearch.go: -------------------------------------------------------------------------------- 1 | // Package neosearch is a full-text-search library focused in fast search on 2 | // multiple indices, doing data join between queries. 3 | // 4 | // NeoSearch - Neoway Full Text Search Index 5 | // 6 | // NeoSearch is a feature-limited full-text-search library with focus on 7 | // indices relationships, its main goal is provide very fast JOIN 8 | // operations between information stored on different indices. 9 | // It's not a complete FTS (Full Text Search) engine, in the common sense, but 10 | // aims to solve very specific problems of FTS. At the moment, NeoSearch is a 11 | // laboratory for research, not recommended for production usage, here we will 12 | // test various technology for fast storage and search algorithms. In the 13 | // future, maybe, we can proud of a very nice tech for solve search in big 14 | // data companies. 15 | // 16 | // NeoSearch is like a Lucene library but without all of the complexities of 17 | // complete FTS engine, written in Go, focusing on high performance search 18 | // with data relationships. 19 | // 20 | // It's not yet complete, still in active development, then stay tuned for 21 | // updates. 22 | // 23 | // Dependencies 24 | // 25 | // - snappy (optional, only required for compressed data) 26 | // - Go > 1.3 27 | // 28 | // Install 29 | // 30 | // go get -u -v github.com/NeowayLabs/neosearch 31 | // cd $GOPATH/src/github/NeowayLabs/neosearch 32 | // go test -v . 33 | // 34 | // Create and add documents to NeoSearch is very easy, see below: 35 | // 36 | // func main() { 37 | // cfg := NewConfig() 38 | // cfg.Option(neosearch.DataDir("/tmp")) 39 | // cfg.Option(neosearch.Debug(false)) 40 | // 41 | // neo := neosearch.New(cfg) 42 | // 43 | // index, err := neosearch.CreateIndex("test") 44 | // if err != nil { 45 | // panic(err) 46 | // } 47 | // 48 | // err = index.Add(1, `{"name": "Neoway Business Solution", "type": "company"}`) 49 | // if err != nil { 50 | // panic(err) 51 | // } 52 | // 53 | // err = index.Add(2, `{"name": "Facebook Inc", "type": "company"}`) 54 | // if err != nil { 55 | // panic(err) 56 | // } 57 | // 58 | // values, err := index.MatchPrefix([]byte("name"), []byte("neoway")) 59 | // if err != nil { 60 | // panic(err) 61 | // } 62 | // 63 | // for _, value := range values { 64 | // fmt.Println(value) 65 | // } 66 | // } 67 | // 68 | // NeoSearch supports the features below: 69 | // 70 | // - Create/Delete index 71 | // - Index JSON documents (No schema) 72 | // - Bulk writes 73 | // - Analysers 74 | // - Tokenizer 75 | // - Search 76 | // - MatchPrefix 77 | // - FilterTerm 78 | // 79 | // This project is in active development stage, it is not recommended for 80 | // production environments. 81 | package neosearch 82 | 83 | import ( 84 | "errors" 85 | "expvar" 86 | "fmt" 87 | "os" 88 | 89 | "github.com/NeowayLabs/neosearch/lib/neosearch/cache" 90 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 91 | "github.com/NeowayLabs/neosearch/lib/neosearch/index" 92 | ) 93 | 94 | var ( 95 | cachedIndices = expvar.NewInt("cachedIndices") 96 | ) 97 | 98 | // NeoSearch is the core of the neosearch package. 99 | // This structure handles all of the user's interactions with the indices, 100 | // like CreateIndex, DeleteIndex, UpdateIndex and others. 101 | type NeoSearch struct { 102 | indices cache.Cache 103 | config *config.Config 104 | } 105 | 106 | // New creates the NeoSearch high-level interface. 107 | // Use that for index/update/delete JSON documents. 108 | func New(cfg *config.Config) *NeoSearch { 109 | if cfg.DataDir == "" { 110 | panic(errors.New("DataDir is required for NeoSearch interface")) 111 | } 112 | 113 | if cfg.DataDir[len(cfg.DataDir)-1] == '/' { 114 | cfg.DataDir = cfg.DataDir[0 : len(cfg.DataDir)-1] 115 | } 116 | 117 | if cfg.MaxIndicesOpen == 0 { 118 | cfg.MaxIndicesOpen = config.DefaultMaxIndicesOpen 119 | } 120 | 121 | neo := &NeoSearch{ 122 | config: cfg, 123 | indices: cache.NewLRUCache(cfg.MaxIndicesOpen), 124 | } 125 | 126 | neo.indices.OnRemove(func(key string, value interface{}) { 127 | v, ok := value.(*index.Index) 128 | 129 | if ok { 130 | v.Close() 131 | } 132 | }) 133 | 134 | return neo 135 | } 136 | 137 | // GetIndices returns cache indices 138 | func (neo *NeoSearch) GetIndices() cache.Cache { 139 | return neo.indices 140 | } 141 | 142 | // CreateIndex creates and setup a new index 143 | func (neo *NeoSearch) CreateIndex(name string) (*index.Index, error) { 144 | indx, err := index.New(name, neo.config, true) 145 | if err != nil { 146 | return nil, err 147 | } 148 | 149 | neo.indices.Add(name, indx) 150 | cachedIndices.Set(int64(neo.indices.Len())) 151 | return indx, nil 152 | } 153 | 154 | // DeleteIndex does exactly what the name says. 155 | func (neo *NeoSearch) DeleteIndex(name string) error { 156 | // closes the index on remove 157 | neo.indices.Remove(name) 158 | idxLen := neo.indices.Len() 159 | cachedIndices.Set(int64(idxLen)) 160 | 161 | if exists, err := neo.IndexExists(name); exists == true && err == nil { 162 | err := os.RemoveAll(neo.config.DataDir + "/" + name) 163 | return err 164 | } 165 | 166 | return errors.New("Index '" + name + "' not found.") 167 | } 168 | 169 | // OpenIndex open a existing index for read/write operations. 170 | func (neo *NeoSearch) OpenIndex(name string) (*index.Index, error) { 171 | var ( 172 | ok bool 173 | cacheIndex interface{} 174 | indx *index.Index 175 | err error 176 | ) 177 | 178 | cacheIndex, ok = neo.indices.Get(name) 179 | 180 | if ok && cacheIndex != nil { 181 | indx, ok = cacheIndex.(*index.Index) 182 | 183 | if !ok { 184 | panic("NeoSearch has inconsistent data stored in cache") 185 | } 186 | 187 | return indx, nil 188 | } 189 | 190 | ok, err = neo.IndexExists(name) 191 | 192 | if err == nil && !ok { 193 | return nil, fmt.Errorf("Index '%s' not found in directory '%s'.", name, neo.config.DataDir) 194 | } else if err != nil { 195 | return nil, err 196 | } 197 | 198 | indx, err = index.New(name, neo.config, false) 199 | if err != nil { 200 | return nil, err 201 | } 202 | 203 | neo.indices.Add(name, indx) 204 | cachedIndices.Set(int64(neo.indices.Len())) 205 | return indx, nil 206 | } 207 | 208 | // IndexExists verifies if the directory of the index given by name exists 209 | func (neo *NeoSearch) IndexExists(name string) (bool, error) { 210 | indexPath := neo.config.DataDir + "/" + name 211 | _, err := os.Stat(indexPath) 212 | 213 | if err == nil { 214 | return true, nil 215 | } 216 | 217 | if os.IsNotExist(err) { 218 | return false, nil 219 | } 220 | 221 | return false, err 222 | } 223 | 224 | // Close all of the open indices 225 | func (neo *NeoSearch) Close() { 226 | neo.indices.Clean() 227 | cachedIndices.Set(int64(neo.indices.Len())) 228 | } 229 | -------------------------------------------------------------------------------- /lib/neosearch/search/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch/index" 8 | ) 9 | 10 | type ( 11 | DSL map[string]interface{} 12 | Result string 13 | ResultSet []Result 14 | ) 15 | 16 | func (d DSL) Map() map[string]interface{} { return map[string]interface{}(d) } 17 | 18 | func Search(ind *index.Index, dsl DSL, limit uint) ([]string, uint64, error) { 19 | var ( 20 | listOp []interface{} 21 | hasAnd, hasOr bool 22 | resultDocIDs []uint64 23 | ) 24 | 25 | listOp, hasAnd = dsl["$and"].([]interface{}) 26 | 27 | if !hasAnd { 28 | listOp, hasOr = dsl["$or"].([]interface{}) 29 | } 30 | 31 | if !hasAnd && !hasOr { 32 | return nil, 0, errors.New("Invalid search DSL. No $and or $or clause found.") 33 | } 34 | 35 | for _, clause := range listOp { 36 | filter, ok := clause.(map[string]interface{}) 37 | 38 | if !ok { 39 | return nil, 0, fmt.Errorf("Invalid clause '%s'.", clause) 40 | } 41 | 42 | field, value := getFieldValue(filter) 43 | 44 | if field == "" || value == nil { 45 | return nil, 0, fmt.Errorf("Invalid clause '%s'.", clause) 46 | } 47 | 48 | strValue, ok := value.(string) 49 | 50 | if !ok { 51 | return nil, 0, fmt.Errorf("Invalid field value: %s", value) 52 | } 53 | 54 | docIDs, _, err := ind.FilterTermID([]byte(field), []byte(strValue), 0) 55 | 56 | if err != nil { 57 | return nil, 0, err 58 | } 59 | 60 | if len(resultDocIDs) == 0 { 61 | resultDocIDs = docIDs 62 | continue 63 | } 64 | 65 | if hasAnd { 66 | resultDocIDs = and(resultDocIDs, docIDs) 67 | } 68 | } 69 | 70 | results, err := ind.GetDocs(resultDocIDs, limit) 71 | return results, uint64(len(resultDocIDs)), err 72 | } 73 | 74 | // TODO: we need benchmark this algorithm and optimize 75 | func and(a, b []uint64) []uint64 { 76 | var ( 77 | aSize, bSize = len(a), len(b) 78 | maxSize, i, j, resIdx = 0, 0, 0, 0 79 | result []uint64 80 | ) 81 | 82 | if aSize > bSize { 83 | maxSize = aSize 84 | } else { 85 | maxSize = bSize 86 | } 87 | 88 | result = make([]uint64, maxSize) 89 | 90 | for i < aSize && j < bSize { 91 | if a[i] == b[j] { 92 | result[resIdx] = a[i] 93 | i++ 94 | j++ 95 | resIdx++ 96 | } else if a[i] < b[j] { 97 | i++ 98 | continue 99 | } else if a[i] > b[j] { 100 | j++ 101 | continue 102 | } 103 | } 104 | 105 | return result[0:resIdx] 106 | } 107 | 108 | func getFieldValue(filter map[string]interface{}) (string, interface{}) { 109 | for field, value := range filter { 110 | return field, value 111 | } 112 | 113 | return "", nil 114 | } 115 | -------------------------------------------------------------------------------- /lib/neosearch/store/goleveldb/iterator.go: -------------------------------------------------------------------------------- 1 | package goleveldb 2 | 3 | import ( 4 | "github.com/syndtr/goleveldb/leveldb" 5 | "github.com/syndtr/goleveldb/leveldb/iterator" 6 | ) 7 | 8 | type LVDBIterator struct { 9 | iterator iterator.Iterator 10 | } 11 | 12 | func newIterator(store *LVDB) *LVDBIterator { 13 | options := defaultReadOptions() 14 | iter := store.db.NewIterator(nil, options) 15 | return &LVDBIterator{ 16 | iterator: iter, 17 | } 18 | } 19 | 20 | func newIteratorWithSnapshot(store *LVDB, snapshot *leveldb.Snapshot) *LVDBIterator { 21 | options := defaultReadOptions() 22 | iter := snapshot.NewIterator(nil, options) 23 | return &LVDBIterator{ 24 | iterator: iter, 25 | } 26 | } 27 | 28 | func (ldi *LVDBIterator) Next() { 29 | ldi.iterator.Next() 30 | } 31 | 32 | func (ldi *LVDBIterator) Prev() { 33 | ldi.iterator.Prev() 34 | } 35 | 36 | func (ldi *LVDBIterator) Valid() bool { 37 | return ldi.iterator.Valid() 38 | } 39 | 40 | func (ldi *LVDBIterator) SeekToFirst() { 41 | ldi.iterator.First() 42 | } 43 | 44 | func (ldi *LVDBIterator) SeekToLast() { 45 | ldi.iterator.Last() 46 | } 47 | 48 | func (ldi *LVDBIterator) Seek(key []byte) { 49 | ldi.iterator.Seek(key) 50 | } 51 | 52 | func (ldi *LVDBIterator) Key() []byte { 53 | return ldi.iterator.Key() 54 | } 55 | 56 | func (ldi *LVDBIterator) Value() []byte { 57 | return ldi.iterator.Value() 58 | } 59 | 60 | func (ldi *LVDBIterator) GetError() error { 61 | return ldi.iterator.Error() 62 | } 63 | 64 | func (ldi *LVDBIterator) Close() error { 65 | ldi.iterator.Release() 66 | return nil 67 | } 68 | -------------------------------------------------------------------------------- /lib/neosearch/store/goleveldb/reader.go: -------------------------------------------------------------------------------- 1 | package goleveldb 2 | 3 | import ( 4 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 5 | "github.com/syndtr/goleveldb/leveldb" 6 | ) 7 | 8 | type LVDBReader struct { 9 | store *LVDB 10 | snapshot *leveldb.Snapshot 11 | } 12 | 13 | func newReader(store *LVDB) *LVDBReader { 14 | snapshot, _ := store.db.GetSnapshot() 15 | return &LVDBReader{ 16 | store: store, 17 | snapshot: snapshot, 18 | } 19 | } 20 | 21 | // Get returns the value of the given key 22 | func (reader *LVDBReader) Get(key []byte) ([]byte, error) { 23 | options := defaultReadOptions() 24 | b, err := reader.snapshot.Get(key, options) 25 | if err == leveldb.ErrNotFound { 26 | return nil, nil 27 | } 28 | return b, err 29 | } 30 | 31 | func (reader *LVDBReader) GetIterator() store.KVIterator { 32 | return newIteratorWithSnapshot(reader.store, reader.snapshot) 33 | } 34 | 35 | func (reader *LVDBReader) Close() error { 36 | reader.snapshot.Release() 37 | return nil 38 | } 39 | -------------------------------------------------------------------------------- /lib/neosearch/store/goleveldb/store.go: -------------------------------------------------------------------------------- 1 | package goleveldb 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | "sync" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 9 | "github.com/syndtr/goleveldb/leveldb" 10 | "github.com/syndtr/goleveldb/leveldb/filter" 11 | "github.com/syndtr/goleveldb/leveldb/opt" 12 | ) 13 | 14 | // KVName is the name of goleveldb data store 15 | const KVName = "goleveldb" 16 | 17 | type LVDB struct { 18 | debug bool 19 | isBatch bool 20 | dataDir string 21 | 22 | opts *opt.Options 23 | db *leveldb.DB 24 | writeBatch *leveldb.Batch 25 | 26 | onceWriter sync.Once 27 | defWriter *LVDBWriter 28 | } 29 | 30 | func LVDBConstructor(config store.KVConfig) (store.KVStore, error) { 31 | return NewLVDB(config) 32 | } 33 | 34 | func init() { 35 | store.RegisterKVStore(KVName, LVDBConstructor) 36 | } 37 | 38 | func NewLVDB(config store.KVConfig) (*LVDB, error) { 39 | lvdb := LVDB{ 40 | debug: false, 41 | isBatch: false, 42 | } 43 | 44 | lvdb.setup(config) 45 | 46 | return &lvdb, nil 47 | } 48 | 49 | func (lvdb *LVDB) setup(config store.KVConfig) { 50 | opts := &opt.Options{} 51 | 52 | debug, ok := config["debug"].(bool) 53 | if ok { 54 | lvdb.debug = debug 55 | } 56 | 57 | if debug { 58 | fmt.Println("Setup goleveldb") 59 | } 60 | 61 | dataDir, ok := config["dataDir"].(string) 62 | if ok { 63 | lvdb.dataDir = dataDir 64 | } else { 65 | lvdb.dataDir = "/tmp" 66 | } 67 | 68 | ro, ok := config["readOnly"].(bool) 69 | if ok { 70 | opts.ReadOnly = ro 71 | } 72 | 73 | cim, ok := config["errorIfMissing"].(bool) 74 | if ok { 75 | opts.ErrorIfMissing = cim 76 | } 77 | 78 | eie, ok := config["errorIfExist"].(bool) 79 | if ok { 80 | opts.ErrorIfExist = eie 81 | } 82 | 83 | wbs, ok := config["writeBuffer"].(float64) 84 | if ok { 85 | opts.WriteBuffer = int(wbs) 86 | } 87 | 88 | bs, ok := config["wlockSize"].(float64) 89 | if ok { 90 | opts.BlockSize = int(bs) 91 | } 92 | 93 | bri, ok := config["blockRestartInterval"].(float64) 94 | if ok { 95 | opts.BlockRestartInterval = int(bri) 96 | } 97 | 98 | lcc, ok := config["blockCacheCapacity"].(float64) 99 | if ok { 100 | opts.BlockCacheCapacity = int(lcc) 101 | } 102 | 103 | bfbpk, ok := config["bloomFilterBitsPerKey"].(float64) 104 | if ok { 105 | bf := filter.NewBloomFilter(int(bfbpk)) 106 | opts.Filter = bf 107 | } 108 | 109 | lvdb.opts = opts 110 | } 111 | 112 | func (lvdb *LVDB) Open(indexName, databaseName string) error { 113 | var err error 114 | 115 | if !store.ValidateDatabaseName(databaseName) { 116 | return fmt.Errorf("Invalid name: %s", databaseName) 117 | } 118 | 119 | // index should exists 120 | fullPath := (lvdb.dataDir + string(filepath.Separator) + 121 | indexName + string(filepath.Separator) + databaseName) 122 | 123 | lvdb.db, err = leveldb.OpenFile(fullPath, lvdb.opts) 124 | if err != nil { 125 | return err 126 | } 127 | 128 | if lvdb.debug { 129 | fmt.Printf("Database '%s' open: %s\n", fullPath, err) 130 | } 131 | return nil 132 | } 133 | 134 | // IsOpen returns true if database is open 135 | func (lvdb *LVDB) IsOpen() bool { 136 | return lvdb.db != nil 137 | } 138 | 139 | // Close the database 140 | func (lvdb *LVDB) Close() error { 141 | if lvdb.db != nil { 142 | lvdb.db.Close() 143 | lvdb.db = nil 144 | } 145 | 146 | if lvdb.writeBatch != nil { 147 | lvdb.writeBatch.Reset() 148 | lvdb.writeBatch = nil 149 | lvdb.isBatch = false 150 | } 151 | return nil 152 | } 153 | 154 | // Reader returns a LVDBReader instance 155 | func (lvdb *LVDB) Reader() store.KVReader { 156 | return newReader(lvdb) 157 | } 158 | 159 | // Writer returns the singleton writer 160 | func (lvdb *LVDB) Writer() store.KVWriter { 161 | lvdb.onceWriter.Do(func() { 162 | lvdb.defWriter = newWriter(lvdb) 163 | }) 164 | return lvdb.defWriter 165 | } 166 | -------------------------------------------------------------------------------- /lib/neosearch/store/goleveldb/store_test.go: -------------------------------------------------------------------------------- 1 | package goleveldb 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/store/test" 11 | ) 12 | 13 | var DataDirTmp string 14 | 15 | func init() { 16 | var err error 17 | DataDirTmp, err = ioutil.TempDir("/tmp", "neosearch-goleveldb-") 18 | 19 | if err != nil { 20 | panic(err) 21 | } 22 | } 23 | 24 | func openDatabase(t *testing.T, indexName, dbName string) store.KVStore { 25 | var ( 26 | err error 27 | kv store.KVStore 28 | ) 29 | 30 | cfg := store.KVConfig{ 31 | "dataDir": DataDirTmp, 32 | } 33 | 34 | kv, err = NewLVDB(cfg) 35 | if err != nil { 36 | t.Error(err) 37 | return nil 38 | } else if kv == nil { 39 | t.Error("Failed to allocate store") 40 | return nil 41 | } 42 | 43 | err = kv.Open(indexName, dbName) 44 | if err != nil { 45 | t.Error(err) 46 | return nil 47 | } 48 | 49 | return kv 50 | } 51 | 52 | func openDatabaseFail(t *testing.T, indexName, dbName string) { 53 | var ( 54 | err error 55 | kv store.KVStore 56 | ) 57 | 58 | cfg := store.KVConfig{ 59 | "dataDir": DataDirTmp, 60 | } 61 | 62 | kv, err = NewLVDB(cfg) 63 | if err != nil { 64 | t.Error(err) 65 | return 66 | } else if kv == nil { 67 | t.Error("Failed to allocate store") 68 | return 69 | } 70 | 71 | err = kv.Open(indexName, dbName) 72 | 73 | if err == nil { 74 | t.Errorf("Should fail... Invalid database name: %s", dbName) 75 | return 76 | } 77 | } 78 | 79 | func TestStoreHasBackend(t *testing.T) { 80 | cfg := store.KVConfig{ 81 | "dataDir": DataDirTmp, 82 | } 83 | 84 | kv, err := NewLVDB(cfg) 85 | if err != nil { 86 | t.Errorf("You need compile this package with -tags : %s", err) 87 | return 88 | } 89 | 90 | if kv == nil { 91 | t.Error("Failed to allocate KVStore") 92 | } 93 | } 94 | 95 | func TestOpenDatabase(t *testing.T) { 96 | shouldPass := []string{ 97 | "123.tt", 98 | /* "9999.db", 99 | "sample.db", 100 | "sample.idx", 101 | "sample_test.db", 102 | "_id.db", 103 | "_all.idx", 104 | "__.idx",*/ 105 | } 106 | 107 | shouldFail := []string{ 108 | "", 109 | "1", 110 | "12", 111 | "123", 112 | "1234", 113 | ".db", 114 | ".idx", 115 | "...db", 116 | "sample", 117 | "sample.", 118 | "sample.a", 119 | "sample/test.db", 120 | } 121 | 122 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-ok", 0755) 123 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-fail", 0755) 124 | 125 | for _, dbname := range shouldPass { 126 | st := openDatabase(t, "sample-ok", dbname) 127 | if st != nil { 128 | st.Close() 129 | } 130 | 131 | os.RemoveAll(DataDirTmp + "/" + dbname) 132 | } 133 | 134 | for _, dbname := range shouldFail { 135 | openDatabaseFail(t, "sample-fail", dbname) 136 | //os.RemoveAll(DataDirTmp + "/" + dbname) 137 | } 138 | } 139 | 140 | func TestStoreSetGet(t *testing.T) { 141 | var ( 142 | testDb = "test_set.db" 143 | kv store.KVStore 144 | ) 145 | 146 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-store-set-get", 0755) 147 | if kv = openDatabase(t, "sample-store-set-get", testDb); kv == nil { 148 | return 149 | } 150 | 151 | test.CommonTestStoreSetGet(t, kv) 152 | 153 | kv.Close() 154 | os.RemoveAll(DataDirTmp + "/" + testDb) 155 | } 156 | 157 | func TestBatchWrite(t *testing.T) { 158 | var ( 159 | kv store.KVStore 160 | testDb = "testbatch.db" 161 | ) 162 | 163 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-batch-write", 0755) 164 | if kv = openDatabase(t, "sample-batch-write", testDb); kv == nil { 165 | return 166 | } 167 | 168 | test.CommonTestBatchWrite(t, kv) 169 | 170 | kv.Close() 171 | os.RemoveAll(DataDirTmp + "/" + testDb) 172 | } 173 | 174 | func TestBatchMultiWrite(t *testing.T) { 175 | var ( 176 | kv store.KVStore 177 | testDb = "test_set-multi.db" 178 | ) 179 | 180 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-batch-multi-write", 0755) 181 | if kv = openDatabase(t, "sample-batch-multi-write", testDb); kv == nil { 182 | return 183 | } 184 | 185 | test.CommonTestBatchMultiWrite(t, kv) 186 | 187 | kv.Close() 188 | os.RemoveAll(DataDirTmp + "/" + testDb) 189 | } 190 | 191 | func TestStoreMergeSet(t *testing.T) { 192 | var ( 193 | kv store.KVStore 194 | testDb = "test_mergeset.db" 195 | ) 196 | 197 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-store-merge-set", 0755) 198 | if kv = openDatabase(t, "sample-store-merge-set", testDb); kv == nil { 199 | return 200 | } 201 | 202 | test.CommonTestStoreMergeSet(t, kv) 203 | 204 | kv.Close() 205 | os.RemoveAll(DataDirTmp + "/" + testDb) 206 | } 207 | 208 | func TestStoreIterator(t *testing.T) { 209 | var ( 210 | kv store.KVStore 211 | testDb = "test_iterator.db" 212 | ) 213 | 214 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-store-iterator", 0755) 215 | if kv = openDatabase(t, "sample-store-iterator", testDb); kv == nil { 216 | return 217 | } 218 | 219 | test.CommonTestStoreIterator(t, kv) 220 | 221 | kv.Close() 222 | os.RemoveAll(DataDirTmp + "/" + testDb) 223 | } 224 | -------------------------------------------------------------------------------- /lib/neosearch/store/goleveldb/util.go: -------------------------------------------------------------------------------- 1 | package goleveldb 2 | 3 | import ( 4 | "github.com/syndtr/goleveldb/leveldb/opt" 5 | ) 6 | 7 | func defaultWriteOptions() *opt.WriteOptions { 8 | wo := &opt.WriteOptions{} 9 | // request fsync on write for safety 10 | wo.Sync = true 11 | return wo 12 | } 13 | 14 | func defaultReadOptions() *opt.ReadOptions { 15 | ro := &opt.ReadOptions{} 16 | return ro 17 | } 18 | -------------------------------------------------------------------------------- /lib/neosearch/store/goleveldb/writer.go: -------------------------------------------------------------------------------- 1 | package goleveldb 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 7 | "github.com/syndtr/goleveldb/leveldb" 8 | ) 9 | 10 | type LVDBWriter struct { 11 | store *LVDB 12 | mutex sync.Mutex 13 | isBatch bool 14 | } 15 | 16 | // newWriter returns a new writer 17 | func newWriter(lvdb *LVDB) *LVDBWriter { 18 | return &LVDBWriter{ 19 | store: lvdb, 20 | } 21 | } 22 | 23 | // Set put or update the key with the given value 24 | func (w *LVDBWriter) Set(key, value []byte) error { 25 | w.mutex.Lock() 26 | defer w.mutex.Unlock() 27 | 28 | if w.isBatch { 29 | // isBatch == true, we can safely access _writeBatch pointer 30 | w.store.writeBatch.Put(key, value) 31 | return nil 32 | } 33 | 34 | options := defaultWriteOptions() 35 | return w.store.db.Put(key, value, options) 36 | } 37 | 38 | // Get returns the value of the given key 39 | func (w *LVDBWriter) Get(key []byte) ([]byte, error) { 40 | options := defaultReadOptions() 41 | b, err := w.store.db.Get(key, options) 42 | if err == leveldb.ErrNotFound { 43 | return nil, nil 44 | } 45 | return b, err 46 | } 47 | 48 | // MergeSet add value to a ordered set of integers stored in key. If value 49 | // is already on the key, than the set will be skipped. 50 | func (w *LVDBWriter) MergeSet(key []byte, value uint64) error { 51 | return store.MergeSet(w, key, value, w.store.debug) 52 | } 53 | 54 | func (w *LVDBWriter) Delete(key []byte) error { 55 | w.mutex.Lock() 56 | defer w.mutex.Unlock() 57 | 58 | if w.isBatch { 59 | w.store.writeBatch.Delete(key) 60 | return nil 61 | } 62 | 63 | options := defaultWriteOptions() 64 | return w.store.db.Delete(key, options) 65 | } 66 | 67 | // StartBatch start a new batch write processing 68 | func (w *LVDBWriter) StartBatch() { 69 | w.mutex.Lock() 70 | defer w.mutex.Unlock() 71 | 72 | if w.store.writeBatch == nil { 73 | w.store.writeBatch = new(leveldb.Batch) 74 | } else { 75 | w.store.writeBatch.Reset() 76 | } 77 | 78 | w.isBatch = true 79 | } 80 | 81 | // IsBatch returns true if LVDB is in batch mode 82 | func (w *LVDBWriter) IsBatch() bool { 83 | return w.isBatch 84 | } 85 | 86 | // FlushBatch writes the batch to disk 87 | func (w *LVDBWriter) FlushBatch() error { 88 | var err error 89 | 90 | w.mutex.Lock() 91 | defer w.mutex.Unlock() 92 | 93 | if w.store.writeBatch != nil { 94 | options := defaultWriteOptions() 95 | err = w.store.db.Write(w.store.writeBatch, options) 96 | // After flush, release the writeBatch for future uses 97 | w.store.writeBatch.Reset() 98 | w.isBatch = false 99 | } 100 | 101 | return err 102 | } 103 | -------------------------------------------------------------------------------- /lib/neosearch/store/leveldb/iterator.go: -------------------------------------------------------------------------------- 1 | // +build leveldb 2 | 3 | package leveldb 4 | 5 | import "github.com/jmhodges/levigo" 6 | 7 | // Simple wrapper around levigo.Iterator to proper implement the KVIterator interface 8 | type LVDBIterator struct { 9 | *levigo.Iterator 10 | } 11 | 12 | // Close the iterator. It's only a wrapper for levigo.Iterator, that does not returns error in Close 13 | // method 14 | func (i LVDBIterator) Close() error { 15 | i.Iterator.Close() 16 | return nil 17 | } 18 | -------------------------------------------------------------------------------- /lib/neosearch/store/leveldb/reader.go: -------------------------------------------------------------------------------- 1 | // +build leveldb 2 | 3 | package leveldb 4 | 5 | import ( 6 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 7 | "github.com/jmhodges/levigo" 8 | ) 9 | 10 | // LVDBReader is the readonly view of database. It implements the KVReader interface 11 | type LVDBReader struct { 12 | store *LVDB 13 | snapshot *levigo.Snapshot 14 | } 15 | 16 | // newReader returns a new reader 17 | func newReader(store *LVDB) store.KVReader { 18 | return &LVDBReader{ 19 | store: store, 20 | snapshot: store.db.NewSnapshot(), 21 | } 22 | } 23 | 24 | // Get returns the value of the given key 25 | func (reader *LVDBReader) Get(key []byte) ([]byte, error) { 26 | options := defaultReadOptions() 27 | options.SetSnapshot(reader.snapshot) 28 | b, err := reader.store.db.Get(options, key) 29 | options.Close() 30 | return b, err 31 | } 32 | 33 | // GetIterator returns a new KVIterator 34 | func (reader *LVDBReader) GetIterator() store.KVIterator { 35 | options := defaultReadOptions() 36 | options.SetSnapshot(reader.snapshot) 37 | it := reader.store.db.NewIterator(options) 38 | options.Close() 39 | return &LVDBIterator{it} 40 | } 41 | 42 | func (reader *LVDBReader) Close() error { 43 | reader.store.db.ReleaseSnapshot(reader.snapshot) 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /lib/neosearch/store/leveldb/store.go: -------------------------------------------------------------------------------- 1 | // +build leveldb 2 | 3 | package leveldb 4 | 5 | import ( 6 | "fmt" 7 | "path/filepath" 8 | "sync" 9 | 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 11 | "github.com/jmhodges/levigo" 12 | ) 13 | 14 | // KVName is the name of leveldb data store 15 | const KVName = "leveldb" 16 | 17 | // LVDB is the leveldb interface exposed by NeoSearch 18 | type LVDB struct { 19 | debug bool 20 | isBatch bool 21 | dataDir string 22 | 23 | opts *levigo.Options 24 | db *levigo.DB 25 | writeBatch *levigo.WriteBatch 26 | 27 | onceWriter sync.Once 28 | defWriter store.KVWriter 29 | } 30 | 31 | // LVDBConstructor build the constructor 32 | func LVDBConstructor(config store.KVConfig) (store.KVStore, error) { 33 | return NewLVDB(config) 34 | } 35 | 36 | // Registry the leveldb module 37 | func init() { 38 | store.RegisterKVStore(KVName, LVDBConstructor) 39 | } 40 | 41 | // NewLVDB creates a new leveldb instance 42 | func NewLVDB(config store.KVConfig) (*LVDB, error) { 43 | lvdb := LVDB{ 44 | debug: false, 45 | isBatch: false, 46 | } 47 | 48 | lvdb.setup(config) 49 | 50 | return &lvdb, nil 51 | } 52 | 53 | // Setup the leveldb instance 54 | func (lvdb *LVDB) setup(config store.KVConfig) { 55 | debug, ok := config["debug"].(bool) 56 | if ok { 57 | lvdb.debug = debug 58 | } 59 | 60 | if debug { 61 | fmt.Println("Setup leveldb") 62 | } 63 | 64 | dataDir, ok := config["dataDir"].(string) 65 | if ok { 66 | lvdb.dataDir = dataDir 67 | } else { 68 | lvdb.dataDir = "/tmp" 69 | } 70 | 71 | lvdb.opts = levigo.NewOptions() 72 | 73 | enableCache, ok := config["enableCache"].(bool) 74 | if ok && enableCache { 75 | cacheSize, _ := config["cacheSize"].(int) 76 | if cacheSize == 0 && enableCache { 77 | cacheSize = 3 << 30 78 | } 79 | lvdb.opts.SetCache(levigo.NewLRUCache(cacheSize)) 80 | } 81 | 82 | lvdb.opts.SetCreateIfMissing(true) 83 | } 84 | 85 | // Open the database 86 | func (lvdb *LVDB) Open(indexName, databaseName string) error { 87 | var err error 88 | 89 | if !store.ValidateDatabaseName(databaseName) { 90 | return fmt.Errorf("Invalid name: %s", databaseName) 91 | } 92 | 93 | // index should exists 94 | fullPath := (lvdb.dataDir + string(filepath.Separator) + 95 | indexName + string(filepath.Separator) + databaseName) 96 | 97 | lvdb.db, err = levigo.Open(fullPath, lvdb.opts) 98 | if err != nil { 99 | return err 100 | } 101 | 102 | if lvdb.debug { 103 | fmt.Printf("Database '%s' open: %s\n", fullPath, err) 104 | } 105 | return nil 106 | } 107 | 108 | // IsOpen returns true if database is open 109 | func (lvdb *LVDB) IsOpen() bool { 110 | return lvdb.db != nil 111 | } 112 | 113 | // Close the database 114 | func (lvdb *LVDB) Close() error { 115 | if lvdb.writeBatch != nil { 116 | lvdb.writeBatch.Close() 117 | lvdb.writeBatch = nil 118 | lvdb.isBatch = false 119 | } 120 | 121 | if lvdb.db != nil { 122 | // levigo close implementation does not returns error 123 | lvdb.db.Close() 124 | lvdb.db = nil 125 | } 126 | 127 | if lvdb.opts != nil { 128 | lvdb.opts.Close() 129 | lvdb.opts = nil 130 | } 131 | return nil 132 | } 133 | 134 | // Reader returns a LVDBReader singleton instance 135 | func (lvdb *LVDB) Reader() store.KVReader { 136 | return newReader(lvdb) 137 | } 138 | 139 | // Writer returns the singleton writer 140 | func (lvdb *LVDB) Writer() store.KVWriter { 141 | lvdb.onceWriter.Do(func() { 142 | lvdb.defWriter = newWriter(lvdb) 143 | }) 144 | return lvdb.defWriter 145 | } 146 | -------------------------------------------------------------------------------- /lib/neosearch/store/leveldb/store_test.go: -------------------------------------------------------------------------------- 1 | // +build leveldb 2 | 3 | package leveldb 4 | 5 | import ( 6 | "io/ioutil" 7 | "os" 8 | "path/filepath" 9 | "testing" 10 | 11 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 12 | "github.com/NeowayLabs/neosearch/lib/neosearch/store/test" 13 | ) 14 | 15 | var DataDirTmp string 16 | 17 | func init() { 18 | var err error 19 | DataDirTmp, err = ioutil.TempDir("/tmp", "neosearch-leveldb-") 20 | 21 | if err != nil { 22 | panic(err) 23 | } 24 | } 25 | 26 | func openDatabase(t *testing.T, indexName, dbName string) store.KVStore { 27 | var ( 28 | err error 29 | kv store.KVStore 30 | ) 31 | 32 | cfg := store.KVConfig{ 33 | "dataDir": DataDirTmp, 34 | } 35 | 36 | kv, err = NewLVDB(cfg) 37 | if err != nil { 38 | t.Error(err) 39 | return nil 40 | } else if kv == nil { 41 | t.Error("Failed to allocate store") 42 | return nil 43 | } 44 | 45 | err = kv.Open(indexName, dbName) 46 | if err != nil { 47 | t.Error(err) 48 | return nil 49 | } 50 | 51 | return kv 52 | } 53 | 54 | func openDatabaseFail(t *testing.T, indexName, dbName string) { 55 | var ( 56 | err error 57 | kv store.KVStore 58 | ) 59 | 60 | cfg := store.KVConfig{ 61 | "dataDir": DataDirTmp, 62 | } 63 | 64 | kv, err = NewLVDB(cfg) 65 | if err != nil { 66 | t.Error(err) 67 | return 68 | } else if kv == nil { 69 | t.Error("Failed to allocate store") 70 | return 71 | } 72 | 73 | err = kv.Open(indexName, dbName) 74 | 75 | if err == nil { 76 | t.Errorf("Should fail... Invalid database name: %s", dbName) 77 | return 78 | } 79 | } 80 | 81 | func TestStoreHasBackend2(t *testing.T) { 82 | cfg := store.KVConfig{ 83 | "dataDir": DataDirTmp, 84 | } 85 | 86 | kv, err := NewLVDB(cfg) 87 | if err != nil { 88 | t.Errorf("You need compile this package with -tags : %s", err) 89 | return 90 | } 91 | 92 | if kv == nil { 93 | t.Error("Failed to allocate KVStore") 94 | } 95 | } 96 | 97 | func TestOpenDatabase2(t *testing.T) { 98 | shouldPass := []string{ 99 | "123.tt", 100 | /* "9999.db", 101 | "sample.db", 102 | "sample.idx", 103 | "sample_test.db", 104 | "_id.db", 105 | "_all.idx", 106 | "__.idx",*/ 107 | } 108 | 109 | shouldFail := []string{ 110 | "", 111 | "1", 112 | "12", 113 | "123", 114 | "1234", 115 | ".db", 116 | ".idx", 117 | "...db", 118 | "sample", 119 | "sample.", 120 | "sample.a", 121 | "sample/test.db", 122 | } 123 | 124 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-ok", 0755) 125 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-fail", 0755) 126 | 127 | for _, dbname := range shouldPass { 128 | st := openDatabase(t, "sample-ok", dbname) 129 | if st != nil { 130 | st.Close() 131 | } 132 | 133 | os.RemoveAll(DataDirTmp + "/" + dbname) 134 | } 135 | 136 | for _, dbname := range shouldFail { 137 | openDatabaseFail(t, "sample-fail", dbname) 138 | //os.RemoveAll(DataDirTmp + "/" + dbname) 139 | } 140 | } 141 | 142 | func TestStoreSetGet2(t *testing.T) { 143 | var ( 144 | kv store.KVStore 145 | testDb = "test_set.db" 146 | ) 147 | 148 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-store-set-get", 0755) 149 | if kv = openDatabase(t, "sample-store-set-get", testDb); kv == nil { 150 | return 151 | } 152 | 153 | test.CommonTestStoreSetGet(t, kv) 154 | 155 | kv.Close() 156 | os.RemoveAll(DataDirTmp + "/" + testDb) 157 | } 158 | 159 | func TestBatchWrite2(t *testing.T) { 160 | var ( 161 | kv store.KVStore 162 | testDb = "testbatch.db" 163 | ) 164 | 165 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-batch-write", 0755) 166 | if kv = openDatabase(t, "sample-batch-write", testDb); kv == nil { 167 | return 168 | } 169 | 170 | test.CommonTestBatchWrite(t, kv) 171 | 172 | kv.Close() 173 | os.RemoveAll(DataDirTmp + "/" + testDb) 174 | } 175 | 176 | func TestBatchMultiWrite2(t *testing.T) { 177 | var ( 178 | kv store.KVStore 179 | testDb = "test_set-multi.db" 180 | ) 181 | 182 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-batch-multi-write", 0755) 183 | if kv = openDatabase(t, "sample-batch-multi-write", testDb); kv == nil { 184 | return 185 | } 186 | 187 | test.CommonTestBatchMultiWrite(t, kv) 188 | 189 | kv.Close() 190 | os.RemoveAll(DataDirTmp + "/" + testDb) 191 | } 192 | 193 | func TestStoreMergeSet2(t *testing.T) { 194 | var ( 195 | kv store.KVStore 196 | testDb = "test_mergeset.db" 197 | ) 198 | 199 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-store-merge-set", 0755) 200 | if kv = openDatabase(t, "sample-store-merge-set", testDb); kv == nil { 201 | return 202 | } 203 | 204 | test.CommonTestStoreMergeSet(t, kv) 205 | 206 | kv.Close() 207 | os.RemoveAll(DataDirTmp + "/" + testDb) 208 | } 209 | 210 | func TestStoreIterator2(t *testing.T) { 211 | var ( 212 | kv store.KVStore 213 | testDb = "test_iterator.db" 214 | ) 215 | 216 | os.Mkdir(DataDirTmp+string(filepath.Separator)+"sample-store-iterator", 0755) 217 | if kv = openDatabase(t, "sample-store-iterator", testDb); kv == nil { 218 | return 219 | } 220 | 221 | test.CommonTestStoreIterator(t, kv) 222 | 223 | kv.Close() 224 | os.RemoveAll(DataDirTmp + "/" + testDb) 225 | } 226 | -------------------------------------------------------------------------------- /lib/neosearch/store/leveldb/util.go: -------------------------------------------------------------------------------- 1 | // +build leveldb 2 | 3 | package leveldb 4 | 5 | import ( 6 | "github.com/jmhodges/levigo" 7 | ) 8 | 9 | func defaultWriteOptions() *levigo.WriteOptions { 10 | wo := levigo.NewWriteOptions() 11 | // request fsync on write for safety 12 | wo.SetSync(true) 13 | return wo 14 | } 15 | 16 | func defaultReadOptions() *levigo.ReadOptions { 17 | ro := levigo.NewReadOptions() 18 | return ro 19 | } 20 | -------------------------------------------------------------------------------- /lib/neosearch/store/leveldb/writer.go: -------------------------------------------------------------------------------- 1 | // +build leveldb 2 | 3 | package leveldb 4 | 5 | import ( 6 | "sync" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/store" 9 | "github.com/jmhodges/levigo" 10 | ) 11 | 12 | type LVDBWriter struct { 13 | store *LVDB 14 | mutex sync.Mutex 15 | isBatch bool 16 | } 17 | 18 | // newWriter returns a new writer 19 | func newWriter(lvdb *LVDB) *LVDBWriter { 20 | return &LVDBWriter{ 21 | store: lvdb, 22 | } 23 | } 24 | 25 | // Set put or update the key with the given value 26 | func (w *LVDBWriter) Set(key []byte, value []byte) error { 27 | w.mutex.Lock() 28 | defer w.mutex.Unlock() 29 | 30 | if w.isBatch { 31 | // isBatch == true, we can safely access _writeBatch pointer 32 | w.store.writeBatch.Put(key, value) 33 | return nil 34 | } 35 | 36 | options := defaultWriteOptions() 37 | err := w.store.db.Put(options, key, value) 38 | options.Close() 39 | return err 40 | } 41 | 42 | // Get returns the value of the given key 43 | func (w *LVDBWriter) Get(key []byte) ([]byte, error) { 44 | options := defaultReadOptions() 45 | b, e := w.store.db.Get(options, key) 46 | options.Close() 47 | return b, e 48 | } 49 | 50 | // MergeSet add value to a ordered set of integers stored in key. If value 51 | // is already on the key, than the set will be skipped. 52 | func (w *LVDBWriter) MergeSet(key []byte, value uint64) error { 53 | return store.MergeSet(w, key, value, w.store.debug) 54 | } 55 | 56 | // Delete remove the given key 57 | func (w *LVDBWriter) Delete(key []byte) error { 58 | w.mutex.Lock() 59 | defer w.mutex.Unlock() 60 | 61 | if w.isBatch { 62 | w.store.writeBatch.Delete(key) 63 | return nil 64 | } 65 | 66 | options := defaultWriteOptions() 67 | err := w.store.db.Delete(options, key) 68 | options.Close() 69 | return err 70 | } 71 | 72 | // StartBatch start a new batch write processing 73 | func (w *LVDBWriter) StartBatch() { 74 | w.mutex.Lock() 75 | defer w.mutex.Unlock() 76 | 77 | if w.store.writeBatch == nil { 78 | w.store.writeBatch = levigo.NewWriteBatch() 79 | } else { 80 | w.store.writeBatch.Clear() 81 | } 82 | 83 | w.isBatch = true 84 | } 85 | 86 | // IsBatch returns true if LVDB is in batch mode 87 | func (w *LVDBWriter) IsBatch() bool { 88 | return w.isBatch 89 | } 90 | 91 | // FlushBatch writes the batch to disk 92 | func (w *LVDBWriter) FlushBatch() error { 93 | var err error 94 | 95 | w.mutex.Lock() 96 | defer w.mutex.Unlock() 97 | 98 | if w.store.writeBatch != nil { 99 | options := defaultWriteOptions() 100 | err = w.store.db.Write(options, w.store.writeBatch) 101 | options.Close() 102 | // After flush, release the writeBatch for future uses 103 | w.store.writeBatch.Clear() 104 | w.isBatch = false 105 | } 106 | 107 | return err 108 | } 109 | -------------------------------------------------------------------------------- /lib/neosearch/store/registry.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import "fmt" 4 | 5 | // KVStoreConstructor is the register function that every store backend need to implement. 6 | type KVStoreConstructor func(KVConfig) (KVStore, error) 7 | 8 | type KVStoreRegistry map[string]KVStoreConstructor 9 | 10 | var stores = make(KVStoreRegistry, 0) 11 | 12 | func RegisterKVStore(name string, constructor KVStoreConstructor) { 13 | _, exists := stores[name] 14 | if exists { 15 | panic(fmt.Errorf("attempted to register duplicate store named '%s'", name)) 16 | } 17 | stores[name] = constructor 18 | } 19 | 20 | func KVStoreConstructorByName(name string) KVStoreConstructor { 21 | return stores[name] 22 | } 23 | -------------------------------------------------------------------------------- /lib/neosearch/store/store.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | // KVReader is a reader safe for concurrent reads. 4 | type KVReader interface { 5 | Get([]byte) ([]byte, error) 6 | GetIterator() KVIterator 7 | Close() error 8 | } 9 | 10 | // KVWriter is a writer safe for concurrent writes. 11 | type KVWriter interface { 12 | Set([]byte, []byte) error 13 | Get([]byte) ([]byte, error) 14 | MergeSet([]byte, uint64) error 15 | Delete([]byte) error 16 | 17 | StartBatch() 18 | FlushBatch() error 19 | IsBatch() bool 20 | } 21 | 22 | // KVStore is the key/value store interface for backend kv stores. 23 | type KVStore interface { 24 | // Open the database 25 | Open(string, string) error 26 | IsOpen() bool 27 | 28 | Reader() KVReader 29 | Writer() KVWriter 30 | 31 | // Close the database 32 | Close() error 33 | } 34 | 35 | // KVIterator expose the interface for database iterators. 36 | // This was Based on leveldb interface 37 | type KVIterator interface { 38 | Valid() bool 39 | Key() []byte 40 | Value() []byte 41 | Next() 42 | Prev() 43 | SeekToFirst() 44 | SeekToLast() 45 | Seek([]byte) 46 | GetError() error 47 | Close() error 48 | } 49 | 50 | // KVConfig stores the kv configurations 51 | type KVConfig map[string]interface{} 52 | -------------------------------------------------------------------------------- /lib/neosearch/store/utils.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | "regexp" 8 | "strings" 9 | 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/utils" 11 | ) 12 | 13 | func ValidateDatabaseName(name string) bool { 14 | if len(name) < 3 { 15 | return false 16 | } 17 | 18 | parts := strings.Split(name, ".") 19 | 20 | if len(parts) < 2 { 21 | return false 22 | } 23 | 24 | // invalid extension 25 | if len(parts[len(parts)-1]) < 2 { 26 | return false 27 | } 28 | 29 | for i := 0; i < len(parts); i++ { 30 | rxp := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) 31 | if !rxp.MatchString(parts[i]) { 32 | return false 33 | } 34 | } 35 | 36 | return true 37 | } 38 | 39 | // MergeSet add value to a ordered set of integers stored in key. If value 40 | // is already on the key, than the set will be skipped. 41 | func MergeSet(writer KVWriter, key []byte, value uint64, debug bool) error { 42 | var ( 43 | buf *bytes.Buffer 44 | err error 45 | v uint64 46 | i uint64 47 | inserted bool 48 | ) 49 | 50 | data, err := writer.Get(key) 51 | if err != nil { 52 | return err 53 | } 54 | 55 | if debug { 56 | fmt.Printf("[INFO] %d ids == %d GB of ids\n", len(data)/8, len(data)/(1024*1024*1024)) 57 | } 58 | 59 | buf = new(bytes.Buffer) 60 | lenBytes := uint64(len(data)) 61 | 62 | // O(n) 63 | for i = 0; i < lenBytes; i += 8 { 64 | v = utils.BytesToUint64(data[i : i+8]) 65 | 66 | // returns if value is already stored 67 | if v == value { 68 | return nil 69 | } 70 | 71 | if value < v { 72 | err = binary.Write(buf, binary.BigEndian, value) 73 | if err != nil { 74 | return err 75 | } 76 | inserted = true 77 | } 78 | 79 | err = binary.Write(buf, binary.BigEndian, v) 80 | if err != nil { 81 | return err 82 | } 83 | } 84 | 85 | if lenBytes == 0 || !inserted { 86 | err = binary.Write(buf, binary.BigEndian, value) 87 | if err != nil { 88 | return err 89 | } 90 | } 91 | 92 | return writer.Set(key, buf.Bytes()) 93 | } 94 | -------------------------------------------------------------------------------- /lib/neosearch/utils/arrays.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | type Uint64Slice []uint64 4 | 5 | func (p Uint64Slice) Len() int { return len(p) } 6 | func (p Uint64Slice) Less(i, j int) bool { return p[i] < p[j] } 7 | func (p Uint64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 8 | 9 | func UniqueUint64Add(slices []uint64, i uint64) []uint64 { 10 | for _, e := range slices { 11 | if e == i { 12 | return slices 13 | } 14 | } 15 | 16 | return append(slices, i) 17 | } 18 | 19 | func UniqueIntAdd(slices []int, i int) []int { 20 | for _, e := range slices { 21 | if e == i { 22 | return slices 23 | } 24 | } 25 | 26 | return append(slices, i) 27 | } 28 | -------------------------------------------------------------------------------- /lib/neosearch/utils/byte.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | func BoolToBytes(b bool) []byte { 9 | var ( 10 | bs []byte = make([]byte, 1) 11 | ) 12 | 13 | if b { 14 | bs[0] = byte(1) 15 | } else { 16 | bs[0] = byte(0) 17 | } 18 | 19 | return bs 20 | } 21 | 22 | func BytesToBool(b []byte) bool { 23 | if len(b) == 0 { 24 | panic("invalid boolean byte-array") 25 | } 26 | 27 | bv := b[0] 28 | 29 | if bv == 0 { 30 | return false 31 | } 32 | 33 | return true 34 | } 35 | 36 | func Uint64ToBytes(i uint64) []byte { 37 | buf := new(bytes.Buffer) 38 | err := binary.Write(buf, binary.BigEndian, i) 39 | 40 | if err != nil { 41 | panic(err) 42 | } 43 | 44 | return buf.Bytes() 45 | } 46 | 47 | func BytesToUint64(b []byte) uint64 { 48 | var i uint64 49 | 50 | buf := bytes.NewReader(b) 51 | err := binary.Read(buf, binary.BigEndian, &i) 52 | if err != nil { 53 | panic(err) 54 | } 55 | 56 | return i 57 | } 58 | 59 | func Int64ToBytes(i int64) []byte { 60 | buf := new(bytes.Buffer) 61 | err := binary.Write(buf, binary.BigEndian, i) 62 | 63 | if err != nil { 64 | panic(err) 65 | } 66 | 67 | return buf.Bytes() 68 | } 69 | 70 | func BytesToInt64(b []byte) int64 { 71 | var i int64 72 | 73 | buf := bytes.NewReader(b) 74 | err := binary.Read(buf, binary.BigEndian, &i) 75 | if err != nil { 76 | panic(err) 77 | } 78 | 79 | return i 80 | } 81 | 82 | func Float64ToBytes(f float64) []byte { 83 | buf := new(bytes.Buffer) 84 | err := binary.Write(buf, binary.BigEndian, f) 85 | 86 | if err != nil { 87 | panic(err) 88 | } 89 | 90 | return buf.Bytes() 91 | } 92 | 93 | func BytesToFloat64(b []byte) float64 { 94 | var f float64 95 | 96 | buf := bytes.NewReader(b) 97 | err := binary.Read(buf, binary.BigEndian, &f) 98 | if err != nil { 99 | panic(err) 100 | } 101 | 102 | return f 103 | } 104 | 105 | func GetUint64Array(data []byte) []uint64 { 106 | var i, v uint64 107 | 108 | lenBytes := uint64(len(data)) 109 | uints := make([]uint64, lenBytes/8) 110 | 111 | for i = 0; i < lenBytes; i += 8 { 112 | v = BytesToUint64(data[i : i+8]) 113 | uints[i] = v 114 | } 115 | 116 | return uints 117 | } 118 | -------------------------------------------------------------------------------- /lib/neosearch/utils/conversions.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "strconv" 7 | ) 8 | 9 | func BoolFromInterface(value interface{}, kind reflect.Kind) (bool, error) { 10 | switch kind { 11 | case reflect.String: 12 | sval := value.(string) 13 | 14 | if sval == "true" { 15 | return true, nil 16 | } else if sval == "false" { 17 | return false, nil 18 | } else { 19 | return false, fmt.Errorf("Invalid boolean: %s", value) 20 | } 21 | case reflect.Int: 22 | ival := value.(int64) 23 | 24 | if ival == 0 { 25 | return false, nil 26 | } 27 | 28 | return true, nil 29 | case reflect.Uint: 30 | uval := value.(uint64) 31 | 32 | if uval == 0 { 33 | return false, nil 34 | } 35 | 36 | return true, nil 37 | case reflect.Float32, reflect.Float64: 38 | fval := value.(float64) 39 | 40 | if fval == 0.0 { 41 | return false, nil 42 | } 43 | 44 | return true, nil 45 | default: 46 | return false, fmt.Errorf("Impossible to convert '%s' to boolean", value) 47 | } 48 | } 49 | 50 | func Uint64FromInterface(value interface{}, kind reflect.Kind) (uint64, error) { 51 | var ( 52 | uret uint64 53 | err error 54 | ) 55 | 56 | switch kind { 57 | case reflect.String: 58 | intval, err := strconv.Atoi(value.(string)) 59 | 60 | if err != nil { 61 | return uret, fmt.Errorf("Failed to convert '%s' to uint64", value) 62 | } 63 | 64 | uret = uint64(intval) 65 | case reflect.Float32, reflect.Float64: 66 | uret = uint64(value.(float64)) 67 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 68 | uret = uint64(value.(int64)) 69 | default: 70 | err = fmt.Errorf("Impossible to convert '%s' to uint64", value) 71 | } 72 | 73 | return uret, err 74 | } 75 | 76 | func Int64FromInterface(value interface{}, kind reflect.Kind) (int64, error) { 77 | var ( 78 | ret int64 79 | err error 80 | ) 81 | 82 | switch kind { 83 | case reflect.String: 84 | intval, err := strconv.Atoi(value.(string)) 85 | 86 | if err != nil { 87 | return ret, fmt.Errorf("Failed to convert '%s' to int64", value) 88 | } 89 | 90 | ret = int64(intval) 91 | case reflect.Float32, reflect.Float64: 92 | ret = int64(value.(float64)) 93 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: 94 | ret = int64(value.(uint64)) 95 | default: 96 | err = fmt.Errorf("Impossible to convert '%s' to int64", value) 97 | } 98 | 99 | return ret, err 100 | } 101 | -------------------------------------------------------------------------------- /lib/neosearch/utils/fields.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/extemporalgenome/slug" 7 | ) 8 | 9 | func FieldNorm(field string) string { 10 | fparts := strings.Split(field, ".") 11 | 12 | for i := range fparts { 13 | fparts[i] = slug.SlugAscii(fparts[i]) 14 | } 15 | 16 | return strings.Join(fparts, ".") 17 | } 18 | -------------------------------------------------------------------------------- /lib/neosearch/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | const Version = "0.1" 4 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: NeoSearch Docs 2 | theme: readthedocs 3 | -------------------------------------------------------------------------------- /service/neosearch/Dockerfile: -------------------------------------------------------------------------------- 1 | from neowaylabs/neosearch-dev-env:latest 2 | 3 | MAINTAINER Tiago Natel de Moura (@tiago4orion) 4 | 5 | RUN cd /tmp && git clone https://github.com/NeowayLabs/neosearch.git && \ 6 | cp -R /tmp/neosearch/* /go/src/github.com/NeowayLabs/neosearch/ 7 | 8 | WORKDIR /go/src/github.com/NeowayLabs/neosearch 9 | 10 | ENV STORAGE_ENGINE leveldb 11 | RUN hack/make.sh server 12 | 13 | VOLUME ["/data"] 14 | 15 | EXPOSE 9500 16 | 17 | CMD ["./bundles/0.1.0/server/neosearch", "-d", "/data"] -------------------------------------------------------------------------------- /service/neosearch/README.md: -------------------------------------------------------------------------------- 1 | # NeoSearch Server 2 | 3 | # Install 4 | 5 | ``` 6 | go get -tags leveldb -v github.com/NeowayLabs/neosearch 7 | ``` 8 | 9 | # Run 10 | 11 | ``` 12 | $GOPATH/bin/neosearch -d /data 13 | ``` 14 | 15 | # Hacking 16 | 17 | ``` 18 | go get -tags leveldb github.com/NeowayLabs/neosearch 19 | cd $GOPATH/src/github.com/NeowayLabs/neosearch 20 | make check 21 | ``` 22 | 23 | Docs soon ... -------------------------------------------------------------------------------- /service/neosearch/config.yml: -------------------------------------------------------------------------------- 1 | # neosearch configuration file 2 | 3 | # Root directory where all of the indices will be written. 4 | dataDir: /data 5 | 6 | # Enables debug in every neosearch module 7 | debug: false 8 | 9 | # maxIndicesOpen is the max number of indices maintained open by neosearch 10 | # for cached searchs 11 | maxIndicesOpen: 50 12 | 13 | engine: 14 | # openCacheSize is the value for the maximum number of 15 | # open database files. 16 | openCacheSize: 100 17 | # batchSize is the size of cached operations before 18 | # a write batch occurs. 19 | batchSize: 5000 20 | # kvstore set the kvstore to be used 21 | kvstore: goleveldb 22 | # kvstoreConfig set specific options for the kvstore 23 | kvconfig: *KVSTORE_CONFIG 24 | 25 | goleveldb: &KVSTORE_CONFIG 26 | # WriteBuffer defines maximum size of a 'memdb' before flushed to 27 | # 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk 28 | # unsorted journal. 29 | writeBuffer: 4194304 30 | # BlockSize is the minimum uncompressed size in bytes of each 'sorted table' 31 | # block. 32 | blockSize: 4096 33 | # BlockRestartInterval is the number of keys between restart points for 34 | # delta encoding of keys. 35 | blockRestartInterval: 16 36 | # BlockCacheCapacity defines the capacity of the 'sorted table' block caching. 37 | # Default is 4MB 38 | blockCacheCapacity: 1073741824 39 | bloomFilterBitsPerKey: 16 40 | 41 | leveldb: 42 | # enable/disable cache support 43 | enableCache: true 44 | # CacheSize is the length of LRU cache used by the storage engine 45 | # Default is 1GB 46 | cacheSize: 1073741824 47 | -------------------------------------------------------------------------------- /service/neosearch/handler/default.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "encoding/json" 5 | "log" 6 | "net/http" 7 | 8 | "github.com/julienschmidt/httprouter" 9 | ) 10 | 11 | type DefaultHandler struct { 12 | requestVars map[string]string 13 | } 14 | 15 | func (h *DefaultHandler) Error(res http.ResponseWriter, errMessage string) { 16 | errObject := map[string]interface{}{ 17 | "error": errMessage, 18 | } 19 | 20 | body, err := json.Marshal(errObject) 21 | 22 | if err != nil { 23 | log.Println("Failed to marshal error object") 24 | return 25 | } 26 | 27 | h.WriteJSON(res, body) 28 | } 29 | 30 | func (h *DefaultHandler) WriteJSON(res http.ResponseWriter, content []byte) { 31 | res.Header().Set("Content-Type", "application/json") 32 | res.Write(content) 33 | } 34 | 35 | func (h *DefaultHandler) WriteJSONObject(res http.ResponseWriter, content interface{}) { 36 | res.Header().Set("Content-Type", "application/json") 37 | 38 | body, err := json.Marshal(content) 39 | 40 | if err != nil { 41 | log.Printf("Failed to marshal JSON: %s", err.Error()) 42 | return 43 | } 44 | 45 | h.WriteJSON(res, body) 46 | } 47 | 48 | func (h *DefaultHandler) ProcessVars(ps httprouter.Params) map[string]string { 49 | h.requestVars = make(map[string]string) 50 | h.requestVars["index"] = ps.ByName("index") 51 | h.requestVars["id"] = ps.ByName("id") 52 | 53 | return h.requestVars 54 | } 55 | 56 | func (h *DefaultHandler) GetIndexName() string { 57 | if h.requestVars == nil { 58 | return "" 59 | } 60 | 61 | return h.requestVars["index"] 62 | } 63 | 64 | func (h *DefaultHandler) GetDocumentID() string { 65 | if h.requestVars == nil { 66 | return "" 67 | } 68 | 69 | return h.requestVars["id"] 70 | } 71 | -------------------------------------------------------------------------------- /service/neosearch/home/home.go: -------------------------------------------------------------------------------- 1 | package home 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/NeowayLabs/neosearch/lib/neosearch" 7 | "github.com/NeowayLabs/neosearch/lib/neosearch/version" 8 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 9 | "github.com/julienschmidt/httprouter" 10 | ) 11 | 12 | type HomeHandler struct { 13 | search *neosearch.NeoSearch 14 | handler.DefaultHandler 15 | } 16 | 17 | func NewHomeHandler(ns *neosearch.NeoSearch) *HomeHandler { 18 | return &HomeHandler{ 19 | search: ns, 20 | } 21 | } 22 | 23 | func (handler *HomeHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 24 | response := map[string]string{ 25 | "version": version.Version, 26 | "status": "alive", 27 | } 28 | 29 | handler.WriteJSONObject(res, response) 30 | } 31 | -------------------------------------------------------------------------------- /service/neosearch/home/home_test.go: -------------------------------------------------------------------------------- 1 | package home 2 | 3 | import ( 4 | "io/ioutil" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 10 | "github.com/julienschmidt/httprouter" 11 | ) 12 | 13 | var dataDirTmp string 14 | 15 | func init() { 16 | var err error 17 | dataDirTmp, err = ioutil.TempDir("/tmp", "neosearch-service-home-") 18 | if err != nil { 19 | panic(err) 20 | } 21 | } 22 | 23 | func getHomeHandler() *HomeHandler { 24 | cfg := config.NewConfig() 25 | cfg.Option(config.DataDir(dataDirTmp)) 26 | ns := neosearch.New(cfg) 27 | return NewHomeHandler(ns) 28 | } 29 | 30 | func TestHomeInfo(t *testing.T) { 31 | handler := getHomeHandler() 32 | 33 | router := httprouter.New() 34 | 35 | router.Handle("GET", "/", handler.ServeHTTP) 36 | 37 | // router.HandleFunc("/{index}", func(res http.ResponseWriter, req *http.Request) { 38 | // handler.ServeHTTP(res, req) 39 | // }).Methods("DELETE") 40 | 41 | ts := httptest.NewServer(router) 42 | 43 | defer func() { 44 | ts.Close() 45 | handler.search.Close() 46 | }() 47 | } 48 | -------------------------------------------------------------------------------- /service/neosearch/index/add.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "io/ioutil" 8 | "net/http" 9 | "strconv" 10 | 11 | "github.com/NeowayLabs/neosearch/lib/neosearch" 12 | nsindex "github.com/NeowayLabs/neosearch/lib/neosearch/index" 13 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 14 | "github.com/julienschmidt/httprouter" 15 | ) 16 | 17 | type AddHandler struct { 18 | handler.DefaultHandler 19 | search *neosearch.NeoSearch 20 | } 21 | 22 | func NewAddHandler(search *neosearch.NeoSearch) *AddHandler { 23 | return &AddHandler{ 24 | search: search, 25 | } 26 | } 27 | 28 | func (handler *AddHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 29 | var ( 30 | document []byte 31 | err error 32 | exists bool 33 | docID string 34 | docIntID int 35 | ) 36 | 37 | handler.ProcessVars(ps) 38 | indexName := handler.GetIndexName() 39 | 40 | if exists, err = handler.search.IndexExists(indexName); exists != true && err == nil { 41 | response := map[string]string{ 42 | "error": "Index '" + indexName + "' doesn't exists.", 43 | } 44 | 45 | handler.WriteJSONObject(res, response) 46 | return 47 | } else if exists == false && err != nil { 48 | res.WriteHeader(http.StatusInternalServerError) 49 | handler.Error(res, err.Error()) 50 | return 51 | } 52 | 53 | if req.Method != "POST" { 54 | err = errors.New("Add document expect a POST request") 55 | goto error_fatal 56 | } 57 | 58 | docID = handler.GetDocumentID() 59 | docIntID, err = strconv.Atoi(docID) 60 | 61 | if err != nil { 62 | goto error_fatal 63 | } 64 | 65 | document, err = ioutil.ReadAll(req.Body) 66 | 67 | if err != nil { 68 | goto error_fatal 69 | } 70 | 71 | err = handler.addDocument(indexName, uint64(docIntID), document) 72 | 73 | if err != nil { 74 | goto error_fatal 75 | } 76 | 77 | handler.WriteJSON(res, []byte(fmt.Sprintf("{\"status\": \"Document %d indexed.\"}", docIntID))) 78 | 79 | return 80 | 81 | error_fatal: 82 | if err != nil { 83 | res.WriteHeader(http.StatusBadRequest) 84 | handler.Error(res, err.Error()) 85 | return 86 | } 87 | } 88 | 89 | func (handler *AddHandler) addDocument(indexName string, id uint64, document []byte) error { 90 | docmeta := make(map[string]interface{}) 91 | 92 | err := json.Unmarshal(document, &docmeta) 93 | 94 | if err != nil { 95 | return err 96 | } 97 | 98 | metadata, ok := docmeta["metadata"].(map[string]interface{}) 99 | 100 | if !ok { 101 | if docmeta["metadata"] == nil { 102 | metadata = nsindex.Metadata{} 103 | } else { 104 | return fmt.Errorf("Invalid document metadata: %s", string(document)) 105 | } 106 | } 107 | 108 | doc, ok := docmeta["doc"].(map[string]interface{}) 109 | 110 | if !ok { 111 | return fmt.Errorf("Invalid document: %s", string(document)) 112 | } 113 | 114 | index, err := handler.search.OpenIndex(indexName) 115 | 116 | if err != nil { 117 | return err 118 | } 119 | 120 | docJSON, err := json.Marshal(doc) 121 | 122 | if err != nil { 123 | return err 124 | } 125 | 126 | return index.Add(id, docJSON, nsindex.Metadata(metadata)) 127 | } 128 | -------------------------------------------------------------------------------- /service/neosearch/index/add_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io/ioutil" 8 | "net/http" 9 | "net/http/httptest" 10 | "strconv" 11 | "testing" 12 | 13 | "github.com/NeowayLabs/neosearch/lib/neosearch" 14 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 15 | "github.com/julienschmidt/httprouter" 16 | ) 17 | 18 | func getAddDocHandler() *AddHandler { 19 | cfg := config.NewConfig() 20 | cfg.Option(config.DataDir(dataDirTmp)) 21 | ns := neosearch.New(cfg) 22 | 23 | handler := NewAddHandler(ns) 24 | 25 | return handler 26 | } 27 | 28 | func TestAddDocumentsOK(t *testing.T) { 29 | handler := getAddDocHandler() 30 | 31 | defer func() { 32 | handler.search.DeleteIndex("test-ok") 33 | handler.search.Close() 34 | }() 35 | 36 | _, err := handler.search.CreateIndex("test-ok") 37 | 38 | if err != nil { 39 | t.Error(err) 40 | return 41 | } 42 | 43 | for i, doc := range []string{ 44 | `{"doc": {"id": 0, "bleh": "test"}}`, 45 | `{"doc": {"id": 1, "title": "ldjfjl"}}`, 46 | `{"doc": {"id": 2, "title": "hjdfskhfk"}}`, 47 | } { 48 | 49 | err = handler.addDocument("test-ok", uint64(i), []byte(doc)) 50 | 51 | if err != nil { 52 | t.Error(err) 53 | return 54 | } 55 | } 56 | } 57 | 58 | func TestAddDocumentsREST_OK(t *testing.T) { 59 | handler := getAddDocHandler() 60 | router := httprouter.New() 61 | router.Handle("POST", "/:index/:id", handler.ServeHTTP) 62 | ts := httptest.NewServer(router) 63 | 64 | defer func() { 65 | handler.search.DeleteIndex("test-rest-add-ok") 66 | ts.Close() 67 | handler.search.Close() 68 | }() 69 | 70 | _, err := handler.search.CreateIndex("test-rest-add-ok") 71 | 72 | if err != nil { 73 | t.Error(err) 74 | return 75 | } 76 | 77 | for i, doc := range []string{ 78 | `{"doc": {"id": 0, "bleh": "test"}}`, 79 | `{"doc": {"id": 1, "title": "ldjfjl"}}`, 80 | `{"doc": {"id": 2, "title": "hjdfskhfk"}}`, 81 | } { 82 | addURL := ts.URL + "/test-rest-add-ok/" + strconv.Itoa(i) 83 | 84 | req, err := http.NewRequest("POST", addURL, bytes.NewBufferString(doc)) 85 | 86 | if err != nil { 87 | t.Error(err) 88 | return 89 | } 90 | 91 | client := &http.Client{} 92 | res, err := client.Do(req) 93 | 94 | if err != nil { 95 | t.Error(err) 96 | } 97 | 98 | content, err := ioutil.ReadAll(res.Body) 99 | res.Body.Close() 100 | if err != nil { 101 | t.Error(err) 102 | return 103 | } 104 | 105 | resObj := map[string]interface{}{} 106 | 107 | err = json.Unmarshal(content, &resObj) 108 | 109 | if err != nil { 110 | t.Error(err) 111 | t.Errorf("Returned value: %s", string(content)) 112 | return 113 | } 114 | 115 | if resObj["error"] != nil { 116 | t.Error(resObj["error"]) 117 | return 118 | } 119 | 120 | if resObj["status"] == nil { 121 | t.Error("Failed to add document") 122 | return 123 | } 124 | 125 | status := resObj["status"] 126 | expected := "Document " + strconv.Itoa(i) + " indexed." 127 | 128 | if status != expected { 129 | t.Errorf("Differs: %s != %s", status, expected) 130 | t.Errorf("Failed to add document: %s", status) 131 | return 132 | } 133 | } 134 | } 135 | 136 | func TestAddDocumentsFail(t *testing.T) { 137 | handler := getAddDocHandler() 138 | 139 | defer func() { 140 | handler.search.DeleteIndex("test-fail") 141 | handler.search.Close() 142 | }() 143 | 144 | _, err := handler.search.CreateIndex("test-fail") 145 | 146 | if err != nil { 147 | t.Error(err) 148 | return 149 | } 150 | 151 | for i, doc := range []string{ 152 | `{}`, 153 | `{"metadata": {}}`, 154 | `{"t": "sçdçs"}`, 155 | ``, 156 | `test`, 157 | ` `, 158 | `[]`, 159 | `[{}]`, 160 | } { 161 | 162 | err = handler.addDocument("test-fail", uint64(i), []byte(doc)) 163 | 164 | if err == nil { 165 | t.Error(fmt.Errorf("Invalid document: %s", doc)) 166 | return 167 | } 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /service/neosearch/index/create.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch" 8 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 9 | "github.com/julienschmidt/httprouter" 10 | ) 11 | 12 | type CreateIndexHandler struct { 13 | handler.DefaultHandler 14 | search *neosearch.NeoSearch 15 | } 16 | 17 | func NewCreateHandler(search *neosearch.NeoSearch) *CreateIndexHandler { 18 | return &CreateIndexHandler{ 19 | search: search, 20 | } 21 | } 22 | 23 | func (handler *CreateIndexHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 24 | handler.ProcessVars(ps) 25 | indexName := handler.GetIndexName() 26 | 27 | if exists, err := handler.search.IndexExists(indexName); exists == true && err == nil { 28 | response := map[string]string{ 29 | "error": "Index '" + indexName + "' already exists.", 30 | } 31 | 32 | handler.WriteJSONObject(res, response) 33 | return 34 | } else if exists == false && err != nil { 35 | handler.Error(res, err.Error()) 36 | return 37 | } 38 | 39 | body, err := handler.createIndex(indexName) 40 | 41 | if err != nil { 42 | handler.Error(res, string(body)) 43 | return 44 | } 45 | 46 | handler.WriteJSON(res, body) 47 | } 48 | 49 | func (handler *CreateIndexHandler) createIndex(name string) ([]byte, error) { 50 | _, err := handler.search.CreateIndex(name) 51 | 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | response := []byte(fmt.Sprintf("{\"status\": \"Index '%s' created.\"}", name)) 57 | return response, nil 58 | } 59 | -------------------------------------------------------------------------------- /service/neosearch/index/create_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch" 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 9 | ) 10 | 11 | func getCreateHandler() *CreateIndexHandler { 12 | cfg := config.NewConfig() 13 | cfg.Option(config.DataDir(dataDirTmp)) 14 | ns := neosearch.New(cfg) 15 | 16 | handler := NewCreateHandler(ns) 17 | 18 | return handler 19 | } 20 | 21 | func TestCreateIndexOK(t *testing.T) { 22 | handler := getCreateHandler() 23 | 24 | defer func() { 25 | handler.search.Close() 26 | }() 27 | 28 | for _, name := range []string{ 29 | "test", 30 | "about", 31 | "company", 32 | "people", 33 | "apple", 34 | "sucks", 35 | } { 36 | body, err := handler.createIndex(name) 37 | 38 | if err != nil { 39 | t.Error(err) 40 | continue 41 | } 42 | 43 | expected := fmt.Sprintf("{\"status\": \"Index '%s' created.\"}", name) 44 | 45 | if string(body) != expected { 46 | t.Errorf("REST response differs: Received (%s)\nExpected: (%s)", 47 | string(body), expected) 48 | } 49 | 50 | deleteIndex(t, handler.search, name) 51 | } 52 | } 53 | 54 | func TestCreateIndexFail(t *testing.T) { 55 | handler := getCreateHandler() 56 | 57 | defer func() { 58 | handler.search.Close() 59 | }() 60 | 61 | for _, name := range []string{ 62 | "_____", 63 | "87)*()*)", 64 | "@#$%*()", 65 | "a", 66 | "aa", 67 | } { 68 | body, err := handler.createIndex(name) 69 | 70 | if err == nil { 71 | t.Errorf("Invalid index name '%s' should fail", name) 72 | deleteIndex(t, handler.search, name) 73 | continue 74 | } 75 | 76 | if body != nil { 77 | t.Errorf("JSON response should be nil for index '%s'", name) 78 | return 79 | } 80 | 81 | if err.Error() != "Invalid index name" { 82 | t.Error("Unexpected error") 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /service/neosearch/index/delete.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch" 8 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 9 | "github.com/julienschmidt/httprouter" 10 | ) 11 | 12 | type DeleteIndexHandler struct { 13 | handler.DefaultHandler 14 | search *neosearch.NeoSearch 15 | } 16 | 17 | func NewDeleteHandler(search *neosearch.NeoSearch) *DeleteIndexHandler { 18 | return &DeleteIndexHandler{ 19 | search: search, 20 | } 21 | } 22 | 23 | func (handler *DeleteIndexHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 24 | handler.ProcessVars(ps) 25 | indexName := handler.GetIndexName() 26 | 27 | if exists, err := handler.search.IndexExists(indexName); exists == false && err == nil { 28 | response := map[string]string{ 29 | "error": "Index '" + indexName + "' doesn't exists.", 30 | } 31 | 32 | handler.WriteJSONObject(res, response) 33 | return 34 | } else if exists == false && err != nil { 35 | handler.Error(res, err.Error()) 36 | return 37 | } 38 | 39 | err := handler.deleteIndex(indexName) 40 | 41 | if err != nil { 42 | handler.Error(res, err.Error()) 43 | return 44 | } 45 | 46 | handler.WriteJSON(res, []byte(fmt.Sprintf("{\"status\": \"Index '%s' deleted.\"}", indexName))) 47 | } 48 | 49 | func (handler *DeleteIndexHandler) deleteIndex(indexName string) error { 50 | return handler.search.DeleteIndex(indexName) 51 | } 52 | -------------------------------------------------------------------------------- /service/neosearch/index/delete_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "errors" 7 | "io/ioutil" 8 | "net/http" 9 | "net/http/httptest" 10 | "testing" 11 | 12 | "github.com/NeowayLabs/neosearch/lib/neosearch" 13 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 14 | "github.com/julienschmidt/httprouter" 15 | ) 16 | 17 | func getDeleteHandler() *DeleteIndexHandler { 18 | cfg := config.NewConfig() 19 | cfg.Option(config.DataDir(dataDirTmp)) 20 | ns := neosearch.New(cfg) 21 | 22 | handler := NewDeleteHandler(ns) 23 | 24 | return handler 25 | 26 | } 27 | 28 | func TestDeleteServeHTTP_OK(t *testing.T) { 29 | handler := getDeleteHandler() 30 | 31 | router := httprouter.New() 32 | 33 | router.Handle("DELETE", "/:index", handler.ServeHTTP) 34 | 35 | ts := httptest.NewServer(router) 36 | 37 | defer func() { 38 | ts.Close() 39 | handler.search.Close() 40 | }() 41 | 42 | for _, name := range []string{ 43 | "test-delete-serve-http", 44 | "delete-this-index", 45 | "lsdfjlsjflsdjfl", 46 | "LOL", 47 | } { 48 | 49 | _, err := handler.search.CreateIndex(name) 50 | 51 | if err != nil { 52 | t.Error(err) 53 | return 54 | } 55 | 56 | deleteURL := ts.URL + "/" + name 57 | 58 | req, err := http.NewRequest("DELETE", deleteURL, bytes.NewBufferString("")) 59 | 60 | if err != nil { 61 | t.Error(err) 62 | return 63 | } 64 | 65 | client := &http.Client{} 66 | res, err := client.Do(req) 67 | 68 | if err != nil { 69 | t.Error(err) 70 | } 71 | 72 | content, err := ioutil.ReadAll(res.Body) 73 | res.Body.Close() 74 | if err != nil { 75 | t.Error(err) 76 | return 77 | } 78 | 79 | resObj := map[string]interface{}{} 80 | 81 | err = json.Unmarshal(content, &resObj) 82 | 83 | if err != nil { 84 | t.Error(err) 85 | return 86 | } 87 | 88 | if resObj["error"] != nil { 89 | t.Error(resObj["error"]) 90 | return 91 | } 92 | 93 | if resObj["status"] == nil { 94 | t.Error("Failed to delete index") 95 | return 96 | } 97 | 98 | status := resObj["status"] 99 | 100 | if status != "Index '"+name+"' deleted." { 101 | t.Errorf("Failed to delete index: %s", status) 102 | return 103 | } 104 | 105 | _, err = handler.search.OpenIndex(name) 106 | 107 | if err == nil { 108 | t.Error(errors.New("Index '" + name + "' should not exist.")) 109 | return 110 | } 111 | } 112 | } 113 | 114 | func TestDeleteIndex(t *testing.T) { 115 | handler := getDeleteHandler() 116 | _, err := handler.search.CreateIndex("test-delete") 117 | 118 | if err != nil { 119 | t.Error(err) 120 | return 121 | } 122 | 123 | defer func() { 124 | handler.search.Close() 125 | }() 126 | 127 | err = handler.deleteIndex("test-delete") 128 | 129 | if err != nil { 130 | t.Error(err) 131 | return 132 | } 133 | 134 | err = handler.deleteIndex("ldjfklsjfl") 135 | 136 | if err == nil { 137 | t.Error(errors.New("should fail: index doesn't exist")) 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /service/neosearch/index/get.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "net/http" 5 | "strconv" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch" 8 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 9 | "github.com/julienschmidt/httprouter" 10 | ) 11 | 12 | type GetHandler struct { 13 | handler.DefaultHandler 14 | search *neosearch.NeoSearch 15 | } 16 | 17 | func NewGetHandler(search *neosearch.NeoSearch) *GetHandler { 18 | return &GetHandler{ 19 | search: search, 20 | } 21 | } 22 | 23 | func (handler *GetHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 24 | var ( 25 | err error 26 | document []byte 27 | exists bool 28 | ) 29 | 30 | handler.ProcessVars(ps) 31 | indexName := handler.GetIndexName() 32 | 33 | if exists, err = handler.search.IndexExists(indexName); exists != true && err == nil { 34 | response := map[string]string{ 35 | "error": "Index '" + indexName + "' doesn't exists.", 36 | } 37 | 38 | handler.WriteJSONObject(res, response) 39 | return 40 | } else if exists == false && err != nil { 41 | res.WriteHeader(http.StatusInternalServerError) 42 | handler.Error(res, err.Error()) 43 | return 44 | } 45 | 46 | docID := handler.GetDocumentID() 47 | 48 | docIntID, err := strconv.Atoi(docID) 49 | 50 | if err != nil { 51 | res.WriteHeader(http.StatusBadRequest) 52 | handler.Error(res, "Invalid document id") 53 | return 54 | } 55 | 56 | index, err := handler.search.OpenIndex(indexName) 57 | 58 | if err != nil { 59 | handler.Error(res, err.Error()) 60 | return 61 | } 62 | 63 | document, err = index.Get(uint64(docIntID)) 64 | 65 | if err != nil { 66 | res.WriteHeader(http.StatusBadRequest) 67 | handler.Error(res, err.Error()) 68 | return 69 | } 70 | 71 | handler.WriteJSON(res, document) 72 | } 73 | -------------------------------------------------------------------------------- /service/neosearch/index/get_analyze.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "net/http" 5 | "strconv" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch" 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/engine" 9 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 10 | "github.com/julienschmidt/httprouter" 11 | ) 12 | 13 | type GetAnalyseHandler struct { 14 | handler.DefaultHandler 15 | 16 | search *neosearch.NeoSearch 17 | } 18 | 19 | func NewGetAnalyzeHandler(search *neosearch.NeoSearch) *GetAnalyseHandler { 20 | handler := GetAnalyseHandler{ 21 | search: search, 22 | } 23 | 24 | return &handler 25 | } 26 | 27 | func (handler *GetAnalyseHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 28 | var ( 29 | err error 30 | cmd engine.Command 31 | exists bool 32 | ) 33 | 34 | handler.ProcessVars(ps) 35 | indexName := handler.GetIndexName() 36 | 37 | if exists, err = handler.search.IndexExists(indexName); exists != true && err == nil { 38 | response := map[string]string{ 39 | "error": "Index '" + indexName + "' doesn't exists.", 40 | } 41 | 42 | handler.WriteJSONObject(res, response) 43 | return 44 | } else if exists == false && err != nil { 45 | res.WriteHeader(http.StatusInternalServerError) 46 | handler.Error(res, err.Error()) 47 | return 48 | } 49 | 50 | docID := handler.GetDocumentID() 51 | 52 | docIntID, err := strconv.Atoi(docID) 53 | 54 | if err != nil { 55 | res.WriteHeader(http.StatusBadRequest) 56 | handler.Error(res, "Invalid document id: "+docID) 57 | return 58 | } 59 | 60 | index, err := handler.search.OpenIndex(indexName) 61 | 62 | if err != nil { 63 | handler.Error(res, err.Error()) 64 | return 65 | } 66 | 67 | cmd, err = index.GetAnalyze(uint64(docIntID)) 68 | 69 | if err != nil { 70 | res.WriteHeader(http.StatusBadRequest) 71 | handler.Error(res, err.Error()) 72 | return 73 | } 74 | 75 | res.Write([]byte(cmd.Reverse())) 76 | } 77 | -------------------------------------------------------------------------------- /service/neosearch/index/get_analyze_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "io/ioutil" 9 | "net/http" 10 | "net/http/httptest" 11 | "testing" 12 | 13 | "github.com/NeowayLabs/neosearch/lib/neosearch" 14 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 15 | "github.com/julienschmidt/httprouter" 16 | ) 17 | 18 | func getAnalyzeGetHandler() *GetAnalyseHandler { 19 | cfg := config.NewConfig() 20 | cfg.Option(config.DataDir(dataDirTmp)) 21 | ns := neosearch.New(cfg) 22 | 23 | handler := NewGetAnalyzeHandler(ns) 24 | return handler 25 | } 26 | 27 | func TestGetAnalyze(t *testing.T) { 28 | handler := getAnalyzeGetHandler() 29 | 30 | router := httprouter.New() 31 | 32 | router.Handle("GET", "/:index/:id/_analyze", handler.ServeHTTP) 33 | 34 | ts := httptest.NewServer(router) 35 | 36 | defer func() { 37 | ts.Close() 38 | handler.search.DeleteIndex("test-analyze-ok") 39 | handler.search.Close() 40 | }() 41 | 42 | _, err := handler.search.CreateIndex("test-analyze-ok") 43 | 44 | if err != nil { 45 | t.Error(err) 46 | return 47 | } 48 | 49 | for _, testPair := range []struct { 50 | id string 51 | out string 52 | }{ 53 | {"1", `USING test-analyze-ok.document.db GET uint(1);`}, 54 | } { 55 | id := testPair.id 56 | out := testPair.out 57 | 58 | analyzeURL := ts.URL + "/test-analyze-ok/" + id + "/_analyze" 59 | 60 | req, err := http.NewRequest("GET", analyzeURL, bytes.NewBufferString("")) 61 | 62 | if err != nil { 63 | t.Error(err) 64 | return 65 | } 66 | 67 | client := &http.Client{} 68 | res, err := client.Do(req) 69 | 70 | if err != nil { 71 | t.Error(err) 72 | } 73 | 74 | content, err := ioutil.ReadAll(res.Body) 75 | res.Body.Close() 76 | if err != nil { 77 | t.Error(err) 78 | return 79 | } 80 | 81 | resObj := map[string]interface{}{} 82 | 83 | err = json.Unmarshal(content, &resObj) 84 | 85 | if err == nil { 86 | t.Error(errors.New("should return a neosearch-cli commands: " + string(content))) 87 | return 88 | } 89 | 90 | if string(content) != out { 91 | t.Error(fmt.Errorf("analyze differs: (%s) != (%s)", string(content), out)) 92 | return 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /service/neosearch/index/get_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io/ioutil" 7 | "net/http" 8 | "net/http/httptest" 9 | "strconv" 10 | "testing" 11 | 12 | "github.com/NeowayLabs/neosearch/lib/neosearch" 13 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 14 | "github.com/julienschmidt/httprouter" 15 | ) 16 | 17 | func getGetHandler() *GetHandler { 18 | cfg := config.NewConfig() 19 | cfg.Option(config.DataDir(dataDirTmp)) 20 | ns := neosearch.New(cfg) 21 | 22 | handler := NewGetHandler(ns) 23 | 24 | return handler 25 | } 26 | 27 | func TestGetDocumentsOK(t *testing.T) { 28 | handler := getGetHandler() 29 | 30 | defer func() { 31 | handler.search.DeleteIndex("test-get-ok") 32 | handler.search.Close() 33 | }() 34 | 35 | ind, err := handler.search.CreateIndex("test-get-ok") 36 | 37 | if err != nil { 38 | t.Error(err) 39 | return 40 | } 41 | 42 | router := httprouter.New() 43 | 44 | router.Handle("GET", "/:index/:id", handler.ServeHTTP) 45 | 46 | ts := httptest.NewServer(router) 47 | 48 | for i, doc := range []string{ 49 | `{"id": 0, "bleh": "test"}`, 50 | `{"id": 1, "title": "ldjfjl"}`, 51 | `{"id": 2, "title": "hjdfskhfk"}`, 52 | } { 53 | 54 | err = ind.Add(uint64(i), []byte(doc), nil) 55 | 56 | if err != nil { 57 | t.Error(err) 58 | return 59 | } 60 | 61 | getURL := ts.URL + "/test-get-ok/" + strconv.Itoa(i) 62 | 63 | req, err := http.NewRequest("GET", getURL, bytes.NewBufferString("")) 64 | 65 | if err != nil { 66 | t.Error(err) 67 | return 68 | } 69 | 70 | client := &http.Client{} 71 | res, err := client.Do(req) 72 | 73 | if err != nil { 74 | t.Error(err) 75 | } 76 | 77 | content, err := ioutil.ReadAll(res.Body) 78 | res.Body.Close() 79 | if err != nil { 80 | t.Error(err) 81 | return 82 | } 83 | 84 | resObj := map[string]interface{}{} 85 | 86 | err = json.Unmarshal(content, &resObj) 87 | 88 | if err != nil { 89 | t.Error(err) 90 | t.Errorf("Returned value: %s", string(content)) 91 | return 92 | } 93 | 94 | if resObj["error"] != nil { 95 | t.Error(resObj["error"]) 96 | return 97 | } 98 | 99 | if string(content) != doc { 100 | t.Errorf("Differs: %s != %s", string(content), doc) 101 | t.Errorf("Failed to get document: %s", string(content)) 102 | return 103 | } 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /service/neosearch/index/index.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | 7 | "github.com/NeowayLabs/neosearch/lib/neosearch" 8 | "github.com/NeowayLabs/neosearch/lib/neosearch/index" 9 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 10 | "github.com/julienschmidt/httprouter" 11 | ) 12 | 13 | type IndexHandler struct { 14 | handler.DefaultHandler 15 | 16 | search *neosearch.NeoSearch 17 | } 18 | 19 | func New(search *neosearch.NeoSearch) *IndexHandler { 20 | handler := IndexHandler{} 21 | handler.search = search 22 | 23 | return &handler 24 | } 25 | 26 | func (handler *IndexHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 27 | handler.ProcessVars(ps) 28 | indexName := handler.GetIndexName() 29 | 30 | if indexName == "" { 31 | handler.Error(res, "no index supplied") 32 | return 33 | } else if !index.ValidateIndexName(indexName) { 34 | handler.Error(res, "Invalid index name: "+indexName) 35 | return 36 | } 37 | 38 | body, err := handler.serveIndex(indexName) 39 | 40 | if err != nil { 41 | handler.Error(res, err.Error()) 42 | return 43 | } 44 | 45 | res.Write(body) 46 | } 47 | 48 | func (handler *IndexHandler) serveIndex(name string) ([]byte, error) { 49 | index, err := handler.search.OpenIndex(name) 50 | 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | body, err := json.Marshal(&index) 56 | 57 | if err != nil { 58 | return nil, err 59 | } 60 | 61 | return body, nil 62 | } 63 | -------------------------------------------------------------------------------- /service/neosearch/index/index_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "testing" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch" 9 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/index" 11 | ) 12 | 13 | var dataDirTmp string 14 | 15 | func init() { 16 | var err error 17 | dataDirTmp, err = ioutil.TempDir("/tmp", "neosearch-service-index-") 18 | if err != nil { 19 | panic(err) 20 | } 21 | } 22 | 23 | func getIndexHandler() *IndexHandler { 24 | cfg := config.NewConfig() 25 | cfg.Option(config.DataDir(dataDirTmp)) 26 | ns := neosearch.New(cfg) 27 | 28 | handler := New(ns) 29 | 30 | return handler 31 | } 32 | 33 | func deleteIndex(t *testing.T, search *neosearch.NeoSearch, name string) { 34 | err := search.DeleteIndex(name) 35 | 36 | if err != nil { 37 | t.Error(err) 38 | } 39 | } 40 | 41 | func TestIndexNotExist(t *testing.T) { 42 | handler := getIndexHandler() 43 | 44 | for _, name := range []string{ 45 | "test", 46 | "info", 47 | "lsajldkjal", 48 | "__", 49 | "about", 50 | "hack", 51 | } { 52 | _, err := handler.serveIndex(name) 53 | 54 | if err == nil { 55 | t.Error(fmt.Errorf("Index '%s' shall not exist", name)) 56 | return 57 | } 58 | } 59 | } 60 | 61 | func addDocs(t *testing.T, index *index.Index) { 62 | err := index.Add(1, []byte(`{"title": "teste"}`), nil) 63 | 64 | if err != nil { 65 | t.Error(err) 66 | return 67 | } 68 | } 69 | 70 | func TestIndexInfo(t *testing.T) { 71 | handler := getIndexHandler() 72 | 73 | defer func() { 74 | deleteIndex(t, handler.search, "test-index-info") 75 | handler.search.Close() 76 | }() 77 | 78 | index, err := handler.search.CreateIndex("test-index-info") 79 | 80 | addDocs(t, index) 81 | 82 | body, err := handler.serveIndex("test-index-info") 83 | 84 | if err != nil { 85 | t.Error(err) 86 | return 87 | } 88 | 89 | if string(body) != `{"name":"test-index-info"}` { 90 | t.Errorf("Invalid index info: %s", string(body)) 91 | return 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /service/neosearch/index/search.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "net/http" 8 | 9 | "github.com/NeowayLabs/neosearch/lib/neosearch" 10 | "github.com/NeowayLabs/neosearch/lib/neosearch/search" 11 | "github.com/NeowayLabs/neosearch/service/neosearch/handler" 12 | "github.com/julienschmidt/httprouter" 13 | ) 14 | 15 | type SearchHandler struct { 16 | handler.DefaultHandler 17 | search *neosearch.NeoSearch 18 | } 19 | 20 | func NewSearchHandler(search *neosearch.NeoSearch) *SearchHandler { 21 | return &SearchHandler{ 22 | search: search, 23 | } 24 | } 25 | 26 | func (handler *SearchHandler) ServeHTTP(res http.ResponseWriter, req *http.Request, ps httprouter.Params) { 27 | var ( 28 | err error 29 | exists bool 30 | documents []map[string]interface{} 31 | outputJSON []byte 32 | ) 33 | 34 | handler.ProcessVars(ps) 35 | indexName := handler.GetIndexName() 36 | 37 | if exists, err = handler.search.IndexExists(indexName); exists != true && err == nil { 38 | response := map[string]string{ 39 | "error": "Index '" + indexName + "' doesn't exists.", 40 | } 41 | 42 | handler.WriteJSONObject(res, response) 43 | return 44 | } else if exists == false && err != nil { 45 | res.WriteHeader(http.StatusInternalServerError) 46 | handler.Error(res, err.Error()) 47 | return 48 | } 49 | 50 | dslBytes, err := ioutil.ReadAll(req.Body) 51 | 52 | if err != nil { 53 | res.WriteHeader(http.StatusBadRequest) 54 | handler.Error(res, err.Error()) 55 | return 56 | } 57 | 58 | dsl := make(map[string]interface{}) 59 | 60 | err = json.Unmarshal(dslBytes, &dsl) 61 | 62 | if err != nil { 63 | res.WriteHeader(http.StatusBadRequest) 64 | handler.Error(res, err.Error()) 65 | return 66 | } 67 | 68 | index, err := handler.search.OpenIndex(indexName) 69 | 70 | if err != nil { 71 | handler.Error(res, err.Error()) 72 | return 73 | } 74 | 75 | if dsl["query"] == nil { 76 | res.WriteHeader(http.StatusBadRequest) 77 | handler.Error(res, "No query field specified") 78 | return 79 | } 80 | 81 | query, ok := dsl["query"].(map[string]interface{}) 82 | 83 | if !ok { 84 | res.WriteHeader(http.StatusBadRequest) 85 | handler.Error(res, "Search 'query' field is not a JSON object") 86 | return 87 | } 88 | 89 | output := make(map[string]interface{}) 90 | var total uint64 91 | 92 | docs, total, err := search.Search(index, query, 10) 93 | 94 | if err != nil { 95 | res.WriteHeader(http.StatusBadRequest) 96 | handler.Error(res, err.Error()) 97 | return 98 | } 99 | 100 | documents = make([]map[string]interface{}, len(docs)) 101 | 102 | for idx, doc := range docs { 103 | obj := make(map[string]interface{}) 104 | err = json.Unmarshal([]byte(doc), &obj) 105 | 106 | if err != nil { 107 | fmt.Println("Failed to unmarshal: ", doc) 108 | goto error 109 | } 110 | 111 | documents[idx] = obj 112 | } 113 | 114 | output["total"] = total 115 | output["results"] = documents 116 | 117 | outputJSON, err = json.Marshal(output) 118 | 119 | if err != nil { 120 | goto error 121 | } 122 | 123 | res.WriteHeader(http.StatusOK) 124 | handler.WriteJSON(res, outputJSON) 125 | 126 | return 127 | 128 | error: 129 | if err != nil { 130 | res.WriteHeader(http.StatusBadRequest) 131 | handler.Error(res, err.Error()) 132 | return 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /service/neosearch/index/search_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io/ioutil" 7 | "net/http" 8 | "net/http/httptest" 9 | "testing" 10 | 11 | "github.com/NeowayLabs/neosearch/lib/neosearch" 12 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 13 | "github.com/julienschmidt/httprouter" 14 | ) 15 | 16 | func getSearchHandler() *SearchHandler { 17 | cfg := config.NewConfig() 18 | cfg.Option(config.DataDir(dataDirTmp)) 19 | ns := neosearch.New(cfg) 20 | 21 | handler := NewSearchHandler(ns) 22 | return handler 23 | } 24 | 25 | func addDocumentsForSearch(indexName string) (*SearchHandler, error) { 26 | handler := getSearchHandler() 27 | 28 | ind, err := handler.search.CreateIndex(indexName) 29 | 30 | if err != nil { 31 | return nil, err 32 | } 33 | 34 | for i, doc := range []string{ 35 | `{"id": 0, "name": "Neoway Business Solution"}`, 36 | `{"id": 1, "name": "Facebook Inc"}`, 37 | `{"id": 2, "name": "Google Inc"}`, 38 | } { 39 | 40 | err = ind.Add(uint64(i), []byte(doc), nil) 41 | 42 | if err != nil { 43 | return nil, err 44 | } 45 | } 46 | 47 | return handler, nil 48 | } 49 | 50 | func TestSimpleSearch(t *testing.T) { 51 | handler, err := addDocumentsForSearch("search-simple") 52 | 53 | if err != nil { 54 | t.Error(err) 55 | return 56 | } 57 | 58 | router := httprouter.New() 59 | 60 | router.Handle("POST", "/:index", handler.ServeHTTP) 61 | 62 | ts := httptest.NewServer(router) 63 | 64 | defer func() { 65 | handler.search.DeleteIndex("search-simple") 66 | ts.Close() 67 | handler.search.Close() 68 | }() 69 | 70 | searchURL := ts.URL + "/search-simple" 71 | 72 | dsl := ` 73 | { 74 | "from": 0, 75 | "size": 10, 76 | "query": { 77 | "$and": [ 78 | {"name": "neoway"} 79 | ] 80 | } 81 | }` 82 | 83 | req, err := http.NewRequest("POST", searchURL, bytes.NewBufferString(dsl)) 84 | 85 | if err != nil { 86 | t.Error(err) 87 | return 88 | } 89 | 90 | client := &http.Client{} 91 | res, err := client.Do(req) 92 | 93 | if err != nil { 94 | t.Error(err) 95 | return 96 | } 97 | 98 | content, err := ioutil.ReadAll(res.Body) 99 | res.Body.Close() 100 | if err != nil { 101 | t.Error(err) 102 | return 103 | } 104 | 105 | resObj := map[string]interface{}{} 106 | 107 | err = json.Unmarshal(content, &resObj) 108 | 109 | if err != nil { 110 | t.Error(err) 111 | t.Errorf("Returned value: %s", string(content)) 112 | return 113 | } 114 | 115 | if resObj["error"] != nil { 116 | t.Error(resObj["error"]) 117 | return 118 | } 119 | 120 | if resObj["total"] == nil { 121 | t.Error("Invalid results response. Field 'total' is required.") 122 | return 123 | } 124 | 125 | total, ok := resObj["total"].(float64) 126 | 127 | if !ok { 128 | t.Errorf("Total must be an float: %+v", total) 129 | return 130 | } 131 | 132 | if int(total) != 1 { 133 | t.Errorf("Search problem. Returns %d but the correct is %d", total, 1) 134 | return 135 | } 136 | } 137 | 138 | func TestSimpleANDSearch(t *testing.T) { 139 | handler, err := addDocumentsForSearch("simple-and-search") 140 | 141 | if err != nil { 142 | t.Error(err) 143 | return 144 | } 145 | 146 | router := httprouter.New() 147 | 148 | router.Handle("POST", "/:index", handler.ServeHTTP) 149 | 150 | ts := httptest.NewServer(router) 151 | 152 | defer func() { 153 | handler.search.DeleteIndex("simple-and-search") 154 | ts.Close() 155 | handler.search.Close() 156 | }() 157 | 158 | searchURL := ts.URL + "/simple-and-search" 159 | 160 | dsl := ` 161 | { 162 | "from": 0, 163 | "size": 10, 164 | "query": { 165 | "$and": [ 166 | {"name": "inc"}, 167 | {"name": "facebook"} 168 | ] 169 | } 170 | }` 171 | 172 | req, err := http.NewRequest("POST", searchURL, bytes.NewBufferString(dsl)) 173 | 174 | if err != nil { 175 | t.Error(err) 176 | return 177 | } 178 | 179 | client := &http.Client{} 180 | res, err := client.Do(req) 181 | 182 | if err != nil { 183 | t.Error(err) 184 | return 185 | } 186 | 187 | content, err := ioutil.ReadAll(res.Body) 188 | res.Body.Close() 189 | if err != nil { 190 | t.Error(err) 191 | return 192 | } 193 | 194 | resObj := map[string]interface{}{} 195 | 196 | err = json.Unmarshal(content, &resObj) 197 | 198 | if err != nil { 199 | t.Error(err) 200 | t.Errorf("Returned value: %s", string(content)) 201 | return 202 | } 203 | 204 | if resObj["error"] != nil { 205 | t.Error(resObj["error"]) 206 | return 207 | } 208 | 209 | if resObj["total"] == nil { 210 | t.Error("Invalid results response. Field 'total' is required.") 211 | return 212 | } 213 | 214 | total, ok := resObj["total"].(float64) 215 | 216 | if !ok { 217 | t.Errorf("Total must be an float: %+v", total) 218 | return 219 | } 220 | 221 | if int(total) != 1 { 222 | t.Errorf("Search problem. Returns %d but the correct is %d", total, 1) 223 | return 224 | } 225 | 226 | results, ok := resObj["results"].([]interface{}) 227 | 228 | if !ok || results == nil { 229 | t.Errorf("No results: %+v", resObj["results"]) 230 | return 231 | } 232 | 233 | if len(results) != 1 { 234 | t.Errorf("Results length is invalid: %d", len(results)) 235 | return 236 | } 237 | 238 | r := results[0].(map[string]interface{}) 239 | 240 | if r["name"].(string) != "Facebook Inc" { 241 | t.Errorf("Invalid result: %+v", r) 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /service/neosearch/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "os/signal" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/NeowayLabs/neosearch/lib/neosearch" 12 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 13 | "github.com/NeowayLabs/neosearch/service/neosearch/server" 14 | "github.com/jteeuwen/go-pkg-optarg" 15 | ) 16 | 17 | const ( 18 | DefaultPort = uint16(9500) 19 | DefaultHost = "0.0.0.0" 20 | ) 21 | 22 | func main() { 23 | var ( 24 | configOpt, dataDirOpt string 25 | kvstoreOpt, hostOpt string 26 | goProcsOpt uint64 27 | portOpt uint16 28 | helpOpt, debugOpt bool 29 | err error 30 | cfg *config.Config 31 | cfgServer *server.ServerConfig 32 | ) 33 | 34 | cfg = config.NewConfig() 35 | cfgServer = server.NewConfig() 36 | 37 | optarg.Header("General options") 38 | optarg.Add("c", "config", "Configurations file", "") 39 | optarg.Add("d", "data-dir", "Data directory", "") 40 | optarg.Add("k", "default-kvstore", "Default kvstore", "") 41 | optarg.Add("g", "maximum-concurrence", "Set the maximum number of concurrent go routines", 0) 42 | optarg.Add("t", "trace-debug", "Enable debug traces", false) 43 | optarg.Add("s", "server-address", "Server host and port", "0.0.0.0:9500") 44 | optarg.Add("h", "help", "Display this help", false) 45 | 46 | for opt := range optarg.Parse() { 47 | switch opt.ShortName { 48 | case "c": 49 | configOpt = opt.String() 50 | case "d": 51 | dataDirOpt = opt.String() 52 | case "k": 53 | kvstoreOpt = opt.String() 54 | case "s": 55 | address := opt.String() 56 | addrParts := strings.Split(address, ":") 57 | 58 | if len(addrParts) > 1 { 59 | hostOpt = addrParts[0] 60 | port := addrParts[1] 61 | 62 | portInt, err := strconv.Atoi(port) 63 | 64 | if err == nil { 65 | portOpt = uint16(portInt) 66 | } else { 67 | log.Fatalf("Invalid port number: %s (%s)", port, err) 68 | return 69 | } 70 | } else { 71 | hostOpt = addrParts[0] 72 | portOpt = DefaultPort 73 | } 74 | case "t": 75 | debugOpt = opt.Bool() 76 | case "g": 77 | goprocsStr := opt.String() 78 | goProcsInt, err := strconv.Atoi(goprocsStr) 79 | 80 | if err != nil || goProcsInt <= 0 { 81 | log.Fatal("Invalid -g option. Should be a unsigned integer value greater than zero.") 82 | return 83 | } 84 | 85 | goProcsOpt = uint64(goProcsInt) 86 | case "h": 87 | helpOpt = true 88 | } 89 | } 90 | 91 | if helpOpt { 92 | optarg.Usage() 93 | os.Exit(0) 94 | } 95 | 96 | if dataDirOpt == "" { 97 | dataDirOpt, _ = os.Getwd() 98 | } 99 | 100 | if configOpt == "" { 101 | log.Println("No configuration file supplied. Using defaults...") 102 | cfg.Debug = false 103 | cfg.DataDir = "/data" 104 | } else { 105 | cfg, err = config.ConfigFromFile(configOpt) 106 | if err != nil { 107 | log.Fatalf("Failed to read configuration file: %s", err.Error()) 108 | return 109 | } 110 | } 111 | 112 | if hostOpt == "" { 113 | hostOpt = DefaultHost 114 | } 115 | 116 | if portOpt == 0 { 117 | portOpt = DefaultPort 118 | } 119 | 120 | // override config options by argument options 121 | cfg.Option(config.DataDir(dataDirOpt)) 122 | cfg.Option(config.KVStore(kvstoreOpt)) 123 | cfg.Option(config.Debug(debugOpt)) 124 | 125 | cfgServer.Host = hostOpt 126 | cfgServer.Port = portOpt 127 | 128 | search := neosearch.New(cfg) 129 | defer func() { 130 | search.Close() 131 | }() 132 | 133 | httpServer, err := server.New(search, cfgServer) 134 | 135 | _ = goProcsOpt 136 | 137 | if err != nil { 138 | log.Fatal(err.Error()) 139 | return 140 | } 141 | 142 | // Wait for a SIGINT (perhaps triggered by user with CTRL-C) 143 | // Run cleanup when signal is received 144 | signalChan := make(chan os.Signal, 1) 145 | signal.Notify(signalChan, os.Interrupt) 146 | go func() { 147 | for _ = range signalChan { 148 | fmt.Println("\nReceived an interrupt, closing indexes...\n") 149 | search.Close() 150 | os.Exit(0) 151 | } 152 | }() 153 | 154 | err = httpServer.Start() 155 | 156 | if err != nil { 157 | log.Fatalf("Failed to start http server: %s", err.Error()) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /service/neosearch/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | "strconv" 7 | 8 | "github.com/NeowayLabs/neosearch/lib/neosearch" 9 | "github.com/NeowayLabs/neosearch/service/neosearch/home" 10 | "github.com/NeowayLabs/neosearch/service/neosearch/index" 11 | "github.com/julienschmidt/httprouter" 12 | ) 13 | 14 | type ServerConfig struct { 15 | Host string 16 | Port uint16 17 | } 18 | 19 | type HTTPServer struct { 20 | config *ServerConfig 21 | router *httprouter.Router 22 | search *neosearch.NeoSearch 23 | } 24 | 25 | func NewConfig() *ServerConfig { 26 | return &ServerConfig{} 27 | } 28 | 29 | func New(search *neosearch.NeoSearch, config *ServerConfig) (*HTTPServer, error) { 30 | server := HTTPServer{} 31 | server.config = config 32 | server.search = search 33 | 34 | server.router = httprouter.New() 35 | 36 | server.createRoutes() 37 | return &server, nil 38 | } 39 | 40 | func (server *HTTPServer) createRoutes() { 41 | homeHandler := home.HomeHandler{} 42 | indexHandler := index.New(server.search) 43 | createIndexHandler := index.NewCreateHandler(server.search) 44 | deleteIndexHandler := index.NewDeleteHandler(server.search) 45 | getIndexHandler := index.NewGetHandler(server.search) 46 | getAnalyzeIndexHandler := index.NewGetAnalyzeHandler(server.search) 47 | addIndexHandler := index.NewAddHandler(server.search) 48 | searchIndexHandler := index.NewSearchHandler(server.search) 49 | 50 | server.router.Handle("GET", "/", homeHandler.ServeHTTP) 51 | server.router.Handle("GET", "/:index", indexHandler.ServeHTTP) 52 | server.router.Handle("PUT", "/:index", createIndexHandler.ServeHTTP) 53 | server.router.Handle("DELETE", "/:index", deleteIndexHandler.ServeHTTP) 54 | server.router.Handle("POST", "/:index", searchIndexHandler.ServeHTTP) 55 | server.router.Handle("GET", "/:index/:id", getIndexHandler.ServeHTTP) 56 | server.router.Handle("GET", "/:index/:id/_analyze", getAnalyzeIndexHandler.ServeHTTP) 57 | server.router.Handle("POST", "/:index/:id", addIndexHandler.ServeHTTP) 58 | } 59 | 60 | func (server *HTTPServer) GetRoutes() *httprouter.Router { 61 | return server.router 62 | } 63 | 64 | func (server *HTTPServer) Start() error { 65 | hostPort := server.config.Host + ":" + strconv.Itoa(int(server.config.Port)) 66 | log.Printf("Listening on %s", hostPort) 67 | err := http.ListenAndServe(hostPort, server.router) 68 | 69 | return err 70 | } 71 | -------------------------------------------------------------------------------- /service/neosearch/server/server_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | "net/http/httptest" 10 | "testing" 11 | 12 | "github.com/NeowayLabs/neosearch/lib/neosearch" 13 | "github.com/NeowayLabs/neosearch/lib/neosearch/config" 14 | ) 15 | 16 | var dataDirTmp string 17 | 18 | func init() { 19 | var err error 20 | dataDirTmp, err = ioutil.TempDir("/tmp", "neosearch-service-server-") 21 | if err != nil { 22 | panic(err) 23 | } 24 | } 25 | 26 | func getServer(t *testing.T) (*httptest.Server, *neosearch.NeoSearch, error) { 27 | cfg := config.NewConfig() 28 | cfg.Option(config.DataDir(dataDirTmp)) 29 | search := neosearch.New(cfg) 30 | serverConfig := ServerConfig{ 31 | Host: "0.0.0.0", 32 | Port: 9500, 33 | } 34 | 35 | srv, err := New(search, &serverConfig) 36 | 37 | if err != nil { 38 | t.Error(err.Error()) 39 | return nil, nil, err 40 | } 41 | 42 | ts := httptest.NewServer(srv.GetRoutes()) 43 | 44 | return ts, search, nil 45 | } 46 | 47 | func addDocs(t *testing.T, ts *httptest.Server) { 48 | indexURL := ts.URL + "/company" 49 | 50 | req, err := http.NewRequest("PUT", indexURL, bytes.NewBufferString("")) 51 | 52 | if err != nil { 53 | t.Error(err.Error()) 54 | return 55 | } 56 | 57 | client := &http.Client{} 58 | _, err = client.Do(req) 59 | 60 | if err != nil { 61 | t.Error(err.Error()) 62 | return 63 | } 64 | 65 | req, err = http.NewRequest("POST", indexURL+"/1", bytes.NewBufferString(`{"doc": {"id": 1, "name": "neoway"}}`)) 66 | 67 | if err != nil { 68 | t.Error(err.Error()) 69 | return 70 | } 71 | 72 | client = &http.Client{} 73 | _, err = client.Do(req) 74 | 75 | if err != nil { 76 | t.Error(err.Error()) 77 | return 78 | } 79 | 80 | req, err = http.NewRequest("POST", indexURL+"/2", bytes.NewBufferString(`{"doc": {"id": 2, "name": "facebook"}}`)) 81 | 82 | if err != nil { 83 | t.Error(err.Error()) 84 | return 85 | } 86 | 87 | client = &http.Client{} 88 | _, err = client.Do(req) 89 | 90 | if err != nil { 91 | t.Error(err.Error()) 92 | return 93 | } 94 | 95 | req, err = http.NewRequest("POST", indexURL+"/3", bytes.NewBufferString(`{"doc": {"id": 3, "name": "google"}}`)) 96 | 97 | if err != nil { 98 | t.Error(err.Error()) 99 | return 100 | } 101 | 102 | client = &http.Client{} 103 | _, err = client.Do(req) 104 | 105 | if err != nil { 106 | t.Error(err.Error()) 107 | return 108 | } 109 | 110 | } 111 | 112 | func deleteIndex(t *testing.T, search *neosearch.NeoSearch, name string) { 113 | err := search.DeleteIndex(name) 114 | 115 | if err != nil { 116 | t.Error(err) 117 | } 118 | } 119 | 120 | func TestRESTCreateIndex(t *testing.T) { 121 | ts, search, _ := getServer(t) 122 | defer func() { 123 | deleteIndex(t, search, "company") 124 | ts.Close() 125 | search.Close() 126 | }() 127 | 128 | createURL := ts.URL + "/company" 129 | 130 | req, err := http.NewRequest("PUT", createURL, bytes.NewBufferString("")) 131 | 132 | if err != nil { 133 | t.Error(err) 134 | return 135 | } 136 | 137 | client := &http.Client{} 138 | 139 | res, err := client.Do(req) 140 | 141 | if err != nil { 142 | t.Error(err) 143 | } 144 | 145 | content, err := ioutil.ReadAll(res.Body) 146 | res.Body.Close() 147 | if err != nil { 148 | t.Error(err) 149 | return 150 | } 151 | 152 | resObj := map[string]interface{}{} 153 | 154 | err = json.Unmarshal(content, &resObj) 155 | 156 | if err != nil { 157 | t.Error(err) 158 | return 159 | } 160 | 161 | if resObj["error"] != nil { 162 | t.Error(resObj["error"]) 163 | return 164 | } 165 | 166 | if resObj["status"] == nil { 167 | t.Error("Failed to create index") 168 | return 169 | } 170 | 171 | status := resObj["status"] 172 | 173 | if status != "Index 'company' created." { 174 | t.Errorf("Failed to create index: %s", status) 175 | return 176 | } 177 | } 178 | 179 | func TestRESTIndexInfo(t *testing.T) { 180 | ts, search, _ := getServer(t) 181 | defer func() { 182 | ts.Close() 183 | search.Close() 184 | }() 185 | 186 | infoURL := ts.URL + "/test" 187 | 188 | res, err := http.Get(infoURL) 189 | 190 | if err != nil { 191 | log.Fatal(err) 192 | } 193 | content, err := ioutil.ReadAll(res.Body) 194 | res.Body.Close() 195 | if err != nil { 196 | t.Error(err) 197 | } 198 | 199 | resObj := map[string]interface{}{} 200 | 201 | err = json.Unmarshal(content, &resObj) 202 | 203 | if err != nil { 204 | t.Errorf("Failed to unmarshal json response: %s", err.Error()) 205 | } 206 | 207 | if resObj["error"] == nil { 208 | t.Error("Failed: test index should not exists") 209 | } 210 | 211 | errMsg := resObj["error"] 212 | 213 | if errMsg.(string) != "Index 'test' not found in directory '"+dataDirTmp+"'." { 214 | t.Error("Wrong error message") 215 | } 216 | } 217 | 218 | func TestRESTGetDocuments(t *testing.T) { 219 | ts, search, _ := getServer(t) 220 | defer ts.Close() 221 | 222 | addDocs(t, ts) 223 | 224 | indexURL := ts.URL + "/company" 225 | 226 | res, err := http.Get(indexURL + "/1") 227 | 228 | if err != nil { 229 | t.Error(err) 230 | return 231 | } 232 | 233 | content, err := ioutil.ReadAll(res.Body) 234 | res.Body.Close() 235 | if err != nil { 236 | t.Error(err) 237 | return 238 | } 239 | 240 | resObj := map[string]interface{}{} 241 | 242 | err = json.Unmarshal(content, &resObj) 243 | 244 | if err != nil { 245 | t.Error(err) 246 | return 247 | } 248 | 249 | if resObj["error"] != nil { 250 | t.Error(resObj["error"]) 251 | return 252 | } 253 | 254 | if resObj["name"] != "neoway" { 255 | t.Error("Invalid document") 256 | } 257 | 258 | deleteIndex(t, search, "company") 259 | } 260 | --------------------------------------------------------------------------------