├── .github └── workflows │ └── tests.yml ├── .gitignore ├── README.md ├── batcher ├── README.md ├── batcher.go └── batcher_test.go ├── cznicb ├── README.md ├── iterator.go ├── reader.go ├── store.go ├── store_test.go └── writer.go ├── go.mod ├── go.sum ├── icu ├── boundary.go └── boundary_test.go ├── lang ├── da │ └── stemmer_da.go ├── de │ └── stemmer_de.go ├── en │ ├── README.md │ ├── stemmer_en.go │ └── stemmer_en_test.go ├── es │ └── stemmer_es.go ├── fi │ └── stemmer_fi.go ├── fr │ ├── README.md │ └── stemmer_fr.go ├── hu │ └── stemmer_hu.go ├── it │ ├── README.md │ └── stemmer_it.go ├── ja │ ├── analyzer_ja.go │ ├── analyzer_ja_test.go │ ├── ja_morph_kagome.go │ └── ja_morph_kagome_test.go ├── nl │ └── stemmer_nl.go ├── no │ └── stemmer_no.go ├── pt │ ├── README.md │ └── stemmer_pt.go ├── ro │ └── stemmer_ro.go ├── ru │ └── stemmer_ru.go ├── sv │ └── stemmer_sv.go ├── th │ ├── analyzer_th.go │ ├── analyzer_th_test.go │ ├── stop_filter_th.go │ ├── stop_words_th.go │ └── unicode_tokenizer_th.go └── tr │ └── stemmer_tr.go ├── leveldb ├── batch.go ├── config.go ├── iterator.go ├── reader.go ├── store.go ├── store_test.go ├── util.go └── writer.go ├── preload ├── README.md ├── cmd │ └── bleve_export │ │ └── main.go ├── export.go ├── import.go ├── kvutil.pb.go ├── kvutil.proto ├── preload_test.go ├── reader.go ├── store.go └── writer.go ├── rocksdb ├── batch.go ├── batchex.go ├── config.go ├── iterator.go ├── reader.go ├── store.go ├── store_test.go └── writer.go └── stemmer ├── README.md ├── stemmer_filter.go └── stemmer_filter_test.go /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | name: Tests 7 | jobs: 8 | test: 9 | strategy: 10 | matrix: 11 | go-version: [1.18.x, 1.19.x, 1.20.x] 12 | platform: [ubuntu-20.04] 13 | runs-on: ${{ matrix.platform }} 14 | steps: 15 | - name: Install Dependencies 16 | run: | 17 | sudo apt-get install libicu-dev libleveldb-dev librocksdb-dev 18 | - name: Install Go 19 | uses: actions/setup-go@v1 20 | with: 21 | go-version: ${{ matrix.go-version }} 22 | - name: Checkout code 23 | uses: actions/checkout@v2 24 | - name: Test 25 | run: | 26 | go version 27 | CGO_LDFLAGS="-lrocksdb" go test -race ./... 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #* 2 | *.sublime-* 3 | *~ 4 | .#* 5 | .project 6 | .settings 7 | **/.idea/ 8 | **/*.iml 9 | .DS_Store 10 | /preload/cmd/bleve_export/bleve_export 11 | *.test 12 | tags 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bleve Extensions 2 | 3 | [![Tests](https://github.com/blevesearch/blevex/workflows/Tests/badge.svg?branch=master&event=push)](https://github.com/blevesearch/blevex/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster) 4 | 5 | This repo contains extentions to Bleve that cannot be included in the core for one of the following reasons: 6 | 7 | - C dependency 8 | - Does not fully satisfy interface contracts 9 | - Experimental in nature 10 | -------------------------------------------------------------------------------- /batcher/README.md: -------------------------------------------------------------------------------- 1 | # Index Batcher 2 | 3 | A wrapper to automatically batch modifications 4 | 5 | IndexBatcher aggregates modifications (Index(), Delete(), SetInternal(), DeleteInternal()) that occur within close proximity into a batch execution for increased throughput at the cost of an amortized period / 2 latency increase. It is a fairly transparent wrapper around the Index interface that is itself an Index. The period is adjustable. When experimenting with Cassandra as a backend, I ran into throughput bottlenecks as a result of the number of keys being inserted at the KVStore layer. Guiding individual insertions into batches via this layer nearly doubled throughput. 6 | 7 | The IndexBatcher is a contribution from [Rob McColl](https://github.com/robmccoll). 8 | -------------------------------------------------------------------------------- /batcher/batcher.go: -------------------------------------------------------------------------------- 1 | package bleve 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/blevesearch/bleve/v2" 9 | ) 10 | 11 | // NOTE As this approach uses carefully orchestrated interactions 12 | // between a mutex, channels, and reference swapping, be sure to carefully 13 | // consider the impact of reordering statements or making changes to the 14 | // patterns present in the code. Specifically, individual operations follow the 15 | // pattern of: 16 | // - Construct empty references for result error, signal channel 17 | // - Acquire lock 18 | // - Assign references to current result error and signal channel 19 | // - Add operation to batch 20 | // - Release lock 21 | // - Wait on signal channel to close 22 | // - Return result error 23 | // While the batch loop follows the pattern on timer event: 24 | // - Acquire lock 25 | // - Execute batch 26 | // - Store result in current result error 27 | // - Make new current result error for use by operations in the next batch 28 | // - Close signal channel so that operations waiting on the completion of this 29 | // batch will return the result pointed to by the previous result error that 30 | // they are still holding. 31 | // - Make new current signal channel for use by operations in the next batch 32 | // - Release lock 33 | // This design minimizes the need for allocating channels, uses only one go 34 | // routine for the batching, and doesn't require tracking the number of 35 | // operations waiting on a response or looping to notify each waiting operation. 36 | 37 | // bleveIndex is an alias for Index used by the IndexBatcher to avoid a conflict 38 | // between the embedded Index field and the overridden Index method 39 | type bleveIndex bleve.Index 40 | 41 | // IndexBatcher can be wrapped around a Index to aggregate operations 42 | // in a concurrent / parallel context for increased throughput. 43 | type IndexBatcher struct { 44 | bleveIndex 45 | 46 | period time.Duration 47 | closer chan bool 48 | 49 | // lock is used to protect applying / resetting the batch, updating / 50 | // replacing the result, and signalling / replacing the signal channel from 51 | // the batch loop. Elsewhere it is used for getting a reference to the 52 | // current result, getting a reference to the current signal channel, 53 | // and adding operations to the batch. 54 | lock sync.Mutex 55 | batch *bleve.Batch 56 | result *error 57 | signal chan bool 58 | } 59 | 60 | // NewIndexBatcher returns an index that will aggregate and fire modifying 61 | // requests as a batch every period time. All other Index methods are 62 | // passed straight through to the underlying index. Period time should be 63 | // tuned to the underlying KVStore, the concurrent load, latency requirements, 64 | // and the documents / document mappings being used. Single digit (7~8) 65 | // milliseconds is a reasonable place to start. 66 | func NewIndexBatcher(index bleve.Index, period time.Duration) bleve.Index { 67 | ib := &IndexBatcher{ 68 | bleveIndex: index, 69 | 70 | batch: index.NewBatch(), 71 | period: period, 72 | closer: make(chan bool), 73 | signal: make(chan bool), 74 | result: new(error), 75 | } 76 | 77 | go ib.batchloop() 78 | return ib 79 | } 80 | 81 | // batchloop processes batches every period and implements a clean close 82 | // operation 83 | func (ib *IndexBatcher) batchloop() { 84 | t := time.NewTicker(ib.period) 85 | 86 | BatchLoop: 87 | for { 88 | select { 89 | case <-t.C: 90 | ib.lock.Lock() 91 | func() { 92 | defer func() { 93 | if r := recover(); r != nil { 94 | (*ib.result) = fmt.Errorf("IndexBatcher caught a panic: %v", r) 95 | } 96 | }() 97 | (*ib.result) = ib.Batch(ib.batch) 98 | }() 99 | ib.batch.Reset() 100 | ib.result = new(error) 101 | close(ib.signal) 102 | ib.signal = make(chan bool) 103 | ib.lock.Unlock() 104 | case <-ib.closer: 105 | break BatchLoop 106 | } 107 | } 108 | 109 | t.Stop() 110 | (*ib.result) = fmt.Errorf("IndexBatcher has been closed") 111 | close(ib.signal) 112 | } 113 | 114 | // Close stops the batcher returning an error to currently waiting operations 115 | // and closes the underlying Index 116 | func (ib *IndexBatcher) Close() error { 117 | ib.closer <- true 118 | return ib.bleveIndex.Close() 119 | } 120 | 121 | // Index the object with the specified identifier. May hold the operation for up 122 | // to ib.period time before executing in a batch. 123 | func (ib *IndexBatcher) Index(id string, data interface{}) error { 124 | var result *error 125 | var signal chan bool 126 | ib.lock.Lock() 127 | result = ib.result 128 | signal = ib.signal 129 | err := ib.batch.Index(id, data) 130 | ib.lock.Unlock() 131 | 132 | if err != nil { 133 | return err 134 | } 135 | 136 | <-signal 137 | return *result 138 | } 139 | 140 | // Delete entries for the specified identifier from the index. May hold the 141 | // operation for up to ib.period time before executing in a batch. 142 | func (ib *IndexBatcher) Delete(id string) error { 143 | var result *error 144 | var signal chan bool 145 | ib.lock.Lock() 146 | result = ib.result 147 | signal = ib.signal 148 | ib.batch.Delete(id) 149 | ib.lock.Unlock() 150 | 151 | <-signal 152 | return *result 153 | } 154 | 155 | // SetInternal mappings directly in the kvstore. May hold the 156 | // operation for up to ib.period time before executing in a batch. 157 | func (ib *IndexBatcher) SetInternal(key, val []byte) error { 158 | var result *error 159 | var signal chan bool 160 | ib.lock.Lock() 161 | result = ib.result 162 | signal = ib.signal 163 | ib.batch.SetInternal(key, val) 164 | ib.lock.Unlock() 165 | 166 | <-signal 167 | return *result 168 | } 169 | 170 | // DeleteInternal mappings directly from the kvstore. May hold the 171 | // operation for up to ib.period time before executing in a batch. 172 | func (ib *IndexBatcher) DeleteInternal(key []byte) error { 173 | var result *error 174 | var signal chan bool 175 | ib.lock.Lock() 176 | result = ib.result 177 | signal = ib.signal 178 | ib.batch.DeleteInternal(key) 179 | ib.lock.Unlock() 180 | 181 | <-signal 182 | return *result 183 | } 184 | -------------------------------------------------------------------------------- /batcher/batcher_test.go: -------------------------------------------------------------------------------- 1 | package bleve 2 | 3 | import ( 4 | "os" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | "github.com/blevesearch/bleve/v2" 10 | ) 11 | 12 | func TestIndexBatcherConcurrentCrud(t *testing.T) { 13 | defer func() { 14 | err := os.RemoveAll("testidx") 15 | if err != nil { 16 | t.Fatal(err) 17 | } 18 | }() 19 | 20 | index, err := bleve.New("testidx", bleve.NewIndexMapping()) 21 | if err != nil { 22 | t.Fatal(err) 23 | } 24 | 25 | index = NewIndexBatcher(index, 2*time.Millisecond) 26 | 27 | { 28 | var wg sync.WaitGroup 29 | 30 | wg.Add(1) 31 | go func() { 32 | doca := map[string]interface{}{ 33 | "name": "marty", 34 | "desc": "gophercon india", 35 | } 36 | err2 := index.Index("a", doca) 37 | if err2 != nil { 38 | t.Error(err2) 39 | } 40 | wg.Done() 41 | }() 42 | 43 | wg.Add(1) 44 | go func() { 45 | docy := map[string]interface{}{ 46 | "name": "jasper", 47 | "desc": "clojure", 48 | } 49 | err3 := index.Index("y", docy) 50 | if err3 != nil { 51 | t.Error(err3) 52 | } 53 | wg.Done() 54 | }() 55 | 56 | wg.Add(1) 57 | go func() { 58 | docy := map[string]interface{}{ 59 | "name": "jasper2", 60 | "desc": "clojure2", 61 | } 62 | err4 := index.Index("y2", docy) 63 | if err4 != nil { 64 | t.Error(err4) 65 | } 66 | wg.Done() 67 | }() 68 | 69 | wg.Add(1) 70 | go func() { 71 | err5 := index.SetInternal([]byte("status2"), []byte("pending")) 72 | if err5 != nil { 73 | t.Error(err5) 74 | } 75 | wg.Done() 76 | }() 77 | 78 | wg.Add(1) 79 | go func() { 80 | docx := map[string]interface{}{ 81 | "name": "rose", 82 | "desc": "googler", 83 | } 84 | err6 := index.Index("x", docx) 85 | if err6 != nil { 86 | t.Error(err6) 87 | } 88 | wg.Done() 89 | }() 90 | 91 | wg.Add(1) 92 | go func() { 93 | err7 := index.SetInternal([]byte("status"), []byte("pending")) 94 | if err7 != nil { 95 | t.Error(err7) 96 | } 97 | wg.Done() 98 | }() 99 | 100 | wg.Wait() 101 | } 102 | 103 | val, err := index.GetInternal([]byte("status2")) 104 | if err != nil { 105 | t.Error(err) 106 | } 107 | if string(val) != "pending" { 108 | t.Errorf("expected pending, got '%s'", val) 109 | } 110 | 111 | { 112 | var wg sync.WaitGroup 113 | 114 | wg.Add(1) 115 | go func() { 116 | err8 := index.Delete("y") 117 | if err8 != nil { 118 | t.Error(err8) 119 | } 120 | wg.Done() 121 | }() 122 | 123 | wg.Add(1) 124 | go func() { 125 | err9 := index.Delete("y2") 126 | if err9 != nil { 127 | t.Error(err9) 128 | } 129 | wg.Done() 130 | }() 131 | 132 | wg.Add(1) 133 | go func() { 134 | err10 := index.DeleteInternal([]byte("status2")) 135 | if err10 != nil { 136 | t.Error(err10) 137 | } 138 | wg.Done() 139 | }() 140 | 141 | wg.Add(1) 142 | go func() { 143 | err11 := index.SetInternal([]byte("status"), []byte("ready")) 144 | if err11 != nil { 145 | t.Error(err11) 146 | } 147 | wg.Done() 148 | }() 149 | 150 | wg.Wait() 151 | } 152 | 153 | val, err = index.GetInternal([]byte("status2")) 154 | if err != nil { 155 | t.Error(err) 156 | } 157 | if val != nil { 158 | t.Errorf("expected nil, got '%s'", val) 159 | } 160 | 161 | docb := map[string]interface{}{ 162 | "name": "steve", 163 | "desc": "cbft master", 164 | } 165 | batch := index.NewBatch() 166 | err = batch.Index("b", docb) 167 | if err != nil { 168 | t.Error(err) 169 | } 170 | batch.Delete("x") 171 | batch.SetInternal([]byte("batchi"), []byte("batchv")) 172 | batch.DeleteInternal([]byte("status")) 173 | err = index.Batch(batch) 174 | if err != nil { 175 | t.Error(err) 176 | } 177 | val, err = index.GetInternal([]byte("batchi")) 178 | if err != nil { 179 | t.Error(err) 180 | } 181 | if string(val) != "batchv" { 182 | t.Errorf("expected 'batchv', got '%s'", val) 183 | } 184 | val, err = index.GetInternal([]byte("status")) 185 | if err != nil { 186 | t.Error(err) 187 | } 188 | if val != nil { 189 | t.Errorf("expected nil, got '%s'", val) 190 | } 191 | 192 | { 193 | var wg sync.WaitGroup 194 | 195 | wg.Add(1) 196 | go func() { 197 | err12 := index.SetInternal([]byte("seqno"), []byte("7")) 198 | if err12 != nil { 199 | t.Error(err12) 200 | } 201 | wg.Done() 202 | }() 203 | 204 | wg.Add(1) 205 | go func() { 206 | err13 := index.DeleteInternal([]byte("status")) 207 | if err13 != nil { 208 | t.Error(err13) 209 | } 210 | wg.Done() 211 | }() 212 | 213 | wg.Wait() 214 | } 215 | val, err = index.GetInternal([]byte("status")) 216 | if err != nil { 217 | t.Error(err) 218 | } 219 | if val != nil { 220 | t.Errorf("expected nil, got '%s'", val) 221 | } 222 | 223 | val, err = index.GetInternal([]byte("seqno")) 224 | if err != nil { 225 | t.Error(err) 226 | } 227 | if string(val) != "7" { 228 | t.Errorf("expected '7', got '%s'", val) 229 | } 230 | 231 | count, err := index.DocCount() 232 | if err != nil { 233 | t.Fatal(err) 234 | } 235 | if count != 2 { 236 | t.Errorf("expected doc count 2, got %d", count) 237 | } 238 | 239 | doc, err := index.Document("a") 240 | if err != nil { 241 | t.Fatal(err) 242 | } 243 | if doc == nil { 244 | t.Errorf("expected doc not nil, got nil") 245 | } 246 | 247 | doc, err = index.Document("y2") 248 | if err != nil { 249 | t.Fatal(err) 250 | } 251 | if doc != nil { 252 | t.Errorf("expected doc nil, got not nil") 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /cznicb/README.md: -------------------------------------------------------------------------------- 1 | # cznicb Bleve KV store 2 | 3 | Although the cznicb KV store is pure-Go, it is in the extensions package since it doesn't not fully satisfy the Bleve contract, which requires reader isolation. 4 | 5 | If, for example, you always load the entire dataset, and then **ONLY** query it, you can safely use this store. -------------------------------------------------------------------------------- /cznicb/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the 4 | // License. You may obtain a copy of the License at 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // Unless required by applicable law or agreed to in writing, 7 | // software distributed under the License is distributed on an "AS 8 | // IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | // express or implied. See the License for the specific language 10 | // governing permissions and limitations under the License. 11 | 12 | package cznicb 13 | 14 | import ( 15 | "bytes" 16 | "errors" 17 | 18 | "github.com/cznic/b" 19 | ) 20 | 21 | var iteratorDoneErr = errors.New("iteratorDoneErr") // A sentinel value. 22 | 23 | type Iterator struct { 24 | s *Store 25 | e *b.Enumerator 26 | 27 | currK interface{} 28 | currV interface{} 29 | currErr error 30 | 31 | prefix []byte 32 | start []byte 33 | end []byte 34 | } 35 | 36 | func (i *Iterator) Seek(k []byte) { 37 | if i.start != nil && bytes.Compare(k, i.start) < 0 { 38 | k = i.start 39 | } 40 | if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) { 41 | if bytes.Compare(k, i.prefix) < 0 { 42 | k = i.prefix 43 | } else { 44 | var end []byte 45 | for x := len(i.prefix) - 1; x >= 0; x-- { 46 | c := i.prefix[x] 47 | if c < 0xff { 48 | end = make([]byte, x+1) 49 | copy(end, i.prefix) 50 | end[x] = c + 1 51 | break 52 | } 53 | } 54 | k = end 55 | } 56 | } 57 | 58 | i.currK = nil 59 | i.currV = nil 60 | i.currErr = nil 61 | 62 | i.s.m.RLock() 63 | i.e, _ = i.s.t.Seek(k) 64 | i.s.m.RUnlock() // cannot defer, must unlock before Next 65 | 66 | i.Next() 67 | } 68 | 69 | func (i *Iterator) Next() { 70 | if i.currErr != nil { 71 | i.currK = nil 72 | i.currV = nil 73 | i.currErr = iteratorDoneErr 74 | return 75 | } 76 | 77 | i.s.m.RLock() 78 | defer i.s.m.RUnlock() 79 | i.currK, i.currV, i.currErr = i.e.Next() 80 | } 81 | 82 | func (i *Iterator) Current() ([]byte, []byte, bool) { 83 | if i.currErr == iteratorDoneErr || 84 | i.currK == nil || 85 | i.currV == nil { 86 | return nil, nil, false 87 | } 88 | if i.prefix != nil && !bytes.HasPrefix(i.currK.([]byte), i.prefix) { 89 | return nil, nil, false 90 | } else if i.end != nil && bytes.Compare(i.currK.([]byte), i.end) >= 0 { 91 | return nil, nil, false 92 | } 93 | 94 | return i.currK.([]byte), i.currV.([]byte), true 95 | } 96 | 97 | func (i *Iterator) Key() []byte { 98 | k, _, ok := i.Current() 99 | if !ok { 100 | return nil 101 | } 102 | return k 103 | } 104 | 105 | func (i *Iterator) Value() []byte { 106 | _, v, ok := i.Current() 107 | if !ok { 108 | return nil 109 | } 110 | return v 111 | } 112 | 113 | func (i *Iterator) Valid() bool { 114 | _, _, ok := i.Current() 115 | return ok 116 | } 117 | 118 | func (i *Iterator) Close() error { 119 | if i.e != nil { 120 | i.e.Close() 121 | } 122 | i.e = nil 123 | return nil 124 | } 125 | -------------------------------------------------------------------------------- /cznicb/reader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the 4 | // License. You may obtain a copy of the License at 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // Unless required by applicable law or agreed to in writing, 7 | // software distributed under the License is distributed on an "AS 8 | // IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | // express or implied. See the License for the specific language 10 | // governing permissions and limitations under the License. 11 | 12 | package cznicb 13 | 14 | import ( 15 | store "github.com/blevesearch/upsidedown_store_api" 16 | ) 17 | 18 | type Reader struct { 19 | s *Store 20 | } 21 | 22 | func (r *Reader) Get(key []byte) ([]byte, error) { 23 | r.s.m.RLock() 24 | defer r.s.m.RUnlock() 25 | v, ok := r.s.t.Get(key) 26 | if !ok || v == nil { 27 | return nil, nil 28 | } 29 | rv := make([]byte, len(v.([]byte))) 30 | copy(rv, v.([]byte)) 31 | return rv, nil 32 | } 33 | 34 | func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { 35 | return store.MultiGet(r, keys) 36 | } 37 | 38 | func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { 39 | e, _ := r.s.t.SeekFirst() 40 | rv := Iterator{ 41 | s: r.s, 42 | e: e, 43 | prefix: prefix, 44 | } 45 | rv.Seek(prefix) 46 | return &rv 47 | } 48 | 49 | func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { 50 | e, _ := r.s.t.SeekFirst() 51 | rv := Iterator{ 52 | s: r.s, 53 | e: e, 54 | start: start, 55 | end: end, 56 | } 57 | rv.Seek(start) 58 | return &rv 59 | } 60 | 61 | func (r *Reader) Close() error { 62 | return nil 63 | } 64 | -------------------------------------------------------------------------------- /cznicb/store.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the 4 | // License. You may obtain a copy of the License at 5 | // http://www.apache.org/licenses/LICENSE-2.0 6 | // Unless required by applicable law or agreed to in writing, 7 | // software distributed under the License is distributed on an "AS 8 | // IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | // express or implied. See the License for the specific language 10 | // governing permissions and limitations under the License. 11 | 12 | // Package cznicb provides an in-memory implementation of the KVStore 13 | // interfaces using the cznic/b in-memory btree. Of note: this 14 | // implementation does not have reader isolation. 15 | package cznicb 16 | 17 | import ( 18 | "bytes" 19 | "fmt" 20 | "os" 21 | "sync" 22 | 23 | store "github.com/blevesearch/upsidedown_store_api" 24 | "github.com/blevesearch/bleve/v2/registry" 25 | 26 | "github.com/cznic/b" 27 | ) 28 | 29 | const Name = "cznicb" 30 | 31 | type Store struct { 32 | m sync.RWMutex 33 | t *b.Tree 34 | mo store.MergeOperator 35 | } 36 | 37 | func itemCompare(a, b interface{}) int { 38 | return bytes.Compare(a.([]byte), b.([]byte)) 39 | } 40 | 41 | func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { 42 | path, ok := config["path"].(string) 43 | if !ok { 44 | return nil, fmt.Errorf("must specify path") 45 | } 46 | if path != "" { 47 | return nil, os.ErrInvalid 48 | } 49 | s := &Store{ 50 | t: b.TreeNew(itemCompare), 51 | mo: mo, 52 | } 53 | return s, nil 54 | } 55 | 56 | func (s *Store) Close() error { 57 | return nil 58 | } 59 | 60 | func (s *Store) Reader() (store.KVReader, error) { 61 | return &Reader{s: s}, nil 62 | } 63 | 64 | func (s *Store) Writer() (store.KVWriter, error) { 65 | return &Writer{s: s}, nil 66 | } 67 | 68 | func init() { 69 | registry.RegisterKVStore(Name, New) 70 | } 71 | -------------------------------------------------------------------------------- /cznicb/store_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the 5 | // License. You may obtain a copy of the License at 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // Unless required by applicable law or agreed to in writing, 8 | // software distributed under the License is distributed on an "AS 9 | // IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 10 | // express or implied. See the License for the specific language 11 | // governing permissions and limitations under the License. 12 | 13 | package cznicb 14 | 15 | import ( 16 | "testing" 17 | 18 | store "github.com/blevesearch/upsidedown_store_api" 19 | "github.com/blevesearch/upsidedown_store_api/test" 20 | ) 21 | 22 | func open(t *testing.T, mo store.MergeOperator) store.KVStore { 23 | rv, err := New(mo, map[string]interface{}{ 24 | "path": "", 25 | }) 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | return rv 30 | } 31 | 32 | func cleanup(t *testing.T, s store.KVStore) { 33 | err := s.Close() 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | } 38 | 39 | func TestCznicbKVCrud(t *testing.T) { 40 | s := open(t, nil) 41 | defer cleanup(t, s) 42 | test.CommonTestKVCrud(t, s) 43 | } 44 | 45 | // Cznicb does NOT provide reader isolation, use accordingly 46 | 47 | // func TestCznicbReaderIsolation(t *testing.T) { 48 | // s, err := open(t, nil) 49 | // defer cleanup(t, s) 50 | // test.CommonTestReaderIsolation(t, s) 51 | // } 52 | 53 | func TestCznicbReaderOwnsGetBytes(t *testing.T) { 54 | s := open(t, nil) 55 | defer cleanup(t, s) 56 | test.CommonTestReaderOwnsGetBytes(t, s) 57 | } 58 | 59 | func TestCznicbWriterOwnsBytes(t *testing.T) { 60 | s := open(t, nil) 61 | defer cleanup(t, s) 62 | test.CommonTestWriterOwnsBytes(t, s) 63 | } 64 | 65 | func TestCznicbPrefixIterator(t *testing.T) { 66 | s := open(t, nil) 67 | defer cleanup(t, s) 68 | test.CommonTestPrefixIterator(t, s) 69 | } 70 | 71 | func TestCznicbPrefixIteratorSeek(t *testing.T) { 72 | s := open(t, nil) 73 | defer cleanup(t, s) 74 | test.CommonTestPrefixIteratorSeek(t, s) 75 | } 76 | 77 | func TestCznicbRangeIterator(t *testing.T) { 78 | s := open(t, nil) 79 | defer cleanup(t, s) 80 | test.CommonTestRangeIterator(t, s) 81 | } 82 | 83 | func TestCznicbRangeIteratorSeek(t *testing.T) { 84 | s := open(t, nil) 85 | defer cleanup(t, s) 86 | test.CommonTestRangeIteratorSeek(t, s) 87 | } 88 | 89 | func TestCznicbMerge(t *testing.T) { 90 | s := open(t, &test.TestMergeCounter{}) 91 | defer cleanup(t, s) 92 | test.CommonTestMerge(t, s) 93 | } 94 | -------------------------------------------------------------------------------- /cznicb/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package cznicb 11 | 12 | import ( 13 | "fmt" 14 | 15 | store "github.com/blevesearch/upsidedown_store_api" 16 | ) 17 | 18 | type Writer struct { 19 | s *Store 20 | } 21 | 22 | func (w *Writer) NewBatch() store.KVBatch { 23 | return store.NewEmulatedBatch(w.s.mo) 24 | } 25 | 26 | func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { 27 | return make([]byte, options.TotalBytes), w.NewBatch(), nil 28 | } 29 | 30 | func (w *Writer) ExecuteBatch(batch store.KVBatch) error { 31 | 32 | emulatedBatch, ok := batch.(*store.EmulatedBatch) 33 | if !ok { 34 | return fmt.Errorf("wrong type of batch") 35 | } 36 | 37 | w.s.m.Lock() 38 | defer w.s.m.Unlock() 39 | 40 | t := w.s.t 41 | for key, mergeOps := range emulatedBatch.Merger.Merges { 42 | k := []byte(key) 43 | t.Put(k, func(oldV interface{}, exists bool) (newV interface{}, write bool) { 44 | ob := []byte(nil) 45 | if exists && oldV != nil { 46 | ob = oldV.([]byte) 47 | } 48 | mergedVal, fullMergeOk := w.s.mo.FullMerge(k, ob, mergeOps) 49 | if !fullMergeOk { 50 | return nil, false 51 | } 52 | return mergedVal, true 53 | }) 54 | } 55 | 56 | for _, op := range emulatedBatch.Ops { 57 | if op.V != nil { 58 | t.Set(op.K, op.V) 59 | } else { 60 | t.Delete(op.K) 61 | } 62 | } 63 | 64 | return nil 65 | } 66 | 67 | func (w *Writer) Close() error { 68 | w.s.m.Lock() 69 | defer w.s.m.Unlock() 70 | w.s = nil 71 | return nil 72 | } 73 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/blevesearch/blevex/v2 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/blevesearch/bleve/v2 v2.3.10 7 | github.com/blevesearch/bleve_index_api v1.0.6 8 | github.com/blevesearch/upsidedown_store_api v1.0.2 9 | github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d 10 | github.com/golang/protobuf v1.3.2 11 | github.com/ikawaha/kagome.ipadic v1.1.2 12 | github.com/jmhodges/levigo v1.0.0 13 | github.com/tebeka/snowball v0.4.2 14 | github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c 15 | ) 16 | 17 | require ( 18 | github.com/RoaringBitmap/roaring v1.2.3 // indirect 19 | github.com/bits-and-blooms/bitset v1.2.0 // indirect 20 | github.com/blevesearch/geo v0.1.18 // indirect 21 | github.com/blevesearch/go-porterstemmer v1.0.3 // indirect 22 | github.com/blevesearch/goleveldb v1.0.1 // indirect 23 | github.com/blevesearch/gtreap v0.1.1 // indirect 24 | github.com/blevesearch/mmap-go v1.0.4 // indirect 25 | github.com/blevesearch/scorch_segment_api/v2 v2.1.6 // indirect 26 | github.com/blevesearch/segment v0.9.1 // indirect 27 | github.com/blevesearch/snowballstem v0.9.0 // indirect 28 | github.com/blevesearch/stempel v0.2.0 // indirect 29 | github.com/blevesearch/vellum v1.0.10 // indirect 30 | github.com/blevesearch/zapx/v11 v11.3.10 // indirect 31 | github.com/blevesearch/zapx/v12 v12.3.10 // indirect 32 | github.com/blevesearch/zapx/v13 v13.3.10 // indirect 33 | github.com/blevesearch/zapx/v14 v14.3.10 // indirect 34 | github.com/blevesearch/zapx/v15 v15.3.13 // indirect 35 | github.com/couchbase/ghistogram v0.1.0 // indirect 36 | github.com/couchbase/moss v0.2.0 // indirect 37 | github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect 38 | github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect 39 | github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect 40 | github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect 41 | github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect 42 | github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect 43 | github.com/golang/snappy v0.0.1 // indirect 44 | github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect 45 | github.com/mschoch/smat v0.2.0 // indirect 46 | github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect 47 | go.etcd.io/bbolt v1.3.7 // indirect 48 | golang.org/x/sys v0.5.0 // indirect 49 | golang.org/x/text v0.8.0 // indirect 50 | ) 51 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY= 2 | github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE= 3 | github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= 4 | github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= 5 | github.com/blevesearch/bleve/v2 v2.3.10 h1:z8V0wwGoL4rp7nG/O3qVVLYxUqCbEwskMt4iRJsPLgg= 6 | github.com/blevesearch/bleve/v2 v2.3.10/go.mod h1:RJzeoeHC+vNHsoLR54+crS1HmOWpnH87fL70HAUCzIA= 7 | github.com/blevesearch/bleve_index_api v1.0.6 h1:gyUUxdsrvmW3jVhhYdCVL6h9dCjNT/geNU7PxGn37p8= 8 | github.com/blevesearch/bleve_index_api v1.0.6/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= 9 | github.com/blevesearch/geo v0.1.18 h1:Np8jycHTZ5scFe7VEPLrDoHnnb9C4j636ue/CGrhtDw= 10 | github.com/blevesearch/geo v0.1.18/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM= 11 | github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= 12 | github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= 13 | github.com/blevesearch/goleveldb v1.0.1 h1:iAtV2Cu5s0GD1lwUiekkFHe2gTMCCNVj2foPclDLIFI= 14 | github.com/blevesearch/goleveldb v1.0.1/go.mod h1:WrU8ltZbIp0wAoig/MHbrPCXSOLpe79nz5lv5nqfYrQ= 15 | github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= 16 | github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= 17 | github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= 18 | github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= 19 | github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= 20 | github.com/blevesearch/scorch_segment_api/v2 v2.1.6 h1:CdekX/Ob6YCYmeHzD72cKpwzBjvkOGegHOqhAkXp6yA= 21 | github.com/blevesearch/scorch_segment_api/v2 v2.1.6/go.mod h1:nQQYlp51XvoSVxcciBjtvuHPIVjlWrN1hX4qwK2cqdc= 22 | github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= 23 | github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= 24 | github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= 25 | github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= 26 | github.com/blevesearch/stempel v0.2.0 h1:CYzVPaScODMvgE9o+kf6D4RJ/VRomyi9uHF+PtB+Afc= 27 | github.com/blevesearch/stempel v0.2.0/go.mod h1:wjeTHqQv+nQdbPuJ/YcvOjTInA2EIc6Ks1FoSUzSLvc= 28 | github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= 29 | github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= 30 | github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI= 31 | github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k= 32 | github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk= 33 | github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ= 34 | github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s= 35 | github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs= 36 | github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8= 37 | github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk= 38 | github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU= 39 | github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns= 40 | github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ= 41 | github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg= 42 | github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= 43 | github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= 44 | github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= 45 | github.com/couchbase/moss v0.2.0/go.mod h1:9MaHIaRuy9pvLPUJxB8sh8OrLfyDczECVL37grCIubs= 46 | github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8= 47 | github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d/go.mod h1:URriBxXwVq5ijiJ12C7iIZqlA69nTlI+LgI6/pwftG8= 48 | github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso= 49 | github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= 50 | github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 h1:MZRmHqDBd0vxNwenEbKSQqRVT24d3C05ft8kduSwlqM= 51 | github.com/cznic/strutil v0.0.0-20181122101858-275e90344537/go.mod h1:AHHPPPXTw0h6pVabbcbyGRK1DckRn7r/STdZEeIDzZc= 52 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 53 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 54 | github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c h1:8ISkoahWXwZR41ois5lSJBSVw4D0OV19Ht/JSTzvSv0= 55 | github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= 56 | github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojtoVVWjGfOF9635RETekkoH6Cc9SX0A= 57 | github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg= 58 | github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 h1:7HZCaLC5+BZpmbhCOZJ293Lz68O7PYrF2EzeiFMwCLk= 59 | github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0= 60 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 61 | github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo= 62 | github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= 63 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 64 | github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= 65 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 66 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 67 | github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= 68 | github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 69 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 70 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= 71 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 72 | github.com/ikawaha/kagome.ipadic v1.1.2 h1:pFxZ1PpMpc6ZoBK712YN5cVK0u/ju2DZ+gRIOriJFFs= 73 | github.com/ikawaha/kagome.ipadic v1.1.2/go.mod h1:DPSBbU0czaJhAb/5uKQZHMc9MTVRpDugJfX+HddPHHg= 74 | github.com/jmhodges/levigo v1.0.0 h1:q5EC36kV79HWeTBWsod3mG11EgStG3qArTKcvlksN1U= 75 | github.com/jmhodges/levigo v1.0.0/go.mod h1:Q6Qx+uH3RAqyK4rFQroq9RL7mdkABMcfhEI+nNuzMJQ= 76 | github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede h1:YrgBGwxMRK0Vq0WSCWFaZUnTsrA/PZE/xs1QZh+/edg= 77 | github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 78 | github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= 79 | github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= 80 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 81 | github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= 82 | github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 83 | github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= 84 | github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= 85 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 86 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 87 | github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= 88 | github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= 89 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 90 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 91 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 92 | github.com/tebeka/snowball v0.4.2 h1:ujvgLOr6IHbsvB2Vgz27IcxWqDrNu9/oPhhe74lN/Kc= 93 | github.com/tebeka/snowball v0.4.2/go.mod h1:4IfL14h1lvwZcp1sfXuuc7/7yCsvVffTWxWxCLfFpYg= 94 | github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c h1:g+WoO5jjkqGAzHWCjJB1zZfXPIAaDpzXIEJ0eS6B5Ok= 95 | github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8= 96 | go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= 97 | go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= 98 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd h1:nTDtHvHSdCn1m6ITfMRqtOd/9+7a3s8RBNOZ3eYZzJA= 99 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 100 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 101 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 102 | golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 103 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 104 | golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= 105 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 106 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 107 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 108 | golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= 109 | golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 110 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 111 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 112 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= 113 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 114 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 115 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 116 | gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE= 117 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 118 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 119 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 120 | -------------------------------------------------------------------------------- /icu/boundary.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package icu 11 | 12 | // #cgo LDFLAGS: -licuuc -licudata 13 | // #cgo darwin LDFLAGS: -L/usr/local/opt/icu4c/lib 14 | // #cgo darwin CPPFLAGS: -I/usr/local/opt/icu4c/include 15 | // #include 16 | // #include 17 | // #include "unicode/utypes.h" 18 | // #include "unicode/uchar.h" 19 | // #include "unicode/ubrk.h" 20 | // #include "unicode/ustring.h" 21 | import "C" 22 | 23 | import ( 24 | "unsafe" 25 | 26 | "github.com/blevesearch/bleve/v2/analysis" 27 | "github.com/blevesearch/bleve/v2/registry" 28 | ) 29 | 30 | const Name = "icu" 31 | 32 | type UnicodeWordBoundaryTokenizer struct { 33 | locale *C.char 34 | } 35 | 36 | func NewUnicodeWordBoundaryTokenizer() *UnicodeWordBoundaryTokenizer { 37 | return &UnicodeWordBoundaryTokenizer{} 38 | } 39 | 40 | func NewUnicodeWordBoundaryCustomLocaleTokenizer(locale string) *UnicodeWordBoundaryTokenizer { 41 | return &UnicodeWordBoundaryTokenizer{ 42 | locale: C.CString(locale), 43 | } 44 | } 45 | 46 | func (t *UnicodeWordBoundaryTokenizer) Tokenize(input []byte) analysis.TokenStream { 47 | rv := make(analysis.TokenStream, 0) 48 | 49 | if len(input) < 1 { 50 | return rv 51 | } 52 | 53 | // works 54 | var myUnsafePointer = unsafe.Pointer(&(input[0])) 55 | var myCCharPointer *C.char = (*C.char)(myUnsafePointer) 56 | 57 | var inlen C.int32_t = C.int32_t(len(input)) 58 | var buflen C.int32_t = C.int32_t(2*len(input) + 1) // worse case each byte becomes 2 59 | var stringToExamine []C.UChar = make([]C.UChar, buflen) 60 | var myUnsafePointerToExamine = unsafe.Pointer(&(stringToExamine[0])) 61 | var myUCharPointer *C.UChar = (*C.UChar)(myUnsafePointerToExamine) 62 | C.u_uastrncpy(myUCharPointer, myCCharPointer, inlen) 63 | 64 | var err C.UErrorCode = C.U_ZERO_ERROR 65 | bi := C.ubrk_open(C.UBRK_WORD, t.locale, myUCharPointer, -1, &err) 66 | 67 | if err > C.U_ZERO_ERROR { 68 | return rv 69 | } 70 | 71 | defer C.ubrk_close(bi) 72 | 73 | position := 0 74 | var prev C.int32_t 75 | p := C.ubrk_first(bi) 76 | for p != C.UBRK_DONE { 77 | 78 | q := C.ubrk_getRuleStatus(bi) 79 | 80 | // convert boundaries back to utf8 positions 81 | var nilCString *C.char 82 | var indexA C.int32_t 83 | 84 | C.u_strToUTF8(nilCString, 0, &indexA, myUCharPointer, prev, &err) 85 | if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR { 86 | return rv 87 | } else { 88 | err = C.U_ZERO_ERROR 89 | } 90 | 91 | var indexB C.int32_t 92 | C.u_strToUTF8(nilCString, 0, &indexB, myUCharPointer, p, &err) 93 | if err > C.U_ZERO_ERROR && err != C.U_BUFFER_OVERFLOW_ERROR { 94 | return rv 95 | } else { 96 | err = C.U_ZERO_ERROR 97 | } 98 | 99 | if q != 0 { 100 | position += 1 101 | token := analysis.Token{ 102 | Start: int(indexA), 103 | End: int(indexB), 104 | Term: input[indexA:indexB], 105 | Position: position, 106 | Type: analysis.AlphaNumeric, 107 | } 108 | if q == 100 { 109 | token.Type = analysis.Numeric 110 | } 111 | if q == 400 { 112 | token.Type = analysis.Ideographic 113 | } 114 | rv = append(rv, &token) 115 | } 116 | prev = p 117 | p = C.ubrk_next(bi) 118 | } 119 | 120 | return rv 121 | } 122 | 123 | func UnicodeWordBoundaryTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { 124 | locale := "" 125 | localeVal, ok := config["locale"].(string) 126 | if ok { 127 | locale = localeVal 128 | } 129 | if locale == "" { 130 | return NewUnicodeWordBoundaryTokenizer(), nil 131 | } else { 132 | return NewUnicodeWordBoundaryCustomLocaleTokenizer(locale), nil 133 | } 134 | } 135 | 136 | func init() { 137 | registry.RegisterTokenizer(Name, UnicodeWordBoundaryTokenizerConstructor) 138 | } 139 | -------------------------------------------------------------------------------- /icu/boundary_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package icu 11 | 12 | import ( 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | ) 18 | 19 | func TestBoundary(t *testing.T) { 20 | 21 | tests := []struct { 22 | input []byte 23 | locale string 24 | output analysis.TokenStream 25 | }{ 26 | { 27 | []byte("Hello World"), 28 | "en_US", 29 | analysis.TokenStream{ 30 | { 31 | Start: 0, 32 | End: 5, 33 | Term: []byte("Hello"), 34 | Position: 1, 35 | Type: analysis.AlphaNumeric, 36 | }, 37 | { 38 | Start: 6, 39 | End: 11, 40 | Term: []byte("World"), 41 | Position: 2, 42 | Type: analysis.AlphaNumeric, 43 | }, 44 | }, 45 | }, 46 | { 47 | []byte("steven's"), 48 | "en_US", 49 | analysis.TokenStream{ 50 | { 51 | Start: 0, 52 | End: 8, 53 | Term: []byte("steven's"), 54 | Position: 1, 55 | Type: analysis.AlphaNumeric, 56 | }, 57 | }, 58 | }, 59 | { 60 | []byte("こんにちは世界"), 61 | "en_US", 62 | analysis.TokenStream{ 63 | { 64 | Start: 0, 65 | End: 15, 66 | Term: []byte("こんにちは"), 67 | Position: 1, 68 | Type: analysis.Ideographic, 69 | }, 70 | { 71 | Start: 15, 72 | End: 21, 73 | Term: []byte("世界"), 74 | Position: 2, 75 | Type: analysis.Ideographic, 76 | }, 77 | }, 78 | }, 79 | { 80 | []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"), 81 | "th_TH", 82 | analysis.TokenStream{ 83 | { 84 | Start: 0, 85 | End: 9, 86 | Term: []byte("แยก"), 87 | Position: 1, 88 | Type: analysis.AlphaNumeric, 89 | }, 90 | { 91 | Start: 9, 92 | End: 15, 93 | Term: []byte("คำ"), 94 | Position: 2, 95 | Type: analysis.AlphaNumeric, 96 | }, 97 | { 98 | Start: 15, 99 | End: 27, 100 | Term: []byte("ภาษา"), 101 | Position: 3, 102 | Type: analysis.AlphaNumeric, 103 | }, 104 | { 105 | Start: 27, 106 | End: 36, 107 | Term: []byte("ไทย"), 108 | Position: 4, 109 | Type: analysis.AlphaNumeric, 110 | }, 111 | { 112 | Start: 36, 113 | End: 42, 114 | Term: []byte("ก็"), 115 | Position: 5, 116 | Type: analysis.AlphaNumeric, 117 | }, 118 | { 119 | Start: 42, 120 | End: 57, 121 | Term: []byte("ทำได้"), 122 | Position: 6, 123 | Type: analysis.AlphaNumeric, 124 | }, 125 | { 126 | Start: 57, 127 | End: 63, 128 | Term: []byte("นะ"), 129 | Position: 7, 130 | Type: analysis.AlphaNumeric, 131 | }, 132 | { 133 | Start: 63, 134 | End: 72, 135 | Term: []byte("จ้ะ"), 136 | Position: 8, 137 | Type: analysis.AlphaNumeric, 138 | }, 139 | }, 140 | }, 141 | { 142 | []byte("age 25"), 143 | "en_US", 144 | analysis.TokenStream{ 145 | { 146 | Start: 0, 147 | End: 3, 148 | Term: []byte("age"), 149 | Position: 1, 150 | Type: analysis.AlphaNumeric, 151 | }, 152 | { 153 | Start: 4, 154 | End: 6, 155 | Term: []byte("25"), 156 | Position: 2, 157 | Type: analysis.Numeric, 158 | }, 159 | }, 160 | }, 161 | } 162 | 163 | for _, test := range tests { 164 | tokenizer := NewUnicodeWordBoundaryCustomLocaleTokenizer(test.locale) 165 | actual := tokenizer.Tokenize(test.input) 166 | 167 | if !reflect.DeepEqual(actual, test.output) { 168 | t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input)) 169 | } 170 | } 171 | } 172 | 173 | var sampleLargeInput = []byte(`There are three characteristics of liquids which are relevant to the discussion of a BLEVE: 174 | If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed). 175 | The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example. 176 | When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did. 177 | Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, oxygen, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures. 178 | If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.`) 179 | 180 | func BenchmarkTokenizeEnglishText(b *testing.B) { 181 | 182 | tokenizer := NewUnicodeWordBoundaryCustomLocaleTokenizer("en_US") 183 | b.ResetTimer() 184 | 185 | for i := 0; i < b.N; i++ { 186 | tokenizer.Tokenize(sampleLargeInput) 187 | } 188 | 189 | } 190 | -------------------------------------------------------------------------------- /lang/da/stemmer_da.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package da 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_da" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("da") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/de/stemmer_de.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package de 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | "github.com/blevesearch/blevex/v2/stemmer" 21 | ) 22 | 23 | const StemmerName = "stemmer_de" 24 | 25 | func StemmerFilterConstructor(config map[string]interface{}, 26 | cache *registry.Cache) (analysis.TokenFilter, error) { 27 | return stemmer.NewStemmerFilter("de") 28 | } 29 | 30 | func init() { 31 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 32 | } 33 | -------------------------------------------------------------------------------- /lang/en/README.md: -------------------------------------------------------------------------------- 1 | This package exposes the English stemmer using libstemmer, however it is NOT required/used by the default Bleve English anayzer which uses the porter stemmer instead. -------------------------------------------------------------------------------- /lang/en/stemmer_en.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package en 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_en" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("en") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/en/stemmer_en_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package en 11 | 12 | import ( 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | "github.com/blevesearch/bleve/v2/registry" 18 | ) 19 | 20 | func TestEnglishStemmer(t *testing.T) { 21 | tests := []struct { 22 | input analysis.TokenStream 23 | output analysis.TokenStream 24 | }{ 25 | { 26 | input: analysis.TokenStream{ 27 | &analysis.Token{ 28 | Term: []byte("walking"), 29 | }, 30 | &analysis.Token{ 31 | Term: []byte("talked"), 32 | }, 33 | &analysis.Token{ 34 | Term: []byte("business"), 35 | }, 36 | &analysis.Token{ 37 | Term: []byte("protected"), 38 | KeyWord: true, 39 | }, 40 | }, 41 | output: analysis.TokenStream{ 42 | &analysis.Token{ 43 | Term: []byte("walk"), 44 | }, 45 | &analysis.Token{ 46 | Term: []byte("talk"), 47 | }, 48 | &analysis.Token{ 49 | Term: []byte("busi"), 50 | }, 51 | &analysis.Token{ 52 | Term: []byte("protected"), 53 | KeyWord: true, 54 | }, 55 | }, 56 | }, 57 | } 58 | 59 | cache := registry.NewCache() 60 | stemmerFilter, err := cache.TokenFilterNamed(StemmerName) 61 | if err != nil { 62 | t.Fatal(err) 63 | } 64 | for _, test := range tests { 65 | actual := stemmerFilter.Filter(test.input) 66 | if !reflect.DeepEqual(actual, test.output) { 67 | t.Errorf("expected %s, got %s", test.output, actual) 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /lang/es/stemmer_es.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package es 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | "github.com/blevesearch/blevex/v2/stemmer" 21 | ) 22 | 23 | const StemmerName = "stemmer_es" 24 | 25 | func StemmerFilterConstructor(config map[string]interface{}, 26 | cache *registry.Cache) (analysis.TokenFilter, error) { 27 | return stemmer.NewStemmerFilter("es") 28 | } 29 | 30 | func init() { 31 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 32 | } 33 | -------------------------------------------------------------------------------- /lang/fi/stemmer_fi.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package fi 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_fi" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("fi") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/fr/README.md: -------------------------------------------------------------------------------- 1 | This package exposes the French stemmer using libstemmer, however it is NOT required/used by the default Bleve French anayzer which uses the French light stemmer instead. -------------------------------------------------------------------------------- /lang/fr/stemmer_fr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package fr 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_fr" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("fr") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/hu/stemmer_hu.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package hu 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_hu" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("hu") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/it/README.md: -------------------------------------------------------------------------------- 1 | This package exposes the Italian stemmer using libstemmer, however it is NOT required/used by the default Bleve Italian anayzer which uses the Italian light stemmer instead. -------------------------------------------------------------------------------- /lang/it/stemmer_it.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package it 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_it" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("it") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/ja/analyzer_ja.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package ja 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 | "github.com/blevesearch/bleve/v2/registry" 16 | ) 17 | 18 | const AnalyzerName = "ja" 19 | 20 | func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { 21 | kagomeTokenizer, err := cache.TokenizerNamed(TokenizerName) 22 | if err != nil { 23 | return nil, err 24 | } 25 | normalizeFilter := unicodenorm.MustNewUnicodeNormalizeFilter(unicodenorm.NFKD) 26 | rv := analysis.DefaultAnalyzer{ 27 | Tokenizer: kagomeTokenizer, 28 | TokenFilters: []analysis.TokenFilter{ 29 | normalizeFilter, 30 | }, 31 | } 32 | return &rv, nil 33 | } 34 | 35 | func init() { 36 | registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) 37 | } 38 | -------------------------------------------------------------------------------- /lang/ja/analyzer_ja_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package ja 11 | 12 | import ( 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | "github.com/blevesearch/bleve/v2/registry" 18 | ) 19 | 20 | func TestJaAnalyzer(t *testing.T) { 21 | tests := []struct { 22 | input []byte 23 | output analysis.TokenStream 24 | }{ 25 | { 26 | input: []byte("こんにちは世界"), 27 | output: analysis.TokenStream{ 28 | &analysis.Token{ 29 | Term: []byte("こんにちは"), 30 | Type: analysis.Ideographic, 31 | Position: 1, 32 | Start: 0, 33 | End: 15, 34 | }, 35 | &analysis.Token{ 36 | Term: []byte("世界"), 37 | Type: analysis.Ideographic, 38 | Position: 2, 39 | Start: 15, 40 | End: 21, 41 | }, 42 | }, 43 | }, 44 | { 45 | input: []byte("カタカナ"), 46 | output: analysis.TokenStream{ 47 | &analysis.Token{ 48 | Term: []byte("カタカナ"), 49 | Type: analysis.Ideographic, 50 | Position: 1, 51 | Start: 0, 52 | End: 12, 53 | }, 54 | }, 55 | }, 56 | } 57 | 58 | cache := registry.NewCache() 59 | for _, test := range tests { 60 | analyzer, err := cache.AnalyzerNamed(AnalyzerName) 61 | if err != nil { 62 | t.Fatal(err) 63 | } 64 | actual := analyzer.Analyze(test.input) 65 | if !reflect.DeepEqual(actual, test.output) { 66 | t.Errorf("expected %v, got %v", test.output, actual) 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /lang/ja/ja_morph_kagome.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package ja 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | 16 | "github.com/ikawaha/kagome.ipadic/tokenizer" 17 | ) 18 | 19 | const TokenizerName = "kagome" 20 | 21 | type KagomeMorphTokenizer struct { 22 | tok tokenizer.Tokenizer 23 | } 24 | 25 | func init() { 26 | _ = tokenizer.SysDic() // prepare system dictionary 27 | } 28 | 29 | func NewKagomeMorphTokenizer() *KagomeMorphTokenizer { 30 | return &KagomeMorphTokenizer{ 31 | tok: tokenizer.New(), 32 | } 33 | } 34 | 35 | func NewKagomeMorphTokenizerWithUserDic(userdic tokenizer.UserDic) *KagomeMorphTokenizer { 36 | k := tokenizer.New() 37 | k.SetUserDic(userdic) 38 | return &KagomeMorphTokenizer{ 39 | tok: k, 40 | } 41 | } 42 | 43 | func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream { 44 | var ( 45 | morphs []tokenizer.Token 46 | prevstart int 47 | ) 48 | 49 | rv := make(analysis.TokenStream, 0, len(input)) 50 | if len(input) < 1 { 51 | return rv 52 | } 53 | 54 | morphs = t.tok.Analyze(string(input), tokenizer.Search) 55 | 56 | for i, m := range morphs { 57 | if m.Surface == "EOS" || m.Surface == "BOS" { 58 | continue 59 | } 60 | 61 | surfacelen := len(m.Surface) 62 | token := &analysis.Token{ 63 | Term: []byte(m.Surface), 64 | Position: i, 65 | Start: prevstart, 66 | End: prevstart + surfacelen, 67 | Type: analysis.Ideographic, 68 | } 69 | 70 | prevstart = prevstart + surfacelen 71 | rv = append(rv, token) 72 | } 73 | 74 | return rv 75 | } 76 | 77 | func KagomeMorphTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { 78 | return NewKagomeMorphTokenizer(), nil 79 | } 80 | 81 | func init() { 82 | registry.RegisterTokenizer(TokenizerName, KagomeMorphTokenizerConstructor) 83 | } 84 | -------------------------------------------------------------------------------- /lang/ja/ja_morph_kagome_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package ja 11 | 12 | import ( 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | ) 18 | 19 | func TestKagome(t *testing.T) { 20 | 21 | tests := []struct { 22 | input []byte 23 | output analysis.TokenStream 24 | }{ 25 | { 26 | []byte("こんにちは世界"), 27 | analysis.TokenStream{ 28 | { 29 | Start: 0, 30 | End: 15, 31 | Term: []byte("こんにちは"), 32 | Position: 1, 33 | Type: analysis.Ideographic, 34 | }, 35 | { 36 | Start: 15, 37 | End: 21, 38 | Term: []byte("世界"), 39 | Position: 2, 40 | Type: analysis.Ideographic, 41 | }, 42 | }, 43 | }, 44 | { 45 | []byte("関西国際空港"), 46 | analysis.TokenStream{ 47 | { 48 | Start: 0, 49 | End: 6, 50 | Term: []byte("関西"), 51 | Position: 1, 52 | Type: analysis.Ideographic, 53 | }, 54 | { 55 | Start: 6, 56 | End: 12, 57 | Term: []byte("国際"), 58 | Position: 2, 59 | Type: analysis.Ideographic, 60 | }, 61 | { 62 | Start: 12, 63 | End: 18, 64 | Term: []byte("空港"), 65 | Position: 3, 66 | Type: analysis.Ideographic, 67 | }, 68 | }, 69 | }, 70 | } 71 | 72 | tokenizer := NewKagomeMorphTokenizer() 73 | for _, test := range tests { 74 | actuals := tokenizer.Tokenize(test.input) 75 | 76 | if !reflect.DeepEqual(actuals, test.output) { 77 | t.Errorf("Expected %v, got %v for %s", test.output, actuals, string(test.input)) 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lang/nl/stemmer_nl.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package nl 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_nl" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("nl") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/no/stemmer_no.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package no 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_no" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("no") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/pt/README.md: -------------------------------------------------------------------------------- 1 | This package exposes the Portuguese stemmer using libstemmer, however it is NOT required/used by the default Bleve Portuguese anayzer which uses the Portuguese light stemmer instead. -------------------------------------------------------------------------------- /lang/pt/stemmer_pt.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package pt 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_pt" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("pt") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/ro/stemmer_ro.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package ro 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_ro" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("ro") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/ru/stemmer_ru.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package ru 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_ru" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("ru") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/sv/stemmer_sv.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package sv 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_sv" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("sv") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/th/analyzer_th.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package th 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 15 | "github.com/blevesearch/bleve/v2/registry" 16 | ) 17 | 18 | const AnalyzerName = "th" 19 | 20 | func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { 21 | unicodeTokenizer, err := cache.TokenizerNamed(TokenizerName) 22 | if err != nil { 23 | return nil, err 24 | } 25 | toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) 26 | if err != nil { 27 | return nil, err 28 | } 29 | stopThFilter, err := cache.TokenFilterNamed(StopName) 30 | if err != nil { 31 | return nil, err 32 | } 33 | rv := analysis.DefaultAnalyzer{ 34 | Tokenizer: unicodeTokenizer, 35 | TokenFilters: []analysis.TokenFilter{ 36 | toLowerFilter, 37 | stopThFilter, 38 | }, 39 | } 40 | return &rv, nil 41 | } 42 | 43 | func init() { 44 | registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) 45 | } 46 | -------------------------------------------------------------------------------- /lang/th/analyzer_th_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package th 11 | 12 | import ( 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | "github.com/blevesearch/bleve/v2/registry" 18 | ) 19 | 20 | // tried to adapt these from the lucene tests, most of which either 21 | // use the empty stop dictionary or the english one. 22 | 23 | func TestThaiAnalyzer(t *testing.T) { 24 | tests := []struct { 25 | input []byte 26 | output analysis.TokenStream 27 | }{ 28 | // stop words 29 | { 30 | input: []byte("การที่ได้ต้องแสดงว่างานดี"), 31 | output: analysis.TokenStream{ 32 | &analysis.Token{ 33 | Term: []byte("แสดง"), 34 | Position: 5, 35 | Start: 39, 36 | End: 51, 37 | }, 38 | &analysis.Token{ 39 | Term: []byte("งาน"), 40 | Position: 7, 41 | Start: 60, 42 | End: 69, 43 | }, 44 | &analysis.Token{ 45 | Term: []byte("ดี"), 46 | Position: 8, 47 | Start: 69, 48 | End: 75, 49 | }, 50 | }, 51 | }, 52 | } 53 | 54 | cache := registry.NewCache() 55 | analyzer, err := cache.AnalyzerNamed(AnalyzerName) 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | for _, test := range tests { 60 | actual := analyzer.Analyze(test.input) 61 | if !reflect.DeepEqual(actual, test.output) { 62 | t.Errorf("expected %v, got %v", test.output, actual) 63 | } 64 | } 65 | } 66 | 67 | func TestThaiAnalyzerWihtoutOffsets(t *testing.T) { 68 | tests := []struct { 69 | input []byte 70 | output analysis.TokenStream 71 | }{ 72 | // stop words 73 | { 74 | input: []byte("บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com"), 75 | output: analysis.TokenStream{ 76 | &analysis.Token{ 77 | Term: []byte("บริษัท"), 78 | }, 79 | &analysis.Token{ 80 | Term: []byte("ชื่อ"), 81 | }, 82 | &analysis.Token{ 83 | Term: []byte("xy"), 84 | }, 85 | &analysis.Token{ 86 | Term: []byte("z"), 87 | }, 88 | &analysis.Token{ 89 | Term: []byte("คุย"), 90 | }, 91 | &analysis.Token{ 92 | Term: []byte("xyz"), 93 | }, 94 | &analysis.Token{ 95 | Term: []byte("demo.com"), 96 | }, 97 | }, 98 | }, 99 | } 100 | 101 | cache := registry.NewCache() 102 | analyzer, err := cache.AnalyzerNamed(AnalyzerName) 103 | if err != nil { 104 | t.Fatal(err) 105 | } 106 | for _, test := range tests { 107 | actual := analyzer.Analyze(test.input) 108 | if len(actual) != len(test.output) { 109 | t.Errorf("expected length: %d, got %d", len(test.output), len(actual)) 110 | } 111 | for i, tok := range actual { 112 | if !reflect.DeepEqual(tok.Term, test.output[i].Term) { 113 | t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term) 114 | } 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /lang/th/stop_filter_th.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package th 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 15 | "github.com/blevesearch/bleve/v2/registry" 16 | ) 17 | 18 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 19 | tokenMap, err := cache.TokenMapNamed(StopName) 20 | if err != nil { 21 | return nil, err 22 | } 23 | return stop.NewStopTokensFilter(tokenMap), nil 24 | } 25 | 26 | func init() { 27 | registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 28 | } 29 | -------------------------------------------------------------------------------- /lang/th/stop_words_th.go: -------------------------------------------------------------------------------- 1 | package th 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const StopName = "stop_th" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ 12 | // ` was changed to ' to allow for literal string 13 | 14 | var ThaiStopWords = []byte(`# Thai stopwords from: 15 | # "Opinion Detection in Thai Political News Columns 16 | # Based on Subjectivity Analysis" 17 | # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak 18 | ไว้ 19 | ไม่ 20 | ไป 21 | ได้ 22 | ให้ 23 | ใน 24 | โดย 25 | แห่ง 26 | แล้ว 27 | และ 28 | แรก 29 | แบบ 30 | แต่ 31 | เอง 32 | เห็น 33 | เลย 34 | เริ่ม 35 | เรา 36 | เมื่อ 37 | เพื่อ 38 | เพราะ 39 | เป็นการ 40 | เป็น 41 | เปิดเผย 42 | เปิด 43 | เนื่องจาก 44 | เดียวกัน 45 | เดียว 46 | เช่น 47 | เฉพาะ 48 | เคย 49 | เข้า 50 | เขา 51 | อีก 52 | อาจ 53 | อะไร 54 | ออก 55 | อย่าง 56 | อยู่ 57 | อยาก 58 | หาก 59 | หลาย 60 | หลังจาก 61 | หลัง 62 | หรือ 63 | หนึ่ง 64 | ส่วน 65 | ส่ง 66 | สุด 67 | สําหรับ 68 | ว่า 69 | วัน 70 | ลง 71 | ร่วม 72 | ราย 73 | รับ 74 | ระหว่าง 75 | รวม 76 | ยัง 77 | มี 78 | มาก 79 | มา 80 | พร้อม 81 | พบ 82 | ผ่าน 83 | ผล 84 | บาง 85 | น่า 86 | นี้ 87 | นํา 88 | นั้น 89 | นัก 90 | นอกจาก 91 | ทุก 92 | ที่สุด 93 | ที่ 94 | ทําให้ 95 | ทํา 96 | ทาง 97 | ทั้งนี้ 98 | ทั้ง 99 | ถ้า 100 | ถูก 101 | ถึง 102 | ต้อง 103 | ต่างๆ 104 | ต่าง 105 | ต่อ 106 | ตาม 107 | ตั้งแต่ 108 | ตั้ง 109 | ด้าน 110 | ด้วย 111 | ดัง 112 | ซึ่ง 113 | ช่วง 114 | จึง 115 | จาก 116 | จัด 117 | จะ 118 | คือ 119 | ความ 120 | ครั้ง 121 | คง 122 | ขึ้น 123 | ของ 124 | ขอ 125 | ขณะ 126 | ก่อน 127 | ก็ 128 | การ 129 | กับ 130 | กัน 131 | กว่า 132 | กล่าว 133 | `) 134 | 135 | func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 136 | rv := analysis.NewTokenMap() 137 | err := rv.LoadBytes(ThaiStopWords) 138 | return rv, err 139 | } 140 | 141 | func init() { 142 | registry.RegisterTokenMap(StopName, TokenMapConstructor) 143 | } 144 | -------------------------------------------------------------------------------- /lang/th/unicode_tokenizer_th.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package th 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/icu" 16 | ) 17 | 18 | const TokenizerName = "icu_th" 19 | 20 | func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { 21 | return icu.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH"), nil 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenizer(TokenizerName, TokenizerConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /lang/tr/stemmer_tr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package tr 11 | 12 | import ( 13 | "github.com/blevesearch/bleve/v2/analysis" 14 | "github.com/blevesearch/bleve/v2/registry" 15 | "github.com/blevesearch/blevex/v2/stemmer" 16 | ) 17 | 18 | const StemmerName = "stemmer_tr" 19 | 20 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 21 | return stemmer.NewStemmerFilter("tr") 22 | } 23 | 24 | func init() { 25 | registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor) 26 | } 27 | -------------------------------------------------------------------------------- /leveldb/batch.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | store "github.com/blevesearch/upsidedown_store_api" 14 | "github.com/jmhodges/levigo" 15 | ) 16 | 17 | type Batch struct { 18 | w *Writer 19 | merge *store.EmulatedMerge 20 | batch *levigo.WriteBatch 21 | } 22 | 23 | func (b *Batch) Set(key, val []byte) { 24 | b.batch.Put(key, val) 25 | } 26 | 27 | func (b *Batch) Delete(key []byte) { 28 | b.batch.Delete(key) 29 | } 30 | 31 | func (b *Batch) Merge(key, val []byte) { 32 | b.merge.Merge(key, val) 33 | } 34 | 35 | func (b *Batch) Reset() { 36 | b.batch.Clear() 37 | b.merge = store.NewEmulatedMerge(b.w.store.mo) 38 | } 39 | 40 | func (b *Batch) Close() error { 41 | b.batch.Close() 42 | b.batch = nil 43 | b.merge = nil 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /leveldb/config.go: -------------------------------------------------------------------------------- 1 | package leveldb 2 | 3 | import "github.com/jmhodges/levigo" 4 | 5 | func applyConfig(o *levigo.Options, config map[string]interface{}) ( 6 | *levigo.Options, error) { 7 | 8 | cim, ok := config["create_if_missing"].(bool) 9 | if ok { 10 | o.SetCreateIfMissing(cim) 11 | } 12 | 13 | eie, ok := config["error_if_exists"].(bool) 14 | if ok { 15 | o.SetErrorIfExists(eie) 16 | } 17 | 18 | wbs, ok := config["write_buffer_size"].(float64) 19 | if ok { 20 | o.SetWriteBufferSize(int(wbs)) 21 | } 22 | 23 | bs, ok := config["block_size"].(float64) 24 | if ok { 25 | o.SetBlockSize(int(bs)) 26 | } 27 | 28 | bri, ok := config["block_restart_interval"].(float64) 29 | if ok { 30 | o.SetBlockRestartInterval(int(bri)) 31 | } 32 | 33 | lcc, ok := config["lru_cache_capacity"].(float64) 34 | if ok { 35 | lruCache := levigo.NewLRUCache(int(lcc)) 36 | o.SetCache(lruCache) 37 | } 38 | 39 | bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) 40 | if ok { 41 | bf := levigo.NewBloomFilter(int(bfbpk)) 42 | o.SetFilterPolicy(bf) 43 | } 44 | 45 | mof, ok := config["max_open_files"].(float64) 46 | if ok { 47 | o.SetMaxOpenFiles(int(mof)) 48 | } 49 | 50 | return o, nil 51 | } 52 | -------------------------------------------------------------------------------- /leveldb/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | "bytes" 14 | 15 | "github.com/jmhodges/levigo" 16 | ) 17 | 18 | type Iterator struct { 19 | store *Store 20 | iterator *levigo.Iterator 21 | 22 | prefix []byte 23 | start []byte 24 | end []byte 25 | } 26 | 27 | func (i *Iterator) Seek(key []byte) { 28 | if key == nil { 29 | key = []byte{0} 30 | } 31 | if i.start != nil && bytes.Compare(key, i.start) < 0 { 32 | key = i.start 33 | } 34 | if i.prefix != nil && !bytes.HasPrefix(key, i.prefix) { 35 | if bytes.Compare(key, i.prefix) < 0 { 36 | key = i.prefix 37 | } else { 38 | var end []byte 39 | for x := len(i.prefix) - 1; x >= 0; x-- { 40 | c := i.prefix[x] 41 | if c < 0xff { 42 | end = make([]byte, x+1) 43 | copy(end, i.prefix) 44 | end[x] = c + 1 45 | break 46 | } 47 | } 48 | key = end 49 | } 50 | } 51 | i.iterator.Seek(key) 52 | } 53 | 54 | func (i *Iterator) Next() { 55 | i.iterator.Next() 56 | } 57 | 58 | func (i *Iterator) Current() ([]byte, []byte, bool) { 59 | if i.Valid() { 60 | return i.Key(), i.Value(), true 61 | } 62 | return nil, nil, false 63 | } 64 | 65 | func (i *Iterator) Key() []byte { 66 | return i.iterator.Key() 67 | } 68 | 69 | func (i *Iterator) Value() []byte { 70 | return i.iterator.Value() 71 | } 72 | 73 | func (i *Iterator) Valid() bool { 74 | if !i.iterator.Valid() { 75 | return false 76 | } else if i.prefix != nil && !bytes.HasPrefix(i.iterator.Key(), i.prefix) { 77 | return false 78 | } else if i.end != nil && bytes.Compare(i.iterator.Key(), i.end) >= 0 { 79 | return false 80 | } 81 | return true 82 | } 83 | 84 | func (i *Iterator) Close() error { 85 | i.iterator.Close() 86 | return nil 87 | } 88 | -------------------------------------------------------------------------------- /leveldb/reader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | store "github.com/blevesearch/upsidedown_store_api" 14 | "github.com/jmhodges/levigo" 15 | ) 16 | 17 | type Reader struct { 18 | store *Store 19 | snapshot *levigo.Snapshot 20 | options *levigo.ReadOptions 21 | } 22 | 23 | func (r *Reader) Get(key []byte) ([]byte, error) { 24 | b, err := r.store.db.Get(r.options, key) 25 | return b, err 26 | } 27 | 28 | func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { 29 | return store.MultiGet(r, keys) 30 | } 31 | 32 | func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { 33 | rv := Iterator{ 34 | store: r.store, 35 | iterator: r.store.db.NewIterator(r.options), 36 | prefix: prefix, 37 | } 38 | rv.Seek(prefix) 39 | return &rv 40 | } 41 | 42 | func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { 43 | rv := Iterator{ 44 | store: r.store, 45 | iterator: r.store.db.NewIterator(r.options), 46 | start: start, 47 | end: end, 48 | } 49 | rv.Seek(start) 50 | return &rv 51 | } 52 | 53 | func (r *Reader) Close() error { 54 | r.options.Close() 55 | r.store.db.ReleaseSnapshot(r.snapshot) 56 | return nil 57 | } 58 | -------------------------------------------------------------------------------- /leveldb/store.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | "fmt" 14 | "os" 15 | "sync" 16 | 17 | store "github.com/blevesearch/upsidedown_store_api" 18 | "github.com/blevesearch/bleve/v2/registry" 19 | "github.com/jmhodges/levigo" 20 | ) 21 | 22 | const Name = "leveldb" 23 | 24 | type Store struct { 25 | path string 26 | opts *levigo.Options 27 | db *levigo.DB 28 | mo store.MergeOperator 29 | 30 | mergeMutex sync.Mutex 31 | } 32 | 33 | func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { 34 | path, ok := config["path"].(string) 35 | if !ok { 36 | return nil, fmt.Errorf("must specify path") 37 | } 38 | if path == "" { 39 | return nil, os.ErrInvalid 40 | } 41 | 42 | rv := Store{ 43 | path: path, 44 | opts: levigo.NewOptions(), 45 | mo: mo, 46 | } 47 | 48 | _, err := applyConfig(rv.opts, config) 49 | if err != nil { 50 | return nil, err 51 | } 52 | 53 | rv.db, err = levigo.Open(rv.path, rv.opts) 54 | if err != nil { 55 | return nil, err 56 | } 57 | return &rv, nil 58 | } 59 | 60 | func (s *Store) Close() error { 61 | s.db.Close() 62 | s.opts.Close() 63 | return nil 64 | } 65 | 66 | func (s *Store) Reader() (store.KVReader, error) { 67 | snapshot := s.db.NewSnapshot() 68 | options := defaultReadOptions() 69 | options.SetSnapshot(snapshot) 70 | return &Reader{ 71 | store: s, 72 | snapshot: snapshot, 73 | options: options, 74 | }, nil 75 | } 76 | 77 | func (s *Store) Writer() (store.KVWriter, error) { 78 | return &Writer{ 79 | store: s, 80 | options: defaultWriteOptions(), 81 | }, nil 82 | } 83 | 84 | func (s *Store) Compact() error { 85 | // workaround for google/leveldb#227 86 | // NULL batch means just wait for earlier writes to be done 87 | err := s.db.Write(defaultWriteOptions(), &levigo.WriteBatch{}) 88 | if err != nil { 89 | return err 90 | } 91 | s.db.CompactRange(levigo.Range{}) 92 | return nil 93 | } 94 | 95 | func init() { 96 | registry.RegisterKVStore(Name, New) 97 | } 98 | -------------------------------------------------------------------------------- /leveldb/store_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | "os" 14 | "testing" 15 | 16 | store "github.com/blevesearch/upsidedown_store_api" 17 | "github.com/blevesearch/upsidedown_store_api/test" 18 | ) 19 | 20 | func open(t *testing.T, mo store.MergeOperator) store.KVStore { 21 | rv, err := New(mo, map[string]interface{}{ 22 | "path": "test", 23 | "create_if_missing": true, 24 | }) 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | return rv 29 | } 30 | 31 | func cleanup(t *testing.T, s store.KVStore) { 32 | err := s.Close() 33 | if err != nil { 34 | t.Fatal(err) 35 | } 36 | err = os.RemoveAll("test") 37 | if err != nil { 38 | t.Fatal(err) 39 | } 40 | } 41 | 42 | func TestLevelDBKVCrud(t *testing.T) { 43 | s := open(t, nil) 44 | defer cleanup(t, s) 45 | test.CommonTestKVCrud(t, s) 46 | } 47 | 48 | func TestLevelDBReaderIsolation(t *testing.T) { 49 | s := open(t, nil) 50 | defer cleanup(t, s) 51 | test.CommonTestReaderIsolation(t, s) 52 | } 53 | 54 | func TestLevelDBReaderOwnsGetBytes(t *testing.T) { 55 | s := open(t, nil) 56 | defer cleanup(t, s) 57 | test.CommonTestReaderOwnsGetBytes(t, s) 58 | } 59 | 60 | func TestLevelDBWriterOwnsBytes(t *testing.T) { 61 | s := open(t, nil) 62 | defer cleanup(t, s) 63 | test.CommonTestWriterOwnsBytes(t, s) 64 | } 65 | 66 | func TestLevelDBPrefixIterator(t *testing.T) { 67 | s := open(t, nil) 68 | defer cleanup(t, s) 69 | test.CommonTestPrefixIterator(t, s) 70 | } 71 | 72 | func TestLevelDBPrefixIteratorSeek(t *testing.T) { 73 | s := open(t, nil) 74 | defer cleanup(t, s) 75 | test.CommonTestPrefixIteratorSeek(t, s) 76 | } 77 | 78 | func TestLevelDBRangeIterator(t *testing.T) { 79 | s := open(t, nil) 80 | defer cleanup(t, s) 81 | test.CommonTestRangeIterator(t, s) 82 | } 83 | 84 | func TestLevelDBRangeIteratorSeek(t *testing.T) { 85 | s := open(t, nil) 86 | defer cleanup(t, s) 87 | test.CommonTestRangeIteratorSeek(t, s) 88 | } 89 | 90 | func TestLevelDBMerge(t *testing.T) { 91 | s := open(t, &test.TestMergeCounter{}) 92 | defer cleanup(t, s) 93 | test.CommonTestMerge(t, s) 94 | } 95 | -------------------------------------------------------------------------------- /leveldb/util.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | "github.com/jmhodges/levigo" 14 | ) 15 | 16 | func defaultWriteOptions() *levigo.WriteOptions { 17 | wo := levigo.NewWriteOptions() 18 | // request fsync on write for safety 19 | wo.SetSync(true) 20 | return wo 21 | } 22 | 23 | func defaultReadOptions() *levigo.ReadOptions { 24 | ro := levigo.NewReadOptions() 25 | return ro 26 | } 27 | -------------------------------------------------------------------------------- /leveldb/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package leveldb 11 | 12 | import ( 13 | "fmt" 14 | 15 | store "github.com/blevesearch/upsidedown_store_api" 16 | "github.com/jmhodges/levigo" 17 | ) 18 | 19 | type Writer struct { 20 | store *Store 21 | options *levigo.WriteOptions 22 | } 23 | 24 | func (w *Writer) NewBatch() store.KVBatch { 25 | rv := Batch{ 26 | w: w, 27 | merge: store.NewEmulatedMerge(w.store.mo), 28 | batch: levigo.NewWriteBatch(), 29 | } 30 | return &rv 31 | } 32 | 33 | func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { 34 | return make([]byte, options.TotalBytes), w.NewBatch(), nil 35 | } 36 | 37 | func (w *Writer) ExecuteBatch(b store.KVBatch) error { 38 | 39 | batch, ok := b.(*Batch) 40 | if !ok { 41 | return fmt.Errorf("wrong type of batch") 42 | } 43 | 44 | // get a lock, because we can't allow 45 | // concurrent writes during the merge 46 | w.store.mergeMutex.Lock() 47 | defer w.store.mergeMutex.Unlock() 48 | 49 | // get a snapshot 50 | snapshot := w.store.db.NewSnapshot() 51 | ro := defaultReadOptions() 52 | ro.SetSnapshot(snapshot) 53 | defer w.store.db.ReleaseSnapshot(snapshot) 54 | defer ro.Close() 55 | 56 | for key, mergeOps := range batch.merge.Merges { 57 | k := []byte(key) 58 | orig, err := w.store.db.Get(ro, k) 59 | if err != nil { 60 | return err 61 | } 62 | mergedVal, fullMergeOk := w.store.mo.FullMerge(k, orig, mergeOps) 63 | if !fullMergeOk { 64 | return fmt.Errorf("unable to merge") 65 | } 66 | batch.Set(k, mergedVal) 67 | } 68 | 69 | err := w.store.db.Write(w.options, batch.batch) 70 | return err 71 | 72 | } 73 | 74 | func (w *Writer) Close() error { 75 | w.options.Close() 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /preload/README.md: -------------------------------------------------------------------------------- 1 | ### Preload 2 | 3 | The `preload` KV store is a wrapper KV store which will first load a set of KV pairs from an external source prior to opening the KV store. 4 | 5 | #### Preparing the KV pairs 6 | 7 | Assuming you have an existing bleve index named search.bleve: 8 | 9 | ``` 10 | $ bleve_export search.bleve search.blexport 11 | ``` 12 | 13 | This creates a new file search.blexport which is a gzipped sequence of KV pairs. 14 | 15 | #### Preloading a KV store with these KV pairs 16 | 17 | Create a new in-memory index with the NewUsing() method as follows: 18 | 19 | ``` 20 | i, err := bleve.NewUsing( 21 | "", 22 | bleve.NewIndexMapping(), 23 | bleve.Config.DefaultIndexType, 24 | preload.Name, 25 | map[string]interface{}{ 26 | "kvStoreName_actual": gtreap.Name, 27 | "preloadpath": pathToBleveExport, 28 | }) 29 | ``` 30 | 31 | #### Why? 32 | 33 | Why would you want to use this? Unfortunately, all of the KV stores supported by bleve either use the `syscall` or the `unsafe` package. This means they aren't suitable for environments like Google App Engine. By exporting the KV pairs of an existing bleve index into a simple format, we can then package them up, and preload them into a in-memory index. 34 | -------------------------------------------------------------------------------- /preload/cmd/bleve_export/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package main 11 | 12 | import ( 13 | "compress/gzip" 14 | "flag" 15 | "log" 16 | "os" 17 | 18 | "github.com/blevesearch/bleve/v2" 19 | "github.com/blevesearch/blevex/v2/preload" 20 | 21 | _ "github.com/blevesearch/bleve/v2/config" 22 | ) 23 | 24 | func main() { 25 | flag.Parse() 26 | if flag.NArg() < 1 { 27 | log.Fatalf("must specify path to index") 28 | } 29 | if flag.NArg() < 2 { 30 | log.Fatalf("must specify path to export to") 31 | } 32 | i, err := bleve.Open(flag.Arg(0)) 33 | if err != nil { 34 | log.Fatalf("error opening index: %v", err) 35 | } 36 | f, err := os.OpenFile(flag.Arg(1), os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666) 37 | if err != nil { 38 | log.Fatalf("error opening export path: %v", err) 39 | } 40 | ii, err := i.Advanced() 41 | if err != nil { 42 | log.Fatalf("error getting internal index: %v", err) 43 | } 44 | gzf := gzip.NewWriter(f) 45 | err = preload.ExportBleve(ii, gzf) 46 | if err != nil { 47 | log.Fatalf("error exporting bleve index: %v", err) 48 | } 49 | err = gzf.Close() 50 | if err != nil { 51 | log.Fatalf("error closing gzip: %v", err) 52 | } 53 | err = f.Close() 54 | if err != nil { 55 | log.Fatalf("error closing export file: %v", err) 56 | } 57 | err = i.Close() 58 | if err != nil { 59 | log.Fatalf("error closing index: %v", err) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /preload/export.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package preload 11 | 12 | import ( 13 | "fmt" 14 | "github.com/blevesearch/bleve/v2/index/upsidedown" 15 | "io" 16 | 17 | index "github.com/blevesearch/bleve_index_api" 18 | ) 19 | 20 | // ExportBleve will dump all the index rows from 21 | // the provided index and serialize them to the 22 | // provided Writer 23 | func ExportBleve(i index.Index, w io.Writer) error { 24 | kvpw := NewWriter(w) 25 | 26 | r, err := i.Reader() 27 | if err != nil { 28 | return fmt.Errorf("error getting reader: %v", err) 29 | } 30 | 31 | upsideDownReader, ok := r.(*upsidedown.IndexReader) 32 | if !ok { 33 | return fmt.Errorf("dump is only supported by index type upsidedown") 34 | } 35 | 36 | var dumpChan chan interface{} 37 | dumpChan = upsideDownReader.DumpAll() 38 | 39 | for dumpValue := range dumpChan { 40 | switch dumpValue := dumpValue.(type) { 41 | case upsidedown.IndexRow: 42 | p := KVPair{K: dumpValue.Key(), V: dumpValue.Value()} 43 | err = kvpw.Write(&p) 44 | if err != nil { 45 | return fmt.Errorf("error writing row: %v", err) 46 | } 47 | 48 | case error: 49 | return fmt.Errorf("error dumping row: %v", dumpValue) 50 | } 51 | } 52 | return nil 53 | } 54 | -------------------------------------------------------------------------------- /preload/import.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package preload 11 | 12 | import ( 13 | "io" 14 | 15 | store "github.com/blevesearch/upsidedown_store_api" 16 | ) 17 | 18 | // Import reads KVPairs from the Reader 19 | // and sets them in the KVStore 20 | // all work is done in batches of the requested size 21 | func Import(s store.KVStore, r io.Reader, batchSize int) error { 22 | kvw, err := s.Writer() 23 | if err != nil { 24 | return err 25 | } 26 | kvpr := NewReader(r) 27 | b := kvw.NewBatch() 28 | bsize := 0 29 | p := &KVPair{} 30 | p, err = kvpr.Read(p) 31 | for err == nil { 32 | b.Set(p.K, p.V) 33 | bsize++ 34 | if bsize > batchSize { 35 | err = kvw.ExecuteBatch(b) 36 | if err != nil { 37 | return err 38 | } 39 | bsize = 0 40 | b = kvw.NewBatch() 41 | } 42 | p, err = kvpr.Read(p) 43 | } 44 | if err != nil && err != io.EOF { 45 | return err 46 | } 47 | // close last batch 48 | if bsize > 0 { 49 | err = kvw.ExecuteBatch(b) 50 | if err != nil { 51 | return err 52 | } 53 | } 54 | return nil 55 | } 56 | -------------------------------------------------------------------------------- /preload/kvutil.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-gogo. 2 | // source: kvutil.proto 3 | // DO NOT EDIT! 4 | 5 | /* 6 | Package preload is a generated protocol buffer package. 7 | 8 | It is generated from these files: 9 | kvutil.proto 10 | 11 | It has these top-level messages: 12 | KVPair 13 | */ 14 | package preload 15 | 16 | import proto "github.com/golang/protobuf/proto" 17 | import math "math" 18 | 19 | import io "io" 20 | import fmt "fmt" 21 | import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" 22 | 23 | // Reference imports to suppress errors if they are not otherwise used. 24 | var _ = proto.Marshal 25 | var _ = math.Inf 26 | 27 | type KVPair struct { 28 | K []byte `protobuf:"bytes,1,req,name=k" json:"k,omitempty"` 29 | V []byte `protobuf:"bytes,2,req,name=v" json:"v,omitempty"` 30 | XXX_unrecognized []byte `json:"-"` 31 | } 32 | 33 | func (m *KVPair) Reset() { *m = KVPair{} } 34 | func (m *KVPair) String() string { return proto.CompactTextString(m) } 35 | func (*KVPair) ProtoMessage() {} 36 | 37 | func (m *KVPair) GetK() []byte { 38 | if m != nil { 39 | return m.K 40 | } 41 | return nil 42 | } 43 | 44 | func (m *KVPair) GetV() []byte { 45 | if m != nil { 46 | return m.V 47 | } 48 | return nil 49 | } 50 | 51 | func (m *KVPair) Unmarshal(data []byte) error { 52 | var hasFields [1]uint64 53 | l := len(data) 54 | iNdEx := 0 55 | for iNdEx < l { 56 | var wire uint64 57 | for shift := uint(0); ; shift += 7 { 58 | if iNdEx >= l { 59 | return io.ErrUnexpectedEOF 60 | } 61 | b := data[iNdEx] 62 | iNdEx++ 63 | wire |= (uint64(b) & 0x7F) << shift 64 | if b < 0x80 { 65 | break 66 | } 67 | } 68 | fieldNum := int32(wire >> 3) 69 | wireType := int(wire & 0x7) 70 | switch fieldNum { 71 | case 1: 72 | if wireType != 2 { 73 | return fmt.Errorf("proto: wrong wireType = %d for field K", wireType) 74 | } 75 | var byteLen int 76 | for shift := uint(0); ; shift += 7 { 77 | if iNdEx >= l { 78 | return io.ErrUnexpectedEOF 79 | } 80 | b := data[iNdEx] 81 | iNdEx++ 82 | byteLen |= (int(b) & 0x7F) << shift 83 | if b < 0x80 { 84 | break 85 | } 86 | } 87 | if byteLen < 0 { 88 | return ErrInvalidLengthKvutil 89 | } 90 | postIndex := iNdEx + byteLen 91 | if postIndex > l { 92 | return io.ErrUnexpectedEOF 93 | } 94 | m.K = append([]byte{}, data[iNdEx:postIndex]...) 95 | iNdEx = postIndex 96 | hasFields[0] |= uint64(0x00000001) 97 | case 2: 98 | if wireType != 2 { 99 | return fmt.Errorf("proto: wrong wireType = %d for field V", wireType) 100 | } 101 | var byteLen int 102 | for shift := uint(0); ; shift += 7 { 103 | if iNdEx >= l { 104 | return io.ErrUnexpectedEOF 105 | } 106 | b := data[iNdEx] 107 | iNdEx++ 108 | byteLen |= (int(b) & 0x7F) << shift 109 | if b < 0x80 { 110 | break 111 | } 112 | } 113 | if byteLen < 0 { 114 | return ErrInvalidLengthKvutil 115 | } 116 | postIndex := iNdEx + byteLen 117 | if postIndex > l { 118 | return io.ErrUnexpectedEOF 119 | } 120 | m.V = append([]byte{}, data[iNdEx:postIndex]...) 121 | iNdEx = postIndex 122 | hasFields[0] |= uint64(0x00000002) 123 | default: 124 | var sizeOfWire int 125 | for { 126 | sizeOfWire++ 127 | wire >>= 7 128 | if wire == 0 { 129 | break 130 | } 131 | } 132 | iNdEx -= sizeOfWire 133 | skippy, err := skipKvutil(data[iNdEx:]) 134 | if err != nil { 135 | return err 136 | } 137 | if skippy < 0 { 138 | return ErrInvalidLengthKvutil 139 | } 140 | if (iNdEx + skippy) > l { 141 | return io.ErrUnexpectedEOF 142 | } 143 | m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) 144 | iNdEx += skippy 145 | } 146 | } 147 | if hasFields[0]&uint64(0x00000001) == 0 { 148 | return new(github_com_golang_protobuf_proto.RequiredNotSetError) 149 | } 150 | if hasFields[0]&uint64(0x00000002) == 0 { 151 | return new(github_com_golang_protobuf_proto.RequiredNotSetError) 152 | } 153 | 154 | return nil 155 | } 156 | func skipKvutil(data []byte) (n int, err error) { 157 | l := len(data) 158 | iNdEx := 0 159 | for iNdEx < l { 160 | var wire uint64 161 | for shift := uint(0); ; shift += 7 { 162 | if iNdEx >= l { 163 | return 0, io.ErrUnexpectedEOF 164 | } 165 | b := data[iNdEx] 166 | iNdEx++ 167 | wire |= (uint64(b) & 0x7F) << shift 168 | if b < 0x80 { 169 | break 170 | } 171 | } 172 | wireType := int(wire & 0x7) 173 | switch wireType { 174 | case 0: 175 | for { 176 | if iNdEx >= l { 177 | return 0, io.ErrUnexpectedEOF 178 | } 179 | iNdEx++ 180 | if data[iNdEx-1] < 0x80 { 181 | break 182 | } 183 | } 184 | return iNdEx, nil 185 | case 1: 186 | iNdEx += 8 187 | return iNdEx, nil 188 | case 2: 189 | var length int 190 | for shift := uint(0); ; shift += 7 { 191 | if iNdEx >= l { 192 | return 0, io.ErrUnexpectedEOF 193 | } 194 | b := data[iNdEx] 195 | iNdEx++ 196 | length |= (int(b) & 0x7F) << shift 197 | if b < 0x80 { 198 | break 199 | } 200 | } 201 | iNdEx += length 202 | if length < 0 { 203 | return 0, ErrInvalidLengthKvutil 204 | } 205 | return iNdEx, nil 206 | case 3: 207 | for { 208 | var innerWire uint64 209 | var start int = iNdEx 210 | for shift := uint(0); ; shift += 7 { 211 | if iNdEx >= l { 212 | return 0, io.ErrUnexpectedEOF 213 | } 214 | b := data[iNdEx] 215 | iNdEx++ 216 | innerWire |= (uint64(b) & 0x7F) << shift 217 | if b < 0x80 { 218 | break 219 | } 220 | } 221 | innerWireType := int(innerWire & 0x7) 222 | if innerWireType == 4 { 223 | break 224 | } 225 | next, err := skipKvutil(data[start:]) 226 | if err != nil { 227 | return 0, err 228 | } 229 | iNdEx = start + next 230 | } 231 | return iNdEx, nil 232 | case 4: 233 | return iNdEx, nil 234 | case 5: 235 | iNdEx += 4 236 | return iNdEx, nil 237 | default: 238 | return 0, fmt.Errorf("proto: illegal wireType %d", wireType) 239 | } 240 | } 241 | panic("unreachable") 242 | } 243 | 244 | var ( 245 | ErrInvalidLengthKvutil = fmt.Errorf("proto: negative length found during unmarshaling") 246 | ) 247 | 248 | func (m *KVPair) Size() (n int) { 249 | var l int 250 | _ = l 251 | if m.K != nil { 252 | l = len(m.K) 253 | n += 1 + l + sovKvutil(uint64(l)) 254 | } 255 | if m.V != nil { 256 | l = len(m.V) 257 | n += 1 + l + sovKvutil(uint64(l)) 258 | } 259 | if m.XXX_unrecognized != nil { 260 | n += len(m.XXX_unrecognized) 261 | } 262 | return n 263 | } 264 | 265 | func sovKvutil(x uint64) (n int) { 266 | for { 267 | n++ 268 | x >>= 7 269 | if x == 0 { 270 | break 271 | } 272 | } 273 | return n 274 | } 275 | func sozKvutil(x uint64) (n int) { 276 | return sovKvutil(uint64((x << 1) ^ uint64((int64(x) >> 63)))) 277 | } 278 | func (m *KVPair) Marshal() (data []byte, err error) { 279 | size := m.Size() 280 | data = make([]byte, size) 281 | n, err := m.MarshalTo(data) 282 | if err != nil { 283 | return nil, err 284 | } 285 | return data[:n], nil 286 | } 287 | 288 | func (m *KVPair) MarshalTo(data []byte) (n int, err error) { 289 | var i int 290 | _ = i 291 | var l int 292 | _ = l 293 | if m.K == nil { 294 | return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) 295 | } else { 296 | data[i] = 0xa 297 | i++ 298 | i = encodeVarintKvutil(data, i, uint64(len(m.K))) 299 | i += copy(data[i:], m.K) 300 | } 301 | if m.V == nil { 302 | return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) 303 | } else { 304 | data[i] = 0x12 305 | i++ 306 | i = encodeVarintKvutil(data, i, uint64(len(m.V))) 307 | i += copy(data[i:], m.V) 308 | } 309 | if m.XXX_unrecognized != nil { 310 | i += copy(data[i:], m.XXX_unrecognized) 311 | } 312 | return i, nil 313 | } 314 | 315 | func encodeFixed64Kvutil(data []byte, offset int, v uint64) int { 316 | data[offset] = uint8(v) 317 | data[offset+1] = uint8(v >> 8) 318 | data[offset+2] = uint8(v >> 16) 319 | data[offset+3] = uint8(v >> 24) 320 | data[offset+4] = uint8(v >> 32) 321 | data[offset+5] = uint8(v >> 40) 322 | data[offset+6] = uint8(v >> 48) 323 | data[offset+7] = uint8(v >> 56) 324 | return offset + 8 325 | } 326 | func encodeFixed32Kvutil(data []byte, offset int, v uint32) int { 327 | data[offset] = uint8(v) 328 | data[offset+1] = uint8(v >> 8) 329 | data[offset+2] = uint8(v >> 16) 330 | data[offset+3] = uint8(v >> 24) 331 | return offset + 4 332 | } 333 | func encodeVarintKvutil(data []byte, offset int, v uint64) int { 334 | for v >= 1<<7 { 335 | data[offset] = uint8(v&0x7f | 0x80) 336 | v >>= 7 337 | offset++ 338 | } 339 | data[offset] = uint8(v) 340 | return offset + 1 341 | } 342 | -------------------------------------------------------------------------------- /preload/kvutil.proto: -------------------------------------------------------------------------------- 1 | package preload; 2 | 3 | message KVPair { 4 | required bytes k = 1; 5 | required bytes v = 2; 6 | } 7 | -------------------------------------------------------------------------------- /preload/preload_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package preload 11 | 12 | import ( 13 | "io" 14 | "io/ioutil" 15 | "os" 16 | "reflect" 17 | "testing" 18 | ) 19 | 20 | func TestRoundtrip(t *testing.T) { 21 | data := []*KVPair{ 22 | &KVPair{ 23 | K: []byte("cat"), 24 | V: []byte("taffy"), 25 | }, 26 | } 27 | 28 | tmp, err := ioutil.TempFile("/tmp", "blevekvp") 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | defer func() { 33 | err := os.RemoveAll(tmp.Name()) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | }() 38 | 39 | kvpw := NewWriter(tmp) 40 | for _, d := range data { 41 | err = kvpw.Write(d) 42 | if err != nil { 43 | t.Fatal(err) 44 | } 45 | } 46 | 47 | err = tmp.Close() 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | 52 | tmp, err = os.Open(tmp.Name()) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | 57 | kvpr := NewReader(tmp) 58 | read := 0 59 | kvp := &KVPair{} 60 | kvp, err = kvpr.Read(kvp) 61 | for err == nil { 62 | if !reflect.DeepEqual(kvp, data[read]) { 63 | t.Errorf("expected %v got %v", data[read], kvp) 64 | } 65 | read++ 66 | kvp, err = kvpr.Read(kvp) 67 | } 68 | if err != nil && err != io.EOF { 69 | t.Fatal(err) 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /preload/reader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package preload 11 | 12 | import ( 13 | "bufio" 14 | "encoding/binary" 15 | "fmt" 16 | "io" 17 | 18 | "github.com/golang/protobuf/proto" 19 | ) 20 | 21 | // Reader reads KVPairs 22 | type Reader struct { 23 | r *bufio.Reader 24 | buf []byte 25 | } 26 | 27 | // NewReader creates a new KVPair reader 28 | // that reads from the provided reader 29 | func NewReader(r io.Reader) *Reader { 30 | return &Reader{ 31 | r: bufio.NewReader(r), 32 | buf: make([]byte, 1024), 33 | } 34 | } 35 | 36 | // Read will read the next KVPair into p 37 | // if p is nil a new KVPair is allocated 38 | func (r *Reader) Read(p *KVPair) (*KVPair, error) { 39 | if p == nil { 40 | p = &KVPair{} 41 | } 42 | size, err := binary.ReadUvarint(r.r) 43 | if err != nil { 44 | return nil, err 45 | } 46 | if cap(r.buf) < int(size) { 47 | r.buf = make([]byte, 0, size) 48 | } 49 | read, err := io.ReadFull(r.r, r.buf[:size]) //r.r.Read(r.buf[:size]) 50 | if err != nil { 51 | return nil, err 52 | } 53 | if read != int(size) { 54 | return nil, fmt.Errorf("read incomplete kv pair") 55 | } 56 | err = proto.Unmarshal(r.buf[:size], p) 57 | if err != nil { 58 | return nil, err 59 | } 60 | return p, nil 61 | } 62 | -------------------------------------------------------------------------------- /preload/store.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | //go:generate protoc --gofast_out=. kvutil.proto 11 | 12 | package preload 13 | 14 | import ( 15 | "compress/gzip" 16 | "fmt" 17 | "os" 18 | 19 | store "github.com/blevesearch/upsidedown_store_api" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | const Name = "preload" 24 | 25 | type Store struct { 26 | o store.KVStore 27 | } 28 | 29 | func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { 30 | name, ok := config["kvStoreName_actual"].(string) 31 | if !ok || name == "" { 32 | return nil, fmt.Errorf("preload: missing kvStoreName_actual,"+ 33 | " config: %#v", config) 34 | } 35 | 36 | if name == Name { 37 | return nil, fmt.Errorf("preload: circular kvStoreName_actual") 38 | } 39 | 40 | ctr := registry.KVStoreConstructorByName(name) 41 | if ctr == nil { 42 | return nil, fmt.Errorf("preload: no kv store constructor,"+ 43 | " kvStoreName_actual: %s", name) 44 | } 45 | 46 | kvs, err := ctr(mo, config) 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | rv := &Store{ 52 | o: kvs, 53 | } 54 | 55 | if preloadPath, ok := config["preloadpath"].(string); ok { 56 | f, err := os.Open(preloadPath) 57 | if err != nil { 58 | return nil, err 59 | } 60 | gzr, err := gzip.NewReader(f) 61 | if err != nil { 62 | return nil, err 63 | } 64 | err = Import(rv, gzr, 1024) 65 | if err != nil { 66 | return nil, err 67 | } 68 | err = gzr.Close() 69 | if err != nil { 70 | return nil, err 71 | } 72 | err = f.Close() 73 | if err != nil { 74 | return nil, err 75 | } 76 | } 77 | 78 | return rv, nil 79 | } 80 | 81 | func (s *Store) Close() error { 82 | return s.o.Close() 83 | } 84 | 85 | func (s *Store) Reader() (store.KVReader, error) { 86 | return s.o.Reader() 87 | } 88 | 89 | func (s *Store) Writer() (store.KVWriter, error) { 90 | return s.o.Writer() 91 | } 92 | 93 | func init() { 94 | registry.RegisterKVStore(Name, New) 95 | } 96 | -------------------------------------------------------------------------------- /preload/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package preload 11 | 12 | import ( 13 | "encoding/binary" 14 | "fmt" 15 | "io" 16 | ) 17 | 18 | // Writer writes KVPairs 19 | type Writer struct { 20 | w io.Writer 21 | buf []byte 22 | sbuf []byte 23 | } 24 | 25 | // NewWriter returns a KVPair Writer which writes to the provided Writer 26 | func NewWriter(w io.Writer) *Writer { 27 | return &Writer{ 28 | w: w, 29 | buf: make([]byte, 0, 1024), 30 | sbuf: make([]byte, binary.MaxVarintLen64), 31 | } 32 | } 33 | 34 | func (w *Writer) Write(p *KVPair) error { 35 | kvpsize := p.Size() 36 | if cap(w.buf) < kvpsize { 37 | w.buf = make([]byte, 0, kvpsize) 38 | } 39 | n, err := p.MarshalTo(w.buf[:kvpsize]) 40 | if err != nil { 41 | return fmt.Errorf("error marshaling row: %v", err) 42 | } 43 | sn := binary.PutUvarint(w.sbuf, uint64(kvpsize)) 44 | _, err = w.w.Write(w.sbuf[:sn]) 45 | if err != nil { 46 | return fmt.Errorf("error writing row size: %v", err) 47 | } 48 | _, err = w.w.Write(w.buf[:n]) 49 | if err != nil { 50 | return fmt.Errorf("error writing row data: %v", err) 51 | } 52 | return nil 53 | } 54 | -------------------------------------------------------------------------------- /rocksdb/batch.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | import ( 13 | "github.com/tecbot/gorocksdb" 14 | ) 15 | 16 | type Batch struct { 17 | batch *gorocksdb.WriteBatch 18 | } 19 | 20 | func (b *Batch) Set(key, val []byte) { 21 | b.batch.Put(key, val) 22 | } 23 | 24 | func (b *Batch) Delete(key []byte) { 25 | b.batch.Delete(key) 26 | } 27 | 28 | func (b *Batch) Merge(key, val []byte) { 29 | b.batch.Merge(key, val) 30 | } 31 | 32 | func (b *Batch) Reset() { 33 | b.batch.Clear() 34 | } 35 | 36 | func (b *Batch) Close() error { 37 | b.batch.Destroy() 38 | b.batch = nil 39 | return nil 40 | } 41 | -------------------------------------------------------------------------------- /rocksdb/batchex.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | /* 13 | #include 14 | #include 15 | #include "rocksdb/c.h" 16 | 17 | char *blevex_rocksdb_execute_direct_batch( 18 | rocksdb_t* db, 19 | const unsigned char writeoptions_sync, 20 | const unsigned char writeoptions_disable_WAL, 21 | const int num_sets, 22 | const char* const* set_keys, 23 | const size_t* set_keys_sizes, 24 | const char* const* set_vals, 25 | const size_t* set_vals_sizes, 26 | int num_deletes, 27 | const char* const* delete_keys, 28 | const size_t* delete_keys_sizes, 29 | int num_merges, 30 | const char* const* merge_keys, 31 | const size_t* merge_keys_sizes, 32 | const char* const* merge_vals, 33 | const size_t* merge_vals_sizes) { 34 | rocksdb_writebatch_t* b = rocksdb_writebatch_create(); 35 | 36 | if (num_sets > 0) { 37 | rocksdb_writebatch_putv(b, 38 | num_sets, set_keys, set_keys_sizes, 39 | num_sets, set_vals, set_vals_sizes); 40 | } 41 | if (num_deletes > 0) { 42 | rocksdb_writebatch_deletev(b, 43 | num_deletes, delete_keys, delete_keys_sizes); 44 | } 45 | if (num_merges > 0) { 46 | rocksdb_writebatch_mergev(b, 47 | num_merges, merge_keys, merge_keys_sizes, 48 | num_merges, merge_vals, merge_vals_sizes); 49 | } 50 | 51 | char *errMsg = NULL; 52 | 53 | rocksdb_writeoptions_t *options = rocksdb_writeoptions_create(); 54 | 55 | rocksdb_writeoptions_set_sync(options, writeoptions_sync); 56 | rocksdb_writeoptions_disable_WAL(options, writeoptions_disable_WAL); 57 | 58 | rocksdb_write(db, options, b, &errMsg); 59 | 60 | rocksdb_writeoptions_destroy(options); 61 | 62 | rocksdb_writebatch_destroy(b); 63 | 64 | return errMsg; 65 | } 66 | 67 | void blevex_rocksdb_alloc_direct_batch(size_t totalBytes, size_t n, void **out) { 68 | out[0] = malloc(totalBytes); 69 | out[1] = malloc(n * sizeof(char *)); 70 | out[2] = malloc(n * sizeof(size_t)); 71 | } 72 | 73 | void blevex_rocksdb_free_direct_batch(void **bufs) { 74 | free(bufs[0]); 75 | free(bufs[1]); 76 | free(bufs[2]); 77 | } 78 | */ 79 | import "C" 80 | 81 | import ( 82 | "errors" 83 | "reflect" 84 | "unsafe" 85 | 86 | store "github.com/blevesearch/upsidedown_store_api" 87 | ) 88 | 89 | type BatchEx struct { 90 | cbufs []unsafe.Pointer 91 | buf []byte 92 | 93 | num_sets int 94 | set_keys []*C.char 95 | set_keys_sizes []C.size_t 96 | set_vals []*C.char 97 | set_vals_sizes []C.size_t 98 | 99 | num_deletes int 100 | delete_keys []*C.char 101 | delete_keys_sizes []C.size_t 102 | 103 | num_merges int 104 | merge_keys []*C.char 105 | merge_keys_sizes []C.size_t 106 | merge_vals []*C.char 107 | merge_vals_sizes []C.size_t 108 | } 109 | 110 | func newBatchEx(o store.KVBatchOptions) *BatchEx { 111 | s := o.NumSets 112 | ss := s + o.NumSets 113 | ssd := ss + o.NumDeletes 114 | ssdm := ssd + o.NumMerges 115 | ssdmm := ssdm + o.NumMerges 116 | 117 | cbufs := make([]unsafe.Pointer, 3) 118 | 119 | C.blevex_rocksdb_alloc_direct_batch(C.size_t(o.TotalBytes), 120 | C.size_t(ssdmm), (*unsafe.Pointer)(&cbufs[0])) 121 | 122 | buf := unsafeToByteSlice(cbufs[0], o.TotalBytes) 123 | arr_ptr_char := unsafeToCPtrCharSlice(cbufs[1], ssdmm) 124 | arr_size_t := unsafeToCSizeTSlice(cbufs[2], ssdmm) 125 | 126 | return &BatchEx{ 127 | cbufs: cbufs, 128 | buf: buf, 129 | set_keys: arr_ptr_char[0:s], 130 | set_keys_sizes: arr_size_t[0:s], 131 | set_vals: arr_ptr_char[s:ss], 132 | set_vals_sizes: arr_size_t[s:ss], 133 | delete_keys: arr_ptr_char[ss:ssd], 134 | delete_keys_sizes: arr_size_t[ss:ssd], 135 | merge_keys: arr_ptr_char[ssd:ssdm], 136 | merge_keys_sizes: arr_size_t[ssd:ssdm], 137 | merge_vals: arr_ptr_char[ssdm:ssdmm], 138 | merge_vals_sizes: arr_size_t[ssdm:ssdmm], 139 | } 140 | } 141 | 142 | func (b *BatchEx) Set(key, val []byte) { 143 | b.set_keys[b.num_sets] = (*C.char)(unsafe.Pointer(&key[0])) 144 | b.set_keys_sizes[b.num_sets] = (C.size_t)(len(key)) 145 | b.set_vals[b.num_sets] = (*C.char)(unsafe.Pointer(&val[0])) 146 | b.set_vals_sizes[b.num_sets] = (C.size_t)(len(val)) 147 | b.num_sets += 1 148 | } 149 | 150 | func (b *BatchEx) Delete(key []byte) { 151 | b.delete_keys[b.num_deletes] = (*C.char)(unsafe.Pointer(&key[0])) 152 | b.delete_keys_sizes[b.num_deletes] = (C.size_t)(len(key)) 153 | b.num_deletes += 1 154 | } 155 | 156 | func (b *BatchEx) Merge(key, val []byte) { 157 | b.merge_keys[b.num_merges] = (*C.char)(unsafe.Pointer(&key[0])) 158 | b.merge_keys_sizes[b.num_merges] = (C.size_t)(len(key)) 159 | b.merge_vals[b.num_merges] = (*C.char)(unsafe.Pointer(&val[0])) 160 | b.merge_vals_sizes[b.num_merges] = (C.size_t)(len(val)) 161 | b.num_merges += 1 162 | } 163 | 164 | func (b *BatchEx) Reset() { 165 | b.num_sets = 0 166 | b.num_deletes = 0 167 | b.num_merges = 0 168 | } 169 | 170 | func (b *BatchEx) Close() error { 171 | b.Reset() 172 | 173 | C.blevex_rocksdb_free_direct_batch((*unsafe.Pointer)(&b.cbufs[0])) 174 | 175 | b.cbufs = nil 176 | b.buf = nil 177 | b.set_keys = nil 178 | b.set_keys_sizes = nil 179 | b.set_vals = nil 180 | b.set_vals_sizes = nil 181 | b.delete_keys = nil 182 | b.delete_keys_sizes = nil 183 | b.merge_keys = nil 184 | b.merge_keys_sizes = nil 185 | b.merge_vals = nil 186 | b.merge_vals_sizes = nil 187 | 188 | return nil 189 | } 190 | 191 | func (b *BatchEx) execute(w *Writer) error { 192 | var num_sets C.int 193 | var set_keys **C.char 194 | var set_keys_sizes *C.size_t 195 | var set_vals **C.char 196 | var set_vals_sizes *C.size_t 197 | 198 | var num_deletes C.int 199 | var delete_keys **C.char 200 | var delete_keys_sizes *C.size_t 201 | 202 | var num_merges C.int 203 | var merge_keys **C.char 204 | var merge_keys_sizes *C.size_t 205 | var merge_vals **C.char 206 | var merge_vals_sizes *C.size_t 207 | 208 | if b.num_sets > 0 { 209 | num_sets = (C.int)(b.num_sets) 210 | set_keys = (**C.char)(unsafe.Pointer(&b.set_keys[0])) 211 | set_keys_sizes = (*C.size_t)(unsafe.Pointer(&b.set_keys_sizes[0])) 212 | set_vals = (**C.char)(unsafe.Pointer(&b.set_vals[0])) 213 | set_vals_sizes = (*C.size_t)(unsafe.Pointer(&b.set_vals_sizes[0])) 214 | } 215 | 216 | if b.num_deletes > 0 { 217 | num_deletes = (C.int)(b.num_deletes) 218 | delete_keys = (**C.char)(unsafe.Pointer(&b.delete_keys[0])) 219 | delete_keys_sizes = (*C.size_t)(unsafe.Pointer(&b.delete_keys_sizes[0])) 220 | } 221 | 222 | if b.num_merges > 0 { 223 | num_merges = (C.int)(b.num_merges) 224 | merge_keys = (**C.char)(unsafe.Pointer(&b.merge_keys[0])) 225 | merge_keys_sizes = (*C.size_t)(unsafe.Pointer(&b.merge_keys_sizes[0])) 226 | merge_vals = (**C.char)(unsafe.Pointer(&b.merge_vals[0])) 227 | merge_vals_sizes = (*C.size_t)(unsafe.Pointer(&b.merge_vals_sizes[0])) 228 | } 229 | 230 | // request fsync on write for safety by default (bleve's convention), 231 | // although rocksdb writeoptions normal default is false for sync. 232 | woptSync := C.uchar(1) 233 | if w.store.woptSyncUse { 234 | woptSync = boolToChar(w.store.woptSync) 235 | } 236 | 237 | woptDisableWAL := C.uchar(0) 238 | if w.store.woptDisableWALUse { 239 | woptDisableWAL = boolToChar(w.store.woptDisableWAL) 240 | } 241 | 242 | cErr := C.blevex_rocksdb_execute_direct_batch( 243 | (*C.rocksdb_t)(w.store.db.UnsafeGetDB()), 244 | woptSync, 245 | woptDisableWAL, 246 | num_sets, 247 | set_keys, 248 | set_keys_sizes, 249 | set_vals, 250 | set_vals_sizes, 251 | num_deletes, 252 | delete_keys, 253 | delete_keys_sizes, 254 | num_merges, 255 | merge_keys, 256 | merge_keys_sizes, 257 | merge_vals, 258 | merge_vals_sizes) 259 | if cErr != nil { 260 | err := errors.New(C.GoString(cErr)) 261 | C.free(unsafe.Pointer(cErr)) 262 | return err 263 | } 264 | 265 | return nil 266 | } 267 | 268 | // Originally from github.com/tecbot/gorocksdb/util.go. 269 | func unsafeToByteSlice(data unsafe.Pointer, len int) []byte { 270 | var value []byte 271 | 272 | sH := (*reflect.SliceHeader)(unsafe.Pointer(&value)) 273 | sH.Cap, sH.Len, sH.Data = len, len, uintptr(data) 274 | 275 | return value 276 | } 277 | 278 | func unsafeToCPtrCharSlice(data unsafe.Pointer, len int) []*C.char { 279 | var value []*C.char 280 | 281 | sH := (*reflect.SliceHeader)(unsafe.Pointer(&value)) 282 | sH.Cap, sH.Len, sH.Data = len, len, uintptr(data) 283 | 284 | return value 285 | } 286 | 287 | func unsafeToCSizeTSlice(data unsafe.Pointer, len int) []C.size_t { 288 | var value []C.size_t 289 | 290 | sH := (*reflect.SliceHeader)(unsafe.Pointer(&value)) 291 | sH.Cap, sH.Len, sH.Data = len, len, uintptr(data) 292 | 293 | return value 294 | } 295 | 296 | func boolToChar(b bool) C.uchar { 297 | if b { 298 | return 1 299 | } 300 | return 0 301 | } 302 | -------------------------------------------------------------------------------- /rocksdb/config.go: -------------------------------------------------------------------------------- 1 | package rocksdb 2 | 3 | import "github.com/tecbot/gorocksdb" 4 | 5 | func applyConfig(o *gorocksdb.Options, config map[string]interface{}) ( 6 | *gorocksdb.Options, error) { 7 | 8 | cim, ok := config["create_if_missing"].(bool) 9 | if ok { 10 | o.SetCreateIfMissing(cim) 11 | } 12 | 13 | eie, ok := config["error_if_exists"].(bool) 14 | if ok { 15 | o.SetErrorIfExists(eie) 16 | } 17 | 18 | pc, ok := config["paranoid_checks"].(bool) 19 | if ok { 20 | o.SetParanoidChecks(pc) 21 | } 22 | 23 | ill, ok := config["info_log_level"].(float64) 24 | if ok { 25 | o.SetInfoLogLevel(gorocksdb.InfoLogLevel(int(ill))) 26 | } 27 | 28 | tt, ok := config["total_threads"].(float64) 29 | if ok { 30 | o.IncreaseParallelism(int(tt)) 31 | } 32 | 33 | ofpl, ok := config["optimize_for_point_lookup"].(float64) 34 | if ok { 35 | o.OptimizeForPointLookup(uint64(ofpl)) 36 | } 37 | 38 | olsc, ok := config["optimize_level_style_compaction"].(float64) 39 | if ok { 40 | o.OptimizeLevelStyleCompaction(uint64(olsc)) 41 | } 42 | 43 | ousc, ok := config["optimize_universal_style_compaction"].(float64) 44 | if ok { 45 | o.OptimizeUniversalStyleCompaction(uint64(ousc)) 46 | } 47 | 48 | wbs, ok := config["write_buffer_size"].(float64) 49 | if ok { 50 | o.SetWriteBufferSize(int(wbs)) 51 | } 52 | 53 | mwbn, ok := config["max_write_buffer_number"].(float64) 54 | if ok { 55 | o.SetMaxWriteBufferNumber(int(mwbn)) 56 | } 57 | 58 | mwbntm, ok := config["min_write_buffer_number_to_merge"].(float64) 59 | if ok { 60 | o.SetMinWriteBufferNumberToMerge(int(mwbntm)) 61 | } 62 | 63 | mof, ok := config["max_open_files"].(float64) 64 | if ok { 65 | o.SetMaxOpenFiles(int(mof)) 66 | } 67 | 68 | c, ok := config["compression"].(float64) 69 | if ok { 70 | o.SetCompression(gorocksdb.CompressionType(int(c))) 71 | } 72 | 73 | mltc, ok := config["min_level_to_compress"].(float64) 74 | if ok { 75 | o.SetMinLevelToCompress(int(mltc)) 76 | } 77 | 78 | nl, ok := config["num_levels"].(float64) 79 | if ok { 80 | o.SetNumLevels(int(nl)) 81 | } 82 | 83 | lfnct, ok := config["level0_file_num_compaction_trigger"].(float64) 84 | if ok { 85 | o.SetLevel0FileNumCompactionTrigger(int(lfnct)) 86 | } 87 | 88 | lswt, ok := config["level0_slowdown_writes_trigger"].(float64) 89 | if ok { 90 | o.SetLevel0SlowdownWritesTrigger(int(lswt)) 91 | } 92 | 93 | lstopwt, ok := config["level0_stop_writes_trigger"].(float64) 94 | if ok { 95 | o.SetLevel0StopWritesTrigger(int(lstopwt)) 96 | } 97 | 98 | mmcl, ok := config["max_mem_compaction_level"].(float64) 99 | if ok { 100 | o.SetMaxMemCompactionLevel(int(mmcl)) 101 | } 102 | 103 | tfsb, ok := config["target_file_size_base"].(float64) 104 | if ok { 105 | o.SetTargetFileSizeBase(uint64(tfsb)) 106 | } 107 | 108 | tfsm, ok := config["target_file_size_multiplier"].(float64) 109 | if ok { 110 | o.SetTargetFileSizeMultiplier(int(tfsm)) 111 | } 112 | 113 | mbflb, ok := config["max_bytes_for_level_base"].(float64) 114 | if ok { 115 | o.SetMaxBytesForLevelBase(uint64(mbflb)) 116 | } 117 | 118 | mbflm, ok := config["max_bytes_for_level_multiplier"].(float64) 119 | if ok { 120 | o.SetMaxBytesForLevelMultiplier(mbflm) 121 | } 122 | 123 | uf, ok := config["use_fsync"].(bool) 124 | if ok { 125 | o.SetUseFsync(uf) 126 | } 127 | 128 | dofpm, ok := config["delete_obsolete_files_period_micros"].(float64) 129 | if ok { 130 | o.SetDeleteObsoleteFilesPeriodMicros(uint64(dofpm)) 131 | } 132 | 133 | mbc, ok := config["max_background_compactions"].(float64) 134 | if ok { 135 | o.SetMaxBackgroundCompactions(int(mbc)) 136 | } 137 | 138 | mbf, ok := config["max_background_flushes"].(float64) 139 | if ok { 140 | o.SetMaxBackgroundFlushes(int(mbf)) 141 | } 142 | 143 | mlfs, ok := config["max_log_file_size"].(float64) 144 | if ok { 145 | o.SetMaxLogFileSize(int(mlfs)) 146 | } 147 | 148 | lfttr, ok := config["log_file_time_to_roll"].(float64) 149 | if ok { 150 | o.SetLogFileTimeToRoll(int(lfttr)) 151 | } 152 | 153 | klfn, ok := config["keep_log_file_num"].(float64) 154 | if ok { 155 | o.SetKeepLogFileNum(int(klfn)) 156 | } 157 | 158 | hrl, ok := config["hard_rate_limit"].(float64) 159 | if ok { 160 | o.SetHardRateLimit(hrl) 161 | } 162 | 163 | rldmm, ok := config["rate_limit_delay_max_millisecond"].(float64) 164 | if ok { 165 | o.SetRateLimitDelayMaxMilliseconds(uint(rldmm)) 166 | } 167 | 168 | mmfs, ok := config["max_manifest_file_size"].(float64) 169 | if ok { 170 | o.SetMaxManifestFileSize(uint64(mmfs)) 171 | } 172 | 173 | tcnsb, ok := config["table_cache_numshardbits"].(float64) 174 | if ok { 175 | o.SetTableCacheNumshardbits(int(tcnsb)) 176 | } 177 | 178 | tcrscl, ok := config["table_cache_remove_scan_count_limit"].(float64) 179 | if ok { 180 | o.SetTableCacheRemoveScanCountLimit(int(tcrscl)) 181 | } 182 | 183 | abs, ok := config["arena_block_size"].(float64) 184 | if ok { 185 | o.SetArenaBlockSize(int(abs)) 186 | } 187 | 188 | dac, ok := config["disable_auto_compactions"].(bool) 189 | if ok { 190 | o.SetDisableAutoCompactions(dac) 191 | } 192 | 193 | wts, ok := config["WAL_ttl_seconds"].(float64) 194 | if ok { 195 | o.SetWALTtlSeconds(uint64(wts)) 196 | } 197 | 198 | wslm, ok := config["WAL_size_limit_MB"].(float64) 199 | if ok { 200 | o.SetWalSizeLimitMb(uint64(wslm)) 201 | } 202 | 203 | mps, ok := config["manifest_preallocation_size"].(float64) 204 | if ok { 205 | o.SetManifestPreallocationSize(int(mps)) 206 | } 207 | 208 | prkwf, ok := config["purge_redundant_kvs_while_flush"].(bool) 209 | if ok { 210 | o.SetPurgeRedundantKvsWhileFlush(prkwf) 211 | } 212 | 213 | amr, ok := config["allow_mmap_reads"].(bool) 214 | if ok { 215 | o.SetAllowMmapReads(amr) 216 | } 217 | 218 | amw, ok := config["allow_mmap_writes"].(bool) 219 | if ok { 220 | o.SetAllowMmapWrites(amw) 221 | } 222 | 223 | sleor, ok := config["skip_log_error_on_recovery"].(bool) 224 | if ok { 225 | o.SetSkipLogErrorOnRecovery(sleor) 226 | } 227 | 228 | sdps, ok := config["stats_dump_period_sec"].(float64) 229 | if ok { 230 | o.SetStatsDumpPeriodSec(uint(sdps)) 231 | } 232 | 233 | aroo, ok := config["advise_random_on_open"].(bool) 234 | if ok { 235 | o.SetAdviseRandomOnOpen(aroo) 236 | } 237 | 238 | ahocs, ok := config["access_hint_on_compaction_start"].(float64) 239 | if ok { 240 | o.SetAccessHintOnCompactionStart(gorocksdb.CompactionAccessPattern(uint(ahocs))) 241 | } 242 | 243 | uam, ok := config["use_adaptive_mutex"].(bool) 244 | if ok { 245 | o.SetUseAdaptiveMutex(uam) 246 | } 247 | 248 | bps, ok := config["bytes_per_sync"].(float64) 249 | if ok { 250 | o.SetBytesPerSync(uint64(bps)) 251 | } 252 | 253 | cs, ok := config["compaction_style"].(float64) 254 | if ok { 255 | o.SetCompactionStyle(gorocksdb.CompactionStyle(uint(cs))) 256 | } 257 | 258 | mssii, ok := config["max_sequential_skip_in_iterations"].(float64) 259 | if ok { 260 | o.SetMaxSequentialSkipInIterations(uint64(mssii)) 261 | } 262 | 263 | ius, ok := config["inplace_update_support"].(bool) 264 | if ok { 265 | o.SetInplaceUpdateSupport(ius) 266 | } 267 | 268 | iunl, ok := config["inplace_update_num_locks"].(float64) 269 | if ok { 270 | o.SetInplaceUpdateNumLocks(int(iunl)) 271 | } 272 | 273 | es, ok := config["enable_statistics"].(bool) 274 | if ok && es { 275 | o.EnableStatistics() 276 | } 277 | 278 | pfbl, ok := config["prepare_for_bulk_load"].(bool) 279 | if ok && pfbl { 280 | o.PrepareForBulkLoad() 281 | } 282 | 283 | // options in the block based table options object 284 | bbto := gorocksdb.NewDefaultBlockBasedTableOptions() 285 | 286 | lcc, ok := config["lru_cache_capacity"].(float64) 287 | if ok { 288 | c := gorocksdb.NewLRUCache(uint64(lcc)) 289 | bbto.SetBlockCache(c) 290 | } 291 | 292 | bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) 293 | if ok { 294 | bf := gorocksdb.NewBloomFilter(int(bfbpk)) 295 | bbto.SetFilterPolicy(bf) 296 | } 297 | 298 | // set the block based table options 299 | o.SetBlockBasedTableFactory(bbto) 300 | 301 | return o, nil 302 | } 303 | 304 | func (s *Store) newWriteOptions() *gorocksdb.WriteOptions { 305 | wo := gorocksdb.NewDefaultWriteOptions() 306 | 307 | if s.woptSyncUse { 308 | wo.SetSync(s.woptSync) 309 | } else { 310 | // request fsync on write for safety by default 311 | wo.SetSync(true) 312 | } 313 | if s.woptDisableWALUse { 314 | wo.DisableWAL(s.woptDisableWAL) 315 | } 316 | 317 | return wo 318 | } 319 | 320 | func (s *Store) newReadOptions() *gorocksdb.ReadOptions { 321 | ro := gorocksdb.NewDefaultReadOptions() 322 | 323 | if s.roptVerifyChecksumsUse { 324 | ro.SetVerifyChecksums(s.roptVerifyChecksums) 325 | } 326 | if s.roptFillCacheUse { 327 | ro.SetFillCache(s.roptFillCache) 328 | } 329 | if s.roptReadTierUse { 330 | ro.SetReadTier(gorocksdb.ReadTier(s.roptReadTier)) 331 | } 332 | 333 | return ro 334 | } 335 | -------------------------------------------------------------------------------- /rocksdb/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | import ( 13 | "bytes" 14 | 15 | "github.com/tecbot/gorocksdb" 16 | ) 17 | 18 | type Iterator struct { 19 | store *Store 20 | iterator *gorocksdb.Iterator 21 | 22 | prefix []byte 23 | start []byte 24 | end []byte 25 | } 26 | 27 | func (i *Iterator) Seek(key []byte) { 28 | if i.start != nil && bytes.Compare(key, i.start) < 0 { 29 | key = i.start 30 | } 31 | if i.prefix != nil && !bytes.HasPrefix(key, i.prefix) { 32 | if bytes.Compare(key, i.prefix) < 0 { 33 | key = i.prefix 34 | } else { 35 | var end []byte 36 | for x := len(i.prefix) - 1; x >= 0; x-- { 37 | c := i.prefix[x] 38 | if c < 0xff { 39 | end = make([]byte, x+1) 40 | copy(end, i.prefix) 41 | end[x] = c + 1 42 | break 43 | } 44 | } 45 | key = end 46 | } 47 | } 48 | i.iterator.Seek(key) 49 | } 50 | 51 | func (i *Iterator) Next() { 52 | i.iterator.Next() 53 | } 54 | 55 | func (i *Iterator) Current() ([]byte, []byte, bool) { 56 | if i.Valid() { 57 | return i.Key(), i.Value(), true 58 | } 59 | return nil, nil, false 60 | } 61 | 62 | func (i *Iterator) Key() []byte { 63 | return i.iterator.Key().Data() 64 | } 65 | 66 | func (i *Iterator) Value() []byte { 67 | return i.iterator.Value().Data() 68 | } 69 | 70 | func (i *Iterator) Valid() bool { 71 | if !i.iterator.Valid() { 72 | return false 73 | } else if i.prefix != nil && !bytes.HasPrefix(i.iterator.Key().Data(), i.prefix) { 74 | return false 75 | } else if i.end != nil && bytes.Compare(i.iterator.Key().Data(), i.end) >= 0 { 76 | return false 77 | } 78 | 79 | return true 80 | } 81 | 82 | func (i *Iterator) Close() error { 83 | i.iterator.Close() 84 | return nil 85 | } 86 | -------------------------------------------------------------------------------- /rocksdb/reader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | import ( 13 | store "github.com/blevesearch/upsidedown_store_api" 14 | "github.com/tecbot/gorocksdb" 15 | ) 16 | 17 | type Reader struct { 18 | store *Store 19 | snapshot *gorocksdb.Snapshot 20 | options *gorocksdb.ReadOptions 21 | } 22 | 23 | func (r *Reader) Get(key []byte) ([]byte, error) { 24 | b, err := r.store.db.Get(r.options, key) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return b.Data(), err 29 | } 30 | 31 | func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { 32 | return store.MultiGet(r, keys) 33 | } 34 | 35 | func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { 36 | rv := Iterator{ 37 | store: r.store, 38 | iterator: r.store.db.NewIterator(r.options), 39 | prefix: prefix, 40 | } 41 | rv.Seek(prefix) 42 | return &rv 43 | } 44 | 45 | func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { 46 | rv := Iterator{ 47 | store: r.store, 48 | iterator: r.store.db.NewIterator(r.options), 49 | start: start, 50 | end: end, 51 | } 52 | rv.Seek(start) 53 | return &rv 54 | } 55 | 56 | func (r *Reader) Close() error { 57 | r.options.Destroy() 58 | r.store.db.ReleaseSnapshot(r.snapshot) 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /rocksdb/store.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | import ( 13 | "fmt" 14 | "os" 15 | 16 | store "github.com/blevesearch/upsidedown_store_api" 17 | "github.com/blevesearch/bleve/v2/registry" 18 | "github.com/tecbot/gorocksdb" 19 | ) 20 | 21 | const Name = "rocksdb" 22 | 23 | type Store struct { 24 | path string 25 | opts *gorocksdb.Options 26 | config map[string]interface{} 27 | db *gorocksdb.DB 28 | 29 | roptVerifyChecksums bool 30 | roptVerifyChecksumsUse bool 31 | roptFillCache bool 32 | roptFillCacheUse bool 33 | roptReadTier int 34 | roptReadTierUse bool 35 | 36 | woptSync bool 37 | woptSyncUse bool 38 | woptDisableWAL bool 39 | woptDisableWALUse bool 40 | } 41 | 42 | func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { 43 | 44 | path, ok := config["path"].(string) 45 | if !ok { 46 | return nil, fmt.Errorf("must specify path") 47 | } 48 | if path == "" { 49 | return nil, os.ErrInvalid 50 | } 51 | 52 | rv := Store{ 53 | path: path, 54 | config: config, 55 | opts: gorocksdb.NewDefaultOptions(), 56 | } 57 | 58 | if mo != nil { 59 | rv.opts.SetMergeOperator(mo) 60 | } 61 | 62 | _, err := applyConfig(rv.opts, config) 63 | if err != nil { 64 | return nil, err 65 | } 66 | 67 | b, ok := config["read_only"].(bool) 68 | if ok && b { 69 | rv.db, err = gorocksdb.OpenDbForReadOnly(rv.opts, rv.path, false) 70 | } else { 71 | rv.db, err = gorocksdb.OpenDb(rv.opts, rv.path) 72 | } 73 | 74 | if err != nil { 75 | return nil, err 76 | } 77 | 78 | b, ok = config["readoptions_verify_checksum"].(bool) 79 | if ok { 80 | rv.roptVerifyChecksums, rv.roptVerifyChecksumsUse = b, true 81 | } 82 | 83 | b, ok = config["readoptions_fill_cache"].(bool) 84 | if ok { 85 | rv.roptFillCache, rv.roptFillCacheUse = b, true 86 | } 87 | 88 | v, ok := config["readoptions_read_tier"].(float64) 89 | if ok { 90 | rv.roptReadTier, rv.roptReadTierUse = int(v), true 91 | } 92 | 93 | b, ok = config["writeoptions_sync"].(bool) 94 | if ok { 95 | rv.woptSync, rv.woptSyncUse = b, true 96 | } 97 | 98 | b, ok = config["writeoptions_disable_WAL"].(bool) 99 | if ok { 100 | rv.woptDisableWAL, rv.woptDisableWALUse = b, true 101 | } 102 | 103 | return &rv, nil 104 | } 105 | 106 | func (s *Store) Close() error { 107 | s.db.Close() 108 | s.db = nil 109 | 110 | s.opts.Destroy() 111 | 112 | s.opts = nil 113 | 114 | return nil 115 | } 116 | 117 | func (s *Store) Reader() (store.KVReader, error) { 118 | snapshot := s.db.NewSnapshot() 119 | options := s.newReadOptions() 120 | options.SetSnapshot(snapshot) 121 | return &Reader{ 122 | store: s, 123 | snapshot: snapshot, 124 | options: options, 125 | }, nil 126 | } 127 | 128 | func (s *Store) Writer() (store.KVWriter, error) { 129 | return &Writer{ 130 | store: s, 131 | options: s.newWriteOptions(), 132 | }, nil 133 | } 134 | 135 | func (s *Store) Compact() error { 136 | s.db.CompactRange(gorocksdb.Range{}) 137 | return nil 138 | } 139 | 140 | func init() { 141 | registry.RegisterKVStore(Name, New) 142 | } 143 | -------------------------------------------------------------------------------- /rocksdb/store_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | import ( 13 | "os" 14 | "testing" 15 | 16 | store "github.com/blevesearch/upsidedown_store_api" 17 | "github.com/blevesearch/upsidedown_store_api/test" 18 | ) 19 | 20 | func open(t *testing.T, mo store.MergeOperator) store.KVStore { 21 | rv, err := New(mo, map[string]interface{}{ 22 | "path": "test", 23 | "create_if_missing": true, 24 | }) 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | return rv 29 | } 30 | 31 | func cleanup(t *testing.T, s store.KVStore) { 32 | err := s.Close() 33 | if err != nil { 34 | t.Fatal(err) 35 | } 36 | err = os.RemoveAll("test") 37 | if err != nil { 38 | t.Fatal(err) 39 | } 40 | } 41 | 42 | func TestRocksDBKVCrud(t *testing.T) { 43 | s := open(t, nil) 44 | defer cleanup(t, s) 45 | test.CommonTestKVCrud(t, s) 46 | } 47 | 48 | func TestRocksDBReaderIsolation(t *testing.T) { 49 | s := open(t, nil) 50 | defer cleanup(t, s) 51 | test.CommonTestReaderIsolation(t, s) 52 | } 53 | 54 | func TestRocksDBReaderOwnsGetBytes(t *testing.T) { 55 | s := open(t, nil) 56 | defer cleanup(t, s) 57 | test.CommonTestReaderOwnsGetBytes(t, s) 58 | } 59 | 60 | func TestRocksDBWriterOwnsBytes(t *testing.T) { 61 | s := open(t, nil) 62 | defer cleanup(t, s) 63 | test.CommonTestWriterOwnsBytes(t, s) 64 | } 65 | 66 | func TestRocksDBPrefixIterator(t *testing.T) { 67 | s := open(t, nil) 68 | defer cleanup(t, s) 69 | test.CommonTestPrefixIterator(t, s) 70 | } 71 | 72 | func TestRocksDBPrefixIteratorSeek(t *testing.T) { 73 | s := open(t, nil) 74 | defer cleanup(t, s) 75 | test.CommonTestPrefixIteratorSeek(t, s) 76 | } 77 | 78 | func TestRocksDBRangeIterator(t *testing.T) { 79 | s := open(t, nil) 80 | defer cleanup(t, s) 81 | test.CommonTestRangeIterator(t, s) 82 | } 83 | 84 | func TestRocksDBRangeIteratorSeek(t *testing.T) { 85 | s := open(t, nil) 86 | defer cleanup(t, s) 87 | test.CommonTestRangeIteratorSeek(t, s) 88 | } 89 | 90 | func TestRocksDBMerge(t *testing.T) { 91 | s := open(t, &test.TestMergeCounter{}) 92 | defer cleanup(t, s) 93 | test.CommonTestMerge(t, s) 94 | } 95 | -------------------------------------------------------------------------------- /rocksdb/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package rocksdb 11 | 12 | import ( 13 | "fmt" 14 | 15 | store "github.com/blevesearch/upsidedown_store_api" 16 | "github.com/tecbot/gorocksdb" 17 | ) 18 | 19 | type Writer struct { 20 | store *Store 21 | options *gorocksdb.WriteOptions 22 | } 23 | 24 | func (w *Writer) NewBatch() store.KVBatch { 25 | rv := Batch{ 26 | batch: gorocksdb.NewWriteBatch(), 27 | } 28 | return &rv 29 | } 30 | 31 | func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { 32 | // Disabled due to https://github.com/blevesearch/blevex/issues/22 33 | // rv := newBatchEx(options) 34 | // return rv.buf, rv, nil 35 | 36 | return make([]byte, options.TotalBytes), w.NewBatch(), nil 37 | } 38 | 39 | func (w *Writer) ExecuteBatch(b store.KVBatch) error { 40 | batchex, ok := b.(*BatchEx) 41 | if ok { 42 | return batchex.execute(w) 43 | } 44 | batch, ok := b.(*Batch) 45 | if ok { 46 | return w.store.db.Write(w.options, batch.batch) 47 | } 48 | return fmt.Errorf("wrong type of batch") 49 | } 50 | 51 | func (w *Writer) Close() error { 52 | w.options.Destroy() 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /stemmer/README.md: -------------------------------------------------------------------------------- 1 | ## Languages supported 2 | 3 | "danish", 4 | "dutch", 5 | "english", 6 | "finnish", 7 | "french", 8 | "german", 9 | "hungarian", 10 | "italian", 11 | "norwegian", 12 | "porter", 13 | "portuguese", 14 | "romanian", 15 | "russian", 16 | "spanish", 17 | "swedish", 18 | "turkish" -------------------------------------------------------------------------------- /stemmer/stemmer_filter.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package stemmer 11 | 12 | import ( 13 | "fmt" 14 | 15 | "github.com/tebeka/snowball" 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | "github.com/blevesearch/bleve/v2/registry" 18 | ) 19 | 20 | const Name = "stem" 21 | 22 | type StemmerFilter struct { 23 | lang string 24 | stemmerPool chan *snowball.Stemmer 25 | } 26 | 27 | func NewStemmerFilter(lang string) (*StemmerFilter, error) { 28 | stemmerPool := make(chan *snowball.Stemmer, 4) 29 | for i := 0; i < 4; i++ { 30 | stemmer, err := snowball.New(lang) 31 | if err != nil { 32 | return nil, err 33 | } 34 | stemmerPool <- stemmer 35 | } 36 | return &StemmerFilter{ 37 | lang: lang, 38 | stemmerPool: stemmerPool, 39 | }, nil 40 | } 41 | 42 | func MustNewStemmerFilter(lang string) *StemmerFilter { 43 | sf, err := NewStemmerFilter(lang) 44 | if err != nil { 45 | panic(err) 46 | } 47 | return sf 48 | } 49 | 50 | func (s *StemmerFilter) List() []string { 51 | return snowball.LangList() 52 | } 53 | 54 | func (s *StemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 55 | for _, token := range input { 56 | // if it is not a protected keyword, stem it 57 | if !token.KeyWord { 58 | stemmer := <-s.stemmerPool 59 | stemmed := stemmer.Stem(string(token.Term)) 60 | s.stemmerPool <- stemmer 61 | token.Term = []byte(stemmed) 62 | } 63 | } 64 | return input 65 | } 66 | 67 | func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 68 | langVal, ok := config["lang"].(string) 69 | if !ok { 70 | return nil, fmt.Errorf("must specify stemmer language") 71 | } 72 | lang := langVal 73 | return NewStemmerFilter(lang) 74 | } 75 | 76 | func init() { 77 | registry.RegisterTokenFilter(Name, StemmerFilterConstructor) 78 | } 79 | -------------------------------------------------------------------------------- /stemmer/stemmer_filter_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file 3 | // except in compliance with the License. You may obtain a copy of the License at 4 | // http://www.apache.org/licenses/LICENSE-2.0 5 | // Unless required by applicable law or agreed to in writing, software distributed under the 6 | // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 7 | // either express or implied. See the License for the specific language governing permissions 8 | // and limitations under the License. 9 | 10 | package stemmer 11 | 12 | import ( 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/blevesearch/bleve/v2/analysis" 17 | ) 18 | 19 | func TestStemmerFilter(t *testing.T) { 20 | 21 | inputTokenStream := analysis.TokenStream{ 22 | &analysis.Token{ 23 | Term: []byte("walking"), 24 | }, 25 | &analysis.Token{ 26 | Term: []byte("talked"), 27 | }, 28 | &analysis.Token{ 29 | Term: []byte("business"), 30 | }, 31 | &analysis.Token{ 32 | Term: []byte("protected"), 33 | KeyWord: true, 34 | }, 35 | } 36 | 37 | expectedTokenStream := analysis.TokenStream{ 38 | &analysis.Token{ 39 | Term: []byte("walk"), 40 | }, 41 | &analysis.Token{ 42 | Term: []byte("talk"), 43 | }, 44 | &analysis.Token{ 45 | Term: []byte("busi"), 46 | }, 47 | &analysis.Token{ 48 | Term: []byte("protected"), 49 | KeyWord: true, 50 | }, 51 | } 52 | 53 | filter, err := NewStemmerFilter("en") 54 | if err != nil { 55 | t.Fatal(err) 56 | } 57 | ouputTokenStream := filter.Filter(inputTokenStream) 58 | if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) { 59 | t.Errorf("expected %#v got %#v", expectedTokenStream[3], ouputTokenStream[3]) 60 | } 61 | } 62 | --------------------------------------------------------------------------------