├── .gitignore ├── README.md ├── ROADMAP.md ├── gen.sh ├── test ├── wal_replay_2 │ └── phase2.go ├── wal_bench │ └── benchmark.go ├── wal_replay_1 │ └── phase1.go ├── parse_words │ └── benchmark.go ├── indexer │ └── benchmark.go └── pilosa_range │ └── benchmark.go ├── cmd └── indexer_dump │ └── indexer_dump.go ├── cql ├── parser │ ├── CQL.tokens │ ├── CQLLexer.tokens │ ├── cql_visitor.go │ ├── cql_base_visitor.go │ ├── CQL.g4 │ ├── cql_listener.go │ ├── CQL.interp │ ├── cql_base_listener.go │ ├── cql_antlr_test.go │ ├── CQLLexer.interp │ └── cql_lexer.go ├── doc.proto └── cql_test.go ├── wal ├── walpb │ ├── record.go │ └── record.proto ├── wal_bench_test.go ├── file_pipeline.go ├── record_test.go ├── util.go ├── encoder.go ├── decoder.go ├── wal_test.go └── wal.go ├── terms_test.go ├── terms.go ├── copy.go ├── index_test.go ├── text_frame_test.go ├── int_frame.go ├── text_frame.go ├── index.go ├── LICENSE └── indexer.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # indexer 2 | Indexing library written in Golang, similar to Lucene(https://lucene.apache.org/core/) and Bleve (https://github.com/blevesearch/bleve). 3 | 4 | It supports numerical fields and text fields. Numerical value can be a multi-dimension uint64 point. Text value can be UTF-8 string, and it's tokenized by spliting the text with UTF-8 whitespace characters. 5 | 6 | 7 | 8 | Documentation 9 | 10 | https://godoc.org/github.com/deepfabric/indexer 11 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | This document defines the roadmap for indexer development. 4 | 5 | - [D] CQL spec and parser 6 | - [D] index of uint properties: Create, Destroy, Insert, Del 7 | - [D] index of uint properties: Select 8 | - [D] index persistence: Open and Close 9 | - [D] index of string properties: create and query term id 10 | - [D] index of string properties: matrix bitmap 11 | - [D] index of string properties 12 | - [D] index of enum properties 13 | - [ ] improve Chinese tokenizer 14 | - [ ] multi-dimension uint property 15 | -------------------------------------------------------------------------------- /gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Generate all indexer protobuf bindings. 4 | # Run from repository root. 5 | # 6 | set -e 7 | 8 | # directories containing protos to be built 9 | DIRS="./cql ./wal/walpb" 10 | 11 | GOGOPROTO_ROOT="${GOPATH}/src/github.com/gogo/protobuf" 12 | GOGOPROTO_PATH="${GOGOPROTO_ROOT}:${GOGOPROTO_ROOT}/protobuf" 13 | 14 | for dir in ${DIRS}; do 15 | pushd ${dir} 16 | protoc --gofast_out=. -I=.:"${GOGOPROTO_PATH}" *.proto 17 | sed -i.bak -E 's/import _ \"gogoproto\"//g' *.pb.go 18 | rm -f *.bak 19 | popd 20 | done 21 | -------------------------------------------------------------------------------- /test/wal_replay_2/phase2.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/deepfabric/indexer" 5 | "github.com/deepfabric/indexer/cql" 6 | log "github.com/sirupsen/logrus" 7 | ) 8 | 9 | func main() { 10 | var err error 11 | var ir2 *indexer.Indexer 12 | 13 | //create indexer with existing data 14 | ir2, err = indexer.NewIndexer("/tmp/indexer_test", false, true) 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | 19 | //query 20 | var qr *indexer.QueryResult 21 | low := 30 22 | high := 600 23 | cs := &cql.CqlSelect{ 24 | Index: "orders", 25 | UintPreds: map[string]cql.UintPred{ 26 | "price": cql.UintPred{ 27 | Name: "price", 28 | Low: uint64(low), 29 | High: uint64(high), 30 | }, 31 | }, 32 | } 33 | qr, err = ir2.Select(cs) 34 | if err != nil { 35 | log.Fatal(err) 36 | } 37 | log.Infoln(qr.Bm.Bits()) 38 | } 39 | -------------------------------------------------------------------------------- /cmd/indexer_dump/indexer_dump.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/deepfabric/indexer" 9 | ) 10 | 11 | func main() { 12 | var err error 13 | var ir *indexer.Indexer 14 | var sum string 15 | 16 | if len(os.Args) <= 1 { 17 | log.Fatalf("paths required\n") 18 | } 19 | 20 | for i := 1; i < len(os.Args); i++ { 21 | fp := os.Args[i] 22 | if _, err := os.Stat(fp); err != nil || os.IsNotExist(err) { 23 | log.Fatalf("path %s doesn't exists", fp) 24 | } 25 | 26 | if ir, err = indexer.NewIndexer(fp, false, false); err != nil { 27 | log.Fatalf("%+v", err) 28 | } 29 | 30 | if sum, err = ir.Summary(); err != nil { 31 | log.Fatalf("%+v", err) 32 | } 33 | 34 | fmt.Printf("%s\n%s\n", fp, sum) 35 | 36 | //close indexer 37 | if err = ir.Close(); err != nil { 38 | log.Fatalf("%+v", err) 39 | } 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /cql/parser/CQL.tokens: -------------------------------------------------------------------------------- 1 | T__0=1 2 | T__1=2 3 | T__2=3 4 | T__3=4 5 | T__4=5 6 | T__5=6 7 | T__6=7 8 | T__7=8 9 | T__8=9 10 | T__9=10 11 | T__10=11 12 | T__11=12 13 | T__12=13 14 | K_UINT8=14 15 | K_UINT16=15 16 | K_UINT32=16 17 | K_UINT64=17 18 | K_FLOAT32=18 19 | K_FLOAT64=19 20 | K_ENUM=20 21 | K_STRING=21 22 | K_IN=22 23 | K_CONTAINS=23 24 | K_LT=24 25 | K_BT=25 26 | K_EQ=26 27 | K_LE=27 28 | K_BE=28 29 | FLOAT_LIT=29 30 | STRING=30 31 | INT=31 32 | IDENTIFIER=32 33 | WS=33 34 | 'IDX.CREATE'=1 35 | 'SCHEMA'=2 36 | 'IDX.DESTROY'=3 37 | 'IDX.INSERT'=4 38 | 'IDX.DEL'=5 39 | 'IDX.SELECT'=6 40 | 'QUERY'=7 41 | 'WHERE'=8 42 | 'ORDERBY'=9 43 | 'LIMIT'=10 44 | '['=11 45 | ','=12 46 | ']'=13 47 | 'UINT8'=14 48 | 'UINT16'=15 49 | 'UINT32'=16 50 | 'UINT64'=17 51 | 'FLOAT32'=18 52 | 'FLOAT64'=19 53 | 'ENUM'=20 54 | 'STRING'=21 55 | 'IN'=22 56 | 'CONTAINS'=23 57 | '<'=24 58 | '>'=25 59 | '='=26 60 | '<='=27 61 | '>='=28 62 | -------------------------------------------------------------------------------- /cql/parser/CQLLexer.tokens: -------------------------------------------------------------------------------- 1 | T__0=1 2 | T__1=2 3 | T__2=3 4 | T__3=4 5 | T__4=5 6 | T__5=6 7 | T__6=7 8 | T__7=8 9 | T__8=9 10 | T__9=10 11 | T__10=11 12 | T__11=12 13 | T__12=13 14 | K_UINT8=14 15 | K_UINT16=15 16 | K_UINT32=16 17 | K_UINT64=17 18 | K_FLOAT32=18 19 | K_FLOAT64=19 20 | K_ENUM=20 21 | K_STRING=21 22 | K_IN=22 23 | K_CONTAINS=23 24 | K_LT=24 25 | K_BT=25 26 | K_EQ=26 27 | K_LE=27 28 | K_BE=28 29 | FLOAT_LIT=29 30 | STRING=30 31 | INT=31 32 | IDENTIFIER=32 33 | WS=33 34 | 'IDX.CREATE'=1 35 | 'SCHEMA'=2 36 | 'IDX.DESTROY'=3 37 | 'IDX.INSERT'=4 38 | 'IDX.DEL'=5 39 | 'IDX.SELECT'=6 40 | 'QUERY'=7 41 | 'WHERE'=8 42 | 'ORDERBY'=9 43 | 'LIMIT'=10 44 | '['=11 45 | ','=12 46 | ']'=13 47 | 'UINT8'=14 48 | 'UINT16'=15 49 | 'UINT32'=16 50 | 'UINT64'=17 51 | 'FLOAT32'=18 52 | 'FLOAT64'=19 53 | 'ENUM'=20 54 | 'STRING'=21 55 | 'IN'=22 56 | 'CONTAINS'=23 57 | '<'=24 58 | '>'=25 59 | '='=26 60 | '<='=27 61 | '>='=28 62 | -------------------------------------------------------------------------------- /wal/walpb/record.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package walpb 16 | 17 | import "errors" 18 | 19 | var ( 20 | ErrCRCMismatch = errors.New("walpb: crc mismatch") 21 | ) 22 | 23 | func (rec *Record) Validate(crc uint32) error { 24 | if rec.Crc == crc { 25 | return nil 26 | } 27 | rec.Reset() 28 | return ErrCRCMismatch 29 | } 30 | -------------------------------------------------------------------------------- /wal/walpb/record.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | package walpb; 3 | 4 | import "gogoproto/gogo.proto"; 5 | 6 | option (gogoproto.marshaler_all) = true; 7 | option (gogoproto.sizer_all) = true; 8 | option (gogoproto.unmarshaler_all) = true; 9 | option (gogoproto.goproto_getters_all) = false; 10 | 11 | enum EntryType { 12 | EntryNormal = 0; 13 | EntryConfChange = 1; 14 | } 15 | 16 | message Entry { 17 | optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations 18 | optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations 19 | optional EntryType Type = 1 [(gogoproto.nullable) = false]; 20 | optional bytes Data = 4; 21 | } 22 | 23 | message Record { 24 | optional int64 type = 1 [(gogoproto.nullable) = false]; 25 | optional uint32 crc = 2 [(gogoproto.nullable) = false]; 26 | optional bytes data = 3; 27 | } 28 | 29 | message Snapshot { 30 | optional uint64 index = 1 [(gogoproto.nullable) = false]; 31 | optional uint64 term = 2 [(gogoproto.nullable) = false]; 32 | } 33 | -------------------------------------------------------------------------------- /terms_test.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestTermDict(t *testing.T) { 11 | var err error 12 | var td *TermDict 13 | var td2 *TermDict 14 | 15 | //TESTCASE: query and insert term to an empty dict 16 | if err = os.Remove("/tmp/terms"); err != nil && !os.IsNotExist(err) { 17 | t.Fatalf("%+v", err) 18 | } 19 | td, err = NewTermDict("/tmp", true) 20 | require.NoError(t, err) 21 | terms := []string{ 22 | "sunday", 23 | "mon", 24 | "tue", 25 | "wen", 26 | "thurs", 27 | "friday", 28 | "satur", 29 | } 30 | expIds := []uint64{0, 1, 2, 3, 4, 5, 6} 31 | 32 | ids, err := td.CreateTermsIfNotExist(terms) 33 | require.NoError(t, err) 34 | require.Equal(t, expIds, ids) 35 | 36 | //TESTCASE: query and insert term to an existing dict 37 | td2, err = NewTermDict("/tmp", false) 38 | require.NoError(t, err) 39 | terms = []string{ 40 | "friday", 41 | "wikepedia", 42 | "thurs", 43 | } 44 | expIds = []uint64{5, 7, 4} 45 | 46 | ids, err = td2.CreateTermsIfNotExist(terms) 47 | require.NoError(t, err) 48 | require.Equal(t, expIds, ids) 49 | } 50 | -------------------------------------------------------------------------------- /cql/doc.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | package cql; 3 | 4 | import "gogoproto/gogo.proto"; 5 | 6 | option (gogoproto.marshaler_all) = true; 7 | option (gogoproto.sizer_all) = true; 8 | option (gogoproto.unmarshaler_all) = true; 9 | option (gogoproto.goproto_getters_all) = false; 10 | 11 | message UintProp { 12 | optional string name = 1 [(gogoproto.nullable) = false]; 13 | optional bool isFloat = 2 [(gogoproto.nullable) = false]; 14 | optional int32 valLen = 3 [(gogoproto.nullable) = false]; 15 | optional uint64 val = 4 [(gogoproto.nullable) = false]; 16 | } 17 | 18 | message EnumProp { 19 | optional string name = 1 [(gogoproto.nullable) = false]; 20 | optional uint64 val = 2 [(gogoproto.nullable) = false]; 21 | } 22 | 23 | message StrProp { 24 | optional string name = 1 [(gogoproto.nullable) = false]; 25 | optional string val = 2 [(gogoproto.nullable) = false]; 26 | } 27 | 28 | message Document { 29 | optional uint64 docID = 1 [(gogoproto.nullable) = false]; 30 | repeated UintProp uintProps = 2; 31 | repeated EnumProp enumProps = 3; 32 | repeated StrProp strProps = 4; 33 | } 34 | 35 | message DocumentWithIdx { 36 | optional Document doc = 1 [(gogoproto.nullable) = false]; 37 | optional string index = 2 [(gogoproto.nullable) = false]; 38 | } 39 | 40 | message DocumentDel { 41 | optional string index = 1 [(gogoproto.nullable) = false]; 42 | optional uint64 docID = 2 [(gogoproto.nullable) = false]; 43 | } 44 | -------------------------------------------------------------------------------- /test/wal_bench/benchmark.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "io/ioutil" 6 | "net/http" 7 | "os" 8 | "time" 9 | 10 | _ "net/http/pprof" 11 | 12 | "github.com/deepfabric/indexer/wal" 13 | "github.com/deepfabric/indexer/wal/walpb" 14 | log "github.com/sirupsen/logrus" 15 | ) 16 | 17 | var ( 18 | pprof = flag.String("addr-pprof", "", "pprof http server address") 19 | ) 20 | 21 | func main() { 22 | // 解析命令行参数 23 | flag.Parse() 24 | 25 | if "" != *pprof { 26 | log.Printf("bootstrap: start pprof at: %s", *pprof) 27 | go func() { 28 | log.Fatalf("bootstrap: start pprof failed, errors:\n%+v", 29 | http.ListenAndServe(*pprof, nil)) 30 | }() 31 | } 32 | 33 | // 记录时间 34 | t0 := time.Now() 35 | 36 | S := 300000 37 | benchmarkWriteEntry(S, 256, 8) 38 | 39 | // record time, and calculate performance 40 | t1 := time.Now() 41 | log.Printf("duration %v", t1.Sub(t0)) 42 | log.Printf("wal write speed %f entries/s", float64(S)/t1.Sub(t0).Seconds()) 43 | } 44 | 45 | func benchmarkWriteEntry(loops int, size int, batch int) { 46 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 47 | if err != nil { 48 | log.Fatalf("err = %v, want nil", err) 49 | } 50 | defer os.RemoveAll(p) 51 | 52 | w, err := wal.Create(p) 53 | if err != nil { 54 | log.Fatalf("err = %v, want nil", err) 55 | } 56 | data := make([]byte, size) 57 | for i := 0; i < size; i++ { 58 | data[i] = byte(i) 59 | } 60 | e := &walpb.Entry{Index: uint64(1), Data: data} 61 | 62 | for i := 0; i < loops; i++ { 63 | err := w.SaveEntry(e) 64 | e.Index++ 65 | if err != nil { 66 | log.Fatalf("err = %v, want nil", err) 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /test/wal_replay_1/phase1.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | log "github.com/sirupsen/logrus" 5 | 6 | "github.com/deepfabric/indexer" 7 | "github.com/deepfabric/indexer/cql" 8 | ) 9 | 10 | func newDocProt1() *cql.DocumentWithIdx { 11 | return &cql.DocumentWithIdx{ 12 | Doc: cql.Document{ 13 | DocID: 0, 14 | UintProps: []*cql.UintProp{ 15 | &cql.UintProp{ 16 | Name: "object", 17 | ValLen: 8, 18 | Val: 0, 19 | }, 20 | &cql.UintProp{ 21 | Name: "price", 22 | ValLen: 4, 23 | Val: 0, 24 | }, 25 | &cql.UintProp{ 26 | Name: "number", 27 | ValLen: 4, 28 | Val: 0, 29 | }, 30 | &cql.UintProp{ 31 | Name: "date", 32 | ValLen: 8, 33 | Val: 0, 34 | }, 35 | }, 36 | StrProps: []*cql.StrProp{ 37 | &cql.StrProp{ 38 | Name: "description", 39 | Val: "", 40 | }, 41 | &cql.StrProp{ 42 | Name: "note", 43 | Val: "", 44 | }, 45 | }, 46 | }, 47 | Index: "orders", 48 | } 49 | } 50 | 51 | func main() { 52 | var err error 53 | var docProt *cql.DocumentWithIdx 54 | var ir *indexer.Indexer 55 | 56 | //create empty indexer 57 | ir, err = indexer.NewIndexer("/tmp/indexer_test", true, true) 58 | if err != nil { 59 | log.Fatal(err) 60 | } 61 | 62 | //create index 1 63 | docProt = newDocProt1() 64 | err = ir.CreateIndex(docProt) 65 | if err != nil { 66 | log.Fatal(err) 67 | } 68 | 69 | //insert documents 70 | for i := 0; i < 137; i++ { 71 | doc := newDocProt1() 72 | doc.Doc.DocID = uint64(i) 73 | for j := 0; j < len(doc.Doc.UintProps); j++ { 74 | doc.Doc.UintProps[j].Val = uint64(i * (j + 1)) 75 | } 76 | err = ir.Insert(doc) 77 | if err != nil { 78 | log.Fatal(err) 79 | } 80 | } 81 | log.Infoln("quit without Sync. Please verify there are some files under /tmp/indexer_test/wal.") 82 | } 83 | -------------------------------------------------------------------------------- /wal/wal_bench_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "io/ioutil" 19 | "os" 20 | "testing" 21 | 22 | "github.com/deepfabric/indexer/wal/walpb" 23 | "github.com/stretchr/testify/require" 24 | ) 25 | 26 | func BenchmarkWrite100EntryWithoutBatch(b *testing.B) { benchmarkWriteEntry(b, 100, 0) } 27 | func BenchmarkWrite100EntryBatch10(b *testing.B) { benchmarkWriteEntry(b, 100, 10) } 28 | func BenchmarkWrite100EntryBatch100(b *testing.B) { benchmarkWriteEntry(b, 100, 100) } 29 | func BenchmarkWrite100EntryBatch500(b *testing.B) { benchmarkWriteEntry(b, 100, 500) } 30 | func BenchmarkWrite100EntryBatch1000(b *testing.B) { benchmarkWriteEntry(b, 100, 1000) } 31 | 32 | func BenchmarkWrite1000EntryWithoutBatch(b *testing.B) { benchmarkWriteEntry(b, 1000, 0) } 33 | func BenchmarkWrite1000EntryBatch10(b *testing.B) { benchmarkWriteEntry(b, 1000, 10) } 34 | func BenchmarkWrite1000EntryBatch100(b *testing.B) { benchmarkWriteEntry(b, 1000, 100) } 35 | func BenchmarkWrite1000EntryBatch500(b *testing.B) { benchmarkWriteEntry(b, 1000, 500) } 36 | func BenchmarkWrite1000EntryBatch1000(b *testing.B) { benchmarkWriteEntry(b, 1000, 1000) } 37 | 38 | func benchmarkWriteEntry(b *testing.B, size int, batch int) { 39 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 40 | require.NoError(b, err) 41 | defer os.RemoveAll(p) 42 | 43 | w, err := Create(p) 44 | require.NoError(b, err) 45 | data := make([]byte, size) 46 | for i := 0; i < size; i++ { 47 | data[i] = byte(i) 48 | } 49 | e := &walpb.Entry{Data: data} 50 | 51 | b.ResetTimer() 52 | n := 0 53 | b.SetBytes(int64(e.Size())) 54 | for i := 0; i < b.N; i++ { 55 | err = w.saveEntry(e) 56 | require.NoError(b, err) 57 | n++ 58 | if n > batch { 59 | err = w.Sync() 60 | require.NoError(b, err) 61 | n = 0 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /wal/file_pipeline.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "fmt" 19 | "os" 20 | "path/filepath" 21 | 22 | "github.com/coreos/etcd/pkg/fileutil" 23 | log "github.com/sirupsen/logrus" 24 | ) 25 | 26 | // filePipeline pipelines allocating disk space 27 | type filePipeline struct { 28 | // dir to put files 29 | dir string 30 | // size of files to make, in bytes 31 | size int64 32 | // count number of files generated 33 | count int 34 | 35 | filec chan *fileutil.LockedFile 36 | errc chan error 37 | donec chan struct{} 38 | } 39 | 40 | func newFilePipeline(dir string, fileSize int64) *filePipeline { 41 | fp := &filePipeline{ 42 | dir: dir, 43 | size: fileSize, 44 | filec: make(chan *fileutil.LockedFile), 45 | errc: make(chan error, 1), 46 | donec: make(chan struct{}), 47 | } 48 | go fp.run() 49 | return fp 50 | } 51 | 52 | // Open returns a fresh file for writing. Rename the file before calling 53 | // Open again or there will be file collisions. 54 | func (fp *filePipeline) Open() (f *fileutil.LockedFile, err error) { 55 | select { 56 | case f = <-fp.filec: 57 | case err = <-fp.errc: 58 | } 59 | return f, err 60 | } 61 | 62 | func (fp *filePipeline) Close() error { 63 | close(fp.donec) 64 | return <-fp.errc 65 | } 66 | 67 | func (fp *filePipeline) alloc() (f *fileutil.LockedFile, err error) { 68 | // count % 2 so this file isn't the same as the one last published 69 | fpath := filepath.Join(fp.dir, fmt.Sprintf("%d.tmp", fp.count%2)) 70 | if f, err = fileutil.LockFile(fpath, os.O_CREATE|os.O_WRONLY, fileutil.PrivateFileMode); err != nil { 71 | return nil, err 72 | } 73 | if err = fileutil.Preallocate(f.File, fp.size, true); err != nil { 74 | log.Errorf("failed to allocate space when creating new wal file (%v)", err) 75 | f.Close() 76 | return nil, err 77 | } 78 | fp.count++ 79 | return f, nil 80 | } 81 | 82 | func (fp *filePipeline) run() { 83 | defer close(fp.errc) 84 | for { 85 | f, err := fp.alloc() 86 | if err != nil { 87 | fp.errc <- err 88 | return 89 | } 90 | select { 91 | case fp.filec <- f: 92 | case <-fp.donec: 93 | os.Remove(f.Name()) 94 | f.Close() 95 | return 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /wal/record_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "bytes" 19 | "hash/crc32" 20 | "io" 21 | "io/ioutil" 22 | "reflect" 23 | "testing" 24 | 25 | "github.com/deepfabric/indexer/wal/walpb" 26 | "github.com/pkg/errors" 27 | ) 28 | 29 | var ( 30 | infoData = []byte("\b\xef\xfd\x02") 31 | infoRecord = append([]byte("\x0e\x00\x00\x00\x00\x00\x00\x00\b\x01\x10\x99\xb5\xe4\xd0\x03\x1a\x04"), infoData...) 32 | ) 33 | 34 | func TestReadRecord(t *testing.T) { 35 | badInfoRecord := make([]byte, len(infoRecord)) 36 | copy(badInfoRecord, infoRecord) 37 | badInfoRecord[len(badInfoRecord)-1] = 'a' 38 | 39 | tests := []struct { 40 | data []byte 41 | wr *walpb.Record 42 | we error 43 | }{ 44 | {infoRecord, &walpb.Record{Type: 1, Crc: crc32.Checksum(infoData, crcTable), Data: infoData}, nil}, 45 | {[]byte(""), &walpb.Record{}, io.EOF}, 46 | {infoRecord[:8], &walpb.Record{}, io.ErrUnexpectedEOF}, 47 | {infoRecord[:len(infoRecord)-len(infoData)-8], &walpb.Record{}, io.ErrUnexpectedEOF}, 48 | {infoRecord[:len(infoRecord)-len(infoData)], &walpb.Record{}, io.ErrUnexpectedEOF}, 49 | {infoRecord[:len(infoRecord)-8], &walpb.Record{}, io.ErrUnexpectedEOF}, 50 | {badInfoRecord, &walpb.Record{}, walpb.ErrCRCMismatch}, 51 | } 52 | 53 | rec := &walpb.Record{} 54 | for i, tt := range tests { 55 | buf := bytes.NewBuffer(tt.data) 56 | decoder := newDecoder(ioutil.NopCloser(buf)) 57 | e := decoder.decode(rec) 58 | if !reflect.DeepEqual(rec, tt.wr) { 59 | t.Errorf("#%d: block = %v, want %v", i, rec, tt.wr) 60 | } 61 | if !reflect.DeepEqual(errors.Cause(e), tt.we) { 62 | t.Errorf("#%d: err = %v, want %v", i, e, tt.we) 63 | } 64 | rec = &walpb.Record{} 65 | } 66 | } 67 | 68 | func TestWriteRecord(t *testing.T) { 69 | b := &walpb.Record{} 70 | typ := int64(0xABCD) 71 | d := []byte("Hello world!") 72 | buf := new(bytes.Buffer) 73 | e := newEncoder(buf, 0, 0) 74 | e.encode(&walpb.Record{Type: typ, Data: d}) 75 | e.flush() 76 | decoder := newDecoder(ioutil.NopCloser(buf)) 77 | err := decoder.decode(b) 78 | if err != nil { 79 | t.Errorf("err = %v, want nil", err) 80 | } 81 | if b.Type != typ { 82 | t.Errorf("type = %d, want %d", b.Type, typ) 83 | } 84 | if !reflect.DeepEqual(b.Data, d) { 85 | t.Errorf("data = %v, want %v", b.Data, d) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /cql/parser/cql_visitor.go: -------------------------------------------------------------------------------- 1 | // Generated from /home/zhichyu/src/github.com/deepfabric/indexer/cql/parser/CQL.g4 by ANTLR 4.7. 2 | 3 | package parser // CQL 4 | 5 | import "github.com/antlr/antlr4/runtime/Go/antlr" 6 | 7 | // A complete Visitor for a parse tree produced by CQLParser. 8 | type CQLVisitor interface { 9 | antlr.ParseTreeVisitor 10 | 11 | // Visit a parse tree produced by CQLParser#cql. 12 | VisitCql(ctx *CqlContext) interface{} 13 | 14 | // Visit a parse tree produced by CQLParser#create. 15 | VisitCreate(ctx *CreateContext) interface{} 16 | 17 | // Visit a parse tree produced by CQLParser#destroy. 18 | VisitDestroy(ctx *DestroyContext) interface{} 19 | 20 | // Visit a parse tree produced by CQLParser#insert. 21 | VisitInsert(ctx *InsertContext) interface{} 22 | 23 | // Visit a parse tree produced by CQLParser#del. 24 | VisitDel(ctx *DelContext) interface{} 25 | 26 | // Visit a parse tree produced by CQLParser#query. 27 | VisitQuery(ctx *QueryContext) interface{} 28 | 29 | // Visit a parse tree produced by CQLParser#indexName. 30 | VisitIndexName(ctx *IndexNameContext) interface{} 31 | 32 | // Visit a parse tree produced by CQLParser#document. 33 | VisitDocument(ctx *DocumentContext) interface{} 34 | 35 | // Visit a parse tree produced by CQLParser#uintPropDef. 36 | VisitUintPropDef(ctx *UintPropDefContext) interface{} 37 | 38 | // Visit a parse tree produced by CQLParser#enumPropDef. 39 | VisitEnumPropDef(ctx *EnumPropDefContext) interface{} 40 | 41 | // Visit a parse tree produced by CQLParser#strPropDef. 42 | VisitStrPropDef(ctx *StrPropDefContext) interface{} 43 | 44 | // Visit a parse tree produced by CQLParser#orderLimit. 45 | VisitOrderLimit(ctx *OrderLimitContext) interface{} 46 | 47 | // Visit a parse tree produced by CQLParser#order. 48 | VisitOrder(ctx *OrderContext) interface{} 49 | 50 | // Visit a parse tree produced by CQLParser#property. 51 | VisitProperty(ctx *PropertyContext) interface{} 52 | 53 | // Visit a parse tree produced by CQLParser#uintType. 54 | VisitUintType(ctx *UintTypeContext) interface{} 55 | 56 | // Visit a parse tree produced by CQLParser#docId. 57 | VisitDocId(ctx *DocIdContext) interface{} 58 | 59 | // Visit a parse tree produced by CQLParser#value. 60 | VisitValue(ctx *ValueContext) interface{} 61 | 62 | // Visit a parse tree produced by CQLParser#uintPred. 63 | VisitUintPred(ctx *UintPredContext) interface{} 64 | 65 | // Visit a parse tree produced by CQLParser#enumPred. 66 | VisitEnumPred(ctx *EnumPredContext) interface{} 67 | 68 | // Visit a parse tree produced by CQLParser#strPred. 69 | VisitStrPred(ctx *StrPredContext) interface{} 70 | 71 | // Visit a parse tree produced by CQLParser#compare. 72 | VisitCompare(ctx *CompareContext) interface{} 73 | 74 | // Visit a parse tree produced by CQLParser#intList. 75 | VisitIntList(ctx *IntListContext) interface{} 76 | 77 | // Visit a parse tree produced by CQLParser#limit. 78 | VisitLimit(ctx *LimitContext) interface{} 79 | } 80 | -------------------------------------------------------------------------------- /test/parse_words/benchmark.go: -------------------------------------------------------------------------------- 1 | /* 2 | github.com/go-ego/gse/test/benchmark.go 3 | 4 | 测试 gse 分词速度 5 | 6 | go run benchmark.go 7 | 8 | 输出分词结果到文件: 9 | 10 | go run benchmark.go -output=output.txt 11 | 12 | 分析性能瓶颈: 13 | 14 | go build benchmark.go 15 | ./benchmark -cpuprofile=cpu.prof 16 | go tool pprof -png --output=cpu.png benchmark cpu.prof 17 | 18 | 分析内存占用: 19 | 20 | go build benchmark.go 21 | ./benchmark -memprofile=mem.prof 22 | go tool pprof -png --output=mem.png benchmark mem.prof 23 | 24 | */ 25 | 26 | package main 27 | 28 | import ( 29 | "bufio" 30 | "flag" 31 | "fmt" 32 | "log" 33 | "os" 34 | "runtime" 35 | "runtime/pprof" 36 | "strings" 37 | "time" 38 | 39 | "github.com/deepfabric/indexer" 40 | ) 41 | 42 | var ( 43 | cpuprofile = flag.String("cpuprofile", "", "处理器profile文件") 44 | memprofile = flag.String("memprofile", "", "内存profile文件") 45 | output = flag.String("output", "", "输出分词结果到此文件") 46 | numRuns = 20 47 | ) 48 | 49 | func main() { 50 | // 确保单线程,因为Go从1.5开始默认多线程 51 | runtime.GOMAXPROCS(1) 52 | 53 | // 解析命令行参数 54 | flag.Parse() 55 | 56 | // 记录时间 57 | t0 := time.Now() 58 | 59 | // 记录时间 60 | t1 := time.Now() 61 | log.Printf("载入词典花费时间 %v", t1.Sub(t0)) 62 | 63 | // 打开将要分词的文件 64 | file, err := os.Open("../testdata/bailuyuan.txt") 65 | if err != nil { 66 | log.Fatal(err) 67 | } 68 | defer file.Close() 69 | 70 | // 逐行读入 71 | scanner := bufio.NewScanner(file) 72 | size := 0 73 | lines := [][]byte{} 74 | for scanner.Scan() { 75 | var text string 76 | fmt.Sscanf(scanner.Text(), "%s", &text) 77 | content := []byte(text) 78 | size += len(content) 79 | lines = append(lines, content) 80 | } 81 | 82 | // 当指定输出文件时打开输出文件 83 | var of *os.File 84 | if *output != "" { 85 | of, err = os.Create(*output) 86 | if err != nil { 87 | log.Fatal(err) 88 | } 89 | defer of.Close() 90 | } 91 | 92 | // cpu profile 93 | if *cpuprofile != "" { 94 | log.Printf("cpuprofile %v\n", *cpuprofile) 95 | f, err := os.Create(*cpuprofile) 96 | if err != nil { 97 | log.Fatal(err) 98 | } 99 | pprof.StartCPUProfile(f) 100 | defer pprof.StopCPUProfile() 101 | } 102 | 103 | // 记录时间 104 | t2 := time.Now() 105 | 106 | // 分词 107 | for i := 0; i < numRuns; i++ { 108 | for _, l := range lines { 109 | words := indexer.ParseWords(string(l)) 110 | if *output != "" { 111 | of.WriteString(strings.Join(words, "/")) 112 | of.WriteString("\n") 113 | } 114 | } 115 | } 116 | 117 | // 记录时间并计算分词速度 118 | t3 := time.Now() 119 | log.Printf("分词花费时间 %v", t3.Sub(t2)) 120 | log.Printf("分词速度 %f MB/s", float64(size*numRuns)/t3.Sub(t2).Seconds()/(1024*1024)) 121 | 122 | // mem profile 123 | if *memprofile != "" { 124 | f, err := os.Create(*memprofile) 125 | if err != nil { 126 | log.Fatal("could not create memory profile: ", err) 127 | } 128 | runtime.GC() // get up-to-date statistics 129 | if err := pprof.WriteHeapProfile(f); err != nil { 130 | log.Fatal("could not write memory profile: ", err) 131 | } 132 | f.Close() 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /cql/parser/cql_base_visitor.go: -------------------------------------------------------------------------------- 1 | // Generated from /home/zhichyu/src/github.com/deepfabric/indexer/cql/parser/CQL.g4 by ANTLR 4.7. 2 | 3 | package parser // CQL 4 | 5 | import "github.com/antlr/antlr4/runtime/Go/antlr" 6 | 7 | type BaseCQLVisitor struct { 8 | *antlr.BaseParseTreeVisitor 9 | } 10 | 11 | func (v *BaseCQLVisitor) VisitCql(ctx *CqlContext) interface{} { 12 | return v.VisitChildren(ctx) 13 | } 14 | 15 | func (v *BaseCQLVisitor) VisitCreate(ctx *CreateContext) interface{} { 16 | return v.VisitChildren(ctx) 17 | } 18 | 19 | func (v *BaseCQLVisitor) VisitDestroy(ctx *DestroyContext) interface{} { 20 | return v.VisitChildren(ctx) 21 | } 22 | 23 | func (v *BaseCQLVisitor) VisitInsert(ctx *InsertContext) interface{} { 24 | return v.VisitChildren(ctx) 25 | } 26 | 27 | func (v *BaseCQLVisitor) VisitDel(ctx *DelContext) interface{} { 28 | return v.VisitChildren(ctx) 29 | } 30 | 31 | func (v *BaseCQLVisitor) VisitQuery(ctx *QueryContext) interface{} { 32 | return v.VisitChildren(ctx) 33 | } 34 | 35 | func (v *BaseCQLVisitor) VisitIndexName(ctx *IndexNameContext) interface{} { 36 | return v.VisitChildren(ctx) 37 | } 38 | 39 | func (v *BaseCQLVisitor) VisitDocument(ctx *DocumentContext) interface{} { 40 | return v.VisitChildren(ctx) 41 | } 42 | 43 | func (v *BaseCQLVisitor) VisitUintPropDef(ctx *UintPropDefContext) interface{} { 44 | return v.VisitChildren(ctx) 45 | } 46 | 47 | func (v *BaseCQLVisitor) VisitEnumPropDef(ctx *EnumPropDefContext) interface{} { 48 | return v.VisitChildren(ctx) 49 | } 50 | 51 | func (v *BaseCQLVisitor) VisitStrPropDef(ctx *StrPropDefContext) interface{} { 52 | return v.VisitChildren(ctx) 53 | } 54 | 55 | func (v *BaseCQLVisitor) VisitOrderLimit(ctx *OrderLimitContext) interface{} { 56 | return v.VisitChildren(ctx) 57 | } 58 | 59 | func (v *BaseCQLVisitor) VisitOrder(ctx *OrderContext) interface{} { 60 | return v.VisitChildren(ctx) 61 | } 62 | 63 | func (v *BaseCQLVisitor) VisitProperty(ctx *PropertyContext) interface{} { 64 | return v.VisitChildren(ctx) 65 | } 66 | 67 | func (v *BaseCQLVisitor) VisitUintType(ctx *UintTypeContext) interface{} { 68 | return v.VisitChildren(ctx) 69 | } 70 | 71 | func (v *BaseCQLVisitor) VisitDocId(ctx *DocIdContext) interface{} { 72 | return v.VisitChildren(ctx) 73 | } 74 | 75 | func (v *BaseCQLVisitor) VisitValue(ctx *ValueContext) interface{} { 76 | return v.VisitChildren(ctx) 77 | } 78 | 79 | func (v *BaseCQLVisitor) VisitUintPred(ctx *UintPredContext) interface{} { 80 | return v.VisitChildren(ctx) 81 | } 82 | 83 | func (v *BaseCQLVisitor) VisitEnumPred(ctx *EnumPredContext) interface{} { 84 | return v.VisitChildren(ctx) 85 | } 86 | 87 | func (v *BaseCQLVisitor) VisitStrPred(ctx *StrPredContext) interface{} { 88 | return v.VisitChildren(ctx) 89 | } 90 | 91 | func (v *BaseCQLVisitor) VisitCompare(ctx *CompareContext) interface{} { 92 | return v.VisitChildren(ctx) 93 | } 94 | 95 | func (v *BaseCQLVisitor) VisitIntList(ctx *IntListContext) interface{} { 96 | return v.VisitChildren(ctx) 97 | } 98 | 99 | func (v *BaseCQLVisitor) VisitLimit(ctx *LimitContext) interface{} { 100 | return v.VisitChildren(ctx) 101 | } 102 | -------------------------------------------------------------------------------- /test/indexer/benchmark.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | _ "net/http/pprof" 9 | "time" 10 | 11 | "github.com/deepfabric/indexer" 12 | "github.com/deepfabric/indexer/cql" 13 | ) 14 | 15 | var ( 16 | pprof = flag.String("addr-pprof", "", "pprof http server address") 17 | ) 18 | 19 | func newDocProt1() *cql.DocumentWithIdx { 20 | return &cql.DocumentWithIdx{ 21 | Doc: cql.Document{ 22 | DocID: 0, 23 | UintProps: []*cql.UintProp{ 24 | &cql.UintProp{ 25 | Name: "object", 26 | ValLen: 8, 27 | Val: 0, 28 | }, 29 | &cql.UintProp{ 30 | Name: "price", 31 | ValLen: 4, 32 | Val: 0, 33 | }, 34 | &cql.UintProp{ 35 | Name: "number", 36 | ValLen: 4, 37 | Val: 0, 38 | }, 39 | &cql.UintProp{ 40 | Name: "date", 41 | ValLen: 8, 42 | Val: 0, 43 | }, 44 | }, 45 | StrProps: []*cql.StrProp{ 46 | &cql.StrProp{ 47 | Name: "description", 48 | Val: "", 49 | }, 50 | &cql.StrProp{ 51 | Name: "note", 52 | Val: "", 53 | }, 54 | }, 55 | }, 56 | Index: "orders", 57 | } 58 | } 59 | 60 | func prepareIndexer(numDocs int, docProts []*cql.DocumentWithIdx) (ir *indexer.Indexer, err error) { 61 | //create indexer 62 | if ir, err = indexer.NewIndexer("/tmp/indexer_test", true, false); err != nil { 63 | return 64 | } 65 | 66 | //insert documents 67 | for _, docProt := range docProts { 68 | if err = ir.CreateIndex(docProt); err != nil { 69 | return 70 | } 71 | for i := 0; i < numDocs; i++ { 72 | docProt.Doc.DocID = uint64(i) 73 | for j := 0; j < len(docProt.Doc.UintProps); j++ { 74 | docProt.Doc.UintProps[j].Val = uint64(i * (j + 1)) 75 | } 76 | for j := 0; j < len(docProt.Doc.StrProps); j++ { 77 | docProt.Doc.StrProps[j].Val = fmt.Sprintf("%03d%03d ", i, j) + "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it?" 78 | } 79 | if err = ir.Insert(docProt); err != nil { 80 | return 81 | } 82 | } 83 | } 84 | return 85 | } 86 | 87 | func main() { 88 | flag.Parse() 89 | N := 1000000 90 | S := 1000 91 | 92 | if "" != *pprof { 93 | log.Printf("bootstrap: start pprof at: %s", *pprof) 94 | go func() { 95 | log.Fatalf("bootstrap: start pprof failed, errors:\n%+v", 96 | http.ListenAndServe(*pprof, nil)) 97 | }() 98 | } 99 | 100 | var ir *indexer.Indexer 101 | var err error 102 | 103 | // record time 104 | t0 := time.Now() 105 | 106 | if ir, err = prepareIndexer(N, []*cql.DocumentWithIdx{newDocProt1()}); err != nil { 107 | log.Fatalf("%+v", err) 108 | } 109 | 110 | // record time, and calculate performance 111 | t1 := time.Now() 112 | log.Printf("duration %v", t1.Sub(t0)) 113 | log.Printf("insertion speed %f docs/s", float64(N)/t1.Sub(t0).Seconds()) 114 | 115 | for i := 0; i < S; i++ { 116 | if err = ir.Sync(); err != nil { 117 | log.Fatalf("%+v", err) 118 | } 119 | } 120 | 121 | // record time, and calculate performance 122 | t2 := time.Now() 123 | log.Printf("duration %v", t2.Sub(t1)) 124 | log.Printf("sync speed %f syncs/s", float64(S)/t2.Sub(t1).Seconds()) 125 | } 126 | -------------------------------------------------------------------------------- /wal/util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "fmt" 19 | "strings" 20 | 21 | "github.com/pkg/errors" 22 | 23 | "github.com/coreos/etcd/pkg/fileutil" 24 | log "github.com/sirupsen/logrus" 25 | ) 26 | 27 | var ( 28 | badWalName = errors.New("bad wal name") 29 | ) 30 | 31 | func Exist(dirpath string) bool { 32 | names, err := fileutil.ReadDir(dirpath) 33 | if err != nil { 34 | return false 35 | } 36 | return len(names) != 0 37 | } 38 | 39 | // searchIndex returns the last array index of names whose raft index section is 40 | // equal to or smaller than the given index. 41 | // The given names MUST be sorted. 42 | func searchIndex(names []string, index uint64) (int, bool) { 43 | for i := len(names) - 1; i >= 0; i-- { 44 | name := names[i] 45 | _, curIndex, err := parseWalName(name) 46 | if err != nil { 47 | log.Panicf("parse correct name should never fail: %v", err) 48 | } 49 | if index >= curIndex { 50 | return i, true 51 | } 52 | } 53 | return -1, false 54 | } 55 | 56 | // names should have been sorted based on sequence number. 57 | // isValidSeq checks whether seq increases continuously. 58 | func isValidSeq(names []string) bool { 59 | var lastSeq uint64 60 | for _, name := range names { 61 | curSeq, _, err := parseWalName(name) 62 | if err != nil { 63 | log.Panicf("parse correct name should never fail: %v", err) 64 | } 65 | if lastSeq != 0 && lastSeq != curSeq-1 { 66 | return false 67 | } 68 | lastSeq = curSeq 69 | } 70 | return true 71 | } 72 | func readWalNames(dirpath string) ([]string, error) { 73 | names, err := fileutil.ReadDir(dirpath) 74 | if err != nil { 75 | return nil, errors.Wrap(err, "") 76 | } 77 | wnames := checkWalNames(names) 78 | if len(wnames) == 0 { 79 | return nil, errors.Wrap(ErrFileNotFound, "") 80 | } 81 | return wnames, nil 82 | } 83 | 84 | func checkWalNames(names []string) []string { 85 | wnames := make([]string, 0) 86 | for _, name := range names { 87 | if _, _, err := parseWalName(name); err != nil { 88 | // don't complain about left over tmp files 89 | if !strings.HasSuffix(name, ".tmp") { 90 | log.Warningf("ignored file %v in wal", name) 91 | } 92 | continue 93 | } 94 | wnames = append(wnames, name) 95 | } 96 | return wnames 97 | } 98 | 99 | func parseWalName(str string) (seq, index uint64, err error) { 100 | if !strings.HasSuffix(str, ".wal") { 101 | return 0, 0, badWalName 102 | } 103 | if _, err = fmt.Sscanf(str, "%016x-%016x.wal", &seq, &index); err != nil { 104 | err = errors.Wrap(err, "") 105 | } 106 | return seq, index, err 107 | } 108 | 109 | func walName(seq, index uint64) string { 110 | return fmt.Sprintf("%016x-%016x.wal", seq, index) 111 | } 112 | -------------------------------------------------------------------------------- /cql/parser/CQL.g4: -------------------------------------------------------------------------------- 1 | 2 | /** Derived from github.com/RedisLabsModules/secondary/docs/Commands.md 3 | 4 | https://stackoverflow.com/questions/44796556/how-to-tell-if-antlr4-parser-consumed-all-tokens-of-the-tokenstream 5 | 6 | Note that Antlr 4 doesn't support case-insensitive token: 7 | https://github.com/antlr/antlr4/issues/1002 8 | 9 | */ 10 | 11 | grammar CQL; 12 | 13 | cql 14 | : create EOF 15 | | destroy EOF 16 | | insert EOF 17 | | del EOF 18 | | query EOF 19 | ; 20 | 21 | create: 'IDX.CREATE' indexName 'SCHEMA' (uintPropDef)* (enumPropDef)* (strPropDef)*; 22 | 23 | destroy: 'IDX.DESTROY' indexName; 24 | 25 | insert: 'IDX.INSERT' document; 26 | 27 | del: 'IDX.DEL' document; 28 | 29 | query: ('IDX.SELECT' | 'QUERY') indexName 'WHERE' (uintPred)* (enumPred)* (strPred)* orderLimit?; 30 | 31 | indexName: IDENTIFIER; 32 | 33 | document: indexName docId value+; 34 | 35 | uintPropDef: property uintType; 36 | 37 | enumPropDef: property K_ENUM; 38 | 39 | strPropDef: property K_STRING; 40 | 41 | orderLimit: 'ORDERBY' order ('LIMIT' limit)?; 42 | 43 | order: property; 44 | 45 | property: IDENTIFIER; 46 | 47 | uintType 48 | : K_UINT8 49 | | K_UINT16 50 | | K_UINT32 51 | | K_UINT64 52 | | K_FLOAT32 53 | | K_FLOAT64 54 | ; 55 | 56 | docId: INT; 57 | 58 | value 59 | : INT 60 | | FLOAT_LIT 61 | | STRING 62 | ; 63 | 64 | uintPred: property compare value; 65 | 66 | enumPred: property K_IN intList; 67 | 68 | strPred: property K_CONTAINS STRING; 69 | 70 | compare 71 | : K_LT 72 | | K_BT 73 | | K_EQ 74 | | K_LE 75 | | K_BE 76 | ; 77 | 78 | intList: '[' INT (',' INT)* ']'; 79 | 80 | limit: INT; 81 | 82 | K_UINT8: 'UINT8'; 83 | K_UINT16: 'UINT16'; 84 | K_UINT32: 'UINT32'; 85 | K_UINT64: 'UINT64'; 86 | K_FLOAT32: 'FLOAT32'; 87 | K_FLOAT64: 'FLOAT64'; 88 | K_ENUM: 'ENUM'; 89 | K_STRING: 'STRING'; 90 | K_IN: 'IN'; 91 | K_CONTAINS: 'CONTAINS'; 92 | K_LT: '<'; 93 | K_BT: '>'; 94 | K_EQ: '='; 95 | K_LE: '<='; 96 | K_BE: '>='; 97 | 98 | // Floating-point literals. Copied from github.com/antlr/grammars-v4/golang/Golang.g4. 99 | 100 | //float_lit = decimals "." [ decimals ] [ exponent ] | 101 | // decimals exponent | 102 | // "." decimals [ exponent ] . 103 | FLOAT_LIT 104 | : DECIMALS '.' DECIMALS? EXPONENT? 105 | | DECIMALS EXPONENT 106 | | '.' DECIMALS EXPONENT? 107 | ; 108 | 109 | //decimals = decimal_digit { decimal_digit } . 110 | fragment DECIMALS 111 | : DECIMAL_DIGIT+ 112 | ; 113 | 114 | //exponent = ( "e" | "E" ) [ "+" | "-" ] decimals . 115 | fragment EXPONENT 116 | : ( 'e' | 'E' ) ( '+' | '-' )? DECIMALS 117 | ; 118 | 119 | //decimal_digit = "0" … "9" . 120 | fragment DECIMAL_DIGIT 121 | : [0-9] 122 | ; 123 | 124 | STRING 125 | : '"' (ESC | ~ ["\\])* '"' 126 | ; 127 | 128 | 129 | fragment ESC 130 | : '\\' (["\\/bfnrt] | UNICODE) 131 | ; 132 | 133 | 134 | fragment UNICODE 135 | : 'u' HEX HEX HEX HEX 136 | ; 137 | 138 | 139 | fragment HEX 140 | : [0-9a-fA-F] 141 | ; 142 | 143 | 144 | INT 145 | : '0' | [1-9] [0-9]* 146 | ; 147 | 148 | 149 | // no leading zeros 150 | 151 | fragment EXP 152 | : [Ee] [+\-]? INT 153 | ; 154 | 155 | 156 | IDENTIFIER 157 | : [a-zA-Z_]([a-zA-Z0-9_])* 158 | ; 159 | 160 | // \- since - means "range" inside [...] 161 | 162 | WS 163 | : [ \t\n\r] + -> skip 164 | ; 165 | 166 | -------------------------------------------------------------------------------- /wal/encoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "encoding/binary" 19 | "hash" 20 | "io" 21 | "os" 22 | "sync" 23 | 24 | "github.com/coreos/etcd/pkg/crc" 25 | "github.com/coreos/etcd/pkg/ioutil" 26 | "github.com/deepfabric/indexer/wal/walpb" 27 | "github.com/pkg/errors" 28 | ) 29 | 30 | // walPageBytes is the alignment for flushing records to the backing Writer. 31 | // It should be a multiple of the minimum sector size so that WAL can safely 32 | // distinguish between torn writes and ordinary data corruption. 33 | const walPageBytes = 8 * minSectorSize 34 | 35 | type encoder struct { 36 | mu sync.Mutex 37 | bw *ioutil.PageWriter 38 | 39 | crc hash.Hash32 40 | buf []byte 41 | uint64buf []byte 42 | curOff int64 //offset of under-layer io.File 43 | } 44 | 45 | func newEncoder(w io.Writer, prevCrc uint32, pageOffset int) *encoder { 46 | return &encoder{ 47 | bw: ioutil.NewPageWriter(w, walPageBytes, pageOffset), 48 | crc: crc.New(prevCrc, crcTable), 49 | // 1MB buffer 50 | buf: make([]byte, 1024*1024), 51 | uint64buf: make([]byte, 8), 52 | curOff: int64(pageOffset), 53 | } 54 | } 55 | 56 | // newFileEncoder creates a new encoder with current file offset for the page writer. 57 | func newFileEncoder(f *os.File, prevCrc uint32) (*encoder, error) { 58 | offset, err := f.Seek(0, io.SeekCurrent) 59 | if err != nil { 60 | return nil, errors.Wrap(err, "") 61 | } 62 | return newEncoder(f, prevCrc, int(offset)), nil 63 | } 64 | 65 | func (e *encoder) encode(rec *walpb.Record) (err error) { 66 | e.mu.Lock() 67 | defer e.mu.Unlock() 68 | 69 | e.crc.Write(rec.Data) 70 | rec.Crc = e.crc.Sum32() 71 | var ( 72 | data []byte 73 | n int 74 | ) 75 | 76 | if rec.Size() > len(e.buf) { 77 | if data, err = rec.Marshal(); err != nil { 78 | err = errors.Wrap(err, "") 79 | return 80 | } 81 | } else { 82 | if n, err = rec.MarshalTo(e.buf); err != nil { 83 | err = errors.Wrap(err, "") 84 | return 85 | } 86 | data = e.buf[:n] 87 | } 88 | 89 | lenField, padBytes := encodeFrameSize(len(data)) 90 | if err = writeUint64(e.bw, lenField, e.uint64buf); err != nil { 91 | return 92 | } 93 | e.curOff += int64(8) 94 | 95 | if padBytes != 0 { 96 | data = append(data, make([]byte, padBytes)...) 97 | } 98 | if _, err = e.bw.Write(data); err != nil { 99 | err = errors.Wrap(err, "") 100 | return 101 | } 102 | e.curOff += int64(len(data)) 103 | return 104 | } 105 | 106 | func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) { 107 | lenField = uint64(dataBytes) 108 | // force 8 byte alignment so length never gets a torn write 109 | padBytes = (8 - (dataBytes % 8)) % 8 110 | if padBytes != 0 { 111 | lenField |= uint64(0x80|padBytes) << 56 112 | } 113 | return lenField, padBytes 114 | } 115 | 116 | func (e *encoder) flush() (err error) { 117 | e.mu.Lock() 118 | defer e.mu.Unlock() 119 | if err = e.bw.Flush(); err != nil { 120 | err = errors.Wrap(err, "") 121 | } 122 | return 123 | } 124 | 125 | func writeUint64(w io.Writer, n uint64, buf []byte) (err error) { 126 | // http://golang.org/src/encoding/binary/binary.go 127 | binary.LittleEndian.PutUint64(buf, n) 128 | if _, err = w.Write(buf); err != nil { 129 | err = errors.Wrap(err, "") 130 | } 131 | return 132 | } 133 | -------------------------------------------------------------------------------- /terms.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | "sync" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | //TermDict stores terms in a map. Note that the term dict is insertion-only. 15 | type TermDict struct { 16 | Dir string 17 | f *os.File 18 | terms map[string]uint64 19 | rwlock sync.RWMutex //concurrent access of TermDict 20 | } 21 | 22 | //NewTermDict creates and initializes a term dict 23 | func NewTermDict(directory string, overwrite bool) (td *TermDict, err error) { 24 | if overwrite { 25 | fp := filepath.Join(directory, "terms") 26 | if err = os.RemoveAll(fp); err != nil { 27 | err = errors.Wrap(err, "") 28 | return 29 | } 30 | } 31 | td = &TermDict{ 32 | Dir: directory, 33 | } 34 | err = td.Open() 35 | return 36 | } 37 | 38 | //Open opens an existing term dict 39 | func (td *TermDict) Open() (err error) { 40 | td.rwlock.Lock() 41 | defer td.rwlock.Unlock() 42 | if td.f != nil { 43 | //TODO: replace panic with log.Fatalf 44 | panic("td.f shall be nil") 45 | } 46 | if err = os.MkdirAll(td.Dir, 0700); err != nil { 47 | err = errors.Wrap(err, "") 48 | return 49 | } 50 | fp := filepath.Join(td.Dir, "terms") 51 | if td.f, err = os.OpenFile(fp, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0600); err != nil { 52 | err = errors.Wrap(err, "") 53 | return 54 | } 55 | td.terms = make(map[string]uint64) 56 | reader := bufio.NewReader(td.f) 57 | var line string 58 | var num uint64 59 | for { 60 | line, err = reader.ReadString('\n') 61 | if err == io.EOF { 62 | err = nil 63 | break 64 | } else if err != nil { 65 | err = errors.Wrap(err, "") 66 | return 67 | } 68 | tmpTerm := strings.TrimSpace(line) 69 | td.terms[tmpTerm] = num 70 | num++ 71 | } 72 | return 73 | } 74 | 75 | //Close clear the dictionary on memory and close file. 76 | func (td *TermDict) Close() (err error) { 77 | td.rwlock.Lock() 78 | defer td.rwlock.Unlock() 79 | err = td.close() 80 | return 81 | } 82 | 83 | func (td *TermDict) close() (err error) { 84 | if err = td.f.Close(); err != nil { 85 | err = errors.Wrap(err, "") 86 | return 87 | } 88 | td.f = nil 89 | for term := range td.terms { 90 | delete(td.terms, term) 91 | } 92 | return 93 | } 94 | 95 | //Destroy clear the dictionary on memory and disk. 96 | func (td *TermDict) Destroy() (err error) { 97 | td.rwlock.Lock() 98 | defer td.rwlock.Unlock() 99 | if err = td.close(); err != nil { 100 | return 101 | } 102 | fp := filepath.Join(td.Dir, "terms") 103 | if err = os.Remove(fp); err != nil { 104 | err = errors.Wrap(err, "") 105 | return 106 | } 107 | return 108 | } 109 | 110 | //Sync synchronizes terms to disk 111 | func (td *TermDict) Sync() (err error) { 112 | return td.f.Sync() 113 | } 114 | 115 | //CreateTermIfNotExist get id of the given term, will insert the term implicitly if it is not in the dict. 116 | func (td *TermDict) CreateTermIfNotExist(term string) (id uint64, err error) { 117 | var found bool 118 | if id, found = td.GetTermID(term); found { 119 | return id, nil 120 | } 121 | td.rwlock.Lock() 122 | defer td.rwlock.Unlock() 123 | if id, found = td.terms[term]; found { 124 | return id, nil 125 | } 126 | id = uint64(len(td.terms)) 127 | td.terms[term] = id 128 | line := term + "\n" 129 | if _, err = td.f.WriteString(line); err != nil { 130 | err = errors.Wrap(err, "") 131 | return 132 | } 133 | return 134 | } 135 | 136 | //GetTermID get id of the given term. 137 | func (td *TermDict) GetTermID(term string) (id uint64, found bool) { 138 | td.rwlock.RLock() 139 | id, found = td.terms[term] 140 | td.rwlock.RUnlock() 141 | return 142 | } 143 | 144 | //CreateTermsIfNotExist is bulk version of CreateTermIfNotExist 145 | func (td *TermDict) CreateTermsIfNotExist(terms []string) (ids []uint64, err error) { 146 | ids = make([]uint64, len(terms)) 147 | for i, term := range terms { 148 | if ids[i], err = td.CreateTermIfNotExist(term); err != nil { 149 | return 150 | } 151 | } 152 | return 153 | } 154 | 155 | //Count returns the count of terms 156 | func (td *TermDict) Count() (cnt uint64) { 157 | td.rwlock.RLock() 158 | cnt = uint64(len(td.terms)) 159 | td.rwlock.RUnlock() 160 | return 161 | } 162 | -------------------------------------------------------------------------------- /copy.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "io" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | // https://gist.github.com/r0l1/92462b38df26839a3ca324697c8cba04 13 | /* MIT License 14 | * 15 | * Copyright (c) 2017 Roland Singer [roland.singer@desertbit.com] 16 | * 17 | * Permission is hereby granted, free of charge, to any person obtaining a copy 18 | * of this software and associated documentation files (the "Software"), to deal 19 | * in the Software without restriction, including without limitation the rights 20 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 21 | * copies of the Software, and to permit persons to whom the Software is 22 | * furnished to do so, subject to the following conditions: 23 | * 24 | * The above copyright notice and this permission notice shall be included in all 25 | * copies or substantial portions of the Software. 26 | * 27 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | * SOFTWARE. 34 | */ 35 | 36 | // CopyFile copies the contents of the file named src to the file named 37 | // by dst. The file will be created if it does not already exist. If the 38 | // destination file exists, all it's contents will be replaced by the contents 39 | // of the source file. The file mode will be copied from the source and 40 | // the copied data is synced/flushed to stable storage. 41 | func CopyFile(src, dst string) (err error) { 42 | in, err := os.Open(src) 43 | if err != nil { 44 | err = errors.Wrap(err, "") 45 | return 46 | } 47 | defer in.Close() 48 | 49 | out, err := os.Create(dst) 50 | if err != nil { 51 | err = errors.Wrap(err, "") 52 | return 53 | } 54 | defer func() { 55 | if e := out.Close(); e != nil { 56 | err = e 57 | } 58 | }() 59 | 60 | if _, err = io.Copy(out, in); err != nil { 61 | err = errors.Wrap(err, "") 62 | return 63 | } 64 | 65 | if err = out.Sync(); err != nil { 66 | err = errors.Wrap(err, "") 67 | return 68 | } 69 | 70 | si, err := os.Stat(src) 71 | if err != nil { 72 | err = errors.Wrap(err, "") 73 | return 74 | } 75 | if err = os.Chmod(dst, si.Mode()); err != nil { 76 | err = errors.Wrap(err, "") 77 | return 78 | } 79 | 80 | return 81 | } 82 | 83 | // CopyDir recursively copies a directory tree, attempting to preserve permissions. 84 | // Source directory must exist, destination directory must *not* exist. 85 | // Symlinks are ignored and skipped. 86 | func CopyDir(src string, dst string) (err error) { 87 | src = filepath.Clean(src) 88 | dst = filepath.Clean(dst) 89 | 90 | si, err := os.Stat(src) 91 | if err != nil { 92 | err = errors.Wrap(err, "") 93 | return err 94 | } 95 | if !si.IsDir() { 96 | return errors.Errorf("source %v is not a directory", src) 97 | } 98 | 99 | _, err = os.Stat(dst) 100 | if err != nil && !os.IsNotExist(err) { 101 | err = errors.Wrap(err, "") 102 | return 103 | } 104 | if err == nil { 105 | return errors.Errorf("destination %v already exists", dst) 106 | } 107 | 108 | if err = os.MkdirAll(dst, si.Mode()); err != nil { 109 | err = errors.Wrap(err, "") 110 | return 111 | } 112 | 113 | entries, err := ioutil.ReadDir(src) 114 | if err != nil { 115 | err = errors.Wrap(err, "") 116 | return 117 | } 118 | 119 | for _, entry := range entries { 120 | srcPath := filepath.Join(src, entry.Name()) 121 | dstPath := filepath.Join(dst, entry.Name()) 122 | 123 | if entry.IsDir() { 124 | err = CopyDir(srcPath, dstPath) 125 | if err != nil { 126 | return 127 | } 128 | } else { 129 | // Skip symlinks. 130 | if entry.Mode()&os.ModeSymlink != 0 { 131 | continue 132 | } 133 | 134 | err = CopyFile(srcPath, dstPath) 135 | if err != nil { 136 | return 137 | } 138 | } 139 | } 140 | 141 | return 142 | } 143 | -------------------------------------------------------------------------------- /test/pilosa_range/benchmark.go: -------------------------------------------------------------------------------- 1 | /* 2 | https://www.pilosa.com/blog/range-encoded-bitmaps/ 3 | 4 | 测试 pilosa range-encoded-bitmaps 5 | */ 6 | 7 | package main 8 | 9 | import ( 10 | "flag" 11 | "fmt" 12 | "io" 13 | "io/ioutil" 14 | "log" 15 | "net/http" 16 | _ "net/http/pprof" 17 | "os" 18 | "path/filepath" 19 | "strconv" 20 | "time" 21 | 22 | "github.com/pilosa/pilosa" 23 | ) 24 | 25 | var ( 26 | pprof = flag.String("addr-pprof", "", "pprof http server address") 27 | logFile = flag.String("log-file", "", "pilosa fragment log file") 28 | ) 29 | 30 | func fragmentPath(sliceID int) string { 31 | return filepath.Join("/tmp/pilosa_range/fragments", strconv.FormatInt(int64(sliceID), 10)) 32 | } 33 | 34 | // GetLogWriter opens a file for logging, or a default io.Writer (such as stderr) for an empty path. 35 | func GetLogWriter(path string, defaultWriter io.Writer) (io.Writer, error) { 36 | // This is split out so it can be used in NewServeCmd as well as SetupServer 37 | if path == "" { 38 | return defaultWriter, nil 39 | } else { 40 | logFile, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0600) 41 | if err != nil { 42 | return nil, err 43 | } 44 | return logFile, nil 45 | } 46 | } 47 | 48 | func main() { 49 | flag.Parse() 50 | N := 1000000 51 | Q := 17000 52 | R := 500 53 | S := 1000 54 | 55 | if "" != *pprof { 56 | log.Printf("bootstrap: start pprof at: %s", *pprof) 57 | go func() { 58 | log.Fatalf("bootstrap: start pprof failed, errors:\n%+v", 59 | http.ListenAndServe(*pprof, nil)) 60 | }() 61 | } 62 | 63 | // record time 64 | t0 := time.Now() 65 | 66 | fragments := make(map[int]*pilosa.Fragment) //map slice to Fragment 67 | var sliceID int 68 | var frag *pilosa.Fragment 69 | var ok bool 70 | var err error 71 | var logOutput io.Writer 72 | if logOutput, err = GetLogWriter(*logFile, ioutil.Discard); err != nil { 73 | log.Fatal(err) 74 | } 75 | 76 | if err = os.RemoveAll("/tmp/pilosa_range/fragments"); err != nil { 77 | log.Fatal(err) 78 | } 79 | if err = os.MkdirAll("/tmp/pilosa_range/fragments", 0700); err != nil { 80 | log.Fatal(err) 81 | } 82 | for i := 0; i < N; i++ { 83 | sliceID = i / pilosa.SliceWidth 84 | if frag, ok = fragments[sliceID]; !ok { 85 | fp := fragmentPath(sliceID) 86 | frag = pilosa.NewFragment(fp, "index", "frame", pilosa.ViewStandard, uint64(sliceID)) 87 | frag.MaxOpN = frag.MaxOpN * 100 88 | frag.CacheType = pilosa.CacheTypeNone 89 | frag.LogOutput = logOutput 90 | if err = frag.Open(); err != nil { 91 | log.Fatal(err) 92 | return 93 | } 94 | fragments[sliceID] = frag 95 | } 96 | _, err = frag.SetFieldValue(uint64(i), uint(32), uint64(i)) 97 | if err != nil { 98 | log.Fatalf("frag.SetFieldValue failed, i=%v, err: %+v", i, err) 99 | } 100 | } 101 | 102 | // record time, and calculate performance 103 | t1 := time.Now() 104 | log.Printf("duration %v", t1.Sub(t0)) 105 | log.Printf("insertion speed %f docs/s", float64(N)/t1.Sub(t0).Seconds()) 106 | fmt.Printf("duration %v\n", t1.Sub(t0)) 107 | fmt.Printf("insertion speed %f docs/s\n", float64(N)/t1.Sub(t0).Seconds()) 108 | 109 | var bs *pilosa.Bitmap 110 | vals := make([]uint64, 0, 1000) 111 | for i := N - 1; i >= N-Q; i-- { 112 | vals = vals[:0] 113 | sliceID = i / pilosa.SliceWidth 114 | if frag, ok = fragments[sliceID]; !ok { 115 | log.Fatalf("frag %v doesn't exist", sliceID) 116 | } 117 | bs, err = frag.FieldRangeBetween(uint(32), uint64(i-R), uint64(i+R)) 118 | if err != nil { 119 | log.Fatalf("frag.FieldRangeBetween failed, i=%v, err: %+v", i, err) 120 | } 121 | var val uint64 122 | var exists bool 123 | for _, docID := range bs.Bits() { 124 | if val, exists, err = frag.FieldValue(docID, uint(32)); err != nil { 125 | log.Fatalf("frag.FieldValue failed, i=%v, err: %+v", i, err) 126 | } 127 | if !exists { 128 | log.Fatalf("document %v doesn't exist", docID) 129 | } 130 | vals = append(vals, val) 131 | } 132 | //log.Printf("vals %v\n", vals) 133 | } 134 | 135 | // record time, and calculate performance 136 | t2 := time.Now() 137 | log.Printf("duration %v", t2.Sub(t1)) 138 | log.Printf("query speed %f queries/s", float64(Q)/t2.Sub(t1).Seconds()) 139 | log.Printf("bs: %v", bs.Bits()) 140 | 141 | _, _ = logOutput.Write([]byte("begin snapshot loop......\n")) 142 | for i := 0; i < S; i++ { 143 | for _, frag = range fragments { 144 | if err = frag.Snapshot(); err != nil { 145 | log.Fatal(err) 146 | } 147 | } 148 | } 149 | 150 | // record time, and calculate performance 151 | t3 := time.Now() 152 | log.Printf("duration %v", t3.Sub(t2)) 153 | log.Printf("snapshot speed %f fragment-snapshots/s", float64(S)/t3.Sub(t2).Seconds()) 154 | } 155 | -------------------------------------------------------------------------------- /wal/decoder.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "bufio" 19 | "encoding/binary" 20 | "hash" 21 | "io" 22 | "sync" 23 | 24 | "github.com/coreos/etcd/pkg/crc" 25 | "github.com/coreos/etcd/pkg/pbutil" 26 | "github.com/deepfabric/indexer/wal/walpb" 27 | "github.com/pkg/errors" 28 | ) 29 | 30 | const minSectorSize = 512 31 | 32 | // frameSizeBytes is frame size in bytes, including record size and padding size. 33 | const frameSizeBytes = 8 34 | 35 | type decoder struct { 36 | mu sync.Mutex 37 | brs []*bufio.Reader 38 | 39 | // lastValidOff file offset following the last valid decoded record 40 | lastValidOff int64 41 | crc hash.Hash32 42 | } 43 | 44 | func newDecoder(r ...io.Reader) *decoder { 45 | readers := make([]*bufio.Reader, len(r)) 46 | for i := range r { 47 | readers[i] = bufio.NewReader(r[i]) 48 | } 49 | return &decoder{ 50 | brs: readers, 51 | crc: crc.New(0, crcTable), 52 | } 53 | } 54 | 55 | func (d *decoder) decode(rec *walpb.Record) error { 56 | rec.Reset() 57 | d.mu.Lock() 58 | defer d.mu.Unlock() 59 | return d.decodeRecord(rec) 60 | } 61 | 62 | func (d *decoder) decodeRecord(rec *walpb.Record) error { 63 | if len(d.brs) == 0 { 64 | return errors.Wrap(io.EOF, "") 65 | } 66 | 67 | l, err := readInt64(d.brs[0]) 68 | if errors.Cause(err) == io.EOF || (err == nil && l == 0) { 69 | // hit end of file or preallocated space 70 | d.brs = d.brs[1:] 71 | if len(d.brs) == 0 { 72 | return errors.Wrap(io.EOF, "") 73 | } 74 | d.lastValidOff = 0 75 | return d.decodeRecord(rec) 76 | } 77 | if err != nil { 78 | return err 79 | } 80 | 81 | recBytes, padBytes := decodeFrameSize(l) 82 | 83 | data := make([]byte, recBytes+padBytes) 84 | if _, err = io.ReadFull(d.brs[0], data); err != nil { 85 | // ReadFull returns io.EOF only if no bytes were read 86 | // the decoder should treat this as an ErrUnexpectedEOF instead. 87 | if err == io.EOF { 88 | err = io.ErrUnexpectedEOF 89 | } 90 | return errors.Wrap(err, "") 91 | } 92 | if err := rec.Unmarshal(data[:recBytes]); err != nil { 93 | if d.isTornEntry(data) { 94 | return errors.Wrap(io.ErrUnexpectedEOF, "") 95 | } 96 | return errors.Wrap(err, "") 97 | } 98 | 99 | // skip crc checking if the record type is crcType 100 | if rec.Type != crcType { 101 | d.crc.Write(rec.Data) 102 | if err := rec.Validate(d.crc.Sum32()); err != nil { 103 | if d.isTornEntry(data) { 104 | return errors.Wrap(io.ErrUnexpectedEOF, "") 105 | } 106 | return errors.Wrap(err, "") 107 | } 108 | } 109 | // record decoded as valid; point last valid offset to end of record 110 | d.lastValidOff += frameSizeBytes + recBytes + padBytes 111 | return nil 112 | } 113 | 114 | func decodeFrameSize(lenField int64) (recBytes int64, padBytes int64) { 115 | // the record size is stored in the lower 56 bits of the 64-bit length 116 | recBytes = int64(uint64(lenField) & ^(uint64(0xff) << 56)) 117 | // non-zero padding is indicated by set MSb / a negative length 118 | if lenField < 0 { 119 | // padding is stored in lower 3 bits of length MSB 120 | padBytes = int64((uint64(lenField) >> 56) & 0x7) 121 | } 122 | return recBytes, padBytes 123 | } 124 | 125 | // isTornEntry determines whether the last entry of the WAL was partially written 126 | // and corrupted because of a torn write. 127 | func (d *decoder) isTornEntry(data []byte) bool { 128 | if len(d.brs) != 1 { 129 | return false 130 | } 131 | 132 | fileOff := d.lastValidOff + frameSizeBytes 133 | curOff := 0 134 | chunks := [][]byte{} 135 | // split data on sector boundaries 136 | for curOff < len(data) { 137 | chunkLen := int(minSectorSize - (fileOff % minSectorSize)) 138 | if chunkLen > len(data)-curOff { 139 | chunkLen = len(data) - curOff 140 | } 141 | chunks = append(chunks, data[curOff:curOff+chunkLen]) 142 | fileOff += int64(chunkLen) 143 | curOff += chunkLen 144 | } 145 | 146 | // if any data for a sector chunk is all 0, it's a torn write 147 | for _, sect := range chunks { 148 | isZero := true 149 | for _, v := range sect { 150 | if v != 0 { 151 | isZero = false 152 | break 153 | } 154 | } 155 | if isZero { 156 | return true 157 | } 158 | } 159 | return false 160 | } 161 | 162 | func (d *decoder) updateCRC(prevCrc uint32) { 163 | d.crc = crc.New(prevCrc, crcTable) 164 | } 165 | 166 | func (d *decoder) lastCRC() uint32 { 167 | return d.crc.Sum32() 168 | } 169 | 170 | func (d *decoder) lastOffset() int64 { return d.lastValidOff } 171 | 172 | func mustUnmarshalEntry(d []byte) walpb.Entry { 173 | var e walpb.Entry 174 | pbutil.MustUnmarshal(&e, d) 175 | return e 176 | } 177 | 178 | func readInt64(r io.Reader) (n int64, err error) { 179 | if err = binary.Read(r, binary.LittleEndian, &n); err != nil { 180 | err = errors.Wrap(err, "") 181 | } 182 | return 183 | } 184 | -------------------------------------------------------------------------------- /cql/parser/cql_listener.go: -------------------------------------------------------------------------------- 1 | // Generated from /home/zhichyu/src/github.com/deepfabric/indexer/cql/parser/CQL.g4 by ANTLR 4.7. 2 | 3 | package parser // CQL 4 | 5 | import "github.com/antlr/antlr4/runtime/Go/antlr" 6 | 7 | // CQLListener is a complete listener for a parse tree produced by CQLParser. 8 | type CQLListener interface { 9 | antlr.ParseTreeListener 10 | 11 | // EnterCql is called when entering the cql production. 12 | EnterCql(c *CqlContext) 13 | 14 | // EnterCreate is called when entering the create production. 15 | EnterCreate(c *CreateContext) 16 | 17 | // EnterDestroy is called when entering the destroy production. 18 | EnterDestroy(c *DestroyContext) 19 | 20 | // EnterInsert is called when entering the insert production. 21 | EnterInsert(c *InsertContext) 22 | 23 | // EnterDel is called when entering the del production. 24 | EnterDel(c *DelContext) 25 | 26 | // EnterQuery is called when entering the query production. 27 | EnterQuery(c *QueryContext) 28 | 29 | // EnterIndexName is called when entering the indexName production. 30 | EnterIndexName(c *IndexNameContext) 31 | 32 | // EnterDocument is called when entering the document production. 33 | EnterDocument(c *DocumentContext) 34 | 35 | // EnterUintPropDef is called when entering the uintPropDef production. 36 | EnterUintPropDef(c *UintPropDefContext) 37 | 38 | // EnterEnumPropDef is called when entering the enumPropDef production. 39 | EnterEnumPropDef(c *EnumPropDefContext) 40 | 41 | // EnterStrPropDef is called when entering the strPropDef production. 42 | EnterStrPropDef(c *StrPropDefContext) 43 | 44 | // EnterOrderLimit is called when entering the orderLimit production. 45 | EnterOrderLimit(c *OrderLimitContext) 46 | 47 | // EnterOrder is called when entering the order production. 48 | EnterOrder(c *OrderContext) 49 | 50 | // EnterProperty is called when entering the property production. 51 | EnterProperty(c *PropertyContext) 52 | 53 | // EnterUintType is called when entering the uintType production. 54 | EnterUintType(c *UintTypeContext) 55 | 56 | // EnterDocId is called when entering the docId production. 57 | EnterDocId(c *DocIdContext) 58 | 59 | // EnterValue is called when entering the value production. 60 | EnterValue(c *ValueContext) 61 | 62 | // EnterUintPred is called when entering the uintPred production. 63 | EnterUintPred(c *UintPredContext) 64 | 65 | // EnterEnumPred is called when entering the enumPred production. 66 | EnterEnumPred(c *EnumPredContext) 67 | 68 | // EnterStrPred is called when entering the strPred production. 69 | EnterStrPred(c *StrPredContext) 70 | 71 | // EnterCompare is called when entering the compare production. 72 | EnterCompare(c *CompareContext) 73 | 74 | // EnterIntList is called when entering the intList production. 75 | EnterIntList(c *IntListContext) 76 | 77 | // EnterLimit is called when entering the limit production. 78 | EnterLimit(c *LimitContext) 79 | 80 | // ExitCql is called when exiting the cql production. 81 | ExitCql(c *CqlContext) 82 | 83 | // ExitCreate is called when exiting the create production. 84 | ExitCreate(c *CreateContext) 85 | 86 | // ExitDestroy is called when exiting the destroy production. 87 | ExitDestroy(c *DestroyContext) 88 | 89 | // ExitInsert is called when exiting the insert production. 90 | ExitInsert(c *InsertContext) 91 | 92 | // ExitDel is called when exiting the del production. 93 | ExitDel(c *DelContext) 94 | 95 | // ExitQuery is called when exiting the query production. 96 | ExitQuery(c *QueryContext) 97 | 98 | // ExitIndexName is called when exiting the indexName production. 99 | ExitIndexName(c *IndexNameContext) 100 | 101 | // ExitDocument is called when exiting the document production. 102 | ExitDocument(c *DocumentContext) 103 | 104 | // ExitUintPropDef is called when exiting the uintPropDef production. 105 | ExitUintPropDef(c *UintPropDefContext) 106 | 107 | // ExitEnumPropDef is called when exiting the enumPropDef production. 108 | ExitEnumPropDef(c *EnumPropDefContext) 109 | 110 | // ExitStrPropDef is called when exiting the strPropDef production. 111 | ExitStrPropDef(c *StrPropDefContext) 112 | 113 | // ExitOrderLimit is called when exiting the orderLimit production. 114 | ExitOrderLimit(c *OrderLimitContext) 115 | 116 | // ExitOrder is called when exiting the order production. 117 | ExitOrder(c *OrderContext) 118 | 119 | // ExitProperty is called when exiting the property production. 120 | ExitProperty(c *PropertyContext) 121 | 122 | // ExitUintType is called when exiting the uintType production. 123 | ExitUintType(c *UintTypeContext) 124 | 125 | // ExitDocId is called when exiting the docId production. 126 | ExitDocId(c *DocIdContext) 127 | 128 | // ExitValue is called when exiting the value production. 129 | ExitValue(c *ValueContext) 130 | 131 | // ExitUintPred is called when exiting the uintPred production. 132 | ExitUintPred(c *UintPredContext) 133 | 134 | // ExitEnumPred is called when exiting the enumPred production. 135 | ExitEnumPred(c *EnumPredContext) 136 | 137 | // ExitStrPred is called when exiting the strPred production. 138 | ExitStrPred(c *StrPredContext) 139 | 140 | // ExitCompare is called when exiting the compare production. 141 | ExitCompare(c *CompareContext) 142 | 143 | // ExitIntList is called when exiting the intList production. 144 | ExitIntList(c *IntListContext) 145 | 146 | // ExitLimit is called when exiting the limit production. 147 | ExitLimit(c *LimitContext) 148 | } 149 | -------------------------------------------------------------------------------- /cql/cql_test.go: -------------------------------------------------------------------------------- 1 | package cql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestParseCql(t *testing.T) { 12 | var res interface{} 13 | var err error 14 | tcs := []string{ 15 | "IDX.CREATE orders SCHEMA type ENUM", 16 | "IDX.CREATE orders SCHEMA object UINT64 price UINT32 number UINT32 date UINT64", 17 | "IDX.CREATE orders SCHEMA object UINT64 price UINT32 number UINT32 date UINT64 type ENUM", 18 | "IDX.CREATE orders SCHEMA object UINT64 price UINT32 number UINT32 date UINT64 desc STRING", 19 | "IDX.CREATE orders SCHEMA object UINT64 price UINT32 number UINT32 date UINT64 type ENUM desc STRING", 20 | "IDX.INSERT orders 615 11 22 33 44 3 \"description\"", 21 | "IDX.DEL orders 615 11 22 33 44 3 \"description\"", 22 | "IDX.SELECT orders WHERE price>=30 price<40 date<2017 type IN [1,3] desc CONTAINS \"pen\" ORDERBY date", 23 | "IDX.SELECT orders WHERE price>=30 price<=40 date<2017 type IN [1,3] ORDERBY date LIMIT 30", 24 | "IDX.SELECT orders WHERE price>=30 price<=40 type IN [1,3]", 25 | "QUERY orders WHERE price>=30 price<=40 type IN [1,3]", 26 | "IDX.DESTROY orders", 27 | } 28 | docProts := make(map[string]*Document) 29 | for i, tc := range tcs { 30 | fmt.Println(tc) 31 | // Note that IDX.CREATE and IDX.DEL don't need docProts. 32 | res, err = ParseCql(tc, docProts) 33 | require.NoErrorf(t, err, "case %d", i) 34 | switch r := res.(type) { 35 | case *CqlCreate: 36 | fmt.Printf("Create index %v\n", r) 37 | docProts[r.DocumentWithIdx.Index] = &r.DocumentWithIdx.Doc 38 | case *CqlDestroy: 39 | fmt.Printf("Destroy index %s\n", r.Index) 40 | delete(docProts, r.Index) 41 | case *CqlInsert: 42 | fmt.Printf("Insert %v\n", r) 43 | case *CqlDel: 44 | fmt.Printf("Del %v\n", r) 45 | case *CqlSelect: 46 | fmt.Printf("Select %v\n", r) 47 | default: 48 | //There shouldn't be any parsing error for above test cases. 49 | t.Fatalf("case %d, res %+v\n", i, res) 50 | } 51 | } 52 | } 53 | 54 | func TestParseCqlSelect(t *testing.T) { 55 | var res interface{} 56 | var err error 57 | var c *CqlCreate 58 | var q *CqlSelect 59 | var uintPred UintPred 60 | var enumPred EnumPred 61 | var strPred StrPred 62 | var ok bool 63 | //Prepare index 64 | docProts := make(map[string]*Document) 65 | res, err = ParseCql("IDX.CREATE orders SCHEMA object UINT64 price UINT32 priceF32 FLOAT32 priceF64 FLOAT64 number UINT32 date UINT64 type ENUM desc STRING", docProts) 66 | require.NoError(t, err) 67 | c = res.(*CqlCreate) 68 | docProts[c.DocumentWithIdx.Index] = &c.DocumentWithIdx.Doc 69 | 70 | //TESTCASE: multiple UintPred of the same property into one 71 | res, err = ParseCql("IDX.SELECT orders WHERE price>=30 price<=40 price<35 price>20", docProts) 72 | require.NoError(t, err) 73 | q = res.(*CqlSelect) 74 | uintPred, ok = q.UintPreds["price"] 75 | require.Equalf(t, true, ok, "UintPred price is gone") 76 | require.Equal(t, 30, uintPred.Low) 77 | require.Equal(t, 34, uintPred.High) 78 | 79 | //TESTCASE: FLOAT32 80 | valSs := []string{"30", "40.3"} 81 | vals := make([]uint64, len(valSs)) 82 | for i, valS := range valSs { 83 | var val uint64 84 | val, err = Float32ToSortableUint64(valS) 85 | require.NoError(t, err) 86 | vals[i] = val 87 | fmt.Printf("FLOAT32 %v\t%v\n", valS, val) 88 | } 89 | res, err = ParseCql("IDX.SELECT orders WHERE priceF32>=30 priceF32<=40.3", docProts) 90 | require.NoError(t, err) 91 | q = res.(*CqlSelect) 92 | uintPred, ok = q.UintPreds["priceF32"] 93 | require.Equalf(t, true, ok, "UintPred price is gone") 94 | require.Equal(t, vals[0], uintPred.Low) 95 | require.Equal(t, vals[1], uintPred.High) 96 | 97 | //TESTCASE: FLOAT64 98 | for i, valS := range valSs { 99 | var val uint64 100 | val, err = Float64ToSortableUint64(valS) 101 | require.NoError(t, err) 102 | vals[i] = val 103 | fmt.Printf("FLOAT64 %v\t%v\n", valS, val) 104 | } 105 | res, err = ParseCql("IDX.SELECT orders WHERE priceF64>=30 priceF64<=40.3", docProts) 106 | require.NoError(t, err) 107 | q = res.(*CqlSelect) 108 | uintPred, ok = q.UintPreds["priceF64"] 109 | require.Equalf(t, true, ok, "UintPred price is gone") 110 | require.Equal(t, vals[0], uintPred.Low) 111 | require.Equal(t, vals[1], uintPred.High) 112 | 113 | //TESTCASE: normal EnumPred 114 | res, err = ParseCql("IDX.SELECT orders WHERE type IN [1,3]", docProts) 115 | require.NoError(t, err) 116 | q = res.(*CqlSelect) 117 | enumPred, ok = q.EnumPreds["type"] 118 | require.Equalf(t, true, ok, "EnumPred type is gone") 119 | require.Equalf(t, []int{1, 3}, enumPred.InVals, "incorrect EnumPred type") 120 | 121 | //TESTCASE: invalid query due to multiple EnumPred of a property 122 | res, err = ParseCql("IDX.SELECT orders WHERE type IN [1,3] type IN [3,9]", docProts) 123 | require.Errorf(t, err, "incorrect EnumPred type %v, want error", res) 124 | 125 | //TESTCASE: normal StrPred 126 | res, err = ParseCql("IDX.SELECT orders WHERE desc CONTAINS \"pen\"", docProts) 127 | require.NoError(t, err) 128 | q = res.(*CqlSelect) 129 | strPred, ok = q.StrPreds["desc"] 130 | require.Equalf(t, true, ok, "StrPred desc is gone") 131 | require.Equal(t, "pen", strings.ToLower(strPred.ContWord)) 132 | 133 | tcs := []string{ 134 | //TESTCASE: invalid query due to multiple StrPred of a property 135 | "IDX.SELECT orders WHERE desc CONTAINS \"pen\" desc CONTAINS \"pencil\"", 136 | //TESTCASE: invalid query due to OBDERBY property doesn't occur in WHERE 137 | "IDX.SELECT orders WHERE price>=30 price<=40 ORDERBY date", 138 | //TESTCASE: invalid query due to OBDERBY property doesn't occur as a UintPred 139 | "IDX.SELECT orders WHERE price>=30 price<=40 type IN [1,3] ORDERBY type", 140 | //TESTCASE: invalid query due to mismatching property name 141 | "IDX.SELECT orders WHERE prices>=20.2", 142 | } 143 | for _, tc := range tcs { 144 | res, err = ParseCql(tc, docProts) 145 | require.Errorf(t, err, "have %+v, want an error", res) 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /cql/parser/CQL.interp: -------------------------------------------------------------------------------- 1 | token literal names: 2 | null 3 | 'IDX.CREATE' 4 | 'SCHEMA' 5 | 'IDX.DESTROY' 6 | 'IDX.INSERT' 7 | 'IDX.DEL' 8 | 'IDX.SELECT' 9 | 'QUERY' 10 | 'WHERE' 11 | 'ORDERBY' 12 | 'LIMIT' 13 | '[' 14 | ',' 15 | ']' 16 | 'UINT8' 17 | 'UINT16' 18 | 'UINT32' 19 | 'UINT64' 20 | 'FLOAT32' 21 | 'FLOAT64' 22 | 'ENUM' 23 | 'STRING' 24 | 'IN' 25 | 'CONTAINS' 26 | '<' 27 | '>' 28 | '=' 29 | '<=' 30 | '>=' 31 | null 32 | null 33 | null 34 | null 35 | null 36 | 37 | token symbolic names: 38 | null 39 | null 40 | null 41 | null 42 | null 43 | null 44 | null 45 | null 46 | null 47 | null 48 | null 49 | null 50 | null 51 | null 52 | K_UINT8 53 | K_UINT16 54 | K_UINT32 55 | K_UINT64 56 | K_FLOAT32 57 | K_FLOAT64 58 | K_ENUM 59 | K_STRING 60 | K_IN 61 | K_CONTAINS 62 | K_LT 63 | K_BT 64 | K_EQ 65 | K_LE 66 | K_BE 67 | FLOAT_LIT 68 | STRING 69 | INT 70 | IDENTIFIER 71 | WS 72 | 73 | rule names: 74 | cql 75 | create 76 | destroy 77 | insert 78 | del 79 | query 80 | indexName 81 | document 82 | uintPropDef 83 | enumPropDef 84 | strPropDef 85 | orderLimit 86 | order 87 | property 88 | uintType 89 | docId 90 | value 91 | uintPred 92 | enumPred 93 | strPred 94 | compare 95 | intList 96 | limit 97 | 98 | 99 | atn: 100 | [3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 35, 181, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 9, 23, 4, 24, 9, 24, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 64, 10, 2, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 70, 10, 3, 12, 3, 14, 3, 73, 11, 3, 3, 3, 7, 3, 76, 10, 3, 12, 3, 14, 3, 79, 11, 3, 3, 3, 7, 3, 82, 10, 3, 12, 3, 14, 3, 85, 11, 3, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 100, 10, 7, 12, 7, 14, 7, 103, 11, 7, 3, 7, 7, 7, 106, 10, 7, 12, 7, 14, 7, 109, 11, 7, 3, 7, 7, 7, 112, 10, 7, 12, 7, 14, 7, 115, 11, 7, 3, 7, 5, 7, 118, 10, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 6, 9, 125, 10, 9, 13, 9, 14, 9, 126, 3, 10, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 12, 3, 12, 3, 12, 3, 13, 3, 13, 3, 13, 3, 13, 5, 13, 142, 10, 13, 3, 14, 3, 14, 3, 15, 3, 15, 3, 16, 3, 16, 3, 17, 3, 17, 3, 18, 3, 18, 3, 19, 3, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 21, 3, 22, 3, 22, 3, 23, 3, 23, 3, 23, 3, 23, 7, 23, 172, 10, 23, 12, 23, 14, 23, 175, 11, 23, 3, 23, 3, 23, 3, 24, 3, 24, 3, 24, 2, 2, 25, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 2, 6, 3, 2, 8, 9, 3, 2, 16, 21, 3, 2, 31, 33, 3, 2, 26, 30, 2, 171, 2, 63, 3, 2, 2, 2, 4, 65, 3, 2, 2, 2, 6, 86, 3, 2, 2, 2, 8, 89, 3, 2, 2, 2, 10, 92, 3, 2, 2, 2, 12, 95, 3, 2, 2, 2, 14, 119, 3, 2, 2, 2, 16, 121, 3, 2, 2, 2, 18, 128, 3, 2, 2, 2, 20, 131, 3, 2, 2, 2, 22, 134, 3, 2, 2, 2, 24, 137, 3, 2, 2, 2, 26, 143, 3, 2, 2, 2, 28, 145, 3, 2, 2, 2, 30, 147, 3, 2, 2, 2, 32, 149, 3, 2, 2, 2, 34, 151, 3, 2, 2, 2, 36, 153, 3, 2, 2, 2, 38, 157, 3, 2, 2, 2, 40, 161, 3, 2, 2, 2, 42, 165, 3, 2, 2, 2, 44, 167, 3, 2, 2, 2, 46, 178, 3, 2, 2, 2, 48, 49, 5, 4, 3, 2, 49, 50, 7, 2, 2, 3, 50, 64, 3, 2, 2, 2, 51, 52, 5, 6, 4, 2, 52, 53, 7, 2, 2, 3, 53, 64, 3, 2, 2, 2, 54, 55, 5, 8, 5, 2, 55, 56, 7, 2, 2, 3, 56, 64, 3, 2, 2, 2, 57, 58, 5, 10, 6, 2, 58, 59, 7, 2, 2, 3, 59, 64, 3, 2, 2, 2, 60, 61, 5, 12, 7, 2, 61, 62, 7, 2, 2, 3, 62, 64, 3, 2, 2, 2, 63, 48, 3, 2, 2, 2, 63, 51, 3, 2, 2, 2, 63, 54, 3, 2, 2, 2, 63, 57, 3, 2, 2, 2, 63, 60, 3, 2, 2, 2, 64, 3, 3, 2, 2, 2, 65, 66, 7, 3, 2, 2, 66, 67, 5, 14, 8, 2, 67, 71, 7, 4, 2, 2, 68, 70, 5, 18, 10, 2, 69, 68, 3, 2, 2, 2, 70, 73, 3, 2, 2, 2, 71, 69, 3, 2, 2, 2, 71, 72, 3, 2, 2, 2, 72, 77, 3, 2, 2, 2, 73, 71, 3, 2, 2, 2, 74, 76, 5, 20, 11, 2, 75, 74, 3, 2, 2, 2, 76, 79, 3, 2, 2, 2, 77, 75, 3, 2, 2, 2, 77, 78, 3, 2, 2, 2, 78, 83, 3, 2, 2, 2, 79, 77, 3, 2, 2, 2, 80, 82, 5, 22, 12, 2, 81, 80, 3, 2, 2, 2, 82, 85, 3, 2, 2, 2, 83, 81, 3, 2, 2, 2, 83, 84, 3, 2, 2, 2, 84, 5, 3, 2, 2, 2, 85, 83, 3, 2, 2, 2, 86, 87, 7, 5, 2, 2, 87, 88, 5, 14, 8, 2, 88, 7, 3, 2, 2, 2, 89, 90, 7, 6, 2, 2, 90, 91, 5, 16, 9, 2, 91, 9, 3, 2, 2, 2, 92, 93, 7, 7, 2, 2, 93, 94, 5, 16, 9, 2, 94, 11, 3, 2, 2, 2, 95, 96, 9, 2, 2, 2, 96, 97, 5, 14, 8, 2, 97, 101, 7, 10, 2, 2, 98, 100, 5, 36, 19, 2, 99, 98, 3, 2, 2, 2, 100, 103, 3, 2, 2, 2, 101, 99, 3, 2, 2, 2, 101, 102, 3, 2, 2, 2, 102, 107, 3, 2, 2, 2, 103, 101, 3, 2, 2, 2, 104, 106, 5, 38, 20, 2, 105, 104, 3, 2, 2, 2, 106, 109, 3, 2, 2, 2, 107, 105, 3, 2, 2, 2, 107, 108, 3, 2, 2, 2, 108, 113, 3, 2, 2, 2, 109, 107, 3, 2, 2, 2, 110, 112, 5, 40, 21, 2, 111, 110, 3, 2, 2, 2, 112, 115, 3, 2, 2, 2, 113, 111, 3, 2, 2, 2, 113, 114, 3, 2, 2, 2, 114, 117, 3, 2, 2, 2, 115, 113, 3, 2, 2, 2, 116, 118, 5, 24, 13, 2, 117, 116, 3, 2, 2, 2, 117, 118, 3, 2, 2, 2, 118, 13, 3, 2, 2, 2, 119, 120, 7, 34, 2, 2, 120, 15, 3, 2, 2, 2, 121, 122, 5, 14, 8, 2, 122, 124, 5, 32, 17, 2, 123, 125, 5, 34, 18, 2, 124, 123, 3, 2, 2, 2, 125, 126, 3, 2, 2, 2, 126, 124, 3, 2, 2, 2, 126, 127, 3, 2, 2, 2, 127, 17, 3, 2, 2, 2, 128, 129, 5, 28, 15, 2, 129, 130, 5, 30, 16, 2, 130, 19, 3, 2, 2, 2, 131, 132, 5, 28, 15, 2, 132, 133, 7, 22, 2, 2, 133, 21, 3, 2, 2, 2, 134, 135, 5, 28, 15, 2, 135, 136, 7, 23, 2, 2, 136, 23, 3, 2, 2, 2, 137, 138, 7, 11, 2, 2, 138, 141, 5, 26, 14, 2, 139, 140, 7, 12, 2, 2, 140, 142, 5, 46, 24, 2, 141, 139, 3, 2, 2, 2, 141, 142, 3, 2, 2, 2, 142, 25, 3, 2, 2, 2, 143, 144, 5, 28, 15, 2, 144, 27, 3, 2, 2, 2, 145, 146, 7, 34, 2, 2, 146, 29, 3, 2, 2, 2, 147, 148, 9, 3, 2, 2, 148, 31, 3, 2, 2, 2, 149, 150, 7, 33, 2, 2, 150, 33, 3, 2, 2, 2, 151, 152, 9, 4, 2, 2, 152, 35, 3, 2, 2, 2, 153, 154, 5, 28, 15, 2, 154, 155, 5, 42, 22, 2, 155, 156, 5, 34, 18, 2, 156, 37, 3, 2, 2, 2, 157, 158, 5, 28, 15, 2, 158, 159, 7, 24, 2, 2, 159, 160, 5, 44, 23, 2, 160, 39, 3, 2, 2, 2, 161, 162, 5, 28, 15, 2, 162, 163, 7, 25, 2, 2, 163, 164, 7, 32, 2, 2, 164, 41, 3, 2, 2, 2, 165, 166, 9, 5, 2, 2, 166, 43, 3, 2, 2, 2, 167, 168, 7, 13, 2, 2, 168, 173, 7, 33, 2, 2, 169, 170, 7, 14, 2, 2, 170, 172, 7, 33, 2, 2, 171, 169, 3, 2, 2, 2, 172, 175, 3, 2, 2, 2, 173, 171, 3, 2, 2, 2, 173, 174, 3, 2, 2, 2, 174, 176, 3, 2, 2, 2, 175, 173, 3, 2, 2, 2, 176, 177, 7, 15, 2, 2, 177, 45, 3, 2, 2, 2, 178, 179, 7, 33, 2, 2, 179, 47, 3, 2, 2, 2, 13, 63, 71, 77, 83, 101, 107, 113, 117, 126, 141, 173] -------------------------------------------------------------------------------- /index_test.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | datastructures "github.com/deepfabric/go-datastructures" 8 | "github.com/deepfabric/indexer/cql" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | const ( 13 | NumDocs = 10000 14 | ) 15 | 16 | func newDocProt() *cql.DocumentWithIdx { 17 | return &cql.DocumentWithIdx{ 18 | Doc: cql.Document{ 19 | DocID: 0, 20 | UintProps: []*cql.UintProp{ 21 | &cql.UintProp{ 22 | Name: "object", 23 | ValLen: 8, 24 | Val: 0, 25 | }, 26 | &cql.UintProp{ 27 | Name: "price", 28 | ValLen: 4, 29 | Val: 0, 30 | }, 31 | &cql.UintProp{ 32 | Name: "priceF64", 33 | IsFloat: true, 34 | ValLen: 8, 35 | Val: 0, 36 | }, 37 | &cql.UintProp{ 38 | Name: "number", 39 | ValLen: 4, 40 | Val: 0, 41 | }, 42 | &cql.UintProp{ 43 | Name: "date", 44 | ValLen: 8, 45 | Val: 0, 46 | }, 47 | }, 48 | StrProps: []*cql.StrProp{ 49 | &cql.StrProp{ 50 | Name: "description", 51 | Val: "", 52 | }, 53 | &cql.StrProp{ 54 | Name: "note", 55 | Val: "", 56 | }, 57 | }, 58 | }, 59 | Index: "orders", 60 | } 61 | } 62 | 63 | //TESTCASE: normal operation sequence: create, insert, del, destroy 64 | func TestIndexNormal(t *testing.T) { 65 | var err error 66 | var ind *Index 67 | var found bool 68 | var bits map[uint64][]uint64 69 | 70 | docProt := newDocProt() 71 | ind, err = NewIndex(docProt, "/tmp/index_test") 72 | require.NoError(t, err) 73 | require.Equal(t, docProt, ind.DocProt) 74 | for i := 0; i < NumDocs; i++ { 75 | doc := newDocProt() 76 | doc.Doc.DocID = uint64(i) 77 | for j := 0; j < len(doc.Doc.UintProps); j++ { 78 | val := uint64(i * (j + 1)) 79 | val, err = cql.ParseUintProp(doc.Doc.UintProps[j], fmt.Sprintf("%v", val)) 80 | require.NoError(t, err) 81 | doc.Doc.UintProps[j].Val = val 82 | } 83 | for j := 0; j < len(doc.Doc.StrProps); j++ { 84 | doc.Doc.StrProps[j].Val = fmt.Sprintf("%03d%03d and some random text", i, j) 85 | } 86 | err = ind.Insert(doc) 87 | require.NoError(t, err) 88 | } 89 | 90 | // query numerical(integer) range 91 | var qr *QueryResult 92 | var items []datastructures.Comparable 93 | low := uint64(30) 94 | high := uint64(600) 95 | cs := &cql.CqlSelect{ 96 | Index: docProt.Index, 97 | UintPreds: map[string]cql.UintPred{ 98 | "price": cql.UintPred{ 99 | Name: "price", 100 | Low: low, 101 | High: high, 102 | }, 103 | }, 104 | } 105 | qr, err = ind.Select(cs) 106 | require.NoError(t, err) 107 | fmt.Printf("query result: %v\n", qr.Bm.Bits()) 108 | // low <= 2*i <= high, (low+1)/2 <= i <= high/2 109 | want := uint64(high/2 - (low+1)/2 + 1) 110 | require.Equalf(t, want, qr.Bm.Count(), "incorrect number of matches") 111 | 112 | // query numerical range + order by + text 113 | cs.OrderBy = "price" 114 | cs.Limit = 20 115 | qr, err = ind.Select(cs) 116 | require.NoError(t, err) 117 | 118 | items = qr.Oa.Finalize() 119 | fmt.Printf("query result: %v\n", items) 120 | require.Equalf(t, cs.Limit, len(items), "incorrect number of matches") 121 | 122 | // dump bits 123 | for name, frame := range ind.txtFrames { 124 | var termID uint64 125 | if termID, found = frame.td.GetTermID("017001"); !found { 126 | continue 127 | } 128 | bits, err = frame.Bits() 129 | require.NoError(t, err) 130 | //fmt.Printf("frmae %v bits: %v\n", name, bits) 131 | fmt.Printf("frame %v bits[%v]: %v\n", name, termID, bits[termID]) 132 | } 133 | 134 | // query numerical range + text 135 | cs.StrPreds = map[string]cql.StrPred{ 136 | "note": cql.StrPred{ 137 | Name: "note", 138 | ContWord: "017001", 139 | // ContWord: "random", 140 | }, 141 | } 142 | cs.Limit = 20 143 | qr, err = ind.Select(cs) 144 | require.NoError(t, err) 145 | items = qr.Oa.Finalize() 146 | fmt.Printf("query result: %v\n", items) 147 | require.Equalf(t, 1, len(items), "incorrect number of matches") 148 | 149 | // query numerical(float) range 150 | valSs := []string{"30", "600"} 151 | vals := make([]uint64, len(valSs)) 152 | for i, valS := range valSs { 153 | var val uint64 154 | val, err = cql.Float64ToSortableUint64(valS) 155 | require.NoError(t, err) 156 | vals[i] = val 157 | fmt.Printf("FLOAT64 %v\t%v\n", valS, val) 158 | } 159 | low, high = vals[0], vals[1] 160 | cs = &cql.CqlSelect{ 161 | Index: docProt.Index, 162 | UintPreds: map[string]cql.UintPred{ 163 | "priceF64": cql.UintPred{ 164 | Name: "priceF64", 165 | Low: low, 166 | High: high, 167 | }, 168 | }, 169 | } 170 | qr, err = ind.Select(cs) 171 | require.NoError(t, err) 172 | fmt.Printf("query result: %v\n", qr.Bm.Bits()) 173 | // low <= 3*i <= high, (low+2)/3 <= i <= high/3 174 | want = uint64(600/3 - (30+2)/3 + 1) 175 | require.Equalf(t, want, qr.Bm.Count(), "incorrect number of matches") 176 | 177 | //delete docs 178 | for i := 0; i < NumDocs; i++ { 179 | doc := newDocProt() 180 | doc.Doc.DocID = uint64(i) 181 | for j := 0; j < len(doc.Doc.UintProps); j++ { 182 | doc.Doc.UintProps[j].Val = uint64(i * (j + 1)) 183 | } 184 | found, err = ind.Del(doc.Doc.DocID) 185 | require.NoError(t, err) 186 | require.Equalf(t, true, found, "document %v not found", doc) 187 | } 188 | } 189 | 190 | func TestIndexOpenClose(t *testing.T) { 191 | var err error 192 | var ind, ind2 *Index 193 | 194 | //create index 195 | docProt := newDocProt() 196 | ind, err = NewIndex(docProt, "/tmp/index_test") 197 | require.NoError(t, err) 198 | 199 | //insert documents 200 | for i := 0; i < NumDocs; i++ { 201 | doc := newDocProt() 202 | doc.Doc.DocID = uint64(i) 203 | for j := 0; j < len(doc.Doc.UintProps); j++ { 204 | doc.Doc.UintProps[j].Val = uint64(i * (j + 1)) 205 | } 206 | err = ind.Insert(doc) 207 | require.NoError(t, err) 208 | } 209 | 210 | //close index 211 | err = ind.Close() 212 | require.NoError(t, err) 213 | 214 | //open index 215 | err = ind.Open() 216 | require.NoError(t, err) 217 | 218 | //close index 219 | err = ind.Close() 220 | require.NoError(t, err) 221 | 222 | //open index with another Index object. This occurs when program restart. 223 | ind2, err = NewIndexExt("/tmp/index_test", "orders") 224 | require.NoError(t, err) 225 | 226 | //verify DocProt keeps unchanged 227 | require.Equal(t, ind.DocProt, ind2.DocProt) 228 | 229 | //close index 230 | err = ind2.Close() 231 | require.NoError(t, err) 232 | 233 | //destroy index 234 | err = ind.Destroy() 235 | require.NoError(t, err) 236 | } 237 | -------------------------------------------------------------------------------- /text_frame_test.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/pilosa/pilosa" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestTextFrameParseWords(t *testing.T) { 15 | text := "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it? cindex为若干路径创建索引。索引是trigram倒排表。trigram是UTF-8文档中的连续3字节(可以是中英文混合)。posting list就是文档ID列表,将它们的delta以变长编码方式存放。整个索引存储在一个文件,在read时mmap到内存。所以索引尺寸受限于RAM。" 16 | expect := "go/s/standard/library/does/not/have/a/function/solely/intended/to/check/if/a/file/exists/or/not/like/python/s/os/path/exists/what/is/the/idiomatic/way/to/do/it/cindex/为/若/干/路/径/创/建/索/引/索/引/是/trigram/倒/排/表/trigram/是/utf/8/文/档/中/的/连/续/3/字/节/可/以/是/中/英/文/混/合/posting/list/就/是/文/档/id/列/表/将/它/们/的/delta/以/变/长/编/码/方/式/存/放/整/个/索/引/存/储/在/一/个/文/件/在/read/时/mmap/到/内/存/所/以/索/引/尺/寸/受/限/于/ram" 17 | words := ParseWords(text) 18 | fmt.Printf("text: %v\n", text) 19 | fmt.Printf("words: %v\n", strings.Join(words, "/")) 20 | require.Equal(t, expect, strings.Join(words, "/")) 21 | } 22 | 23 | func TestTextFrameDoIndex(t *testing.T) { 24 | var err error 25 | var found bool 26 | var f *TextFrame 27 | var terms []string 28 | 29 | //TESTCASE: query and insert term to an empty dict 30 | f, err = NewTextFrame("/tmp/text_frame_test", "i", "f", true) 31 | require.NoError(t, err) 32 | defer f.Close() 33 | 34 | text := "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it?" 35 | err = f.DoIndex(3, text) 36 | require.NoError(t, err) 37 | fmt.Printf("termdict size: %d\n", f.td.Count()) 38 | 39 | terms = []string{"go", "it"} 40 | for _, term := range terms { 41 | _, found = f.td.GetTermID(term) 42 | require.Equal(t, true, found, "Term %s not found", term) 43 | } 44 | 45 | terms = []string{"java", "php"} 46 | for _, term := range terms { 47 | _, found = f.td.GetTermID(term) 48 | require.Equal(t, false, found, "Term %s found", term) 49 | } 50 | } 51 | 52 | func TestTextFrameQuery(t *testing.T) { 53 | var err error 54 | var f *TextFrame 55 | var terms []string 56 | var bm *pilosa.Bitmap 57 | var bits map[uint64][]uint64 58 | 59 | //TESTCASE: query and insert term to an empty dict 60 | f, err = NewTextFrame("/tmp/text_frame_test", "i", "f", true) 61 | require.NoError(t, err) 62 | defer f.Close() 63 | 64 | docIDs := []uint64{1, 10} 65 | texts := []string{ 66 | "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it? 你好,世界", 67 | "This is a listing of successful results of all the various data storage and processing system benchmarks I've conducted using the dataset produced in the Billion Taxi Rides in Redshift blog post. The dataset itself has 1.1 billion records, 51 columns and takes up about 500 GB of disk space uncompressed.", 68 | } 69 | for i := 0; i < len(docIDs); i++ { 70 | err = f.DoIndex(docIDs[i], texts[i]) 71 | require.NoError(t, err) 72 | } 73 | fmt.Printf("termdict size after indexing: %d\n", f.td.Count()) 74 | bits, err = f.Bits() 75 | require.NoError(t, err) 76 | fmt.Printf("frame bits: %v\n", bits) 77 | 78 | terms = []string{"The", "disk", "standard function", "standard世界!", "你坏"} 79 | expDocIDs := [][]uint64{[]uint64{1, 10}, []uint64{10}, []uint64{1}, []uint64{1}, []uint64{}} 80 | for i, term := range terms { 81 | bm = f.Query(term) 82 | docIDs = bm.Bits() 83 | fmt.Printf("found term %s in documents: %v\n", term, docIDs) 84 | require.Equal(t, expDocIDs[i], docIDs) 85 | } 86 | } 87 | 88 | func TestTextFrameDestroy(t *testing.T) { 89 | var err error 90 | var f *TextFrame 91 | 92 | f, err = NewTextFrame("/tmp/text_frame_test", "i", "f", true) 93 | require.NoError(t, err) 94 | defer f.Close() 95 | 96 | text := "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it?" 97 | err = f.DoIndex(3, text) 98 | require.NoError(t, err) 99 | fmt.Printf("termdict size: %d\n", f.td.Count()) 100 | 101 | err = f.Destroy() 102 | require.NoError(t, err) 103 | 104 | fps := []string{filepath.Join(f.path, "terms"), filepath.Join(f.path, "fragments")} 105 | for _, fp := range fps { 106 | if _, err := os.Stat(fp); err == nil || !os.IsNotExist(err) { 107 | t.Fatalf("path %s exists, want removed", fp) 108 | } 109 | } 110 | require.Equal(t, uint64(0), f.td.Count()) 111 | } 112 | 113 | func TestTextFrameGetFragList(t *testing.T) { 114 | var err error 115 | var f *TextFrame 116 | 117 | f, err = NewTextFrame("/tmp/text_frame_test", "i", "f", true) 118 | require.NoError(t, err) 119 | defer f.Close() 120 | 121 | text := "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it? 你好,世界" 122 | docIDs := []uint64{ 123 | 0, 124 | 1, 125 | pilosa.SliceWidth, 126 | pilosa.SliceWidth + 1, 127 | 9 * pilosa.SliceWidth} 128 | expFragLists := [][]uint64{ 129 | []uint64{0}, 130 | []uint64{0}, 131 | []uint64{0, 1}, 132 | []uint64{0, 1}, 133 | []uint64{0, 1, 9}} 134 | for i := 0; i < len(docIDs); i++ { 135 | err = f.DoIndex(docIDs[i], text) 136 | require.NoError(t, err) 137 | numList := f.GetFragList() 138 | require.Equal(t, expFragLists[i], numList) 139 | } 140 | 141 | //clearBit doesn't impact GetFragList 142 | docIDs = []uint64{ 143 | 0, 144 | 1, 145 | pilosa.SliceWidth, 146 | pilosa.SliceWidth + 1, 147 | 9 * pilosa.SliceWidth} 148 | expFragLists = [][]uint64{ 149 | []uint64{0, 1, 9}, 150 | []uint64{0, 1, 9}, 151 | []uint64{0, 1, 9}, 152 | []uint64{0, 1, 9}, 153 | []uint64{0, 1, 9}} 154 | for i := 0; i < len(docIDs); i++ { 155 | _, err = f.clearBit(0, docIDs[i]) 156 | require.NoError(t, err) 157 | numList := f.GetFragList() 158 | require.Equal(t, expFragLists[i], numList) 159 | } 160 | } 161 | 162 | func BenchmarkTextFrameDoIndex(b *testing.B) { 163 | var err error 164 | var f *TextFrame 165 | f, err = NewTextFrame("/tmp/text_frame_test", "i", "f", true) 166 | require.NoError(b, err) 167 | defer f.Close() 168 | 169 | b.ResetTimer() 170 | text := "Go's standard library does not have a function solely intended to check if a file exists or not (like Python's os.path.exists). What is the idiomatic way to do it? cindex为若干路径创建索引。索引是trigram倒排表。trigram是UTF-8文档中的连续3字节(可以是中英文混合)。posting list就是文档ID列表,将它们的delta以变长编码方式存放。整个索引存储在一个文件,在read时mmap到内存。所以索引尺寸受限于RAM。" 171 | for i := 0; i < b.N; i++ { 172 | err = f.DoIndex(uint64(i), text) 173 | require.NoError(b, err) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /int_frame.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "sort" 7 | "strconv" 8 | "sync" 9 | 10 | "github.com/pilosa/pilosa" 11 | "github.com/pilosa/pilosa/pql" 12 | "github.com/pkg/errors" 13 | ) 14 | 15 | const ( 16 | MaxUint = ^uint(0) 17 | MinUint = 0 18 | MaxInt = int(MaxUint >> 1) 19 | MinInt = -MaxInt - 1 20 | ) 21 | 22 | // IntFrame represents a string field of an index. Refers to pilosa.Frame and pilosa.View. 23 | type IntFrame struct { 24 | path string 25 | index string 26 | name string 27 | bitDepth uint 28 | rwlock sync.RWMutex //concurrent access of fragments 29 | fragments map[uint64]*pilosa.Fragment //map slice to Fragment 30 | } 31 | 32 | // NewIntFrame returns a new instance of frame, and initializes it. 33 | func NewIntFrame(path, index, name string, bitDepth uint, overwrite bool) (f *IntFrame, err error) { 34 | if overwrite { 35 | if err = os.RemoveAll(filepath.Join(path, "fragments")); err != nil { 36 | err = errors.Wrap(err, "") 37 | return 38 | } 39 | } 40 | f = &IntFrame{ 41 | path: path, 42 | index: index, 43 | name: name, 44 | bitDepth: bitDepth, 45 | fragments: make(map[uint64]*pilosa.Fragment), 46 | } 47 | err = f.openFragments() 48 | return 49 | } 50 | 51 | //Open opens an existing frame 52 | func (f *IntFrame) Open() (err error) { 53 | if err = f.openFragments(); err != nil { 54 | return 55 | } 56 | return 57 | } 58 | 59 | func (f *IntFrame) openFragments() (err error) { 60 | var sliceList []uint64 61 | if sliceList, err = getSliceList(f.path); err != nil { 62 | return 63 | } 64 | for _, slice := range sliceList { 65 | fp := f.FragmentPath(slice) 66 | fragment := pilosa.NewFragment(fp, f.index, f.name, pilosa.ViewStandard, slice) 67 | fragment.MaxOpN = MaxInt 68 | fragment.CacheType = pilosa.CacheTypeNone 69 | if err = fragment.Open(); err != nil { 70 | err = errors.Wrap(err, "") 71 | return 72 | } 73 | f.rwlock.Lock() 74 | f.fragments[slice] = fragment 75 | f.rwlock.Unlock() 76 | } 77 | return 78 | } 79 | 80 | // Close closes all fragments without removing files on disk. 81 | // It's allowed to invoke Close multiple times. 82 | func (f *IntFrame) Close() (err error) { 83 | if err = f.closeFragments(); err != nil { 84 | return 85 | } 86 | return 87 | } 88 | 89 | // Destroy closes all fragments, removes all files on disk. 90 | // It's allowed to invoke Close before or after Destroy. 91 | func (f *IntFrame) Destroy() (err error) { 92 | if err = f.closeFragments(); err != nil { 93 | return 94 | } 95 | if err = os.RemoveAll(filepath.Join(f.path, "fragments")); err != nil { 96 | err = errors.Wrap(err, "") 97 | return 98 | } 99 | return 100 | } 101 | 102 | func (f *IntFrame) closeFragments() (err error) { 103 | for _, fragment := range f.fragments { 104 | if err = fragment.Close(); err != nil { 105 | err = errors.Wrap(err, "") 106 | return 107 | } 108 | } 109 | f.rwlock.Lock() 110 | f.fragments = nil 111 | f.rwlock.Unlock() 112 | return 113 | } 114 | 115 | // Sync synchronizes storage bitmap to disk and reopens it. 116 | func (f *IntFrame) Sync() (err error) { 117 | f.rwlock.Lock() 118 | for _, frag := range f.fragments { 119 | if err = frag.Snapshot(); err != nil { 120 | f.rwlock.Unlock() 121 | err = errors.Wrap(err, "") 122 | return 123 | } 124 | } 125 | f.rwlock.Unlock() 126 | return 127 | } 128 | 129 | // FragmentPath returns the path to a fragment 130 | func (f *IntFrame) FragmentPath(slice uint64) string { 131 | return filepath.Join(f.path, "fragments", strconv.FormatUint(slice, 10)) 132 | } 133 | 134 | // Name returns the name the frame was initialized with. 135 | func (f *IntFrame) Name() string { return f.name } 136 | 137 | // Index returns the index name the frame was initialized with. 138 | func (f *IntFrame) Index() string { return f.index } 139 | 140 | // Path returns the path the frame was initialized with. 141 | func (f *IntFrame) Path() string { return f.path } 142 | 143 | // BitDepth returns the bit depth the frame was initialized with. 144 | func (f *IntFrame) BitDepth() uint { return f.bitDepth } 145 | 146 | // setValue sets value of a column within the frame, and expands fragments if necessary. 147 | func (f *IntFrame) setValue(colID, val uint64) (changed bool, err error) { 148 | slice := colID / pilosa.SliceWidth 149 | f.rwlock.Lock() 150 | fragment, ok := f.fragments[slice] 151 | if !ok { 152 | fp := f.FragmentPath(slice) 153 | fragment = pilosa.NewFragment(fp, f.index, f.name, pilosa.ViewStandard, slice) 154 | fragment.MaxOpN = fragment.MaxOpN * 100 155 | fragment.CacheType = pilosa.CacheTypeNone 156 | if err = fragment.Open(); err != nil { 157 | err = errors.Wrap(err, "") 158 | f.rwlock.Unlock() 159 | return 160 | } 161 | f.fragments[slice] = fragment 162 | } 163 | f.rwlock.Unlock() 164 | changed, err = fragment.SetFieldValue(colID, f.bitDepth, val) 165 | return 166 | } 167 | 168 | // GetValue returns value of a column within the frame. 169 | func (f *IntFrame) GetValue(docID uint64) (val uint64, exists bool, err error) { 170 | slice := docID / pilosa.SliceWidth 171 | f.rwlock.RLock() 172 | fragment, ok := f.fragments[slice] 173 | f.rwlock.RUnlock() 174 | if !ok { 175 | return 176 | } 177 | val, exists, err = fragment.FieldValue(docID, f.bitDepth) 178 | return 179 | } 180 | 181 | // DoIndex parses and index a field. 182 | func (f *IntFrame) DoIndex(docID uint64, val uint64) (err error) { 183 | _, err = f.setValue(docID, val) 184 | return 185 | } 186 | 187 | //QueryRange query which documents' value is inside the given range. 188 | func (f *IntFrame) QueryRange(op pql.Token, predicate uint64) (bm *pilosa.Bitmap, err error) { 189 | var bm2 *pilosa.Bitmap 190 | bm = pilosa.NewBitmap() 191 | for _, frag := range f.fragments { 192 | bm2, err = frag.FieldRange(op, f.bitDepth, predicate) 193 | if err != nil { 194 | return 195 | } 196 | bm = bm.Union(bm2) 197 | } 198 | return 199 | } 200 | 201 | //QueryRangeBetween query which documents' value is inside the given range. 202 | func (f *IntFrame) QueryRangeBetween(predicateMin, predicateMax uint64) (bm *pilosa.Bitmap, err error) { 203 | var bm2 *pilosa.Bitmap 204 | bm = pilosa.NewBitmap() 205 | for _, frag := range f.fragments { 206 | bm2, err = frag.FieldRangeBetween(f.bitDepth, predicateMin, predicateMax) 207 | if err != nil { 208 | return 209 | } 210 | bm = bm.Union(bm2) 211 | } 212 | return 213 | } 214 | 215 | // GetFragList returns fragments' numbers 216 | func (f *IntFrame) GetFragList() (numList []uint64) { 217 | numList = make([]uint64, len(f.fragments)) 218 | i := 0 219 | f.rwlock.RLock() 220 | for num := range f.fragments { 221 | numList[i] = num 222 | i++ 223 | } 224 | f.rwlock.RUnlock() 225 | sort.Slice(numList, func(i, j int) bool { return numList[i] < numList[j] }) 226 | return 227 | } 228 | -------------------------------------------------------------------------------- /cql/parser/cql_base_listener.go: -------------------------------------------------------------------------------- 1 | // Generated from /home/zhichyu/src/github.com/deepfabric/indexer/cql/parser/CQL.g4 by ANTLR 4.7. 2 | 3 | package parser // CQL 4 | 5 | import "github.com/antlr/antlr4/runtime/Go/antlr" 6 | 7 | // BaseCQLListener is a complete listener for a parse tree produced by CQLParser. 8 | type BaseCQLListener struct{} 9 | 10 | var _ CQLListener = &BaseCQLListener{} 11 | 12 | // VisitTerminal is called when a terminal node is visited. 13 | func (s *BaseCQLListener) VisitTerminal(node antlr.TerminalNode) {} 14 | 15 | // VisitErrorNode is called when an error node is visited. 16 | func (s *BaseCQLListener) VisitErrorNode(node antlr.ErrorNode) {} 17 | 18 | // EnterEveryRule is called when any rule is entered. 19 | func (s *BaseCQLListener) EnterEveryRule(ctx antlr.ParserRuleContext) {} 20 | 21 | // ExitEveryRule is called when any rule is exited. 22 | func (s *BaseCQLListener) ExitEveryRule(ctx antlr.ParserRuleContext) {} 23 | 24 | // EnterCql is called when production cql is entered. 25 | func (s *BaseCQLListener) EnterCql(ctx *CqlContext) {} 26 | 27 | // ExitCql is called when production cql is exited. 28 | func (s *BaseCQLListener) ExitCql(ctx *CqlContext) {} 29 | 30 | // EnterCreate is called when production create is entered. 31 | func (s *BaseCQLListener) EnterCreate(ctx *CreateContext) {} 32 | 33 | // ExitCreate is called when production create is exited. 34 | func (s *BaseCQLListener) ExitCreate(ctx *CreateContext) {} 35 | 36 | // EnterDestroy is called when production destroy is entered. 37 | func (s *BaseCQLListener) EnterDestroy(ctx *DestroyContext) {} 38 | 39 | // ExitDestroy is called when production destroy is exited. 40 | func (s *BaseCQLListener) ExitDestroy(ctx *DestroyContext) {} 41 | 42 | // EnterInsert is called when production insert is entered. 43 | func (s *BaseCQLListener) EnterInsert(ctx *InsertContext) {} 44 | 45 | // ExitInsert is called when production insert is exited. 46 | func (s *BaseCQLListener) ExitInsert(ctx *InsertContext) {} 47 | 48 | // EnterDel is called when production del is entered. 49 | func (s *BaseCQLListener) EnterDel(ctx *DelContext) {} 50 | 51 | // ExitDel is called when production del is exited. 52 | func (s *BaseCQLListener) ExitDel(ctx *DelContext) {} 53 | 54 | // EnterQuery is called when production query is entered. 55 | func (s *BaseCQLListener) EnterQuery(ctx *QueryContext) {} 56 | 57 | // ExitQuery is called when production query is exited. 58 | func (s *BaseCQLListener) ExitQuery(ctx *QueryContext) {} 59 | 60 | // EnterIndexName is called when production indexName is entered. 61 | func (s *BaseCQLListener) EnterIndexName(ctx *IndexNameContext) {} 62 | 63 | // ExitIndexName is called when production indexName is exited. 64 | func (s *BaseCQLListener) ExitIndexName(ctx *IndexNameContext) {} 65 | 66 | // EnterDocument is called when production document is entered. 67 | func (s *BaseCQLListener) EnterDocument(ctx *DocumentContext) {} 68 | 69 | // ExitDocument is called when production document is exited. 70 | func (s *BaseCQLListener) ExitDocument(ctx *DocumentContext) {} 71 | 72 | // EnterUintPropDef is called when production uintPropDef is entered. 73 | func (s *BaseCQLListener) EnterUintPropDef(ctx *UintPropDefContext) {} 74 | 75 | // ExitUintPropDef is called when production uintPropDef is exited. 76 | func (s *BaseCQLListener) ExitUintPropDef(ctx *UintPropDefContext) {} 77 | 78 | // EnterEnumPropDef is called when production enumPropDef is entered. 79 | func (s *BaseCQLListener) EnterEnumPropDef(ctx *EnumPropDefContext) {} 80 | 81 | // ExitEnumPropDef is called when production enumPropDef is exited. 82 | func (s *BaseCQLListener) ExitEnumPropDef(ctx *EnumPropDefContext) {} 83 | 84 | // EnterStrPropDef is called when production strPropDef is entered. 85 | func (s *BaseCQLListener) EnterStrPropDef(ctx *StrPropDefContext) {} 86 | 87 | // ExitStrPropDef is called when production strPropDef is exited. 88 | func (s *BaseCQLListener) ExitStrPropDef(ctx *StrPropDefContext) {} 89 | 90 | // EnterOrderLimit is called when production orderLimit is entered. 91 | func (s *BaseCQLListener) EnterOrderLimit(ctx *OrderLimitContext) {} 92 | 93 | // ExitOrderLimit is called when production orderLimit is exited. 94 | func (s *BaseCQLListener) ExitOrderLimit(ctx *OrderLimitContext) {} 95 | 96 | // EnterOrder is called when production order is entered. 97 | func (s *BaseCQLListener) EnterOrder(ctx *OrderContext) {} 98 | 99 | // ExitOrder is called when production order is exited. 100 | func (s *BaseCQLListener) ExitOrder(ctx *OrderContext) {} 101 | 102 | // EnterProperty is called when production property is entered. 103 | func (s *BaseCQLListener) EnterProperty(ctx *PropertyContext) {} 104 | 105 | // ExitProperty is called when production property is exited. 106 | func (s *BaseCQLListener) ExitProperty(ctx *PropertyContext) {} 107 | 108 | // EnterUintType is called when production uintType is entered. 109 | func (s *BaseCQLListener) EnterUintType(ctx *UintTypeContext) {} 110 | 111 | // ExitUintType is called when production uintType is exited. 112 | func (s *BaseCQLListener) ExitUintType(ctx *UintTypeContext) {} 113 | 114 | // EnterDocId is called when production docId is entered. 115 | func (s *BaseCQLListener) EnterDocId(ctx *DocIdContext) {} 116 | 117 | // ExitDocId is called when production docId is exited. 118 | func (s *BaseCQLListener) ExitDocId(ctx *DocIdContext) {} 119 | 120 | // EnterValue is called when production value is entered. 121 | func (s *BaseCQLListener) EnterValue(ctx *ValueContext) {} 122 | 123 | // ExitValue is called when production value is exited. 124 | func (s *BaseCQLListener) ExitValue(ctx *ValueContext) {} 125 | 126 | // EnterUintPred is called when production uintPred is entered. 127 | func (s *BaseCQLListener) EnterUintPred(ctx *UintPredContext) {} 128 | 129 | // ExitUintPred is called when production uintPred is exited. 130 | func (s *BaseCQLListener) ExitUintPred(ctx *UintPredContext) {} 131 | 132 | // EnterEnumPred is called when production enumPred is entered. 133 | func (s *BaseCQLListener) EnterEnumPred(ctx *EnumPredContext) {} 134 | 135 | // ExitEnumPred is called when production enumPred is exited. 136 | func (s *BaseCQLListener) ExitEnumPred(ctx *EnumPredContext) {} 137 | 138 | // EnterStrPred is called when production strPred is entered. 139 | func (s *BaseCQLListener) EnterStrPred(ctx *StrPredContext) {} 140 | 141 | // ExitStrPred is called when production strPred is exited. 142 | func (s *BaseCQLListener) ExitStrPred(ctx *StrPredContext) {} 143 | 144 | // EnterCompare is called when production compare is entered. 145 | func (s *BaseCQLListener) EnterCompare(ctx *CompareContext) {} 146 | 147 | // ExitCompare is called when production compare is exited. 148 | func (s *BaseCQLListener) ExitCompare(ctx *CompareContext) {} 149 | 150 | // EnterIntList is called when production intList is entered. 151 | func (s *BaseCQLListener) EnterIntList(ctx *IntListContext) {} 152 | 153 | // ExitIntList is called when production intList is exited. 154 | func (s *BaseCQLListener) ExitIntList(ctx *IntListContext) {} 155 | 156 | // EnterLimit is called when production limit is entered. 157 | func (s *BaseCQLListener) EnterLimit(ctx *LimitContext) {} 158 | 159 | // ExitLimit is called when production limit is exited. 160 | func (s *BaseCQLListener) ExitLimit(ctx *LimitContext) {} 161 | -------------------------------------------------------------------------------- /cql/parser/cql_antlr_test.go: -------------------------------------------------------------------------------- 1 | package parser // CQL 2 | import ( 3 | "fmt" 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | 8 | "github.com/antlr/antlr4/runtime/Go/antlr" 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | type VerboseErrorListener struct { 13 | antlr.DefaultErrorListener 14 | err error 15 | } 16 | 17 | func (el *VerboseErrorListener) SyntaxError(recognizer antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) { 18 | parser := recognizer.(antlr.Parser) 19 | stack := parser.GetRuleInvocationStack(parser.GetParserRuleContext()) 20 | el.err = errors.Errorf("rule stack: %v, line %d:%d at %v: %s\n", stack, line, column, offendingSymbol, msg) 21 | } 22 | 23 | func (el *VerboseErrorListener) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs antlr.ATNConfigSet) { 24 | parser := recognizer.(antlr.Parser) 25 | stack := parser.GetRuleInvocationStack(parser.GetParserRuleContext()) 26 | el.err = errors.Errorf("rule stack: %v, ReportAmbiguity %v %v %v %v %v\n", stack, startIndex, stopIndex, exact, ambigAlts, configs) 27 | } 28 | 29 | func (el *VerboseErrorListener) ReportAttemptingFullContext(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, conflictingAlts *antlr.BitSet, configs antlr.ATNConfigSet) { 30 | parser := recognizer.(antlr.Parser) 31 | stack := parser.GetRuleInvocationStack(parser.GetParserRuleContext()) 32 | el.err = errors.Errorf("rule stack: %v, ReportAttemptingFullContext %v %v %v %v\n", stack, startIndex, stopIndex, conflictingAlts, configs) 33 | } 34 | 35 | func (el *VerboseErrorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex, prediction int, configs antlr.ATNConfigSet) { 36 | parser := recognizer.(antlr.Parser) 37 | stack := parser.GetRuleInvocationStack(parser.GetParserRuleContext()) 38 | el.err = errors.Errorf("rule stack: %v, ReportContextSensitivity %v %v %v %v\n", stack, startIndex, stopIndex, prediction, configs) 39 | } 40 | 41 | type ParserCase struct { 42 | Input string 43 | ExpectError bool 44 | } 45 | 46 | func TestCqlParserError(t *testing.T) { 47 | fmt.Println("================TestCqlParserError================") 48 | tcs := []ParserCase{ 49 | //normal case 50 | {"IDX.CREATE orders SCHEMA object UINT64 price UINT32 number UINT32 date UINT64 type ENUM desc STRING", false}, 51 | //invalid token: "IDX" 52 | {"IDX orders SCHEMA object UINT64 price FLOAT number UINT32 date UINT64", true}, 53 | //invalid order of "desc STRING type ENUM" 54 | {"IDX.CREATE orders SCHEMA object UINT64 price UINT32 number UINT32 date UINT64 desc STRING type ENUM", true}, 55 | //invalid query due to LIMIT without ORDERBY 56 | {"IDX.SELECT orders WHERE price>=30 price<=40 type IN [1,3] LIMIT 30", true}, 57 | } 58 | for i, tc := range tcs { 59 | input := antlr.NewInputStream(tc.Input) 60 | lexer := NewCQLLexer(input) 61 | stream := antlr.NewCommonTokenStream(lexer, 0) 62 | parser := NewCQLParser(stream) 63 | 64 | el := new(VerboseErrorListener) 65 | parser.AddErrorListener(el) 66 | _ = parser.Cql() 67 | 68 | fmt.Println(input) 69 | if el.err != nil { 70 | fmt.Printf("parser raised exception %+v\n", el.err) 71 | } 72 | if (tc.ExpectError && el.err == nil) || (!tc.ExpectError && el.err != nil) { 73 | t.Fatalf("case %d failed. have %v, want %v", i, !tc.ExpectError, tc.ExpectError) 74 | } 75 | } 76 | } 77 | 78 | type CqlTestListener struct { 79 | BaseCQLListener 80 | } 81 | 82 | func NewCqlTestListener() *CqlTestListener { 83 | return new(CqlTestListener) 84 | } 85 | 86 | /*func (l *CqlTestListener) EnterEveryRule(ctx antlr.ParserRuleContext) { 87 | fmt.Println(ctx.GetText()) 88 | }*/ 89 | 90 | func (l *CqlTestListener) VisitTerminal(node antlr.TerminalNode) { 91 | fmt.Printf("VisitTerminal: %v, tokenType: %v\n", node.GetText(), node.GetSymbol().GetTokenType()) 92 | } 93 | 94 | func (l *CqlTestListener) EnterCql(ctx *CqlContext) { 95 | fmt.Printf("EnterCql: %v\n", ctx.GetText()) 96 | } 97 | 98 | func (l *CqlTestListener) ExitCreate(ctx *CreateContext) { 99 | fmt.Printf("ExitCreate: %v\n", ctx.GetText()) 100 | fmt.Printf("create indexName: %v\n", ctx.IndexName().GetText()) 101 | } 102 | 103 | //POC of listener. Printing every token's type helps to find grammer ambiguity. 104 | func TestCqlListener(t *testing.T) { 105 | fmt.Println("================TestCqlListener================") 106 | tcs := []string{ 107 | "IDX.CREATE orders SCHEMA object UINT64 price FLOAT number UINT32 date UINT64 desc STRING", 108 | "IDX.INSERT orders 615 11 22 33 44 \"description\"", 109 | "IDX.SELECT orders WHERE price>=30 price<40 date<2017 type IN [1,3] desc CONTAINS \"pen\" ORDERBY date", 110 | } 111 | for _, tc := range tcs { 112 | input := antlr.NewInputStream(tc) 113 | lexer := NewCQLLexer(input) 114 | stream := antlr.NewCommonTokenStream(lexer, 0) 115 | parser := NewCQLParser(stream) 116 | //parser.AddErrorListener(antlr.NewDiagnosticErrorListener(true)) 117 | //parser.BuildParseTrees = true 118 | tree := parser.Cql() 119 | listener := NewCqlTestListener() 120 | antlr.ParseTreeWalkerDefault.Walk(listener, tree) 121 | } 122 | } 123 | 124 | type CqlTestVisitor struct { 125 | BaseCQLVisitor 126 | res interface{} //record the result of visitor 127 | } 128 | 129 | func (v *CqlTestVisitor) VisitCql(ctx *CqlContext) interface{} { 130 | fmt.Printf("VisitCql %v...\n", ctx) 131 | //If there are multiple subrules, then check one by one. 132 | if create := ctx.Create(); create != nil { 133 | v.res = v.VisitCreate(create.(*CreateContext)) 134 | } else if destroy := ctx.Destroy(); destroy != nil { 135 | v.res = v.VisitDestroy(destroy.(*DestroyContext)) 136 | } 137 | return nil 138 | } 139 | 140 | type UintProp struct { 141 | Name string 142 | ValLen int //one of 1, 2, 4, 8 143 | Val uint64 144 | } 145 | 146 | type EnumProp struct { 147 | Name string 148 | Val int 149 | } 150 | 151 | type StrProp struct { 152 | Name string 153 | Val string 154 | } 155 | 156 | type Document struct { 157 | UintProps []UintProp 158 | EnumProps []EnumProp 159 | StrProps []StrProp 160 | } 161 | 162 | func (v *CqlTestVisitor) VisitCreate(ctx *CreateContext) interface{} { 163 | fmt.Println("VisitCreate...") 164 | indexName := ctx.IndexName().GetText() 165 | fmt.Printf("indexName: %s\n", indexName) 166 | var doc Document 167 | for _, popDef := range ctx.AllUintPropDef() { 168 | pop := v.VisitUintPropDef(popDef.(*UintPropDefContext)) 169 | if pop.(UintProp).ValLen == 0 { 170 | continue 171 | } 172 | doc.UintProps = append(doc.UintProps, pop.(UintProp)) 173 | } 174 | for _, popDef := range ctx.AllEnumPropDef() { 175 | pop := v.VisitEnumPropDef(popDef.(*EnumPropDefContext)) 176 | doc.EnumProps = append(doc.EnumProps, pop.(EnumProp)) 177 | } 178 | for _, popDef := range ctx.AllStrPropDef() { 179 | pop := v.VisitStrPropDef(popDef.(*StrPropDefContext)) 180 | doc.StrProps = append(doc.StrProps, pop.(StrProp)) 181 | } 182 | return fmt.Sprintf("Create index %s schema %v\n", indexName, doc) 183 | } 184 | 185 | func (v *CqlTestVisitor) VisitUintPropDef(ctx *UintPropDefContext) interface{} { 186 | fmt.Println("VisitUintPropDef...") 187 | pop := UintProp{} 188 | pop.Name = ctx.Property().GetText() 189 | uintType := ctx.UintType().(*UintTypeContext) 190 | if u8 := uintType.K_UINT8(); u8 != nil { 191 | pop.ValLen = 1 192 | } else if u16 := uintType.K_UINT16(); u16 != nil { 193 | pop.ValLen = 2 194 | } else if u32 := uintType.K_UINT32(); u32 != nil { 195 | pop.ValLen = 4 196 | } else if u64 := uintType.K_UINT64(); u64 != nil { 197 | pop.ValLen = 8 198 | } else { 199 | panic(fmt.Sprintf("invalid uintType: %v", ctx.UintType().GetText())) 200 | } 201 | return pop 202 | } 203 | 204 | func (v *CqlTestVisitor) VisitEnumPropDef(ctx *EnumPropDefContext) interface{} { 205 | fmt.Println("VisitEnumPropDef...") 206 | pop := EnumProp{} 207 | pop.Name = ctx.Property().GetText() 208 | return pop 209 | } 210 | 211 | func (v *CqlTestVisitor) VisitStrPropDef(ctx *StrPropDefContext) interface{} { 212 | fmt.Println("VisitStrPropDef...") 213 | pop := StrProp{} 214 | pop.Name = ctx.Property().GetText() 215 | return pop 216 | } 217 | 218 | func (v *CqlTestVisitor) VisitDestroy(ctx *DestroyContext) interface{} { 219 | fmt.Println("VisitDestroy...") 220 | indexName := ctx.IndexName().GetText() 221 | fmt.Printf("indexName: %s\n", indexName) 222 | return fmt.Sprintf("Destroy %s", indexName) 223 | } 224 | 225 | //POC of visitor 226 | func TestCqlVisitor(t *testing.T) { 227 | fmt.Println("================TestCqlVisitor================") 228 | 229 | input := antlr.NewInputStream("IDX.CREATE orders SCHEMA object UINT64 number UINT32 date UINT64 price UINT16 desc STRING") 230 | //input := antlr.NewInputStream("IDX.DESTROY orders") 231 | lexer := NewCQLLexer(input) 232 | stream := antlr.NewCommonTokenStream(lexer, 0) 233 | parser := NewCQLParser(stream) 234 | el := new(VerboseErrorListener) 235 | parser.AddErrorListener(el) 236 | //parser.BuildParseTrees = true 237 | 238 | tree := parser.Cql() 239 | require.NoErrorf(t, el.err, "parser raised exception") 240 | 241 | visitor := new(CqlTestVisitor) 242 | tree.Accept(visitor) 243 | fmt.Printf("the result of visitor: %v\n", visitor.res) 244 | } 245 | -------------------------------------------------------------------------------- /text_frame.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "sort" 7 | "strconv" 8 | "strings" 9 | "sync" 10 | "unicode" 11 | "unicode/utf8" 12 | 13 | "github.com/deepfabric/bkdtree" 14 | "github.com/pilosa/pilosa" 15 | "github.com/pkg/errors" 16 | ) 17 | 18 | // TextFrame represents a string field of an index. Refers to pilosa.Frame and pilosa.View. 19 | type TextFrame struct { 20 | path string 21 | index string 22 | name string 23 | 24 | rwlock sync.RWMutex //concurrent access of fragments 25 | fragments map[uint64]*pilosa.Fragment //map slice to Fragment 26 | td *TermDict 27 | } 28 | 29 | // NewTextFrame returns a new instance of frame, and initializes it. 30 | func NewTextFrame(path, index, name string, overwrite bool) (f *TextFrame, err error) { 31 | var td *TermDict 32 | if td, err = NewTermDict(path, overwrite); err != nil { 33 | return 34 | } 35 | if overwrite { 36 | if err = os.RemoveAll(filepath.Join(path, "fragments")); err != nil { 37 | err = errors.Wrap(err, "") 38 | return 39 | } 40 | } 41 | f = &TextFrame{ 42 | path: path, 43 | index: index, 44 | name: name, 45 | td: td, 46 | fragments: make(map[uint64]*pilosa.Fragment), 47 | } 48 | err = f.openFragments() 49 | return 50 | } 51 | 52 | //Open opens an existing frame 53 | func (f *TextFrame) Open() (err error) { 54 | if err = f.openFragments(); err != nil { 55 | return 56 | } 57 | err = f.td.Open() 58 | return 59 | } 60 | 61 | func (f *TextFrame) openFragments() (err error) { 62 | var sliceList []uint64 63 | if sliceList, err = getSliceList(f.path); err != nil { 64 | return 65 | } 66 | for _, slice := range sliceList { 67 | fp := f.FragmentPath(slice) 68 | fragment := pilosa.NewFragment(fp, f.index, f.name, pilosa.ViewStandard, slice) 69 | fragment.MaxOpN = fragment.MaxOpN * 100 70 | fragment.CacheType = pilosa.CacheTypeNone 71 | if err = fragment.Open(); err != nil { 72 | err = errors.Wrap(err, "") 73 | return 74 | } 75 | f.rwlock.Lock() 76 | f.fragments[slice] = fragment 77 | f.rwlock.Unlock() 78 | } 79 | return 80 | } 81 | 82 | func getSliceList(dir string) (numList []uint64, err error) { 83 | var num uint64 84 | var matches [][]string 85 | fragDir := filepath.Join(dir, "fragments") 86 | if err = os.MkdirAll(fragDir, 0700); err != nil { 87 | err = errors.Wrap(err, "") 88 | return 89 | } 90 | if matches, err = bkdtree.FilepathGlob(fragDir, "^(?P[0-9]+)$"); err != nil { 91 | return 92 | } 93 | for _, match := range matches { 94 | num, err = strconv.ParseUint(match[1], 10, 64) 95 | if err != nil { 96 | err = errors.Wrap(err, "") 97 | return 98 | } 99 | numList = append(numList, num) 100 | } 101 | return 102 | } 103 | 104 | // Close closes all fragments without removing files on disk. 105 | // It's allowed to invoke Close multiple times. 106 | func (f *TextFrame) Close() (err error) { 107 | if err = f.closeFragments(); err != nil { 108 | return 109 | } 110 | err = f.td.Close() 111 | return 112 | } 113 | 114 | // Destroy closes all fragments, removes all files on disk. 115 | // It's allowed to invoke Close before or after Destroy. 116 | func (f *TextFrame) Destroy() (err error) { 117 | if err = f.closeFragments(); err != nil { 118 | return 119 | } 120 | if err = os.RemoveAll(filepath.Join(f.path, "fragments")); err != nil { 121 | err = errors.Wrap(err, "") 122 | return 123 | } 124 | err = f.td.Destroy() 125 | return 126 | } 127 | 128 | func (f *TextFrame) closeFragments() (err error) { 129 | for _, fragment := range f.fragments { 130 | if err = fragment.Close(); err != nil { 131 | err = errors.Wrap(err, "") 132 | return 133 | } 134 | } 135 | f.rwlock.Lock() 136 | f.fragments = nil 137 | f.rwlock.Unlock() 138 | return 139 | } 140 | 141 | // Sync synchronizes storage bitmap to disk and reopens it. 142 | func (f *TextFrame) Sync() (err error) { 143 | f.rwlock.Lock() 144 | for _, frag := range f.fragments { 145 | if err = frag.Snapshot(); err != nil { 146 | f.rwlock.Unlock() 147 | err = errors.Wrap(err, "") 148 | return 149 | } 150 | } 151 | f.rwlock.Unlock() 152 | return 153 | } 154 | 155 | // FragmentPath returns the path to a fragment 156 | func (f *TextFrame) FragmentPath(slice uint64) string { 157 | return filepath.Join(f.path, "fragments", strconv.FormatUint(slice, 10)) 158 | } 159 | 160 | // Name returns the name the frame was initialized with. 161 | func (f *TextFrame) Name() string { return f.name } 162 | 163 | // Index returns the index name the frame was initialized with. 164 | func (f *TextFrame) Index() string { return f.index } 165 | 166 | // Path returns the path the frame was initialized with. 167 | func (f *TextFrame) Path() string { return f.path } 168 | 169 | // setBit sets a bit within the frame, and expands fragments if necessary. 170 | func (f *TextFrame) setBit(rowID, colID uint64) (changed bool, err error) { 171 | slice := colID / pilosa.SliceWidth 172 | f.rwlock.Lock() 173 | fragment, ok := f.fragments[slice] 174 | if !ok { 175 | fp := f.FragmentPath(slice) 176 | fragment = pilosa.NewFragment(fp, f.index, f.name, pilosa.ViewStandard, slice) 177 | fragment.MaxOpN = MaxInt 178 | fragment.CacheType = pilosa.CacheTypeNone 179 | if err = fragment.Open(); err != nil { 180 | err = errors.Wrap(err, "") 181 | f.rwlock.Unlock() 182 | return 183 | } 184 | f.fragments[slice] = fragment 185 | } 186 | f.rwlock.Unlock() 187 | changed, err = fragment.SetBit(rowID, colID) 188 | return 189 | } 190 | 191 | // clearBit clears a bit within the frame. 192 | func (f *TextFrame) clearBit(rowID, colID uint64) (changed bool, err error) { 193 | slice := colID / pilosa.SliceWidth 194 | f.rwlock.RLock() 195 | fragment, ok := f.fragments[slice] 196 | f.rwlock.RUnlock() 197 | if !ok { 198 | return 199 | } 200 | changed, err = fragment.ClearBit(rowID, colID) 201 | return 202 | } 203 | 204 | //row returns the given row as a pilosa.Bitmap. 205 | func (f *TextFrame) row(rowID uint64) (bm *pilosa.Bitmap) { 206 | bm = pilosa.NewBitmap() 207 | f.rwlock.RLock() 208 | for _, fragment := range f.fragments { 209 | bm2 := fragment.Row(rowID) 210 | bm.Merge(bm2) 211 | } 212 | f.rwlock.RUnlock() 213 | return 214 | } 215 | 216 | // Bits returns bits set in frame. 217 | func (f *TextFrame) Bits() (bits map[uint64][]uint64, err error) { 218 | var ok bool 219 | bits = make(map[uint64][]uint64) 220 | var columns []uint64 221 | f.rwlock.RLock() 222 | defer f.rwlock.RUnlock() 223 | for _, fragment := range f.fragments { 224 | err = fragment.ForEachBit( 225 | func(rowID, columnID uint64) error { 226 | columns, ok = bits[rowID] 227 | if ok { 228 | columns = append(columns, columnID) 229 | } else { 230 | columns = []uint64{columnID} 231 | } 232 | bits[rowID] = columns 233 | return nil 234 | }, 235 | ) 236 | if err != nil { 237 | return 238 | } 239 | } 240 | return 241 | } 242 | 243 | // Count returns number of bits set in frame. 244 | func (f *TextFrame) Count() (cnt uint64, err error) { 245 | f.rwlock.RLock() 246 | defer f.rwlock.RUnlock() 247 | for _, fragment := range f.fragments { 248 | err = fragment.ForEachBit( 249 | func(rowID, columnID uint64) error { 250 | cnt++ 251 | return nil 252 | }, 253 | ) 254 | if err != nil { 255 | return 256 | } 257 | } 258 | return 259 | } 260 | 261 | // DoIndex parses and index a field. 262 | func (f *TextFrame) DoIndex(docID uint64, text string) (err error) { 263 | //https://stackoverflow.com/questions/13737745/split-a-string-on-whitespace-in-go 264 | /*terms := strings.Fields(text) 265 | for i, term := range terms { 266 | terms[i] = strings.ToLower(term) 267 | }*/ 268 | terms := ParseWords(text) 269 | ids, err := f.td.CreateTermsIfNotExist(terms) 270 | if err != nil { 271 | return 272 | } 273 | for _, termID := range ids { 274 | if _, err = f.setBit(termID, docID); err != nil { 275 | return 276 | } 277 | } 278 | return 279 | } 280 | 281 | //Query query which documents contain the given term. 282 | func (f *TextFrame) Query(text string) (bm *pilosa.Bitmap) { 283 | words := ParseWords(text) 284 | var bm2 *pilosa.Bitmap 285 | for _, word := range words { 286 | termID, found := f.td.GetTermID(word) 287 | if !found { 288 | bm = pilosa.NewBitmap() 289 | return 290 | } 291 | bm2 = f.row(termID) 292 | if bm != nil { 293 | bm = bm.Intersect(bm2) 294 | } else { 295 | bm = bm2 296 | } 297 | } 298 | return 299 | } 300 | 301 | // GetFragList returns fragments' numbers 302 | func (f *TextFrame) GetFragList() (numList []uint64) { 303 | numList = make([]uint64, len(f.fragments)) 304 | i := 0 305 | f.rwlock.RLock() 306 | for num := range f.fragments { 307 | numList[i] = num 308 | i++ 309 | } 310 | f.rwlock.RUnlock() 311 | sort.Slice(numList, func(i, j int) bool { return numList[i] < numList[j] }) 312 | return 313 | } 314 | 315 | var asciiSpace = [128]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 316 | 317 | //ParseWords parses text(encoded in UTF-8) for words. 318 | //A word is a non-ascii-space lowered ASCII character sequence, or a non-ASCII non-unicode-space non-chinese-punctuate character. 319 | //Note: words are not de-duplicated. 320 | func ParseWords(text string) (words []string) { 321 | lenText := len(text) 322 | words = make([]string, 0, lenText/3) 323 | i := 0 324 | for i < lenText { 325 | j := i 326 | var c byte 327 | for j < lenText { 328 | if c = text[j]; c < 0x80 && asciiSpace[c] == 0 && unicode.IsPrint(rune(c)) && !unicode.IsPunct(rune(c)) { 329 | j++ 330 | } else { 331 | break 332 | } 333 | } 334 | if i < j { 335 | // text[i:j] is a printable non-space ASCII character sequence. 336 | words = append(words, strings.ToLower(text[i:j])) 337 | i = j 338 | } else if c < 0x80 { 339 | // i==j, text[i] is an ascii space, non-printable or punctuation character. 340 | i++ 341 | } else { 342 | // i==j, text[i] is the begin of an non-ascii character 343 | r, w := utf8.DecodeRuneInString(text[i:]) 344 | if unicode.IsPrint(rune(c)) && !unicode.IsSpace(r) && !unicode.IsPunct(r) { 345 | words = append(words, text[i:i+w]) 346 | } 347 | i += w 348 | } 349 | } 350 | return 351 | } 352 | -------------------------------------------------------------------------------- /index.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "sync" 8 | 9 | "github.com/deepfabric/bkdtree" 10 | "github.com/deepfabric/go-datastructures" 11 | "github.com/deepfabric/indexer/cql" 12 | "github.com/pilosa/pilosa" 13 | "github.com/pkg/errors" 14 | ) 15 | 16 | const ( 17 | LiveDocs string = "__liveDocs" // the directory where stores Index.liveDocs 18 | ) 19 | 20 | var ( 21 | ErrUnknownProp = errors.New("unknown property") 22 | ErrDocExist = errors.New("document already exist") 23 | ) 24 | 25 | //Index is created by CqlCreate 26 | type Index struct { 27 | MainDir string 28 | DocProt *cql.DocumentWithIdx //document prototype. persisted to an index-specific file 29 | 30 | rwlock sync.RWMutex //concurrent access of frames, liveDocs 31 | intFrames map[string]*IntFrame 32 | txtFrames map[string]*TextFrame 33 | liveDocs *TextFrame //row 0 of this frame stores a bitmap of live docIDs. other rows are not used. 34 | dirty bool 35 | } 36 | 37 | // QueryResult is query result 38 | type QueryResult struct { 39 | Bm *pilosa.Bitmap // used when no OrderBy given 40 | Oa *datastructures.OrderedArray // used when OrderBy given 41 | } 42 | 43 | // Merge merges other (keep unchagned) into qr 44 | func (qr *QueryResult) Merge(other *QueryResult) { 45 | qr.Bm.Merge(other.Bm) 46 | qr.Oa.Merge(other.Oa) 47 | } 48 | 49 | // NewQueryResult creates an empty QueryResult 50 | func NewQueryResult(limit int) (qr *QueryResult) { 51 | qr = &QueryResult{ 52 | Bm: pilosa.NewBitmap(), 53 | Oa: datastructures.NewOrderedArray(limit), 54 | } 55 | return 56 | } 57 | 58 | //NewIndex creates index according to given conf, overwrites existing files. 59 | func NewIndex(docProt *cql.DocumentWithIdx, mainDir string) (ind *Index, err error) { 60 | if err = indexWriteConf(mainDir, docProt); err != nil { 61 | return 62 | } 63 | // ensure per-index sub-directory exists 64 | indDir := filepath.Join(mainDir, docProt.Index) 65 | if err = os.MkdirAll(indDir, 0700); err != nil { 66 | return 67 | } 68 | ind = &Index{ 69 | MainDir: mainDir, 70 | DocProt: docProt, 71 | intFrames: make(map[string]*IntFrame), 72 | txtFrames: make(map[string]*TextFrame), 73 | } 74 | var ifm *IntFrame 75 | for _, uintProp := range docProt.Doc.UintProps { 76 | dir := filepath.Join(indDir, uintProp.Name) 77 | if ifm, err = NewIntFrame(dir, docProt.Index, uintProp.Name, uint(uintProp.ValLen*8), true); err != nil { 78 | return 79 | } 80 | ind.intFrames[uintProp.Name] = ifm 81 | } 82 | var tfm *TextFrame 83 | for _, strProp := range docProt.Doc.StrProps { 84 | dir := filepath.Join(indDir, strProp.Name) 85 | if tfm, err = NewTextFrame(dir, docProt.Index, strProp.Name, true); err != nil { 86 | return 87 | } 88 | ind.txtFrames[strProp.Name] = tfm 89 | } 90 | dir := filepath.Join(indDir, LiveDocs) 91 | if tfm, err = NewTextFrame(dir, docProt.Index, LiveDocs, true); err != nil { 92 | return 93 | } 94 | ind.liveDocs = tfm 95 | return 96 | } 97 | 98 | //indexWriteConf persists conf to given path. 99 | func indexWriteConf(mainDir string, docProt *cql.DocumentWithIdx) (err error) { 100 | if err = os.MkdirAll(mainDir, 0700); err != nil { 101 | err = errors.Wrap(err, "") 102 | return 103 | } 104 | fp := filepath.Join(mainDir, fmt.Sprintf("index_%s.json", docProt.Index)) 105 | err = bkdtree.FileMarshal(fp, docProt) 106 | return 107 | } 108 | 109 | //indexReadConf parses conf 110 | func indexReadConf(mainDir string, name string, docProt *cql.DocumentWithIdx) (err error) { 111 | fp := filepath.Join(mainDir, fmt.Sprintf("index_%s.json", name)) 112 | err = bkdtree.FileUnmarshal(fp, docProt) 113 | return 114 | } 115 | 116 | //Destroy removes data and conf files on disk. 117 | func (ind *Index) Destroy() (err error) { 118 | ind.rwlock.Lock() 119 | defer ind.rwlock.Unlock() 120 | if ind.liveDocs != nil { 121 | for _, ifm := range ind.intFrames { 122 | if err = ifm.Destroy(); err != nil { 123 | return 124 | } 125 | } 126 | for _, tfm := range ind.txtFrames { 127 | if err = tfm.Destroy(); err != nil { 128 | return 129 | } 130 | } 131 | if err = ind.liveDocs.Destroy(); err != nil { 132 | return 133 | } 134 | ind.intFrames = nil 135 | ind.txtFrames = nil 136 | ind.liveDocs = nil 137 | } 138 | 139 | paths := make([]string, 0) 140 | for _, uintProp := range ind.DocProt.Doc.UintProps { 141 | paths = append(paths, filepath.Join(ind.MainDir, uintProp.Name)) 142 | } 143 | for _, strProp := range ind.DocProt.Doc.StrProps { 144 | paths = append(paths, filepath.Join(ind.MainDir, strProp.Name)) 145 | } 146 | paths = append(paths, filepath.Join(ind.MainDir, LiveDocs)) 147 | paths = append(paths, filepath.Join(ind.MainDir, fmt.Sprintf("index_%s.json", ind.DocProt.Index))) 148 | for _, fp := range paths { 149 | if err = os.RemoveAll(fp); err != nil { 150 | err = errors.Wrap(err, "") 151 | } 152 | } 153 | ind.dirty = false 154 | return 155 | } 156 | 157 | //NewIndexExt create index according to existing files. 158 | func NewIndexExt(mainDir, name string) (ind *Index, err error) { 159 | docProt := &cql.DocumentWithIdx{} 160 | if err = indexReadConf(mainDir, name, docProt); err != nil { 161 | return 162 | } 163 | ind = &Index{ 164 | MainDir: mainDir, 165 | DocProt: docProt, 166 | } 167 | err = ind.Open() 168 | return 169 | } 170 | 171 | //Open opens existing index. Assumes MainDir and DocProt is already populated. 172 | func (ind *Index) Open() (err error) { 173 | ind.rwlock.Lock() 174 | defer ind.rwlock.Unlock() 175 | if ind.liveDocs != nil { 176 | //index is already open 177 | return 178 | } 179 | indDir := filepath.Join(ind.MainDir, ind.DocProt.Index) 180 | ind.intFrames = make(map[string]*IntFrame) 181 | var ifm *IntFrame 182 | for _, uintProp := range ind.DocProt.Doc.UintProps { 183 | dir := filepath.Join(indDir, uintProp.Name) 184 | if ifm, err = NewIntFrame(dir, ind.DocProt.Index, uintProp.Name, uint(uintProp.ValLen*8), false); err != nil { 185 | return 186 | } 187 | ind.intFrames[uintProp.Name] = ifm 188 | } 189 | ind.txtFrames = make(map[string]*TextFrame) 190 | var tfm *TextFrame 191 | for _, strProp := range ind.DocProt.Doc.StrProps { 192 | dir := filepath.Join(indDir, strProp.Name) 193 | if tfm, err = NewTextFrame(dir, ind.DocProt.Index, strProp.Name, false); err != nil { 194 | return 195 | } 196 | ind.txtFrames[strProp.Name] = tfm 197 | } 198 | dir := filepath.Join(indDir, LiveDocs) 199 | if tfm, err = NewTextFrame(dir, ind.DocProt.Index, LiveDocs, false); err != nil { 200 | return 201 | } 202 | ind.liveDocs = tfm 203 | ind.dirty = false 204 | return 205 | } 206 | 207 | //Close closes index 208 | func (ind *Index) Close() (err error) { 209 | ind.rwlock.Lock() 210 | defer ind.rwlock.Unlock() 211 | if ind.liveDocs == nil { 212 | //index is already closed 213 | return 214 | } 215 | for _, ifm := range ind.intFrames { 216 | if err = ifm.Close(); err != nil { 217 | return 218 | } 219 | } 220 | for _, tfm := range ind.txtFrames { 221 | if err = tfm.Close(); err != nil { 222 | return 223 | } 224 | } 225 | if err = ind.liveDocs.Close(); err != nil { 226 | return 227 | } 228 | ind.intFrames = nil 229 | ind.txtFrames = nil 230 | ind.liveDocs = nil 231 | ind.dirty = false 232 | return 233 | } 234 | 235 | // Sync synchronizes index to disk 236 | func (ind *Index) Sync() (err error) { 237 | ind.rwlock.Lock() 238 | defer ind.rwlock.Unlock() 239 | if !ind.dirty { 240 | return 241 | } 242 | for _, ifm := range ind.intFrames { 243 | if err = ifm.Sync(); err != nil { 244 | return 245 | } 246 | } 247 | for _, tfm := range ind.txtFrames { 248 | if err = tfm.Sync(); err != nil { 249 | return 250 | } 251 | } 252 | if err = ind.liveDocs.Sync(); err != nil { 253 | return 254 | } 255 | ind.dirty = false 256 | return 257 | } 258 | 259 | //Insert executes CqlInsert 260 | func (ind *Index) Insert(doc *cql.DocumentWithIdx) (err error) { 261 | var ifm *IntFrame 262 | var tfm *TextFrame 263 | var changed, ok bool 264 | ind.rwlock.RLock() 265 | defer ind.rwlock.RUnlock() 266 | //check if doc.DocID is already there before insertion. 267 | if changed, err = ind.liveDocs.setBit(0, doc.Doc.DocID); err != nil { 268 | return 269 | } else if !changed { 270 | err = errors.Wrapf(ErrDocExist, "document %v is alaredy there before insertion", doc.Doc.DocID) 271 | return 272 | } 273 | for _, uintProp := range doc.Doc.UintProps { 274 | if ifm, ok = ind.intFrames[uintProp.Name]; !ok { 275 | err = errors.Wrapf(ErrUnknownProp, "property %v is missing at index spec, document %v, index spec %v", uintProp.Name, doc, ind.DocProt) 276 | return 277 | } 278 | if err = ifm.DoIndex(doc.Doc.DocID, uintProp.Val); err != nil { 279 | return 280 | } 281 | } 282 | for _, strProp := range doc.Doc.StrProps { 283 | if tfm, ok = ind.txtFrames[strProp.Name]; !ok { 284 | err = errors.Wrapf(ErrUnknownProp, "property %v is missing at index spec, document %v, index spec %v", strProp.Name, doc, ind.DocProt) 285 | return 286 | } 287 | if err = tfm.DoIndex(doc.Doc.DocID, strProp.Val); err != nil { 288 | return 289 | } 290 | } 291 | ind.dirty = true 292 | return 293 | } 294 | 295 | //Del executes CqlDel. Do mark-deletion only. The caller shall rebuild index in order to recycle disk space. 296 | func (ind *Index) Del(docID uint64) (found bool, err error) { 297 | var changed bool 298 | ind.rwlock.RLock() 299 | defer ind.rwlock.RUnlock() 300 | if changed, err = ind.liveDocs.clearBit(0, docID); err != nil { 301 | return 302 | } else if !changed { 303 | return 304 | } 305 | found = true 306 | ind.dirty = true 307 | return 308 | } 309 | 310 | //Select executes CqlSelect. 311 | func (ind *Index) Select(q *cql.CqlSelect) (qr *QueryResult, err error) { 312 | qr = &QueryResult{ 313 | Bm: pilosa.NewBitmap(), 314 | Oa: datastructures.NewOrderedArray(q.Limit), 315 | } 316 | var ifm *IntFrame 317 | var tfm *TextFrame 318 | var ok bool 319 | var prevDocs, docs *pilosa.Bitmap 320 | 321 | ind.rwlock.RLock() 322 | defer ind.rwlock.RUnlock() 323 | prevDocs = ind.liveDocs.row(0) 324 | if prevDocs.Count() == 0 { 325 | return 326 | } 327 | if len(q.StrPreds) != 0 { 328 | for _, strPred := range q.StrPreds { 329 | if tfm, ok = ind.txtFrames[strPred.Name]; !ok { 330 | err = errors.Wrapf(ErrUnknownProp, "property %s not found in index spec", strPred.Name) 331 | return 332 | } 333 | docs = tfm.Query(strPred.ContWord) 334 | prevDocs = prevDocs.Intersect(docs) 335 | if prevDocs.Count() == 0 { 336 | return 337 | } 338 | } 339 | } 340 | 341 | if len(q.UintPreds) == 0 { 342 | qr.Bm = prevDocs 343 | return 344 | } 345 | 346 | var ifmOrder *IntFrame 347 | for _, uintPred := range q.UintPreds { 348 | if ifm, ok = ind.intFrames[uintPred.Name]; !ok { 349 | err = errors.Wrapf(ErrUnknownProp, "property %s not found in index spec", uintPred.Name) 350 | return 351 | } 352 | if q.OrderBy == uintPred.Name { 353 | ifmOrder = ifm 354 | } 355 | var bm *pilosa.Bitmap 356 | if bm, err = ifm.QueryRangeBetween(uintPred.Low, uintPred.High); err != nil { 357 | return 358 | } 359 | prevDocs = prevDocs.Intersect(bm) 360 | if prevDocs.Count() == 0 { 361 | return 362 | } 363 | } 364 | 365 | if ifmOrder == nil { 366 | qr.Bm = prevDocs 367 | } else { 368 | var val uint64 369 | var exists bool 370 | for _, docID := range prevDocs.Bits() { 371 | if val, exists, err = ifmOrder.GetValue(docID); err != nil { 372 | return 373 | } 374 | if exists { 375 | point := bkdtree.Point{ 376 | Vals: []uint64{val}, 377 | UserData: docID, 378 | } 379 | qr.Oa.Put(point) 380 | } 381 | } 382 | } 383 | 384 | return 385 | } 386 | 387 | //GetDocIDFragList returns DocID fragment list. Each fragment's size is pilosa.SliceWidth 388 | func (ind *Index) GetDocIDFragList() (numList []uint64) { 389 | return ind.liveDocs.GetFragList() 390 | } 391 | -------------------------------------------------------------------------------- /cql/parser/CQLLexer.interp: -------------------------------------------------------------------------------- 1 | token literal names: 2 | null 3 | 'IDX.CREATE' 4 | 'SCHEMA' 5 | 'IDX.DESTROY' 6 | 'IDX.INSERT' 7 | 'IDX.DEL' 8 | 'IDX.SELECT' 9 | 'QUERY' 10 | 'WHERE' 11 | 'ORDERBY' 12 | 'LIMIT' 13 | '[' 14 | ',' 15 | ']' 16 | 'UINT8' 17 | 'UINT16' 18 | 'UINT32' 19 | 'UINT64' 20 | 'FLOAT32' 21 | 'FLOAT64' 22 | 'ENUM' 23 | 'STRING' 24 | 'IN' 25 | 'CONTAINS' 26 | '<' 27 | '>' 28 | '=' 29 | '<=' 30 | '>=' 31 | null 32 | null 33 | null 34 | null 35 | null 36 | 37 | token symbolic names: 38 | null 39 | null 40 | null 41 | null 42 | null 43 | null 44 | null 45 | null 46 | null 47 | null 48 | null 49 | null 50 | null 51 | null 52 | K_UINT8 53 | K_UINT16 54 | K_UINT32 55 | K_UINT64 56 | K_FLOAT32 57 | K_FLOAT64 58 | K_ENUM 59 | K_STRING 60 | K_IN 61 | K_CONTAINS 62 | K_LT 63 | K_BT 64 | K_EQ 65 | K_LE 66 | K_BE 67 | FLOAT_LIT 68 | STRING 69 | INT 70 | IDENTIFIER 71 | WS 72 | 73 | rule names: 74 | T__0 75 | T__1 76 | T__2 77 | T__3 78 | T__4 79 | T__5 80 | T__6 81 | T__7 82 | T__8 83 | T__9 84 | T__10 85 | T__11 86 | T__12 87 | K_UINT8 88 | K_UINT16 89 | K_UINT32 90 | K_UINT64 91 | K_FLOAT32 92 | K_FLOAT64 93 | K_ENUM 94 | K_STRING 95 | K_IN 96 | K_CONTAINS 97 | K_LT 98 | K_BT 99 | K_EQ 100 | K_LE 101 | K_BE 102 | FLOAT_LIT 103 | DECIMALS 104 | EXPONENT 105 | DECIMAL_DIGIT 106 | STRING 107 | ESC 108 | UNICODE 109 | HEX 110 | INT 111 | EXP 112 | IDENTIFIER 113 | WS 114 | 115 | channel names: 116 | DEFAULT_TOKEN_CHANNEL 117 | HIDDEN 118 | 119 | mode names: 120 | DEFAULT_MODE 121 | 122 | atn: 123 | [3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 35, 338, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 9, 23, 4, 24, 9, 24, 4, 25, 9, 25, 4, 26, 9, 26, 4, 27, 9, 27, 4, 28, 9, 28, 4, 29, 9, 29, 4, 30, 9, 30, 4, 31, 9, 31, 4, 32, 9, 32, 4, 33, 9, 33, 4, 34, 9, 34, 4, 35, 9, 35, 4, 36, 9, 36, 4, 37, 9, 37, 4, 38, 9, 38, 4, 39, 9, 39, 4, 40, 9, 40, 4, 41, 9, 41, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 12, 3, 12, 3, 13, 3, 13, 3, 14, 3, 14, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 16, 3, 16, 3, 16, 3, 16, 3, 16, 3, 16, 3, 16, 3, 17, 3, 17, 3, 17, 3, 17, 3, 17, 3, 17, 3, 17, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 21, 3, 21, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 23, 3, 23, 3, 23, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 25, 3, 25, 3, 26, 3, 26, 3, 27, 3, 27, 3, 28, 3, 28, 3, 28, 3, 29, 3, 29, 3, 29, 3, 30, 3, 30, 3, 30, 5, 30, 258, 10, 30, 3, 30, 5, 30, 261, 10, 30, 3, 30, 3, 30, 3, 30, 3, 30, 3, 30, 3, 30, 5, 30, 269, 10, 30, 5, 30, 271, 10, 30, 3, 31, 6, 31, 274, 10, 31, 13, 31, 14, 31, 275, 3, 32, 3, 32, 5, 32, 280, 10, 32, 3, 32, 3, 32, 3, 33, 3, 33, 3, 34, 3, 34, 3, 34, 7, 34, 289, 10, 34, 12, 34, 14, 34, 292, 11, 34, 3, 34, 3, 34, 3, 35, 3, 35, 3, 35, 5, 35, 299, 10, 35, 3, 36, 3, 36, 3, 36, 3, 36, 3, 36, 3, 36, 3, 37, 3, 37, 3, 38, 3, 38, 3, 38, 7, 38, 312, 10, 38, 12, 38, 14, 38, 315, 11, 38, 5, 38, 317, 10, 38, 3, 39, 3, 39, 5, 39, 321, 10, 39, 3, 39, 3, 39, 3, 40, 3, 40, 7, 40, 327, 10, 40, 12, 40, 14, 40, 330, 11, 40, 3, 41, 6, 41, 333, 10, 41, 13, 41, 14, 41, 334, 3, 41, 3, 41, 2, 2, 42, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 15, 9, 17, 10, 19, 11, 21, 12, 23, 13, 25, 14, 27, 15, 29, 16, 31, 17, 33, 18, 35, 19, 37, 20, 39, 21, 41, 22, 43, 23, 45, 24, 47, 25, 49, 26, 51, 27, 53, 28, 55, 29, 57, 30, 59, 31, 61, 2, 63, 2, 65, 2, 67, 32, 69, 2, 71, 2, 73, 2, 75, 33, 77, 2, 79, 34, 81, 35, 3, 2, 12, 4, 2, 71, 71, 103, 103, 4, 2, 45, 45, 47, 47, 3, 2, 50, 59, 4, 2, 36, 36, 94, 94, 10, 2, 36, 36, 49, 49, 94, 94, 100, 100, 104, 104, 112, 112, 116, 116, 118, 118, 5, 2, 50, 59, 67, 72, 99, 104, 3, 2, 51, 59, 5, 2, 67, 92, 97, 97, 99, 124, 6, 2, 50, 59, 67, 92, 97, 97, 99, 124, 5, 2, 11, 12, 15, 15, 34, 34, 2, 345, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 2, 17, 3, 2, 2, 2, 2, 19, 3, 2, 2, 2, 2, 21, 3, 2, 2, 2, 2, 23, 3, 2, 2, 2, 2, 25, 3, 2, 2, 2, 2, 27, 3, 2, 2, 2, 2, 29, 3, 2, 2, 2, 2, 31, 3, 2, 2, 2, 2, 33, 3, 2, 2, 2, 2, 35, 3, 2, 2, 2, 2, 37, 3, 2, 2, 2, 2, 39, 3, 2, 2, 2, 2, 41, 3, 2, 2, 2, 2, 43, 3, 2, 2, 2, 2, 45, 3, 2, 2, 2, 2, 47, 3, 2, 2, 2, 2, 49, 3, 2, 2, 2, 2, 51, 3, 2, 2, 2, 2, 53, 3, 2, 2, 2, 2, 55, 3, 2, 2, 2, 2, 57, 3, 2, 2, 2, 2, 59, 3, 2, 2, 2, 2, 67, 3, 2, 2, 2, 2, 75, 3, 2, 2, 2, 2, 79, 3, 2, 2, 2, 2, 81, 3, 2, 2, 2, 3, 83, 3, 2, 2, 2, 5, 94, 3, 2, 2, 2, 7, 101, 3, 2, 2, 2, 9, 113, 3, 2, 2, 2, 11, 124, 3, 2, 2, 2, 13, 132, 3, 2, 2, 2, 15, 143, 3, 2, 2, 2, 17, 149, 3, 2, 2, 2, 19, 155, 3, 2, 2, 2, 21, 163, 3, 2, 2, 2, 23, 169, 3, 2, 2, 2, 25, 171, 3, 2, 2, 2, 27, 173, 3, 2, 2, 2, 29, 175, 3, 2, 2, 2, 31, 181, 3, 2, 2, 2, 33, 188, 3, 2, 2, 2, 35, 195, 3, 2, 2, 2, 37, 202, 3, 2, 2, 2, 39, 210, 3, 2, 2, 2, 41, 218, 3, 2, 2, 2, 43, 223, 3, 2, 2, 2, 45, 230, 3, 2, 2, 2, 47, 233, 3, 2, 2, 2, 49, 242, 3, 2, 2, 2, 51, 244, 3, 2, 2, 2, 53, 246, 3, 2, 2, 2, 55, 248, 3, 2, 2, 2, 57, 251, 3, 2, 2, 2, 59, 270, 3, 2, 2, 2, 61, 273, 3, 2, 2, 2, 63, 277, 3, 2, 2, 2, 65, 283, 3, 2, 2, 2, 67, 285, 3, 2, 2, 2, 69, 295, 3, 2, 2, 2, 71, 300, 3, 2, 2, 2, 73, 306, 3, 2, 2, 2, 75, 316, 3, 2, 2, 2, 77, 318, 3, 2, 2, 2, 79, 324, 3, 2, 2, 2, 81, 332, 3, 2, 2, 2, 83, 84, 7, 75, 2, 2, 84, 85, 7, 70, 2, 2, 85, 86, 7, 90, 2, 2, 86, 87, 7, 48, 2, 2, 87, 88, 7, 69, 2, 2, 88, 89, 7, 84, 2, 2, 89, 90, 7, 71, 2, 2, 90, 91, 7, 67, 2, 2, 91, 92, 7, 86, 2, 2, 92, 93, 7, 71, 2, 2, 93, 4, 3, 2, 2, 2, 94, 95, 7, 85, 2, 2, 95, 96, 7, 69, 2, 2, 96, 97, 7, 74, 2, 2, 97, 98, 7, 71, 2, 2, 98, 99, 7, 79, 2, 2, 99, 100, 7, 67, 2, 2, 100, 6, 3, 2, 2, 2, 101, 102, 7, 75, 2, 2, 102, 103, 7, 70, 2, 2, 103, 104, 7, 90, 2, 2, 104, 105, 7, 48, 2, 2, 105, 106, 7, 70, 2, 2, 106, 107, 7, 71, 2, 2, 107, 108, 7, 85, 2, 2, 108, 109, 7, 86, 2, 2, 109, 110, 7, 84, 2, 2, 110, 111, 7, 81, 2, 2, 111, 112, 7, 91, 2, 2, 112, 8, 3, 2, 2, 2, 113, 114, 7, 75, 2, 2, 114, 115, 7, 70, 2, 2, 115, 116, 7, 90, 2, 2, 116, 117, 7, 48, 2, 2, 117, 118, 7, 75, 2, 2, 118, 119, 7, 80, 2, 2, 119, 120, 7, 85, 2, 2, 120, 121, 7, 71, 2, 2, 121, 122, 7, 84, 2, 2, 122, 123, 7, 86, 2, 2, 123, 10, 3, 2, 2, 2, 124, 125, 7, 75, 2, 2, 125, 126, 7, 70, 2, 2, 126, 127, 7, 90, 2, 2, 127, 128, 7, 48, 2, 2, 128, 129, 7, 70, 2, 2, 129, 130, 7, 71, 2, 2, 130, 131, 7, 78, 2, 2, 131, 12, 3, 2, 2, 2, 132, 133, 7, 75, 2, 2, 133, 134, 7, 70, 2, 2, 134, 135, 7, 90, 2, 2, 135, 136, 7, 48, 2, 2, 136, 137, 7, 85, 2, 2, 137, 138, 7, 71, 2, 2, 138, 139, 7, 78, 2, 2, 139, 140, 7, 71, 2, 2, 140, 141, 7, 69, 2, 2, 141, 142, 7, 86, 2, 2, 142, 14, 3, 2, 2, 2, 143, 144, 7, 83, 2, 2, 144, 145, 7, 87, 2, 2, 145, 146, 7, 71, 2, 2, 146, 147, 7, 84, 2, 2, 147, 148, 7, 91, 2, 2, 148, 16, 3, 2, 2, 2, 149, 150, 7, 89, 2, 2, 150, 151, 7, 74, 2, 2, 151, 152, 7, 71, 2, 2, 152, 153, 7, 84, 2, 2, 153, 154, 7, 71, 2, 2, 154, 18, 3, 2, 2, 2, 155, 156, 7, 81, 2, 2, 156, 157, 7, 84, 2, 2, 157, 158, 7, 70, 2, 2, 158, 159, 7, 71, 2, 2, 159, 160, 7, 84, 2, 2, 160, 161, 7, 68, 2, 2, 161, 162, 7, 91, 2, 2, 162, 20, 3, 2, 2, 2, 163, 164, 7, 78, 2, 2, 164, 165, 7, 75, 2, 2, 165, 166, 7, 79, 2, 2, 166, 167, 7, 75, 2, 2, 167, 168, 7, 86, 2, 2, 168, 22, 3, 2, 2, 2, 169, 170, 7, 93, 2, 2, 170, 24, 3, 2, 2, 2, 171, 172, 7, 46, 2, 2, 172, 26, 3, 2, 2, 2, 173, 174, 7, 95, 2, 2, 174, 28, 3, 2, 2, 2, 175, 176, 7, 87, 2, 2, 176, 177, 7, 75, 2, 2, 177, 178, 7, 80, 2, 2, 178, 179, 7, 86, 2, 2, 179, 180, 7, 58, 2, 2, 180, 30, 3, 2, 2, 2, 181, 182, 7, 87, 2, 2, 182, 183, 7, 75, 2, 2, 183, 184, 7, 80, 2, 2, 184, 185, 7, 86, 2, 2, 185, 186, 7, 51, 2, 2, 186, 187, 7, 56, 2, 2, 187, 32, 3, 2, 2, 2, 188, 189, 7, 87, 2, 2, 189, 190, 7, 75, 2, 2, 190, 191, 7, 80, 2, 2, 191, 192, 7, 86, 2, 2, 192, 193, 7, 53, 2, 2, 193, 194, 7, 52, 2, 2, 194, 34, 3, 2, 2, 2, 195, 196, 7, 87, 2, 2, 196, 197, 7, 75, 2, 2, 197, 198, 7, 80, 2, 2, 198, 199, 7, 86, 2, 2, 199, 200, 7, 56, 2, 2, 200, 201, 7, 54, 2, 2, 201, 36, 3, 2, 2, 2, 202, 203, 7, 72, 2, 2, 203, 204, 7, 78, 2, 2, 204, 205, 7, 81, 2, 2, 205, 206, 7, 67, 2, 2, 206, 207, 7, 86, 2, 2, 207, 208, 7, 53, 2, 2, 208, 209, 7, 52, 2, 2, 209, 38, 3, 2, 2, 2, 210, 211, 7, 72, 2, 2, 211, 212, 7, 78, 2, 2, 212, 213, 7, 81, 2, 2, 213, 214, 7, 67, 2, 2, 214, 215, 7, 86, 2, 2, 215, 216, 7, 56, 2, 2, 216, 217, 7, 54, 2, 2, 217, 40, 3, 2, 2, 2, 218, 219, 7, 71, 2, 2, 219, 220, 7, 80, 2, 2, 220, 221, 7, 87, 2, 2, 221, 222, 7, 79, 2, 2, 222, 42, 3, 2, 2, 2, 223, 224, 7, 85, 2, 2, 224, 225, 7, 86, 2, 2, 225, 226, 7, 84, 2, 2, 226, 227, 7, 75, 2, 2, 227, 228, 7, 80, 2, 2, 228, 229, 7, 73, 2, 2, 229, 44, 3, 2, 2, 2, 230, 231, 7, 75, 2, 2, 231, 232, 7, 80, 2, 2, 232, 46, 3, 2, 2, 2, 233, 234, 7, 69, 2, 2, 234, 235, 7, 81, 2, 2, 235, 236, 7, 80, 2, 2, 236, 237, 7, 86, 2, 2, 237, 238, 7, 67, 2, 2, 238, 239, 7, 75, 2, 2, 239, 240, 7, 80, 2, 2, 240, 241, 7, 85, 2, 2, 241, 48, 3, 2, 2, 2, 242, 243, 7, 62, 2, 2, 243, 50, 3, 2, 2, 2, 244, 245, 7, 64, 2, 2, 245, 52, 3, 2, 2, 2, 246, 247, 7, 63, 2, 2, 247, 54, 3, 2, 2, 2, 248, 249, 7, 62, 2, 2, 249, 250, 7, 63, 2, 2, 250, 56, 3, 2, 2, 2, 251, 252, 7, 64, 2, 2, 252, 253, 7, 63, 2, 2, 253, 58, 3, 2, 2, 2, 254, 255, 5, 61, 31, 2, 255, 257, 7, 48, 2, 2, 256, 258, 5, 61, 31, 2, 257, 256, 3, 2, 2, 2, 257, 258, 3, 2, 2, 2, 258, 260, 3, 2, 2, 2, 259, 261, 5, 63, 32, 2, 260, 259, 3, 2, 2, 2, 260, 261, 3, 2, 2, 2, 261, 271, 3, 2, 2, 2, 262, 263, 5, 61, 31, 2, 263, 264, 5, 63, 32, 2, 264, 271, 3, 2, 2, 2, 265, 266, 7, 48, 2, 2, 266, 268, 5, 61, 31, 2, 267, 269, 5, 63, 32, 2, 268, 267, 3, 2, 2, 2, 268, 269, 3, 2, 2, 2, 269, 271, 3, 2, 2, 2, 270, 254, 3, 2, 2, 2, 270, 262, 3, 2, 2, 2, 270, 265, 3, 2, 2, 2, 271, 60, 3, 2, 2, 2, 272, 274, 5, 65, 33, 2, 273, 272, 3, 2, 2, 2, 274, 275, 3, 2, 2, 2, 275, 273, 3, 2, 2, 2, 275, 276, 3, 2, 2, 2, 276, 62, 3, 2, 2, 2, 277, 279, 9, 2, 2, 2, 278, 280, 9, 3, 2, 2, 279, 278, 3, 2, 2, 2, 279, 280, 3, 2, 2, 2, 280, 281, 3, 2, 2, 2, 281, 282, 5, 61, 31, 2, 282, 64, 3, 2, 2, 2, 283, 284, 9, 4, 2, 2, 284, 66, 3, 2, 2, 2, 285, 290, 7, 36, 2, 2, 286, 289, 5, 69, 35, 2, 287, 289, 10, 5, 2, 2, 288, 286, 3, 2, 2, 2, 288, 287, 3, 2, 2, 2, 289, 292, 3, 2, 2, 2, 290, 288, 3, 2, 2, 2, 290, 291, 3, 2, 2, 2, 291, 293, 3, 2, 2, 2, 292, 290, 3, 2, 2, 2, 293, 294, 7, 36, 2, 2, 294, 68, 3, 2, 2, 2, 295, 298, 7, 94, 2, 2, 296, 299, 9, 6, 2, 2, 297, 299, 5, 71, 36, 2, 298, 296, 3, 2, 2, 2, 298, 297, 3, 2, 2, 2, 299, 70, 3, 2, 2, 2, 300, 301, 7, 119, 2, 2, 301, 302, 5, 73, 37, 2, 302, 303, 5, 73, 37, 2, 303, 304, 5, 73, 37, 2, 304, 305, 5, 73, 37, 2, 305, 72, 3, 2, 2, 2, 306, 307, 9, 7, 2, 2, 307, 74, 3, 2, 2, 2, 308, 317, 7, 50, 2, 2, 309, 313, 9, 8, 2, 2, 310, 312, 9, 4, 2, 2, 311, 310, 3, 2, 2, 2, 312, 315, 3, 2, 2, 2, 313, 311, 3, 2, 2, 2, 313, 314, 3, 2, 2, 2, 314, 317, 3, 2, 2, 2, 315, 313, 3, 2, 2, 2, 316, 308, 3, 2, 2, 2, 316, 309, 3, 2, 2, 2, 317, 76, 3, 2, 2, 2, 318, 320, 9, 2, 2, 2, 319, 321, 9, 3, 2, 2, 320, 319, 3, 2, 2, 2, 320, 321, 3, 2, 2, 2, 321, 322, 3, 2, 2, 2, 322, 323, 5, 75, 38, 2, 323, 78, 3, 2, 2, 2, 324, 328, 9, 9, 2, 2, 325, 327, 9, 10, 2, 2, 326, 325, 3, 2, 2, 2, 327, 330, 3, 2, 2, 2, 328, 326, 3, 2, 2, 2, 328, 329, 3, 2, 2, 2, 329, 80, 3, 2, 2, 2, 330, 328, 3, 2, 2, 2, 331, 333, 9, 11, 2, 2, 332, 331, 3, 2, 2, 2, 333, 334, 3, 2, 2, 2, 334, 332, 3, 2, 2, 2, 334, 335, 3, 2, 2, 2, 335, 336, 3, 2, 2, 2, 336, 337, 8, 41, 2, 2, 337, 82, 3, 2, 2, 2, 17, 2, 257, 260, 268, 270, 275, 279, 288, 290, 298, 313, 316, 320, 328, 334, 3, 8, 2, 2] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /wal/wal_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "bytes" 19 | "io" 20 | "io/ioutil" 21 | "os" 22 | "path/filepath" 23 | "testing" 24 | 25 | "github.com/pkg/errors" 26 | "github.com/stretchr/testify/require" 27 | 28 | "github.com/coreos/etcd/pkg/fileutil" 29 | "github.com/deepfabric/indexer/wal/walpb" 30 | ) 31 | 32 | func TestNew(t *testing.T) { 33 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 34 | require.NoError(t, err) 35 | defer os.RemoveAll(p) 36 | 37 | w, err := Create(p) 38 | require.NoError(t, err) 39 | if g := filepath.Base(w.tail.Name()); g != walName(0, 0) { 40 | t.Errorf("name = %+v, want %+v", g, walName(0, 0)) 41 | } 42 | defer w.Close(false) 43 | 44 | // file is preallocated to segment size; only read data written by wal 45 | off, err := w.tail.Seek(0, io.SeekCurrent) 46 | require.NoError(t, err) 47 | gd := make([]byte, off) 48 | f, err := os.Open(filepath.Join(p, filepath.Base(w.tail.Name()))) 49 | require.NoError(t, err) 50 | defer f.Close() 51 | _, err = io.ReadFull(f, gd) 52 | require.NoError(t, err) 53 | 54 | var wb bytes.Buffer 55 | e := newEncoder(&wb, 0, 0) 56 | e.flush() 57 | if !bytes.Equal(gd, wb.Bytes()) { 58 | t.Errorf("data = %v, want %v", gd, wb.Bytes()) 59 | } 60 | } 61 | 62 | func TestNewForInitedDir(t *testing.T) { 63 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 64 | require.NoError(t, err) 65 | defer os.RemoveAll(p) 66 | 67 | os.Create(filepath.Join(p, walName(0, 0))) 68 | if _, err = Create(p); err == nil || err != os.ErrExist { 69 | t.Errorf("err = %v, want %v", err, os.ErrExist) 70 | } 71 | } 72 | 73 | func TestOpenAtIndex(t *testing.T) { 74 | dir, err := ioutil.TempDir(os.TempDir(), "waltest") 75 | require.NoError(t, err) 76 | defer os.RemoveAll(dir) 77 | 78 | f, err := os.Create(filepath.Join(dir, walName(0, 0))) 79 | require.NoError(t, err) 80 | f.Close() 81 | 82 | w, err := Open(dir, walpb.Snapshot{}) 83 | require.NoError(t, err) 84 | if g := filepath.Base(w.walNames[len(w.walNames)-1]); g != walName(0, 0) { 85 | require.Equal(t, walName(0, 0), g) 86 | } 87 | require.Equal(t, uint64(0), w.seq()) 88 | w.Close(false) 89 | 90 | wname := walName(2, 10) 91 | f, err = os.Create(filepath.Join(dir, wname)) 92 | require.NoError(t, err) 93 | f.Close() 94 | 95 | w, err = Open(dir, walpb.Snapshot{Index: 5}) 96 | require.NoError(t, err) 97 | if g := filepath.Base(w.walNames[len(w.walNames)-1]); g != wname { 98 | require.Equal(t, wname, g) 99 | } 100 | require.Equal(t, uint64(2), w.seq()) 101 | w.Close(false) 102 | 103 | emptydir, err := ioutil.TempDir(os.TempDir(), "waltestempty") 104 | require.NoError(t, err) 105 | defer os.RemoveAll(emptydir) 106 | _, err = Open(emptydir, walpb.Snapshot{}) 107 | require.Equal(t, ErrFileNotFound, errors.Cause(err)) 108 | } 109 | 110 | // TODO: split it into smaller tests for better readability 111 | func TestCut(t *testing.T) { 112 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 113 | require.NoError(t, err) 114 | defer os.RemoveAll(p) 115 | 116 | w, err := Create(p) 117 | require.NoError(t, err) 118 | defer w.Close(false) 119 | 120 | err = w.cut() 121 | require.NoError(t, err) 122 | wname := walName(1, 1) 123 | if g := filepath.Base(w.tail.Name()); g != wname { 124 | t.Errorf("name = %s, want %s", g, wname) 125 | } 126 | 127 | es := []walpb.Entry{{Index: 1, Term: 1, Data: []byte{1}}} 128 | err = w.Save(es) 129 | require.NoError(t, err) 130 | err = w.cut() 131 | require.NoError(t, err) 132 | wname = walName(2, 2) 133 | if g := filepath.Base(w.tail.Name()); g != wname { 134 | t.Errorf("name = %s, want %s", g, wname) 135 | } 136 | 137 | // check the state in the last WAL 138 | // We do check before closing the WAL to ensure that Cut syncs the data 139 | // into the disk. 140 | f, err := os.Open(filepath.Join(p, wname)) 141 | require.NoError(t, err) 142 | defer f.Close() 143 | nw := &WAL{ 144 | decoder: newDecoder(f), 145 | } 146 | _, err = nw.ReadAll() 147 | require.NoError(t, err) 148 | } 149 | 150 | func TestRecover(t *testing.T) { 151 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 152 | require.NoError(t, err) 153 | defer os.RemoveAll(p) 154 | 155 | w, err := Create(p) 156 | require.NoError(t, err) 157 | ents := []walpb.Entry{{Index: 1, Term: 1, Data: []byte{1}}, {Index: 2, Term: 2, Data: []byte{2}}} 158 | err = w.Save(ents) 159 | require.NoError(t, err) 160 | w.Close(false) 161 | 162 | w, err = Open(p, walpb.Snapshot{}) 163 | require.NoError(t, err) 164 | entries, err := w.ReadAll() 165 | require.NoError(t, err) 166 | 167 | require.Equal(t, ents, entries) 168 | w.Close(false) 169 | } 170 | 171 | func TestSearchIndex(t *testing.T) { 172 | tests := []struct { 173 | names []string 174 | index uint64 175 | widx int 176 | wok bool 177 | }{ 178 | { 179 | []string{ 180 | "0000000000000000-0000000000000000.wal", 181 | "0000000000000001-0000000000001000.wal", 182 | "0000000000000002-0000000000002000.wal", 183 | }, 184 | 0x1000, 1, true, 185 | }, 186 | { 187 | []string{ 188 | "0000000000000001-0000000000004000.wal", 189 | "0000000000000002-0000000000003000.wal", 190 | "0000000000000003-0000000000005000.wal", 191 | }, 192 | 0x4000, 1, true, 193 | }, 194 | { 195 | []string{ 196 | "0000000000000001-0000000000002000.wal", 197 | "0000000000000002-0000000000003000.wal", 198 | "0000000000000003-0000000000005000.wal", 199 | }, 200 | 0x1000, -1, false, 201 | }, 202 | } 203 | for i, tt := range tests { 204 | idx, ok := searchIndex(tt.names, tt.index) 205 | if idx != tt.widx { 206 | t.Errorf("#%d: idx = %d, want %d", i, idx, tt.widx) 207 | } 208 | if ok != tt.wok { 209 | t.Errorf("#%d: ok = %v, want %v", i, ok, tt.wok) 210 | } 211 | } 212 | } 213 | 214 | func TestScanWalName(t *testing.T) { 215 | tests := []struct { 216 | str string 217 | wseq, windex uint64 218 | wok bool 219 | }{ 220 | {"0000000000000000-0000000000000000.wal", 0, 0, true}, 221 | {"0000000000000000.wal", 0, 0, false}, 222 | {"0000000000000000-0000000000000000.snap", 0, 0, false}, 223 | } 224 | for i, tt := range tests { 225 | s, index, err := parseWalName(tt.str) 226 | if g := err == nil; g != tt.wok { 227 | t.Errorf("#%d: ok = %v, want %v", i, g, tt.wok) 228 | } 229 | if s != tt.wseq { 230 | t.Errorf("#%d: seq = %d, want %d", i, s, tt.wseq) 231 | } 232 | if index != tt.windex { 233 | t.Errorf("#%d: index = %d, want %d", i, index, tt.windex) 234 | } 235 | } 236 | } 237 | 238 | func TestRecoverAfterCut(t *testing.T) { 239 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 240 | require.NoError(t, err) 241 | defer os.RemoveAll(p) 242 | 243 | md, err := Create(p) 244 | require.NoError(t, err) 245 | for i := 0; i < 10; i++ { 246 | es := []walpb.Entry{{Index: uint64(i)}} 247 | err = md.Save(es) 248 | require.NoError(t, err) 249 | err = md.cut() 250 | require.NoError(t, err) 251 | } 252 | md.Close(false) 253 | 254 | err = os.Remove(filepath.Join(p, walName(4, 4))) 255 | require.NoError(t, err) 256 | 257 | for i := 0; i < 10; i++ { 258 | w, err := Open(p, walpb.Snapshot{Index: uint64(i)}) 259 | if err != nil { 260 | if i <= 4 { 261 | if err != ErrFileNotFound { 262 | t.Errorf("#%d: err = %v, want %v", i, err, ErrFileNotFound) 263 | } 264 | } else { 265 | t.Errorf("#%d: err = %v, want nil", i, err) 266 | } 267 | continue 268 | } 269 | entries, err := w.ReadAll() 270 | if err != nil { 271 | t.Errorf("#%d: err = %v, want nil", i, err) 272 | continue 273 | } 274 | for j, e := range entries { 275 | if e.Index != uint64(j+i+1) { 276 | t.Errorf("#%d: ents[%d].Index = %+v, want %+v", i, j, e.Index, j+i+1) 277 | } 278 | } 279 | w.Close(false) 280 | } 281 | } 282 | 283 | func TestOpenAtUncommittedIndex(t *testing.T) { 284 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 285 | require.NoError(t, err) 286 | defer os.RemoveAll(p) 287 | 288 | w, err := Create(p) 289 | require.NoError(t, err) 290 | err = w.Save([]walpb.Entry{{Index: 0}}) 291 | require.NoError(t, err) 292 | w.Close(false) 293 | 294 | w, err = Open(p, walpb.Snapshot{}) 295 | require.NoError(t, err) 296 | // commit up to index 0, try to read index 1 297 | _, err = w.ReadAll() 298 | require.NoError(t, err) 299 | w.Close(false) 300 | } 301 | 302 | // TestOpenForRead tests that OpenForRead can load all files. 303 | // The tests creates WAL directory, and cut out multiple WAL files. Then 304 | // it releases the lock of part of data, and excepts that OpenForRead 305 | // can read out all files even if some are locked for write. 306 | func TestOpenForRead(t *testing.T) { 307 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 308 | require.NoError(t, err) 309 | defer os.RemoveAll(p) 310 | // create WAL 311 | w, err := Create(p) 312 | require.NoError(t, err) 313 | defer w.Close(false) 314 | // make 10 separate files 315 | for i := 0; i < 10; i++ { 316 | es := []walpb.Entry{{Index: uint64(i)}} 317 | err = w.Save(es) 318 | require.NoError(t, err) 319 | err = w.cut() 320 | require.NoError(t, err) 321 | } 322 | 323 | // All are available for read 324 | w2, err := OpenForRead(p, walpb.Snapshot{}) 325 | require.NoError(t, err) 326 | defer w2.Close(false) 327 | ents, err := w2.ReadAll() 328 | require.NoError(t, err) 329 | g := ents[len(ents)-1].Index 330 | require.Equal(t, uint64(9), g) 331 | } 332 | 333 | // TestTailWriteNoSlackSpace ensures that tail writes append if there's no preallocated space. 334 | func TestTailWriteNoSlackSpace(t *testing.T) { 335 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 336 | require.NoError(t, err) 337 | defer os.RemoveAll(p) 338 | 339 | // create initial WAL 340 | w, err := Create(p) 341 | require.NoError(t, err) 342 | // write some entries 343 | for i := 1; i <= 5; i++ { 344 | es := []walpb.Entry{{Index: uint64(i), Term: 1, Data: []byte{byte(i)}}} 345 | err = w.Save(es) 346 | require.NoError(t, err) 347 | } 348 | // get rid of slack space by truncating file 349 | off, serr := w.tail.Seek(0, io.SeekCurrent) 350 | require.NoError(t, serr) 351 | terr := w.tail.Truncate(off) 352 | require.NoError(t, terr) 353 | w.Close(false) 354 | 355 | // open, write more 356 | w, err = Open(p, walpb.Snapshot{}) 357 | require.NoError(t, err) 358 | ents, rerr := w.ReadAll() 359 | require.NoError(t, rerr) 360 | require.Equal(t, 5, len(ents)) 361 | // write more entries 362 | for i := 6; i <= 10; i++ { 363 | es := []walpb.Entry{{Index: uint64(i), Term: 1, Data: []byte{byte(i)}}} 364 | err = w.Save(es) 365 | require.NoError(t, err) 366 | } 367 | w.Close(false) 368 | 369 | // confirm all writes 370 | w, err = Open(p, walpb.Snapshot{}) 371 | require.NoError(t, err) 372 | ents, rerr = w.ReadAll() 373 | require.NoError(t, rerr) 374 | require.Equal(t, 10, len(ents)) 375 | w.Close(false) 376 | } 377 | 378 | // TestRestartCreateWal ensures that an interrupted WAL initialization is clobbered on restart 379 | func TestRestartCreateWal(t *testing.T) { 380 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 381 | require.NoError(t, err) 382 | defer os.RemoveAll(p) 383 | 384 | // make temporary directory so it looks like initialization is interrupted 385 | tmpdir := filepath.Clean(p) + ".tmp" 386 | err = os.Mkdir(tmpdir, fileutil.PrivateDirMode) 387 | require.NoError(t, err) 388 | _, err = os.OpenFile(filepath.Join(tmpdir, "test"), os.O_WRONLY|os.O_CREATE, fileutil.PrivateFileMode) 389 | require.NoError(t, err) 390 | 391 | w, werr := Create(p) 392 | require.NoError(t, werr) 393 | w.Close(false) 394 | if Exist(tmpdir) { 395 | t.Fatalf("got %q exists, expected it to not exist", tmpdir) 396 | } 397 | 398 | w, err = OpenForRead(p, walpb.Snapshot{}) 399 | require.NoError(t, err) 400 | defer w.Close(false) 401 | 402 | _, rerr := w.ReadAll() 403 | require.NoError(t, rerr) 404 | } 405 | 406 | // TestOpenOnTornWrite ensures that entries past the torn write are truncated. 407 | func TestOpenOnTornWrite(t *testing.T) { 408 | maxEntries := 40 409 | clobberIdx := 20 410 | overwriteEntries := 5 411 | 412 | p, err := ioutil.TempDir(os.TempDir(), "waltest") 413 | require.NoError(t, err) 414 | defer os.RemoveAll(p) 415 | w, err := Create(p) 416 | defer func() { 417 | if err = w.Close(false); err != nil && err != os.ErrInvalid { 418 | t.Fatal(err) 419 | } 420 | }() 421 | require.NoError(t, err) 422 | 423 | // get offset of end of each saved entry 424 | offsets := make([]int64, maxEntries) 425 | for i := range offsets { 426 | es := []walpb.Entry{{Index: uint64(i)}} 427 | err = w.Save(es) 428 | require.NoError(t, err) 429 | offsets[i], err = w.tail.Seek(0, io.SeekCurrent) 430 | require.NoError(t, err) 431 | } 432 | 433 | fn := filepath.Join(p, filepath.Base(w.tail.Name())) 434 | w.Close(false) 435 | 436 | // clobber some entry with 0's to simulate a torn write 437 | f, ferr := os.OpenFile(fn, os.O_WRONLY, fileutil.PrivateFileMode) 438 | require.NoError(t, ferr) 439 | defer f.Close() 440 | _, err = f.Seek(offsets[clobberIdx], io.SeekStart) 441 | require.NoError(t, err) 442 | zeros := make([]byte, offsets[clobberIdx+1]-offsets[clobberIdx]) 443 | _, err = f.Write(zeros) 444 | require.NoError(t, err) 445 | f.Close() 446 | 447 | w, err = Open(p, walpb.Snapshot{}) 448 | require.NoError(t, err) 449 | // seek up to clobbered entry 450 | _, err = w.ReadAll() 451 | require.NoError(t, err) 452 | 453 | // write a few entries past the clobbered entry 454 | for i := 0; i < overwriteEntries; i++ { 455 | // Index is different from old, truncated entries 456 | es := []walpb.Entry{{Index: uint64(i + clobberIdx), Data: []byte("new")}} 457 | err = w.Save(es) 458 | require.NoError(t, err) 459 | } 460 | w.Close(false) 461 | 462 | // read back the entries, confirm number of entries matches expectation 463 | w, err = OpenForRead(p, walpb.Snapshot{}) 464 | require.NoError(t, err) 465 | 466 | ents, rerr := w.ReadAll() 467 | // CRC error? the old entries were likely never truncated away 468 | require.NoError(t, rerr) 469 | wEntries := (clobberIdx - 1) + overwriteEntries 470 | require.Equal(t, wEntries, len(ents)) 471 | } 472 | -------------------------------------------------------------------------------- /cql/parser/cql_lexer.go: -------------------------------------------------------------------------------- 1 | // Generated from /home/zhichyu/src/github.com/deepfabric/indexer/cql/parser/CQL.g4 by ANTLR 4.7. 2 | 3 | package parser 4 | 5 | import ( 6 | "fmt" 7 | "unicode" 8 | 9 | "github.com/antlr/antlr4/runtime/Go/antlr" 10 | ) 11 | 12 | // Suppress unused import error 13 | var _ = fmt.Printf 14 | var _ = unicode.IsLetter 15 | 16 | var serializedLexerAtn = []uint16{ 17 | 3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 35, 338, 18 | 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 19 | 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 20 | 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 21 | 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 22 | 9, 23, 4, 24, 9, 24, 4, 25, 9, 25, 4, 26, 9, 26, 4, 27, 9, 27, 4, 28, 9, 23 | 28, 4, 29, 9, 29, 4, 30, 9, 30, 4, 31, 9, 31, 4, 32, 9, 32, 4, 33, 9, 33, 24 | 4, 34, 9, 34, 4, 35, 9, 35, 4, 36, 9, 36, 4, 37, 9, 37, 4, 38, 9, 38, 4, 25 | 39, 9, 39, 4, 40, 9, 40, 4, 41, 9, 41, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 26 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27 | 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 28 | 4, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 5, 3, 29 | 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 30 | 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 31 | 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 32 | 3, 10, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 12, 3, 33 | 12, 3, 13, 3, 13, 3, 14, 3, 14, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 34 | 3, 16, 3, 16, 3, 16, 3, 16, 3, 16, 3, 16, 3, 16, 3, 17, 3, 17, 3, 17, 3, 35 | 17, 3, 17, 3, 17, 3, 17, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 36 | 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 37 | 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 21, 3, 21, 38 | 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 23, 3, 23, 3, 23, 3, 39 | 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 24, 3, 25, 3, 25, 40 | 3, 26, 3, 26, 3, 27, 3, 27, 3, 28, 3, 28, 3, 28, 3, 29, 3, 29, 3, 29, 3, 41 | 30, 3, 30, 3, 30, 5, 30, 258, 10, 30, 3, 30, 5, 30, 261, 10, 30, 3, 30, 42 | 3, 30, 3, 30, 3, 30, 3, 30, 3, 30, 5, 30, 269, 10, 30, 5, 30, 271, 10, 43 | 30, 3, 31, 6, 31, 274, 10, 31, 13, 31, 14, 31, 275, 3, 32, 3, 32, 5, 32, 44 | 280, 10, 32, 3, 32, 3, 32, 3, 33, 3, 33, 3, 34, 3, 34, 3, 34, 7, 34, 289, 45 | 10, 34, 12, 34, 14, 34, 292, 11, 34, 3, 34, 3, 34, 3, 35, 3, 35, 3, 35, 46 | 5, 35, 299, 10, 35, 3, 36, 3, 36, 3, 36, 3, 36, 3, 36, 3, 36, 3, 37, 3, 47 | 37, 3, 38, 3, 38, 3, 38, 7, 38, 312, 10, 38, 12, 38, 14, 38, 315, 11, 38, 48 | 5, 38, 317, 10, 38, 3, 39, 3, 39, 5, 39, 321, 10, 39, 3, 39, 3, 39, 3, 49 | 40, 3, 40, 7, 40, 327, 10, 40, 12, 40, 14, 40, 330, 11, 40, 3, 41, 6, 41, 50 | 333, 10, 41, 13, 41, 14, 41, 334, 3, 41, 3, 41, 2, 2, 42, 3, 3, 5, 4, 7, 51 | 5, 9, 6, 11, 7, 13, 8, 15, 9, 17, 10, 19, 11, 21, 12, 23, 13, 25, 14, 27, 52 | 15, 29, 16, 31, 17, 33, 18, 35, 19, 37, 20, 39, 21, 41, 22, 43, 23, 45, 53 | 24, 47, 25, 49, 26, 51, 27, 53, 28, 55, 29, 57, 30, 59, 31, 61, 2, 63, 54 | 2, 65, 2, 67, 32, 69, 2, 71, 2, 73, 2, 75, 33, 77, 2, 79, 34, 81, 35, 3, 55 | 2, 12, 4, 2, 71, 71, 103, 103, 4, 2, 45, 45, 47, 47, 3, 2, 50, 59, 4, 2, 56 | 36, 36, 94, 94, 10, 2, 36, 36, 49, 49, 94, 94, 100, 100, 104, 104, 112, 57 | 112, 116, 116, 118, 118, 5, 2, 50, 59, 67, 72, 99, 104, 3, 2, 51, 59, 5, 58 | 2, 67, 92, 97, 97, 99, 124, 6, 2, 50, 59, 67, 92, 97, 97, 99, 124, 5, 2, 59 | 11, 12, 15, 15, 34, 34, 2, 345, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 60 | 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 61 | 2, 15, 3, 2, 2, 2, 2, 17, 3, 2, 2, 2, 2, 19, 3, 2, 2, 2, 2, 21, 3, 2, 2, 62 | 2, 2, 23, 3, 2, 2, 2, 2, 25, 3, 2, 2, 2, 2, 27, 3, 2, 2, 2, 2, 29, 3, 2, 63 | 2, 2, 2, 31, 3, 2, 2, 2, 2, 33, 3, 2, 2, 2, 2, 35, 3, 2, 2, 2, 2, 37, 3, 64 | 2, 2, 2, 2, 39, 3, 2, 2, 2, 2, 41, 3, 2, 2, 2, 2, 43, 3, 2, 2, 2, 2, 45, 65 | 3, 2, 2, 2, 2, 47, 3, 2, 2, 2, 2, 49, 3, 2, 2, 2, 2, 51, 3, 2, 2, 2, 2, 66 | 53, 3, 2, 2, 2, 2, 55, 3, 2, 2, 2, 2, 57, 3, 2, 2, 2, 2, 59, 3, 2, 2, 2, 67 | 2, 67, 3, 2, 2, 2, 2, 75, 3, 2, 2, 2, 2, 79, 3, 2, 2, 2, 2, 81, 3, 2, 2, 68 | 2, 3, 83, 3, 2, 2, 2, 5, 94, 3, 2, 2, 2, 7, 101, 3, 2, 2, 2, 9, 113, 3, 69 | 2, 2, 2, 11, 124, 3, 2, 2, 2, 13, 132, 3, 2, 2, 2, 15, 143, 3, 2, 2, 2, 70 | 17, 149, 3, 2, 2, 2, 19, 155, 3, 2, 2, 2, 21, 163, 3, 2, 2, 2, 23, 169, 71 | 3, 2, 2, 2, 25, 171, 3, 2, 2, 2, 27, 173, 3, 2, 2, 2, 29, 175, 3, 2, 2, 72 | 2, 31, 181, 3, 2, 2, 2, 33, 188, 3, 2, 2, 2, 35, 195, 3, 2, 2, 2, 37, 202, 73 | 3, 2, 2, 2, 39, 210, 3, 2, 2, 2, 41, 218, 3, 2, 2, 2, 43, 223, 3, 2, 2, 74 | 2, 45, 230, 3, 2, 2, 2, 47, 233, 3, 2, 2, 2, 49, 242, 3, 2, 2, 2, 51, 244, 75 | 3, 2, 2, 2, 53, 246, 3, 2, 2, 2, 55, 248, 3, 2, 2, 2, 57, 251, 3, 2, 2, 76 | 2, 59, 270, 3, 2, 2, 2, 61, 273, 3, 2, 2, 2, 63, 277, 3, 2, 2, 2, 65, 283, 77 | 3, 2, 2, 2, 67, 285, 3, 2, 2, 2, 69, 295, 3, 2, 2, 2, 71, 300, 3, 2, 2, 78 | 2, 73, 306, 3, 2, 2, 2, 75, 316, 3, 2, 2, 2, 77, 318, 3, 2, 2, 2, 79, 324, 79 | 3, 2, 2, 2, 81, 332, 3, 2, 2, 2, 83, 84, 7, 75, 2, 2, 84, 85, 7, 70, 2, 80 | 2, 85, 86, 7, 90, 2, 2, 86, 87, 7, 48, 2, 2, 87, 88, 7, 69, 2, 2, 88, 89, 81 | 7, 84, 2, 2, 89, 90, 7, 71, 2, 2, 90, 91, 7, 67, 2, 2, 91, 92, 7, 86, 2, 82 | 2, 92, 93, 7, 71, 2, 2, 93, 4, 3, 2, 2, 2, 94, 95, 7, 85, 2, 2, 95, 96, 83 | 7, 69, 2, 2, 96, 97, 7, 74, 2, 2, 97, 98, 7, 71, 2, 2, 98, 99, 7, 79, 2, 84 | 2, 99, 100, 7, 67, 2, 2, 100, 6, 3, 2, 2, 2, 101, 102, 7, 75, 2, 2, 102, 85 | 103, 7, 70, 2, 2, 103, 104, 7, 90, 2, 2, 104, 105, 7, 48, 2, 2, 105, 106, 86 | 7, 70, 2, 2, 106, 107, 7, 71, 2, 2, 107, 108, 7, 85, 2, 2, 108, 109, 7, 87 | 86, 2, 2, 109, 110, 7, 84, 2, 2, 110, 111, 7, 81, 2, 2, 111, 112, 7, 91, 88 | 2, 2, 112, 8, 3, 2, 2, 2, 113, 114, 7, 75, 2, 2, 114, 115, 7, 70, 2, 2, 89 | 115, 116, 7, 90, 2, 2, 116, 117, 7, 48, 2, 2, 117, 118, 7, 75, 2, 2, 118, 90 | 119, 7, 80, 2, 2, 119, 120, 7, 85, 2, 2, 120, 121, 7, 71, 2, 2, 121, 122, 91 | 7, 84, 2, 2, 122, 123, 7, 86, 2, 2, 123, 10, 3, 2, 2, 2, 124, 125, 7, 75, 92 | 2, 2, 125, 126, 7, 70, 2, 2, 126, 127, 7, 90, 2, 2, 127, 128, 7, 48, 2, 93 | 2, 128, 129, 7, 70, 2, 2, 129, 130, 7, 71, 2, 2, 130, 131, 7, 78, 2, 2, 94 | 131, 12, 3, 2, 2, 2, 132, 133, 7, 75, 2, 2, 133, 134, 7, 70, 2, 2, 134, 95 | 135, 7, 90, 2, 2, 135, 136, 7, 48, 2, 2, 136, 137, 7, 85, 2, 2, 137, 138, 96 | 7, 71, 2, 2, 138, 139, 7, 78, 2, 2, 139, 140, 7, 71, 2, 2, 140, 141, 7, 97 | 69, 2, 2, 141, 142, 7, 86, 2, 2, 142, 14, 3, 2, 2, 2, 143, 144, 7, 83, 98 | 2, 2, 144, 145, 7, 87, 2, 2, 145, 146, 7, 71, 2, 2, 146, 147, 7, 84, 2, 99 | 2, 147, 148, 7, 91, 2, 2, 148, 16, 3, 2, 2, 2, 149, 150, 7, 89, 2, 2, 150, 100 | 151, 7, 74, 2, 2, 151, 152, 7, 71, 2, 2, 152, 153, 7, 84, 2, 2, 153, 154, 101 | 7, 71, 2, 2, 154, 18, 3, 2, 2, 2, 155, 156, 7, 81, 2, 2, 156, 157, 7, 84, 102 | 2, 2, 157, 158, 7, 70, 2, 2, 158, 159, 7, 71, 2, 2, 159, 160, 7, 84, 2, 103 | 2, 160, 161, 7, 68, 2, 2, 161, 162, 7, 91, 2, 2, 162, 20, 3, 2, 2, 2, 163, 104 | 164, 7, 78, 2, 2, 164, 165, 7, 75, 2, 2, 165, 166, 7, 79, 2, 2, 166, 167, 105 | 7, 75, 2, 2, 167, 168, 7, 86, 2, 2, 168, 22, 3, 2, 2, 2, 169, 170, 7, 93, 106 | 2, 2, 170, 24, 3, 2, 2, 2, 171, 172, 7, 46, 2, 2, 172, 26, 3, 2, 2, 2, 107 | 173, 174, 7, 95, 2, 2, 174, 28, 3, 2, 2, 2, 175, 176, 7, 87, 2, 2, 176, 108 | 177, 7, 75, 2, 2, 177, 178, 7, 80, 2, 2, 178, 179, 7, 86, 2, 2, 179, 180, 109 | 7, 58, 2, 2, 180, 30, 3, 2, 2, 2, 181, 182, 7, 87, 2, 2, 182, 183, 7, 75, 110 | 2, 2, 183, 184, 7, 80, 2, 2, 184, 185, 7, 86, 2, 2, 185, 186, 7, 51, 2, 111 | 2, 186, 187, 7, 56, 2, 2, 187, 32, 3, 2, 2, 2, 188, 189, 7, 87, 2, 2, 189, 112 | 190, 7, 75, 2, 2, 190, 191, 7, 80, 2, 2, 191, 192, 7, 86, 2, 2, 192, 193, 113 | 7, 53, 2, 2, 193, 194, 7, 52, 2, 2, 194, 34, 3, 2, 2, 2, 195, 196, 7, 87, 114 | 2, 2, 196, 197, 7, 75, 2, 2, 197, 198, 7, 80, 2, 2, 198, 199, 7, 86, 2, 115 | 2, 199, 200, 7, 56, 2, 2, 200, 201, 7, 54, 2, 2, 201, 36, 3, 2, 2, 2, 202, 116 | 203, 7, 72, 2, 2, 203, 204, 7, 78, 2, 2, 204, 205, 7, 81, 2, 2, 205, 206, 117 | 7, 67, 2, 2, 206, 207, 7, 86, 2, 2, 207, 208, 7, 53, 2, 2, 208, 209, 7, 118 | 52, 2, 2, 209, 38, 3, 2, 2, 2, 210, 211, 7, 72, 2, 2, 211, 212, 7, 78, 119 | 2, 2, 212, 213, 7, 81, 2, 2, 213, 214, 7, 67, 2, 2, 214, 215, 7, 86, 2, 120 | 2, 215, 216, 7, 56, 2, 2, 216, 217, 7, 54, 2, 2, 217, 40, 3, 2, 2, 2, 218, 121 | 219, 7, 71, 2, 2, 219, 220, 7, 80, 2, 2, 220, 221, 7, 87, 2, 2, 221, 222, 122 | 7, 79, 2, 2, 222, 42, 3, 2, 2, 2, 223, 224, 7, 85, 2, 2, 224, 225, 7, 86, 123 | 2, 2, 225, 226, 7, 84, 2, 2, 226, 227, 7, 75, 2, 2, 227, 228, 7, 80, 2, 124 | 2, 228, 229, 7, 73, 2, 2, 229, 44, 3, 2, 2, 2, 230, 231, 7, 75, 2, 2, 231, 125 | 232, 7, 80, 2, 2, 232, 46, 3, 2, 2, 2, 233, 234, 7, 69, 2, 2, 234, 235, 126 | 7, 81, 2, 2, 235, 236, 7, 80, 2, 2, 236, 237, 7, 86, 2, 2, 237, 238, 7, 127 | 67, 2, 2, 238, 239, 7, 75, 2, 2, 239, 240, 7, 80, 2, 2, 240, 241, 7, 85, 128 | 2, 2, 241, 48, 3, 2, 2, 2, 242, 243, 7, 62, 2, 2, 243, 50, 3, 2, 2, 2, 129 | 244, 245, 7, 64, 2, 2, 245, 52, 3, 2, 2, 2, 246, 247, 7, 63, 2, 2, 247, 130 | 54, 3, 2, 2, 2, 248, 249, 7, 62, 2, 2, 249, 250, 7, 63, 2, 2, 250, 56, 131 | 3, 2, 2, 2, 251, 252, 7, 64, 2, 2, 252, 253, 7, 63, 2, 2, 253, 58, 3, 2, 132 | 2, 2, 254, 255, 5, 61, 31, 2, 255, 257, 7, 48, 2, 2, 256, 258, 5, 61, 31, 133 | 2, 257, 256, 3, 2, 2, 2, 257, 258, 3, 2, 2, 2, 258, 260, 3, 2, 2, 2, 259, 134 | 261, 5, 63, 32, 2, 260, 259, 3, 2, 2, 2, 260, 261, 3, 2, 2, 2, 261, 271, 135 | 3, 2, 2, 2, 262, 263, 5, 61, 31, 2, 263, 264, 5, 63, 32, 2, 264, 271, 3, 136 | 2, 2, 2, 265, 266, 7, 48, 2, 2, 266, 268, 5, 61, 31, 2, 267, 269, 5, 63, 137 | 32, 2, 268, 267, 3, 2, 2, 2, 268, 269, 3, 2, 2, 2, 269, 271, 3, 2, 2, 2, 138 | 270, 254, 3, 2, 2, 2, 270, 262, 3, 2, 2, 2, 270, 265, 3, 2, 2, 2, 271, 139 | 60, 3, 2, 2, 2, 272, 274, 5, 65, 33, 2, 273, 272, 3, 2, 2, 2, 274, 275, 140 | 3, 2, 2, 2, 275, 273, 3, 2, 2, 2, 275, 276, 3, 2, 2, 2, 276, 62, 3, 2, 141 | 2, 2, 277, 279, 9, 2, 2, 2, 278, 280, 9, 3, 2, 2, 279, 278, 3, 2, 2, 2, 142 | 279, 280, 3, 2, 2, 2, 280, 281, 3, 2, 2, 2, 281, 282, 5, 61, 31, 2, 282, 143 | 64, 3, 2, 2, 2, 283, 284, 9, 4, 2, 2, 284, 66, 3, 2, 2, 2, 285, 290, 7, 144 | 36, 2, 2, 286, 289, 5, 69, 35, 2, 287, 289, 10, 5, 2, 2, 288, 286, 3, 2, 145 | 2, 2, 288, 287, 3, 2, 2, 2, 289, 292, 3, 2, 2, 2, 290, 288, 3, 2, 2, 2, 146 | 290, 291, 3, 2, 2, 2, 291, 293, 3, 2, 2, 2, 292, 290, 3, 2, 2, 2, 293, 147 | 294, 7, 36, 2, 2, 294, 68, 3, 2, 2, 2, 295, 298, 7, 94, 2, 2, 296, 299, 148 | 9, 6, 2, 2, 297, 299, 5, 71, 36, 2, 298, 296, 3, 2, 2, 2, 298, 297, 3, 149 | 2, 2, 2, 299, 70, 3, 2, 2, 2, 300, 301, 7, 119, 2, 2, 301, 302, 5, 73, 150 | 37, 2, 302, 303, 5, 73, 37, 2, 303, 304, 5, 73, 37, 2, 304, 305, 5, 73, 151 | 37, 2, 305, 72, 3, 2, 2, 2, 306, 307, 9, 7, 2, 2, 307, 74, 3, 2, 2, 2, 152 | 308, 317, 7, 50, 2, 2, 309, 313, 9, 8, 2, 2, 310, 312, 9, 4, 2, 2, 311, 153 | 310, 3, 2, 2, 2, 312, 315, 3, 2, 2, 2, 313, 311, 3, 2, 2, 2, 313, 314, 154 | 3, 2, 2, 2, 314, 317, 3, 2, 2, 2, 315, 313, 3, 2, 2, 2, 316, 308, 3, 2, 155 | 2, 2, 316, 309, 3, 2, 2, 2, 317, 76, 3, 2, 2, 2, 318, 320, 9, 2, 2, 2, 156 | 319, 321, 9, 3, 2, 2, 320, 319, 3, 2, 2, 2, 320, 321, 3, 2, 2, 2, 321, 157 | 322, 3, 2, 2, 2, 322, 323, 5, 75, 38, 2, 323, 78, 3, 2, 2, 2, 324, 328, 158 | 9, 9, 2, 2, 325, 327, 9, 10, 2, 2, 326, 325, 3, 2, 2, 2, 327, 330, 3, 2, 159 | 2, 2, 328, 326, 3, 2, 2, 2, 328, 329, 3, 2, 2, 2, 329, 80, 3, 2, 2, 2, 160 | 330, 328, 3, 2, 2, 2, 331, 333, 9, 11, 2, 2, 332, 331, 3, 2, 2, 2, 333, 161 | 334, 3, 2, 2, 2, 334, 332, 3, 2, 2, 2, 334, 335, 3, 2, 2, 2, 335, 336, 162 | 3, 2, 2, 2, 336, 337, 8, 41, 2, 2, 337, 82, 3, 2, 2, 2, 17, 2, 257, 260, 163 | 268, 270, 275, 279, 288, 290, 298, 313, 316, 320, 328, 334, 3, 8, 2, 2, 164 | } 165 | 166 | var lexerDeserializer = antlr.NewATNDeserializer(nil) 167 | var lexerAtn = lexerDeserializer.DeserializeFromUInt16(serializedLexerAtn) 168 | 169 | var lexerChannelNames = []string{ 170 | "DEFAULT_TOKEN_CHANNEL", "HIDDEN", 171 | } 172 | 173 | var lexerModeNames = []string{ 174 | "DEFAULT_MODE", 175 | } 176 | 177 | var lexerLiteralNames = []string{ 178 | "", "'IDX.CREATE'", "'SCHEMA'", "'IDX.DESTROY'", "'IDX.INSERT'", "'IDX.DEL'", 179 | "'IDX.SELECT'", "'QUERY'", "'WHERE'", "'ORDERBY'", "'LIMIT'", "'['", "','", 180 | "']'", "'UINT8'", "'UINT16'", "'UINT32'", "'UINT64'", "'FLOAT32'", "'FLOAT64'", 181 | "'ENUM'", "'STRING'", "'IN'", "'CONTAINS'", "'<'", "'>'", "'='", "'<='", 182 | "'>='", 183 | } 184 | 185 | var lexerSymbolicNames = []string{ 186 | "", "", "", "", "", "", "", "", "", "", "", "", "", "", "K_UINT8", "K_UINT16", 187 | "K_UINT32", "K_UINT64", "K_FLOAT32", "K_FLOAT64", "K_ENUM", "K_STRING", 188 | "K_IN", "K_CONTAINS", "K_LT", "K_BT", "K_EQ", "K_LE", "K_BE", "FLOAT_LIT", 189 | "STRING", "INT", "IDENTIFIER", "WS", 190 | } 191 | 192 | var lexerRuleNames = []string{ 193 | "T__0", "T__1", "T__2", "T__3", "T__4", "T__5", "T__6", "T__7", "T__8", 194 | "T__9", "T__10", "T__11", "T__12", "K_UINT8", "K_UINT16", "K_UINT32", "K_UINT64", 195 | "K_FLOAT32", "K_FLOAT64", "K_ENUM", "K_STRING", "K_IN", "K_CONTAINS", "K_LT", 196 | "K_BT", "K_EQ", "K_LE", "K_BE", "FLOAT_LIT", "DECIMALS", "EXPONENT", "DECIMAL_DIGIT", 197 | "STRING", "ESC", "UNICODE", "HEX", "INT", "EXP", "IDENTIFIER", "WS", 198 | } 199 | 200 | type CQLLexer struct { 201 | *antlr.BaseLexer 202 | channelNames []string 203 | modeNames []string 204 | // TODO: EOF string 205 | } 206 | 207 | var lexerDecisionToDFA = make([]*antlr.DFA, len(lexerAtn.DecisionToState)) 208 | 209 | func init() { 210 | for index, ds := range lexerAtn.DecisionToState { 211 | lexerDecisionToDFA[index] = antlr.NewDFA(ds, index) 212 | } 213 | } 214 | 215 | func NewCQLLexer(input antlr.CharStream) *CQLLexer { 216 | 217 | l := new(CQLLexer) 218 | 219 | l.BaseLexer = antlr.NewBaseLexer(input) 220 | l.Interpreter = antlr.NewLexerATNSimulator(l, lexerAtn, lexerDecisionToDFA, antlr.NewPredictionContextCache()) 221 | 222 | l.channelNames = lexerChannelNames 223 | l.modeNames = lexerModeNames 224 | l.RuleNames = lexerRuleNames 225 | l.LiteralNames = lexerLiteralNames 226 | l.SymbolicNames = lexerSymbolicNames 227 | l.GrammarFileName = "CQL.g4" 228 | // TODO: l.EOF = antlr.TokenEOF 229 | 230 | return l 231 | } 232 | 233 | // CQLLexer tokens. 234 | const ( 235 | CQLLexerT__0 = 1 236 | CQLLexerT__1 = 2 237 | CQLLexerT__2 = 3 238 | CQLLexerT__3 = 4 239 | CQLLexerT__4 = 5 240 | CQLLexerT__5 = 6 241 | CQLLexerT__6 = 7 242 | CQLLexerT__7 = 8 243 | CQLLexerT__8 = 9 244 | CQLLexerT__9 = 10 245 | CQLLexerT__10 = 11 246 | CQLLexerT__11 = 12 247 | CQLLexerT__12 = 13 248 | CQLLexerK_UINT8 = 14 249 | CQLLexerK_UINT16 = 15 250 | CQLLexerK_UINT32 = 16 251 | CQLLexerK_UINT64 = 17 252 | CQLLexerK_FLOAT32 = 18 253 | CQLLexerK_FLOAT64 = 19 254 | CQLLexerK_ENUM = 20 255 | CQLLexerK_STRING = 21 256 | CQLLexerK_IN = 22 257 | CQLLexerK_CONTAINS = 23 258 | CQLLexerK_LT = 24 259 | CQLLexerK_BT = 25 260 | CQLLexerK_EQ = 26 261 | CQLLexerK_LE = 27 262 | CQLLexerK_BE = 28 263 | CQLLexerFLOAT_LIT = 29 264 | CQLLexerSTRING = 30 265 | CQLLexerINT = 31 266 | CQLLexerIDENTIFIER = 32 267 | CQLLexerWS = 33 268 | ) 269 | -------------------------------------------------------------------------------- /indexer.go: -------------------------------------------------------------------------------- 1 | package indexer 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "sort" 8 | "sync" 9 | "sync/atomic" 10 | 11 | "github.com/davecgh/go-spew/spew" 12 | "github.com/deepfabric/bkdtree" 13 | "github.com/deepfabric/indexer/cql" 14 | "github.com/deepfabric/indexer/wal" 15 | "github.com/deepfabric/indexer/wal/walpb" 16 | "github.com/pkg/errors" 17 | log "github.com/sirupsen/logrus" 18 | ) 19 | 20 | const ( 21 | // DefaultIndexerMaxOpN is the default value for Indexer.MaxOpN. 22 | DefaultIndexerMaxOpN = uint64(1000000) 23 | ) 24 | 25 | var ( 26 | ErrIdxExist = errors.New("index already exist") 27 | ErrIdxNotExist = errors.New("index not exist") 28 | ) 29 | 30 | //Indexer shall be singleton 31 | type Indexer struct { 32 | MainDir string //the main directory where stores all indices 33 | // Number of operations performed before performing a snapshot. 34 | MaxOpN uint64 35 | 36 | rwlock sync.RWMutex //concurrent access of docProts, indices 37 | docProts map[string]*cql.DocumentWithIdx //index meta, need to persist 38 | indices map[string]*Index //index data, need to persist 39 | w *wal.WAL //WAL 40 | opN uint64 41 | entIndex uint64 42 | } 43 | 44 | //NewIndexer creates an Indexer. 45 | func NewIndexer(mainDir string, overwirte bool, enableWal bool) (ir *Indexer, err error) { 46 | ir = &Indexer{ 47 | MainDir: mainDir, 48 | MaxOpN: DefaultIndexerMaxOpN, 49 | entIndex: uint64(0), 50 | } 51 | if err = os.MkdirAll(mainDir, 0700); err != nil { 52 | err = errors.Wrap(err, "") 53 | return 54 | } 55 | if overwirte { 56 | ir.docProts = make(map[string]*cql.DocumentWithIdx) 57 | ir.indices = make(map[string]*Index) 58 | if err = ir.removeIndices(); err != nil { 59 | return 60 | } 61 | } else { 62 | if err = ir.Open(); err != nil { 63 | return 64 | } 65 | } 66 | if enableWal { 67 | walDir := filepath.Join(mainDir, "wal") 68 | if overwirte { 69 | if err = os.RemoveAll(walDir); err != nil { 70 | err = errors.Wrap(err, "") 71 | return 72 | } 73 | if ir.w, err = wal.Create(walDir); err != nil { 74 | return 75 | } 76 | } else { 77 | if err = ir.replayWal(); err != nil { 78 | return 79 | } 80 | } 81 | } 82 | return 83 | } 84 | 85 | //Destroy close and remove index files 86 | func (ir *Indexer) Destroy() (err error) { 87 | ir.rwlock.Lock() 88 | defer ir.rwlock.Unlock() 89 | if err = ir.close(); err != nil { 90 | return 91 | } 92 | if err = ir.removeIndices(); err != nil { 93 | return 94 | } 95 | return 96 | } 97 | 98 | //Open opens all indices. Assumes ir.MainDir is already populated. 99 | func (ir *Indexer) Open() (err error) { 100 | ir.rwlock.Lock() 101 | err = ir.open() 102 | ir.rwlock.Unlock() 103 | return 104 | } 105 | 106 | //Open opens all indices without holding the lock 107 | func (ir *Indexer) open() (err error) { 108 | if ir.indices != nil || ir.docProts != nil { 109 | panic("indexer already open") 110 | } 111 | ir.docProts = make(map[string]*cql.DocumentWithIdx) 112 | ir.indices = make(map[string]*Index) 113 | if err = ir.readMeta(); err != nil { 114 | return 115 | } 116 | var ind *Index 117 | for name, docProt := range ir.docProts { 118 | if ind, err = NewIndexExt(ir.MainDir, docProt.Index); err != nil { 119 | return 120 | } 121 | ir.indices[name] = ind 122 | } 123 | if ir.w != nil { 124 | if err = ir.replayWal(); err != nil { 125 | return 126 | } 127 | } 128 | return 129 | } 130 | 131 | // Close close indexer 132 | func (ir *Indexer) Close() (err error) { 133 | ir.rwlock.Lock() 134 | err = ir.close() 135 | ir.rwlock.Unlock() 136 | return 137 | } 138 | 139 | // Close close indexer without holding the lock 140 | func (ir *Indexer) close() (err error) { 141 | for _, ind := range ir.indices { 142 | if err = ind.Close(); err != nil { 143 | return 144 | } 145 | } 146 | if ir.w != nil { 147 | if err = ir.w.Close(true); err != nil { 148 | return 149 | } 150 | } 151 | ir.indices = nil 152 | ir.docProts = nil 153 | return 154 | } 155 | 156 | // Sync synchronizes index to disk 157 | func (ir *Indexer) Sync() (err error) { 158 | ir.rwlock.Lock() 159 | err = ir.sync() 160 | ir.rwlock.Unlock() 161 | return 162 | } 163 | 164 | // sync synchronizes index to disk without holding the lock 165 | func (ir *Indexer) sync() (err error) { 166 | for _, ind := range ir.indices { 167 | if err = ind.Sync(); err != nil { 168 | return 169 | } 170 | } 171 | if ir.w != nil { 172 | if err = ir.w.CompactAll(); err != nil { 173 | return 174 | } 175 | } 176 | return 177 | } 178 | 179 | func (ir *Indexer) replayWal() (err error) { 180 | var w *wal.WAL 181 | walDir := filepath.Join(ir.MainDir, "wal") 182 | _, err = os.Stat(walDir) 183 | if err != nil { 184 | if !os.IsNotExist(err) { 185 | err = errors.Wrap(err, "") 186 | return 187 | } 188 | // wal directory doesn't exist 189 | if w, err = wal.Create(walDir); err != nil { 190 | return 191 | } 192 | ir.w = w 193 | return 194 | } 195 | //replay wal records 196 | if w, err = wal.OpenAtBeginning(walDir); err != nil { 197 | return 198 | } 199 | var ents []walpb.Entry 200 | if ents, err = w.ReadAll(); err != nil { 201 | return 202 | } 203 | doc := &cql.DocumentWithIdx{} 204 | dd := &cql.DocumentDel{} 205 | for _, ent := range ents { 206 | switch ent.Type { 207 | case 0: 208 | if err = doc.Unmarshal(ent.Data); err != nil { 209 | err = errors.Wrap(err, "") 210 | return 211 | } 212 | if err = ir.Insert(doc); err != nil { 213 | return 214 | } 215 | default: 216 | if err = dd.Unmarshal(ent.Data); err != nil { 217 | err = errors.Wrap(err, "") 218 | return 219 | } 220 | if _, err = ir.Del(dd.Index, dd.DocID); err != nil { 221 | return 222 | } 223 | } 224 | } 225 | log.Infof("replayed %v entries in %v", len(ents), walDir) 226 | ir.w = w 227 | if err = ir.sync(); err != nil { 228 | return 229 | } 230 | return 231 | } 232 | 233 | // GetDocProts dumps docProts 234 | func (ir *Indexer) GetDocProts() (sdump string) { 235 | ir.rwlock.RLock() 236 | sdump = spew.Sdump(ir.docProts) 237 | ir.rwlock.RUnlock() 238 | return 239 | } 240 | 241 | // GetDocProt returns docProt of given index 242 | func (ir *Indexer) GetDocProt(name string) (docProt *cql.DocumentWithIdx) { 243 | ir.rwlock.RLock() 244 | docProt, _ = ir.docProts[name] 245 | ir.rwlock.RUnlock() 246 | return 247 | } 248 | 249 | // CreateIndex creates index 250 | func (ir *Indexer) CreateIndex(docProt *cql.DocumentWithIdx) (err error) { 251 | ir.rwlock.Lock() 252 | err = ir.createIndex(docProt) 253 | ir.rwlock.Unlock() 254 | return 255 | } 256 | 257 | //DestroyIndex destroy given index 258 | func (ir *Indexer) DestroyIndex(name string) (err error) { 259 | ir.rwlock.Lock() 260 | delete(ir.indices, name) 261 | delete(ir.docProts, name) 262 | err = ir.removeIndex(name) 263 | ir.rwlock.Unlock() 264 | return 265 | } 266 | 267 | //Insert executes CqlInsert 268 | func (ir *Indexer) Insert(doc *cql.DocumentWithIdx) (err error) { 269 | var ind *Index 270 | var found bool 271 | ir.rwlock.RLock() 272 | if ind, found = ir.indices[doc.Index]; !found { 273 | ir.rwlock.RUnlock() 274 | err = errors.Wrapf(ErrIdxNotExist, "index %v doesn't exist", doc.Index) 275 | return 276 | } 277 | if err = ind.Insert(doc); err != nil { 278 | ir.rwlock.RUnlock() 279 | return 280 | } 281 | if ir.w != nil { 282 | var data []byte 283 | if data, err = doc.Marshal(); err != nil { 284 | ir.rwlock.RUnlock() 285 | err = errors.Wrap(err, "") 286 | return 287 | } 288 | entIndex := atomic.AddUint64(&ir.entIndex, uint64(1)) 289 | e := &walpb.Entry{Index: entIndex, Data: data} 290 | if err = ir.w.SaveEntry(e); err != nil { 291 | ir.rwlock.RUnlock() 292 | return 293 | } 294 | } 295 | ir.rwlock.RUnlock() 296 | if err = ir._IncrementOpN(); err != nil { 297 | return 298 | } 299 | return 300 | } 301 | 302 | //Del executes CqlDel. It's allowed that the given index doesn't exist. 303 | func (ir *Indexer) Del(idxName string, docID uint64) (found bool, err error) { 304 | var ind *Index 305 | var fnd bool 306 | ir.rwlock.RLock() 307 | if ind, fnd = ir.indices[idxName]; !fnd { 308 | ir.rwlock.RUnlock() 309 | err = errors.Wrapf(ErrIdxNotExist, "index %v doesn't exist", idxName) 310 | return 311 | } 312 | if found, err = ind.Del(docID); err != nil { 313 | ir.rwlock.RUnlock() 314 | return 315 | } 316 | if ir.w != nil { 317 | dd := cql.DocumentDel{ 318 | Index: idxName, 319 | DocID: docID, 320 | } 321 | var data []byte 322 | if data, err = dd.Marshal(); err != nil { 323 | ir.rwlock.RUnlock() 324 | err = errors.Wrap(err, "") 325 | return 326 | } 327 | entIndex := atomic.AddUint64(&ir.entIndex, uint64(1)) 328 | e := &walpb.Entry{Index: entIndex, Type: walpb.EntryType(1), Data: data} 329 | if err = ir.w.SaveEntry(e); err != nil { 330 | ir.rwlock.RUnlock() 331 | return 332 | } 333 | } 334 | ir.rwlock.RUnlock() 335 | if err = ir._IncrementOpN(); err != nil { 336 | return 337 | } 338 | return 339 | } 340 | 341 | // _IncrementOpN increase the operation count by one. 342 | // If the count exceeds the maximum allowed then a snapshot is performed. 343 | func (ir *Indexer) _IncrementOpN() (err error) { 344 | opN := atomic.AddUint64(&ir.opN, uint64(1)) 345 | if opN <= ir.MaxOpN { 346 | return 347 | } 348 | atomic.StoreUint64(&ir.opN, 0) 349 | err = ir.Sync() 350 | return 351 | } 352 | 353 | //Select executes CqlSelect. 354 | func (ir *Indexer) Select(q *cql.CqlSelect) (qr *QueryResult, err error) { 355 | var ind *Index 356 | var found bool 357 | ir.rwlock.RLock() 358 | if ind, found = ir.indices[q.Index]; !found { 359 | err = errors.Wrap(ErrIdxNotExist, q.Index) 360 | ir.rwlock.RUnlock() 361 | return 362 | } 363 | ir.rwlock.RUnlock() 364 | qr, err = ind.Select(q) 365 | return 366 | } 367 | 368 | //Summary returns a summary of all indices. 369 | func (ir *Indexer) Summary() (sum string, err error) { 370 | var ind *Index 371 | var name string 372 | var cnt uint64 373 | ir.rwlock.RLock() 374 | defer ir.rwlock.RUnlock() 375 | for name, ind = range ir.indices { 376 | if cnt, err = ind.liveDocs.Count(); err != nil { 377 | return 378 | } 379 | sum += fmt.Sprintf("index %s contains %d documents\n", name, cnt) 380 | } 381 | return 382 | } 383 | 384 | // createIndex creates index without holding the lock 385 | func (ir *Indexer) createIndex(docProt *cql.DocumentWithIdx) (err error) { 386 | if curDocProt, found := ir.docProts[docProt.Index]; found { 387 | if isSameSchema(curDocProt, docProt) { 388 | return 389 | } 390 | //TODO: on line schema change 391 | log.Infof("indexer %v createIndex with the different schema, current one %+v, new one %+v", ir.MainDir, curDocProt, docProt) 392 | if err = ir.removeIndex(docProt.Index); err != nil { 393 | return 394 | } 395 | } 396 | if err = indexWriteConf(ir.MainDir, docProt); err != nil { 397 | return 398 | } 399 | var ind *Index 400 | if ind, err = NewIndex(docProt, ir.MainDir); err != nil { 401 | return 402 | } 403 | ir.indices[docProt.Index] = ind 404 | ir.docProts[docProt.Index] = docProt 405 | return 406 | } 407 | 408 | //WriteMeta persists Conf and DocProts to files. 409 | func (ir *Indexer) WriteMeta() (err error) { 410 | ir.rwlock.RLock() 411 | for _, docProt := range ir.docProts { 412 | if err = indexWriteConf(ir.MainDir, docProt); err != nil { 413 | ir.rwlock.RUnlock() 414 | return 415 | } 416 | } 417 | ir.rwlock.RUnlock() 418 | return 419 | } 420 | 421 | //readMeta parses Conf and DocProts from files. 422 | func (ir *Indexer) readMeta() (err error) { 423 | var matches [][]string 424 | patt := `^index_(?P[^.]+)\.json$` 425 | if matches, err = bkdtree.FilepathGlob(ir.MainDir, patt); err != nil { 426 | return 427 | } 428 | for _, match := range matches { 429 | var doc cql.DocumentWithIdx 430 | if err = indexReadConf(ir.MainDir, match[1], &doc); err != nil { 431 | return 432 | } 433 | ir.docProts[match[1]] = &doc 434 | } 435 | return 436 | } 437 | 438 | func (ir *Indexer) removeIndices() (err error) { 439 | var matches [][]string 440 | patt := `^index_(?P[^.]+)\.json$` 441 | if matches, err = bkdtree.FilepathGlob(ir.MainDir, patt); err != nil { 442 | return 443 | } 444 | for _, match := range matches { 445 | if err = ir.removeIndex(match[1]); err != nil { 446 | return 447 | } 448 | } 449 | return 450 | } 451 | 452 | func (ir *Indexer) removeIndex(name string) (err error) { 453 | var fp string 454 | fp = filepath.Join(ir.MainDir, fmt.Sprintf("index_%s.json", name)) 455 | if err = os.Remove(fp); err != nil { 456 | err = errors.Wrap(err, "") 457 | return 458 | } 459 | fp = filepath.Join(ir.MainDir, name) 460 | if err = os.RemoveAll(fp); err != nil { 461 | err = errors.Wrap(err, "") 462 | } 463 | return 464 | } 465 | 466 | func (ir *Indexer) GetDocIDFragList() (numList []uint64) { 467 | ir.rwlock.RLock() 468 | numList = ir.getDocIDFragList() 469 | ir.rwlock.RUnlock() 470 | return 471 | } 472 | 473 | func (ir *Indexer) getDocIDFragList() (numList []uint64) { 474 | numList = []uint64{} 475 | seen := map[uint64]int{} 476 | for _, ind := range ir.indices { 477 | numList2 := ind.GetDocIDFragList() 478 | for _, num := range numList2 { 479 | seen[num] = 1 480 | } 481 | } 482 | for num := range seen { 483 | numList = append(numList, num) 484 | } 485 | sort.Slice(numList, func(i, j int) bool { return numList[i] < numList[j] }) 486 | return 487 | } 488 | 489 | func (ir *Indexer) CreateSnapshot(snapDir string) (numList []uint64, err error) { 490 | ir.rwlock.Lock() 491 | defer ir.rwlock.Unlock() 492 | if err = ir.sync(); err != nil { 493 | return 494 | } 495 | src := ir.MainDir 496 | dst := filepath.Join(snapDir, "index") 497 | if err = os.RemoveAll(dst); err != nil { 498 | return 499 | } 500 | if err = CopyDir(src, dst); err != nil { 501 | return 502 | } 503 | numList = ir.getDocIDFragList() 504 | return 505 | } 506 | 507 | func (ir *Indexer) ApplySnapshot(snapDir string) (err error) { 508 | ir.rwlock.Lock() 509 | defer ir.rwlock.Unlock() 510 | if err = ir.close(); err != nil { 511 | return 512 | } 513 | if err = os.RemoveAll(ir.MainDir); err != nil { 514 | return 515 | } 516 | src := filepath.Join(snapDir, "index") 517 | dst := ir.MainDir 518 | _, err = os.Stat(src) 519 | if os.IsNotExist(err) { 520 | log.Infof("snapshot source directory %v doesn't exist, treating it as an empty one", src) 521 | if err = os.MkdirAll(dst, 0700); err != nil { 522 | err = errors.Wrap(err, "") 523 | return 524 | } 525 | } else { 526 | if err = CopyDir(src, dst); err != nil { 527 | return 528 | } 529 | } 530 | if err = ir.open(); err != nil { 531 | return 532 | } 533 | log.Infof("applied snapshot %v, docProts %+v", src, ir.docProts) 534 | return 535 | } 536 | 537 | func isSameSchema(docProt1, docProt2 *cql.DocumentWithIdx) bool { 538 | if docProt1.Index != docProt2.Index || 539 | len(docProt1.Doc.UintProps) != len(docProt2.Doc.UintProps) || 540 | len(docProt1.Doc.EnumProps) != len(docProt2.Doc.EnumProps) || 541 | len(docProt1.Doc.StrProps) != len(docProt2.Doc.StrProps) { 542 | return false 543 | } 544 | for i := 0; i < len(docProt1.Doc.UintProps); i++ { 545 | uintProt1 := docProt1.Doc.UintProps[i] 546 | uintProt2 := docProt2.Doc.UintProps[i] 547 | if uintProt1.Name != uintProt2.Name || 548 | uintProt1.IsFloat != uintProt2.IsFloat || 549 | uintProt1.ValLen != uintProt2.ValLen { 550 | return false 551 | } 552 | } 553 | for i := 0; i < len(docProt1.Doc.EnumProps); i++ { 554 | enumProt1 := docProt1.Doc.EnumProps[i] 555 | enumProt2 := docProt2.Doc.EnumProps[i] 556 | if enumProt1.Name != enumProt2.Name { 557 | return false 558 | } 559 | } 560 | for i := 0; i < len(docProt1.Doc.StrProps); i++ { 561 | strProt1 := docProt1.Doc.StrProps[i] 562 | strProt2 := docProt2.Doc.StrProps[i] 563 | if strProt1.Name != strProt2.Name { 564 | return false 565 | } 566 | } 567 | return true 568 | } 569 | -------------------------------------------------------------------------------- /wal/wal.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The etcd Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package wal 16 | 17 | import ( 18 | "hash/crc32" 19 | "io" 20 | "os" 21 | "path/filepath" 22 | "sync" 23 | "time" 24 | 25 | "github.com/coreos/etcd/pkg/fileutil" 26 | "github.com/coreos/etcd/pkg/pbutil" 27 | "github.com/deepfabric/indexer/wal/walpb" 28 | 29 | "github.com/pkg/errors" 30 | log "github.com/sirupsen/logrus" 31 | ) 32 | 33 | const ( 34 | metadataType int64 = iota + 1 35 | entryType 36 | stateType 37 | crcType 38 | snapshotType 39 | 40 | // warnSyncDuration is the amount of time allotted to an fsync before 41 | // logging a warning 42 | warnSyncDuration = time.Second 43 | ) 44 | 45 | var ( 46 | // SegmentSizeBytes is the preallocated size of each wal segment file. 47 | // The actual size might be larger than this. In general, the default 48 | // value should be used, but this is defined as an exported variable 49 | // so that tests can set a different segment size. 50 | SegmentSizeBytes int64 = 64 * 1000 * 1000 // 64MB 51 | 52 | ErrFileNotFound = errors.New("wal: file not found") 53 | ErrCRCMismatch = errors.New("wal: crc mismatch") 54 | crcTable = crc32.MakeTable(crc32.Castagnoli) 55 | ) 56 | 57 | // WAL is a logical representation of the stable storage. 58 | // WAL is either in read mode or append mode but not both. 59 | // A newly created WAL is in append mode, and ready for appending records. 60 | // A just opened WAL is in read mode, and ready for reading records. 61 | // The WAL will be ready for appending after reading out all the previous records. 62 | type WAL struct { 63 | dir string // the living directory of the underlay files 64 | 65 | // dirFile is a fd for the wal directory for syncing on Rename 66 | dirFile *os.File 67 | 68 | start walpb.Snapshot // snapshot to start reading 69 | decoder *decoder // decoder to decode records 70 | readClose func() error // closer for decode reader 71 | 72 | mu sync.Mutex 73 | enti uint64 // index of the last entry saved to the wal 74 | encoder *encoder // encoder to encode records 75 | 76 | tail *os.File //the tail segment 77 | walNames []string // the segment files the WAL holds (the name is increasing) 78 | fp *filePipeline 79 | } 80 | 81 | // Create creates a WAL ready for appending records. 82 | func Create(dirpath string) (*WAL, error) { 83 | if Exist(dirpath) { 84 | return nil, os.ErrExist 85 | } 86 | 87 | // keep temporary wal directory so WAL initialization appears atomic 88 | tmpdirpath := filepath.Clean(dirpath) + ".tmp" 89 | if fileutil.Exist(tmpdirpath) { 90 | if err := os.RemoveAll(tmpdirpath); err != nil { 91 | return nil, err 92 | } 93 | } 94 | if err := fileutil.CreateDirAll(tmpdirpath); err != nil { 95 | return nil, err 96 | } 97 | 98 | p := filepath.Join(tmpdirpath, walName(0, 0)) 99 | f, err := fileutil.LockFile(p, os.O_WRONLY|os.O_CREATE, fileutil.PrivateFileMode) 100 | if err != nil { 101 | return nil, err 102 | } 103 | if _, err = f.Seek(0, io.SeekEnd); err != nil { 104 | return nil, err 105 | } 106 | if err = fileutil.Preallocate(f.File, SegmentSizeBytes, true); err != nil { 107 | return nil, err 108 | } 109 | 110 | w := &WAL{ 111 | dir: dirpath, 112 | walNames: make([]string, 0), 113 | } 114 | if w, err = w.renameWal(tmpdirpath); err != nil { 115 | return nil, err 116 | } 117 | 118 | // directory was renamed; sync parent dir to persist rename 119 | pdir, perr := fileutil.OpenDir(filepath.Dir(w.dir)) 120 | if perr != nil { 121 | return nil, perr 122 | } 123 | if perr = fileutil.Fsync(pdir); perr != nil { 124 | return nil, perr 125 | } 126 | if perr = pdir.Close(); err != nil { 127 | return nil, perr 128 | } 129 | 130 | w.encoder, err = newFileEncoder(f.File, 0) 131 | if err != nil { 132 | return nil, err 133 | } 134 | w.tail = f.File 135 | w.walNames = append(w.walNames, filepath.Join(dirpath, walName(0, 0))) 136 | if err = w.saveCrc(0); err != nil { 137 | return nil, err 138 | } 139 | 140 | return w, nil 141 | } 142 | 143 | func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) { 144 | if err := os.RemoveAll(w.dir); err != nil { 145 | return nil, err 146 | } 147 | // On non-Windows platforms, hold the lock while renaming. Releasing 148 | // the lock and trying to reacquire it quickly can be flaky because 149 | // it's possible the process will fork to spawn a process while this is 150 | // happening. The fds are set up as close-on-exec by the Go runtime, 151 | // but there is a window between the fork and the exec where another 152 | // process holds the lock. 153 | if err := os.Rename(tmpdirpath, w.dir); err != nil { 154 | if _, ok := err.(*os.LinkError); ok { 155 | return w.renameWalUnlock(tmpdirpath) 156 | } 157 | return nil, err 158 | } 159 | w.fp = newFilePipeline(w.dir, SegmentSizeBytes) 160 | df, err := fileutil.OpenDir(w.dir) 161 | w.dirFile = df 162 | return w, err 163 | } 164 | 165 | func (w *WAL) renameWalUnlock(tmpdirpath string) (*WAL, error) { 166 | // rename of directory with locked files doesn't work on windows/cifs; 167 | // close the WAL to release the locks so the directory can be renamed. 168 | log.Infof("releasing file lock to rename %q to %q", tmpdirpath, w.dir) 169 | w.Close(false) 170 | if err := os.Rename(tmpdirpath, w.dir); err != nil { 171 | return nil, err 172 | } 173 | // reopen and relock 174 | newWAL, oerr := Open(w.dir, walpb.Snapshot{}) 175 | if oerr != nil { 176 | return nil, oerr 177 | } 178 | if _, err := newWAL.ReadAll(); err != nil { 179 | newWAL.Close(false) 180 | return nil, err 181 | } 182 | return newWAL, nil 183 | } 184 | 185 | // Open opens the WAL at the given snap. 186 | // The snap SHOULD have been previously saved to the WAL, or the following 187 | // ReadAll will fail. 188 | // The returned WAL is ready to read and the first record will be the one after 189 | // the given snap. The WAL cannot be appended to before reading out all of its 190 | // previous records. 191 | func Open(dirpath string, snap walpb.Snapshot) (*WAL, error) { 192 | w, err := openAtIndex(dirpath, snap, true) 193 | if err != nil { 194 | return nil, err 195 | } 196 | if w.dirFile, err = fileutil.OpenDir(w.dir); err != nil { 197 | return nil, err 198 | } 199 | return w, nil 200 | } 201 | 202 | // OpenForRead only opens the wal files for read. 203 | // Write on a read only wal panics. 204 | func OpenForRead(dirpath string, snap walpb.Snapshot) (*WAL, error) { 205 | return openAtIndex(dirpath, snap, false) 206 | } 207 | 208 | func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error) { 209 | names, err := readWalNames(dirpath) 210 | if err != nil { 211 | return nil, err 212 | } 213 | 214 | nameIndex, ok := searchIndex(names, snap.Index) 215 | if !ok || !isValidSeq(names[nameIndex:]) { 216 | return nil, ErrFileNotFound 217 | } 218 | 219 | // open the wal files 220 | rcs := make([]io.ReadCloser, 0) 221 | rs := make([]io.Reader, 0) 222 | walNames := make([]string, 0) 223 | for _, name := range names[nameIndex:] { 224 | p := filepath.Join(dirpath, name) 225 | if write { 226 | l, err := fileutil.TryLockFile(p, os.O_RDWR, fileutil.PrivateFileMode) 227 | if err != nil { 228 | closeAll(rcs...) 229 | return nil, err 230 | } 231 | rcs = append(rcs, l) 232 | } else { 233 | rf, err := os.OpenFile(p, os.O_RDONLY, fileutil.PrivateFileMode) 234 | if err != nil { 235 | closeAll(rcs...) 236 | return nil, err 237 | } 238 | rcs = append(rcs, rf) 239 | } 240 | rs = append(rs, rcs[len(rcs)-1]) 241 | walNames = append(walNames, p) 242 | } 243 | 244 | closer := func() error { return closeAll(rcs...) } 245 | 246 | // create a WAL ready for reading 247 | w := &WAL{ 248 | dir: dirpath, 249 | start: snap, 250 | decoder: newDecoder(rs...), 251 | readClose: closer, 252 | walNames: walNames, 253 | } 254 | 255 | if write { 256 | w.fp = newFilePipeline(w.dir, SegmentSizeBytes) 257 | } 258 | 259 | return w, nil 260 | } 261 | 262 | // OpenAtBeginning opens the WAL at the beginning. 263 | // The WAL cannot be appended to before reading out all of its 264 | // previous records. 265 | func OpenAtBeginning(dirpath string) (*WAL, error) { 266 | names, err := readWalNames(dirpath) 267 | if err != nil && errors.Cause(err) != ErrFileNotFound { 268 | return nil, err 269 | } 270 | nameIndex := 0 271 | 272 | // open the wal files 273 | rcs := make([]io.ReadCloser, 0) 274 | rs := make([]io.Reader, 0) 275 | walNames := make([]string, 0) 276 | for _, name := range names[nameIndex:] { 277 | p := filepath.Join(dirpath, name) 278 | l, err := fileutil.TryLockFile(p, os.O_RDWR, fileutil.PrivateFileMode) 279 | if err != nil { 280 | closeAll(rcs...) 281 | return nil, errors.Wrap(err, "") 282 | } 283 | rcs = append(rcs, l) 284 | rs = append(rs, rcs[len(rcs)-1]) 285 | walNames = append(walNames, p) 286 | } 287 | 288 | closer := func() error { return closeAll(rcs...) } 289 | 290 | // create a WAL ready for reading 291 | w := &WAL{ 292 | dir: dirpath, 293 | start: walpb.Snapshot{}, 294 | decoder: newDecoder(rs...), 295 | readClose: closer, 296 | walNames: walNames, 297 | } 298 | 299 | w.fp = newFilePipeline(w.dir, SegmentSizeBytes) 300 | if w.dirFile, err = fileutil.OpenDir(w.dir); err != nil { 301 | return nil, errors.Wrap(err, "") 302 | } 303 | return w, nil 304 | } 305 | 306 | // ReadAll reads out records of the current WAL. 307 | // If opened in write mode, it must read out all records until EOF. Or an error 308 | // will be returned. 309 | // If opened in read mode, it will try to read all records if possible. 310 | // If it cannot read out the expected snap, it will return ErrSnapshotNotFound. 311 | // If loaded snap doesn't match with the expected one, it will return 312 | // all the records and error ErrSnapshotMismatch. 313 | // TODO: detect not-last-snap error. 314 | // TODO: maybe loose the checking of match. 315 | // After ReadAll, the WAL will be ready for appending new records. 316 | func (w *WAL) ReadAll() (ents []walpb.Entry, err error) { 317 | w.mu.Lock() 318 | defer w.mu.Unlock() 319 | 320 | rec := &walpb.Record{} 321 | decoder := w.decoder 322 | 323 | for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) { 324 | switch rec.Type { 325 | case entryType: 326 | e := mustUnmarshalEntry(rec.Data) 327 | if e.Index > w.start.Index { 328 | ents = append(ents[:e.Index-w.start.Index-1], e) 329 | } 330 | w.enti = e.Index 331 | case crcType: 332 | crc := decoder.crc.Sum32() 333 | // current crc of decoder must match the crc of the record. 334 | // do no need to match 0 crc, since the decoder is a new one at this case. 335 | if crc != 0 && rec.Validate(crc) != nil { 336 | err = errors.Wrap(ErrCRCMismatch, "") 337 | return 338 | } 339 | decoder.updateCRC(rec.Crc) 340 | default: 341 | } 342 | } 343 | cause := errors.Cause(err) 344 | switch w.fp { 345 | case nil: 346 | // We do not have to read out all entries in read mode. 347 | // The last record maybe a partial written one, so 348 | // ErrunexpectedEOF might be returned. 349 | if cause != io.EOF && cause != io.ErrUnexpectedEOF { 350 | return 351 | } 352 | default: 353 | // We must read all of the entries if WAL is opened in write mode. 354 | if cause != io.EOF { 355 | return 356 | } 357 | } 358 | err = nil 359 | 360 | if w.fp != nil { 361 | if err = w.advance(w.decoder.lastCRC()); err != nil { 362 | return 363 | } 364 | } 365 | // close decoder, disable reading 366 | if w.readClose != nil { 367 | w.readClose() 368 | w.readClose = nil 369 | } 370 | w.decoder = nil 371 | return 372 | } 373 | 374 | // cut closes current file written and creates a new one ready to append. 375 | // cut first creates a temp wal file and writes necessary headers into it. 376 | // Then cut atomically rename temp wal file to a wal file. 377 | func (w *WAL) cut() error { 378 | // close old wal file; truncate to avoid wasting space if an early cut 379 | off, serr := w.tail.Seek(0, io.SeekCurrent) 380 | if serr != nil { 381 | return serr 382 | } 383 | if err := w.tail.Truncate(off); err != nil { 384 | return err 385 | } 386 | if err := w.Sync(); err != nil { 387 | return err 388 | } 389 | 390 | return w.advance(w.encoder.crc.Sum32()) 391 | } 392 | 393 | // CompactAll remove all entries. 394 | func (w *WAL) CompactAll() (err error) { 395 | w.mu.Lock() 396 | defer w.mu.Unlock() 397 | if err = w.clean(); err != nil { 398 | return 399 | } 400 | 401 | err = w.advance(w.encoder.crc.Sum32()) 402 | return 403 | } 404 | 405 | // CompactAll remove all entries. 406 | func (w *WAL) clean() (err error) { 407 | for _, name := range w.walNames { 408 | if err = os.Remove(name); err != nil { 409 | err = errors.Wrap(err, "") 410 | return 411 | } 412 | } 413 | w.walNames = w.walNames[0:0] 414 | return 415 | } 416 | 417 | func (w *WAL) advance(prevCrc uint32) (err error) { 418 | if w.tail != nil { 419 | if err = w.tail.Close(); err != nil { 420 | err = errors.Wrap(err, "") 421 | return 422 | } 423 | w.tail = nil 424 | } 425 | fpath := filepath.Join(w.dir, walName(w.seq()+1, w.enti+1)) 426 | 427 | // create a temp wal file with name sequence + 1, or truncate the existing one 428 | newTail, err := w.fp.Open() 429 | if err != nil { 430 | return err 431 | } 432 | 433 | // update writer and save the previous crc 434 | w.tail = newTail.File 435 | w.encoder, err = newFileEncoder(w.tail, prevCrc) 436 | if err != nil { 437 | return err 438 | } 439 | if err = w.saveCrc(prevCrc); err != nil { 440 | return err 441 | } 442 | // atomically move temp wal file to wal file 443 | if err = w.Sync(); err != nil { 444 | return err 445 | } 446 | 447 | var off int64 448 | off, err = newTail.Seek(0, io.SeekCurrent) 449 | if err != nil { 450 | return errors.Wrap(err, "") 451 | } 452 | 453 | if err = os.Rename(newTail.Name(), fpath); err != nil { 454 | return errors.Wrap(err, "") 455 | } 456 | if err = fileutil.Fsync(w.dirFile); err != nil { 457 | return errors.Wrap(err, "") 458 | } 459 | 460 | // reopen newTail with its new path so calls to Name() match the wal filename format 461 | newTail.Close() 462 | 463 | if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY, fileutil.PrivateFileMode); err != nil { 464 | return errors.Wrap(err, "") 465 | } 466 | if _, err = newTail.Seek(off, io.SeekStart); err != nil { 467 | return errors.Wrap(err, "") 468 | } 469 | 470 | w.tail = newTail.File 471 | w.walNames = append(w.walNames, newTail.Name()) 472 | 473 | w.encoder, err = newFileEncoder(w.tail, prevCrc) 474 | if err != nil { 475 | return errors.Wrap(err, "") 476 | } 477 | 478 | log.Infof("segmented wal file %v is created", fpath) 479 | return nil 480 | } 481 | 482 | func (w *WAL) Sync() error { 483 | if w.encoder != nil { 484 | if err := w.encoder.flush(); err != nil { 485 | return err 486 | } 487 | } 488 | err := fileutil.Fdatasync(w.tail) 489 | return err 490 | } 491 | 492 | func (w *WAL) Close(clean bool) (err error) { 493 | w.mu.Lock() 494 | defer w.mu.Unlock() 495 | 496 | if w.fp != nil { 497 | w.fp.Close() 498 | w.fp = nil 499 | } 500 | 501 | if w.readClose != nil { 502 | if err := w.readClose(); err != nil { 503 | return err 504 | } 505 | w.readClose = nil 506 | } 507 | 508 | if w.tail != nil { 509 | if err := w.Sync(); err != nil { 510 | return err 511 | } 512 | if err := w.tail.Close(); err != nil { 513 | return err 514 | } 515 | w.tail = nil 516 | } 517 | 518 | if err = w.dirFile.Close(); err != nil { 519 | return 520 | } 521 | 522 | if clean { 523 | if err = w.clean(); err != nil { 524 | return 525 | } 526 | } 527 | return 528 | } 529 | 530 | // SaveEntry saves an entry, and always sync the wal 531 | func (w *WAL) SaveEntry(e *walpb.Entry) (err error) { 532 | w.mu.Lock() 533 | defer w.mu.Unlock() 534 | if err = w.saveEntry(e); err != nil { 535 | return 536 | } 537 | if w.encoder.curOff >= SegmentSizeBytes { 538 | if err = w.cut(); err != nil { 539 | return 540 | } 541 | } else { 542 | if err = w.Sync(); err != nil { 543 | return 544 | } 545 | } 546 | return 547 | } 548 | 549 | func (w *WAL) saveEntry(e *walpb.Entry) (err error) { 550 | // TODO: add MustMarshalTo to reduce one allocation. 551 | b := pbutil.MustMarshal(e) 552 | rec := &walpb.Record{Type: entryType, Data: b} 553 | if err = w.encoder.encode(rec); err != nil { 554 | return 555 | } 556 | w.enti = e.Index 557 | return 558 | } 559 | 560 | func (w *WAL) Save(ents []walpb.Entry) (err error) { 561 | w.mu.Lock() 562 | defer w.mu.Unlock() 563 | 564 | // short cut, do not call sync 565 | if len(ents) == 0 { 566 | return 567 | } 568 | 569 | // TODO(xiangli): no more reference operator 570 | for i := range ents { 571 | if err = w.saveEntry(&ents[i]); err != nil { 572 | return 573 | } 574 | } 575 | 576 | if w.encoder.curOff >= SegmentSizeBytes { 577 | if err = w.cut(); err != nil { 578 | return 579 | } 580 | } else { 581 | if err = w.Sync(); err != nil { 582 | return 583 | } 584 | } 585 | return 586 | } 587 | 588 | func (w *WAL) saveCrc(prevCrc uint32) (err error) { 589 | err = w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc}) 590 | return 591 | } 592 | 593 | func (w *WAL) seq() uint64 { 594 | num := len(w.walNames) 595 | if num == 0 { 596 | return 0 597 | } 598 | seq, _, err := parseWalName(filepath.Base(w.walNames[num-1])) 599 | if err != nil { 600 | log.Fatalf("bad wal name %s (%v)", w.walNames[num-1], err) 601 | } 602 | return seq 603 | } 604 | 605 | func closeAll(rcs ...io.ReadCloser) error { 606 | for _, f := range rcs { 607 | if err := f.Close(); err != nil { 608 | return err 609 | } 610 | } 611 | return nil 612 | } 613 | --------------------------------------------------------------------------------