├── core ├── util │ ├── automaton │ │ ├── README.md │ │ ├── levenshtein.go │ │ ├── slices.go │ │ ├── operationsrun.go │ │ ├── operationsrun_test.go │ │ ├── bitmixer.go │ │ ├── limitedfinitestringsiterator.go │ │ ├── operations_test.go │ │ ├── finitestringsiterator_test.go │ │ ├── transition.go │ │ ├── byterunautomaton.go │ │ ├── regexp_test.go │ │ └── intsrefbuilder.go │ ├── bytesref │ │ ├── array_test.go │ │ ├── allocator_test.go │ │ ├── blockpool_test.go │ │ ├── builder_test.go │ │ ├── bytesref_test.go │ │ └── startarray_test.go │ ├── packed │ │ ├── format_test.go │ │ ├── mutable_test.go │ │ ├── writer_test.go │ │ ├── bulkoperation_test.go │ │ ├── directreader_test.go │ │ ├── packedwriter_test.go │ │ ├── pagedmutable_test.go │ │ ├── directpackedreader_test.go │ │ ├── fixsizepagedmutable_test.go │ │ ├── abstractblockpackedwriter_test.go │ │ ├── directpacked64singleblockreader_test.go │ │ ├── bulkoperation │ │ │ ├── bulkoperationpackedsingleblock_test.go │ │ │ ├── bulkoperationpacked3_test.go │ │ │ ├── bulkoperationpacked4_test.go │ │ │ ├── bulkoperationpacked5_test.go │ │ │ ├── bulkoperationpacked6_test.go │ │ │ ├── bulkoperationpacked7_test.go │ │ │ ├── bulkoperationpacked8_test.go │ │ │ ├── bulkoperationpacked9_test.go │ │ │ ├── bulkoperationpacked10_test.go │ │ │ ├── bulkoperationpacked11_test.go │ │ │ ├── bulkoperationpacked12_test.go │ │ │ ├── bulkoperationpacked13_test.go │ │ │ ├── bulkoperationpacked14_test.go │ │ │ ├── bulkoperationpacked15_test.go │ │ │ ├── bulkoperationpacked16_test.go │ │ │ ├── bulkoperationpacked17_test.go │ │ │ ├── bulkoperationpacked18_test.go │ │ │ ├── bulkoperationpacked19_test.go │ │ │ ├── bulkoperationpacked20_test.go │ │ │ ├── bulkoperationpacked21_test.go │ │ │ ├── bulkoperationpacked22_test.go │ │ │ ├── bulkoperationpacked23_test.go │ │ │ ├── bulkoperationpacked24_test.go │ │ │ ├── bulkoperationpacked2_test.go │ │ │ ├── bulkoperationpacked8.go │ │ │ ├── bulkoperationpacked4.go │ │ │ ├── bulkoperationpacked16.go │ │ │ ├── bulkoperationpacked2.go │ │ │ ├── bulkoperationpacked1.go │ │ │ └── bulkoperationpacked24.go │ │ ├── reader.go │ │ ├── packed64singleblock_test.go │ │ ├── packedlongvalues_test.go │ │ ├── deltapackedlongvalues_test.go │ │ ├── monotoniclongvalues_test.go │ │ └── packedreaderiterator.go │ ├── bkd │ │ ├── test │ │ │ └── 1d.bkd │ │ ├── README.md │ │ ├── heappointwriter_test.go │ │ ├── intersect_state.go │ │ └── docidswriter_test.go │ ├── selector │ │ ├── selector.go │ │ └── mock_test.go │ ├── bitmixer │ │ └── bitmixer.go │ ├── fst │ │ ├── errors.go │ │ ├── fstenum.go │ │ ├── fst_utils_test.go │ │ └── utils.go │ ├── structure │ │ ├── box.go │ │ ├── iterator.go │ │ ├── priorityqueue_test.go │ │ ├── iterator_test.go │ │ └── hashmap.go │ ├── sorter │ │ ├── sorter.go │ │ ├── pdqsort_test.go │ │ ├── pdqsort.go │ │ └── radixsort_test.go │ ├── stringhelper_test.go │ ├── closer.go │ ├── hash │ │ └── hash.go │ ├── attribute │ │ ├── bytes_test.go │ │ ├── factory_test.go │ │ ├── attribute.go │ │ ├── factory.go │ │ └── source.go │ ├── compress │ │ └── lz4.go │ ├── bits.go │ ├── version │ │ └── option.go │ ├── selector.go │ ├── zigzag │ │ └── zigzag.go │ ├── array │ │ └── util.go │ ├── ints │ │ └── allocator.go │ ├── refcount.go │ ├── smallfloat.go │ └── numeric │ │ └── numeric_utils_test.go ├── store │ ├── directory_test.go │ ├── flushinfo_test.go │ ├── fakehash32.go │ ├── indexio_test.go │ ├── nolockfactory_test.go │ ├── file_windows.go │ ├── file_linux.go │ ├── file_darwin.go │ ├── bytesref_test.go │ ├── flushinfo.go │ ├── mergeinfo.go │ ├── nolockfactory.go │ ├── utils_test.go │ ├── utils.go │ ├── inputstream_test.go │ ├── outputstream_test.go │ ├── outputstream.go │ ├── ramoutputstream_test.go │ ├── bytesref.go │ ├── bytes_test.go │ ├── mmap.go │ ├── inputstream.go │ └── ramfile.go ├── codecs │ ├── perfieldpostingsformat.go │ ├── compressing │ │ ├── matchingreaders.go │ │ ├── interface.go │ │ └── termvectorsformat.go │ ├── perfieldmergestate.go │ ├── blocktree │ │ ├── stats.go │ │ └── compressionalgorithm.go │ ├── perfielddocvaluesformat.go │ ├── lucene50 │ │ └── termvectorsformat.go │ ├── lucene80 │ │ ├── normsproducer.go │ │ ├── normsformat.go │ │ └── normsconsumer.go │ ├── lucene86 │ │ └── pointsformat.go │ └── types │ │ └── postingsreader.go ├── search │ ├── prefixquery.go │ ├── termrangequery.go │ ├── disipriorityqueue.go │ ├── scoreanddoc.go │ ├── scorer.go │ ├── bitdocidset.go │ ├── spans │ │ └── spancollector.go │ ├── topfielddocs.go │ ├── collectormanager.go │ ├── blockmaxdisi.go │ ├── disjunctionscorer.go │ ├── bulkscorer.go │ ├── docvaluesfieldexistsquery.go │ ├── matchnodocsquery.go │ ├── disjunctionsumscorer.go │ ├── builder │ │ └── binary.go │ ├── leafcollector.go │ ├── namedmatches.go │ ├── scoremode.go │ ├── booleanclause.go │ ├── scorable.go │ ├── filterscorer.go │ ├── scoredoc.go │ ├── constantscorequery.go │ ├── scorerutil.go │ ├── termmatchesiterator.go │ └── matches.go ├── index │ ├── sortedsetdocvalues.go │ ├── nomergescheduler.go │ ├── postingsenum.go │ ├── sort.go │ ├── sortedsetdocvalueswriter.go │ ├── sortednumericdocvalues.go │ ├── docswithfieldset_test.go │ ├── similarity.go │ ├── fieldtermiterator.go │ ├── sortedsetselector.go │ ├── leafmetadata.go │ ├── readerutil.go │ ├── filterleafreader.go │ ├── eventqueue.go │ ├── documentswriterperthreadpool.go │ ├── checkindex.go │ ├── segmentmerger.go │ ├── mergetrigger.go │ ├── indexreadercontext.go │ ├── impact.go │ ├── keeponlylastcommitdeletionpolicy.go │ ├── docswithfieldset.go │ ├── singletermsenum.go │ ├── fields.go │ ├── bitsetiterator.go │ └── multisorter.go ├── query │ ├── utils.go │ ├── intrange.go │ ├── longrange.go │ ├── doublerange.go │ ├── floatrange.go │ ├── longrangeslowrangequery.go │ └── rangefieldquery.go ├── types │ ├── interfaces.go │ ├── values.go │ └── docvaluesiterator.go ├── document │ ├── consts.go │ ├── sortedfield.go │ ├── value.go │ ├── error.go │ ├── storedfield.go │ ├── textfield.go │ ├── doublefield_test.go │ ├── numericfield.go │ ├── stringfield.go │ └── latlon.go ├── analysis │ ├── whitespacetokenizer.go │ ├── graphtokenfilter.go │ ├── stopwordanalyzer.go │ ├── stopfilter.go │ ├── lowercasefilter.go │ ├── tokenfilter.go │ ├── wordlistloader.go │ ├── chartokenizer_test.go │ ├── charfilter.go │ └── standard │ │ ├── tokenizer_test.go │ │ └── analyzer.go └── interface │ └── index │ ├── sorter.go │ ├── bits.go │ ├── postingsenum.go │ ├── segmentinfo.go │ ├── topdocs.go │ ├── document.go │ ├── readercontext.go │ ├── codecreader.go │ ├── merge.go │ ├── notifications.go │ └── docidset.go ├── memory ├── producer_test.go ├── terms_test.go ├── indexreader_test.go ├── indexutils_test.go ├── postingsenum_test.go ├── producer.go ├── fields_test.go ├── collector.go ├── pointvalues_test.go ├── collector_test.go ├── bytestartarray_test.go └── bytestartarray.go ├── CHANGELOG.md ├── codecs ├── postingswriterbase.go ├── fieldsproducer.go ├── fieldsconsumer.go └── simpletext │ ├── storedfieldsformat.go │ ├── termvectorsformat.go │ ├── pointsformat.go │ └── postingsformat.go ├── .gitignore ├── go.mod └── .github └── workflows └── go.yml /core/util/automaton/README.md: -------------------------------------------------------------------------------- 1 | # automaton -------------------------------------------------------------------------------- /memory/producer_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | -------------------------------------------------------------------------------- /memory/terms_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | -------------------------------------------------------------------------------- /core/store/directory_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | -------------------------------------------------------------------------------- /core/store/flushinfo_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | -------------------------------------------------------------------------------- /memory/indexreader_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | -------------------------------------------------------------------------------- /memory/indexutils_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | -------------------------------------------------------------------------------- /memory/postingsenum_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | -------------------------------------------------------------------------------- /core/util/bytesref/array_test.go: -------------------------------------------------------------------------------- 1 | package bytesref 2 | -------------------------------------------------------------------------------- /core/util/packed/format_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/mutable_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/writer_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/codecs/perfieldpostingsformat.go: -------------------------------------------------------------------------------- 1 | package codecs 2 | -------------------------------------------------------------------------------- /core/util/automaton/levenshtein.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | -------------------------------------------------------------------------------- /core/util/bytesref/allocator_test.go: -------------------------------------------------------------------------------- 1 | package bytesref 2 | -------------------------------------------------------------------------------- /core/util/bytesref/blockpool_test.go: -------------------------------------------------------------------------------- 1 | package bytesref 2 | -------------------------------------------------------------------------------- /core/util/bytesref/builder_test.go: -------------------------------------------------------------------------------- 1 | package bytesref 2 | -------------------------------------------------------------------------------- /core/util/bytesref/bytesref_test.go: -------------------------------------------------------------------------------- 1 | package bytesref 2 | -------------------------------------------------------------------------------- /core/util/bytesref/startarray_test.go: -------------------------------------------------------------------------------- 1 | package bytesref 2 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/directreader_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/packedwriter_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/pagedmutable_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/directpackedreader_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/fixsizepagedmutable_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/codecs/compressing/matchingreaders.go: -------------------------------------------------------------------------------- 1 | package compressing 2 | -------------------------------------------------------------------------------- /core/util/packed/abstractblockpackedwriter_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/util/packed/directpacked64singleblockreader_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | -------------------------------------------------------------------------------- /core/search/prefixquery.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | type PrefixQuery struct { 4 | } 5 | -------------------------------------------------------------------------------- /core/search/termrangequery.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | type TermRangeQuery struct { 4 | } 5 | -------------------------------------------------------------------------------- /core/index/sortedsetdocvalues.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | const ( 4 | NO_MORE_ORDS = -1 5 | ) 6 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpackedsingleblock_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | -------------------------------------------------------------------------------- /core/codecs/perfieldmergestate.go: -------------------------------------------------------------------------------- 1 | package codecs 2 | 3 | type PerFieldMergeState struct { 4 | } 5 | -------------------------------------------------------------------------------- /core/util/bkd/test/1d.bkd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geange/lucene-go/HEAD/core/util/bkd/test/1d.bkd -------------------------------------------------------------------------------- /core/util/selector/selector.go: -------------------------------------------------------------------------------- 1 | package selector 2 | 3 | type Selector interface { 4 | SelectK(from, to, k int) 5 | } 6 | -------------------------------------------------------------------------------- /core/query/utils.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import "cmp" 4 | 5 | func IsNaN[T cmp.Ordered](f T) bool { 6 | return f != f 7 | } 8 | -------------------------------------------------------------------------------- /core/types/interfaces.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | // DocMap A map of doc IDs. 4 | type DocMap interface { 5 | Get(docId int) int 6 | } 7 | -------------------------------------------------------------------------------- /core/document/consts.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | const ( 4 | FLOAT_BYTES = 4 5 | DOUBLE_BYTES = 8 6 | INTEGER_BYTES = 4 7 | LONG_BYTES = 8 8 | ) 9 | -------------------------------------------------------------------------------- /core/codecs/blocktree/stats.go: -------------------------------------------------------------------------------- 1 | package blocktree 2 | 3 | // Stats 4 | // BlockTree statistics for a single field returned by FieldReader.getStats(). 5 | type Stats struct { 6 | } 7 | -------------------------------------------------------------------------------- /core/types/values.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | // LongValues Abstraction over an array of longs. 4 | // lucene.internal 5 | type LongValues interface { 6 | Get(index int) (uint64, error) 7 | } 8 | -------------------------------------------------------------------------------- /core/util/bkd/README.md: -------------------------------------------------------------------------------- 1 | # BKD Tree 2 | 3 | Block KD-tree, implementing the generic spatial data structure described 4 | in [this paper](https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf). -------------------------------------------------------------------------------- /core/codecs/perfielddocvaluesformat.go: -------------------------------------------------------------------------------- 1 | package codecs 2 | 3 | import "github.com/geange/lucene-go/core/interface/index" 4 | 5 | type PerFieldDocValuesFormat interface { 6 | index.DocValuesFormat 7 | } 8 | -------------------------------------------------------------------------------- /core/document/sortedfield.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | type SortedSetDocValuesField Field[[]byte] 4 | 5 | type SortedNumericDocValuesField Field[[]byte] 6 | 7 | type SortedDocValuesField Field[[]byte] 8 | -------------------------------------------------------------------------------- /core/util/bitmixer/bitmixer.go: -------------------------------------------------------------------------------- 1 | package bitmixer 2 | 3 | func Mix32(v int) int { 4 | k := uint32(v) 5 | k = (k ^ (k >> 16)) * 0x85ebca6b 6 | k = (k ^ (k >> 13)) * 0xc2b2ae35 7 | return int(k ^ (k >> 16)) 8 | } 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## [0.0.2] 4 | 5 | ### Features 6 | 7 | - fst支持范性 8 | 9 | ## [0.0.1] - 2024-12-23 10 | 11 | ### Features 12 | 13 | * 🎉 Successfully wrote to the index and queried for the first time! 14 | -------------------------------------------------------------------------------- /core/util/fst/errors.go: -------------------------------------------------------------------------------- 1 | package fst 2 | 3 | import "github.com/pkg/errors" 4 | 5 | var ( 6 | ErrByteStoreBasic = errors.New("bytestore basic error") 7 | ErrItemNotFound = errors.Wrap(ErrByteStoreBasic, "item not found") 8 | ) 9 | -------------------------------------------------------------------------------- /core/util/structure/box.go: -------------------------------------------------------------------------------- 1 | package structure 2 | 3 | type Box[T any] struct { 4 | value T 5 | } 6 | 7 | func NewBox[T any](v T) *Box[T] { 8 | return &Box[T]{value: v} 9 | } 10 | 11 | func (b *Box[T]) Value() T { 12 | return b.value 13 | } 14 | -------------------------------------------------------------------------------- /codecs/postingswriterbase.go: -------------------------------------------------------------------------------- 1 | package codecs 2 | 3 | // PostingsWriterBase 4 | // Class that plugs into term dictionaries, such as BlockTreeTermsWriter, 5 | // and handles writing postings. 6 | // See Also: PostingsReaderBase 7 | type PostingsWriterBase interface { 8 | } 9 | -------------------------------------------------------------------------------- /core/util/structure/iterator.go: -------------------------------------------------------------------------------- 1 | package structure 2 | 3 | import "context" 4 | 5 | type Iterator[T any] interface { 6 | HasNext() bool 7 | Next(context.Context) (T, error) 8 | } 9 | 10 | type Iterable[T any] interface { 11 | Iterator() Iterator[T] 12 | } 13 | -------------------------------------------------------------------------------- /core/analysis/whitespacetokenizer.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | import ( 4 | "unicode" 5 | ) 6 | 7 | type WhitespaceTokenizer struct { 8 | CharTokenizerBase 9 | } 10 | 11 | func (w *WhitespaceTokenizer) IsTokenChar(r rune) bool { 12 | return !unicode.IsSpace(r) 13 | } 14 | -------------------------------------------------------------------------------- /core/util/automaton/slices.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | func grow[T any](s []T, size int) []T { 4 | if len(s) >= size { 5 | return s 6 | } 7 | var empty T 8 | add := size - len(s) 9 | for i := 0; i < add; i++ { 10 | s = append(s, empty) 11 | } 12 | return s 13 | } 14 | -------------------------------------------------------------------------------- /core/util/sorter/sorter.go: -------------------------------------------------------------------------------- 1 | package sorter 2 | 3 | type Sorter interface { 4 | Sort(from, to int) 5 | } 6 | 7 | const ( 8 | // INSERTION_SORT_THRESHOLD 9 | // Below this size threshold, the sub-range is sorted using Insertion sort. 10 | INSERTION_SORT_THRESHOLD = 16 11 | ) 12 | -------------------------------------------------------------------------------- /codecs/fieldsproducer.go: -------------------------------------------------------------------------------- 1 | package codecs 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | // FieldsProducer Abstract API that produces terms, doc, freq, prox, offset and payloads postings. 8 | type FieldsProducer interface { 9 | index.Fields 10 | } 11 | -------------------------------------------------------------------------------- /core/util/automaton/operationsrun.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | func Run(a *Automaton, s string) bool { 4 | state := 0 5 | for _, v := range s { 6 | nextState := a.Step(state, int(v)) 7 | if nextState == -1 { 8 | return false 9 | } 10 | state = nextState 11 | } 12 | return a.IsAccept(state) 13 | } 14 | -------------------------------------------------------------------------------- /codecs/fieldsconsumer.go: -------------------------------------------------------------------------------- 1 | package codecs 2 | 3 | // FieldsConsumer Abstract API that consumes terms, doc, freq, prox, offset and payloads postings. 4 | // Concrete implementations of this actually do "something" with the postings (write it into the 5 | // index in a specific format). 6 | type FieldsConsumer interface { 7 | } 8 | -------------------------------------------------------------------------------- /core/util/stringhelper_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | //func TestRandomId(t *testing.T) { 4 | // id := big.NewInt(int64(math.MaxInt64)) 5 | // fmt.Println(id.String()) 6 | // fmt.Println(len(id.NewBytes())) 7 | // id.Add(id, one) 8 | // fmt.Println(id.String()) 9 | // fmt.Println(len(id.NewBytes())) 10 | // 11 | //} 12 | -------------------------------------------------------------------------------- /core/document/value.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | const ( 4 | // MaxNumBytes Maximum number of bytes for each dimension 5 | MaxNumBytes = 16 6 | 7 | // MaxDimensions Maximum number of dimensions 8 | MaxDimensions = 16 9 | 10 | // MaxIndexDimensions Maximum number of index dimensions 11 | MaxIndexDimensions = 8 12 | ) 13 | -------------------------------------------------------------------------------- /core/search/disipriorityqueue.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | // DisiPriorityQueue 4 | // A priority queue of DocIdSetIterators that orders by current doc ID. This specialization is needed over PriorityQueue because the pluggable comparison function makes the rebalancing quite slow. 5 | // lucene.internal 6 | type DisiPriorityQueue struct { 7 | } 8 | -------------------------------------------------------------------------------- /core/util/closer.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | func Close(closers ...io.Closer) error { 9 | errs := make([]error, 0) 10 | for _, closer := range closers { 11 | if err := closer.Close(); err != nil { 12 | errs = append(errs, err) 13 | } 14 | } 15 | return errors.Join(errs...) 16 | } 17 | -------------------------------------------------------------------------------- /core/store/fakehash32.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import "hash" 4 | 5 | type fakeHash32 struct { 6 | hash.Hash32 7 | } 8 | 9 | func NewFakeHash32() hash.Hash32 { 10 | return fakeHash32{} 11 | } 12 | 13 | func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil } 14 | func (fakeHash32) Sum32() uint32 { return 0 } 15 | -------------------------------------------------------------------------------- /core/document/error.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrFieldNotFound = errors.New("field not found") 7 | ErrFieldValueTypeNotFit = errors.New("field value types not fit") 8 | ErrIllegalOperation = errors.New("illegal operation") 9 | ErrFieldValueType = errors.New("field value type not fit") 10 | ) 11 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked3_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked3_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 3, 100, NewPacked3()) 7 | } 8 | 9 | func TestBulkOperationPacked3_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 3, 100, NewPacked3()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked4_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked4_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 4, 100, NewPacked4()) 7 | } 8 | 9 | func TestBulkOperationPacked4_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 4, 100, NewPacked4()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked5_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked5_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 5, 100, NewPacked5()) 7 | } 8 | 9 | func TestBulkOperationPacked5_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 5, 100, NewPacked5()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked6_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked6_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 6, 100, NewPacked6()) 7 | } 8 | 9 | func TestBulkOperationPacked6_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 6, 100, NewPacked6()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked7_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked7_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 7, 100, NewPacked7()) 7 | } 8 | 9 | func TestBulkOperationPacked7_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 7, 100, NewPacked7()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked8_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked8_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 8, 100, NewPacked8()) 7 | } 8 | 9 | func TestBulkOperationPacked8_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 8, 100, NewPacked8()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked9_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked9_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 9, 100, NewPacked9()) 7 | } 8 | 9 | func TestBulkOperationPacked9_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 9, 100, NewPacked9()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked10_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked10_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 10, 100, NewPacked10()) 7 | } 8 | 9 | func TestBulkOperationPacked10_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 10, 100, NewPacked10()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked11_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked11_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 11, 100, NewPacked11()) 7 | } 8 | 9 | func TestBulkOperationPacked11_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 11, 100, NewPacked11()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked12_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked12_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 12, 100, NewPacked12()) 7 | } 8 | 9 | func TestBulkOperationPacked12_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 12, 100, NewPacked12()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked13_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked13_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 13, 100, NewPacked13()) 7 | } 8 | 9 | func TestBulkOperationPacked13_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 13, 100, NewPacked13()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked14_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked14_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 14, 100, NewPacked14()) 7 | } 8 | 9 | func TestBulkOperationPacked14_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 14, 100, NewPacked14()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked15_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked15_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 15, 100, NewPacked15()) 7 | } 8 | 9 | func TestBulkOperationPacked15_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 15, 100, NewPacked15()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked16_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked16_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 16, 100, NewPacked16()) 7 | } 8 | 9 | func TestBulkOperationPacked16_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 16, 100, NewPacked16()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked17_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked17_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 17, 100, NewPacked17()) 7 | } 8 | 9 | func TestBulkOperationPacked17_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 17, 100, NewPacked17()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked18_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked18_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 18, 100, NewPacked18()) 7 | } 8 | 9 | func TestBulkOperationPacked18_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 18, 100, NewPacked18()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked19_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked19_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 19, 100, NewPacked19()) 7 | } 8 | 9 | func TestBulkOperationPacked19_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 19, 100, NewPacked19()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked20_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked20_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 20, 100, NewPacked20()) 7 | } 8 | 9 | func TestBulkOperationPacked20_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 20, 100, NewPacked20()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked21_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked21_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 21, 100, NewPacked21()) 7 | } 8 | 9 | func TestBulkOperationPacked21_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 21, 100, NewPacked21()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked22_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked22_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 22, 100, NewPacked22()) 7 | } 8 | 9 | func TestBulkOperationPacked22_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 22, 100, NewPacked22()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked23_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked23_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 23, 100, NewPacked23()) 7 | } 8 | 9 | func TestBulkOperationPacked23_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 23, 100, NewPacked23()) 11 | } 12 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked24_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import "testing" 4 | 5 | func TestBulkOperationPacked24_DecodeUint64(t *testing.T) { 6 | testDecodeUint64(t, 64, 24, 100, NewPacked24()) 7 | } 8 | 9 | func TestBulkOperationPacked24_DecodeBytes(t *testing.T) { 10 | testDecodeBytes(t, 8, 24, 100, NewPacked24()) 11 | } 12 | -------------------------------------------------------------------------------- /core/interface/index/sorter.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | // Sorter Base class for sorting algorithms implementations. 4 | // lucene.internal 5 | type Sorter interface { 6 | // Compare entries found in slots i and j. 7 | // The contract for the returned item is the same as cmp.CompareFn(Object, Object). 8 | Compare(i, j int) int 9 | 10 | Swap(i, j int) int 11 | } 12 | -------------------------------------------------------------------------------- /core/store/indexio_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestIndexInputBase(t *testing.T) { 11 | 12 | } 13 | 14 | func TestIndexOutputBase(t *testing.T) { 15 | outputWrap := NewBaseIndexOutput("x", new(bytes.Buffer)) 16 | assert.Equal(t, "x", outputWrap.GetName()) 17 | } 18 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked2_test.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestBulkOperationPacked2_DecodeUint64(t *testing.T) { 8 | testDecodeUint64(t, 64, 2, 100, NewPacked2()) 9 | } 10 | 11 | func TestBulkOperationPacked2_DecodeBytes(t *testing.T) { 12 | testDecodeBytes(t, 8, 2, 100, NewPacked2()) 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Go template 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | 9 | # Test binary, built with `go test -c` 10 | *.test 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | 15 | # Dependency directories (remove the comment below to include it) 16 | # vendor/ 17 | 18 | .idea 19 | data/ 20 | example/ -------------------------------------------------------------------------------- /core/store/nolockfactory_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "github.com/stretchr/testify/assert" 5 | "testing" 6 | ) 7 | 8 | func TestNoLockFactory(t *testing.T) { 9 | lockFactory := NewNoLockFactory() 10 | lock, err := lockFactory.ObtainLock(nil, "") 11 | assert.Nil(t, err) 12 | 13 | err = lock.EnsureValid() 14 | assert.Nil(t, err) 15 | 16 | err = lock.Close() 17 | assert.Nil(t, err) 18 | } 19 | -------------------------------------------------------------------------------- /core/util/fst/fstenum.go: -------------------------------------------------------------------------------- 1 | package fst 2 | 3 | type FSTEnum[T any] struct { 4 | fst *FST[T] 5 | arcs []*Arc[T] 6 | output []T 7 | noOutput T 8 | fstReader BytesReader 9 | upto int 10 | targetLength int 11 | } 12 | 13 | type FSTEnumLabel interface { 14 | GetTargetLabel(upto int) int 15 | GetCurrentLabel(upto int) int 16 | SetCurrentLabel(upto, label int) 17 | Grow(upto int) 18 | } 19 | -------------------------------------------------------------------------------- /core/types/docvaluesiterator.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type DocValuesIterator interface { 4 | DocIdSetIterator 5 | 6 | // AdvanceExact 7 | // Advance the iterator to exactly target and return whether target has a item. 8 | // target must be greater than or equal to the current doc ID and must be a valid doc ID, ie. ≥ 0 and < maxDoc. 9 | // After this method returns, docID() returns target. 10 | AdvanceExact(target int) (bool, error) 11 | } 12 | -------------------------------------------------------------------------------- /core/store/file_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package store 4 | 5 | import ( 6 | "os" 7 | "syscall" 8 | "time" 9 | ) 10 | 11 | func FileTime(info os.FileInfo) (access, create, modify time.Time) { 12 | filetime := info.Sys().(*syscall.Win32FileAttributeData) 13 | return time.Unix(0, filetime.LastAccessTime.Nanoseconds()).Local(), 14 | time.Unix(0, filetime.CreationTime.Nanoseconds()).Local(), 15 | time.Unix(0, filetime.LastWriteTime.Nanoseconds()).Local() 16 | } 17 | -------------------------------------------------------------------------------- /core/store/file_linux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | 3 | package store 4 | 5 | import ( 6 | "os" 7 | "syscall" 8 | "time" 9 | ) 10 | 11 | func FileTime(info os.FileInfo) (access, create, modify time.Time) { 12 | stat_t := info.Sys().(*syscall.Stat_t) 13 | return timespecToTime(stat_t.Atim), 14 | timespecToTime(stat_t.Ctim), 15 | timespecToTime(stat_t.Mtim) 16 | } 17 | 18 | func timespecToTime(ts syscall.Timespec) time.Time { 19 | return time.Unix(int64(ts.Sec), int64(ts.Nsec)) 20 | } 21 | -------------------------------------------------------------------------------- /core/store/file_darwin.go: -------------------------------------------------------------------------------- 1 | //go:build darwin 2 | 3 | package store 4 | 5 | import ( 6 | "os" 7 | "syscall" 8 | "time" 9 | ) 10 | 11 | func FileTime(info os.FileInfo) (access, create, modify time.Time) { 12 | statT := info.Sys().(*syscall.Stat_t) 13 | return timespecToTime(statT.Atimespec), 14 | timespecToTime(statT.Ctimespec), 15 | timespecToTime(statT.Mtimespec) 16 | } 17 | 18 | func timespecToTime(ts syscall.Timespec) time.Time { 19 | return time.Unix(int64(ts.Sec), int64(ts.Nsec)) 20 | } 21 | -------------------------------------------------------------------------------- /core/util/hash/hash.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "sync" 7 | ) 8 | 9 | func Compare[T any](a, b T) int { 10 | b1 := hash(a) 11 | b2 := hash(b) 12 | return bytes.Compare(b1, b2) 13 | } 14 | 15 | var localPool = &sync.Pool{ 16 | New: func() any { 17 | return new(bytes.Buffer) 18 | }, 19 | } 20 | 21 | func hash(s any) []byte { 22 | b := localPool.Get().(*bytes.Buffer) 23 | defer localPool.Put(b) 24 | 25 | gob.NewEncoder(b).Encode(s) 26 | return b.Bytes() 27 | } 28 | -------------------------------------------------------------------------------- /core/util/attribute/bytes_test.go: -------------------------------------------------------------------------------- 1 | package attribute 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBytesAttr_CopyTo(t *testing.T) { 10 | attr := newBytesAttr("x") 11 | target := newBytesAttr("j", "l") 12 | err := attr.CopyTo(target) 13 | assert.Nil(t, err) 14 | assert.EqualValues(t, attr, target) 15 | } 16 | 17 | func TestBytesAttrClone(t *testing.T) { 18 | attr := newBytesAttr("x") 19 | newAttr := attr.Clone() 20 | assert.EqualValues(t, attr, newAttr) 21 | } 22 | -------------------------------------------------------------------------------- /core/util/compress/lz4.go: -------------------------------------------------------------------------------- 1 | package compress 2 | 3 | import ( 4 | "github.com/pierrec/lz4/v4" 5 | 6 | "github.com/geange/lucene-go/core/store" 7 | ) 8 | 9 | var LZ4Compression = &LZ4{} 10 | 11 | type LZ4 struct { 12 | } 13 | 14 | func (*LZ4) Compress(in []byte, out store.DataOutput) error { 15 | w := lz4.NewWriter(out) 16 | _, err := w.Write(in) 17 | return err 18 | } 19 | 20 | func (*LZ4) Decompress(in store.DataInput, out []byte) error { 21 | r := lz4.NewReader(in) 22 | _, err := r.Read(out) 23 | return err 24 | } 25 | -------------------------------------------------------------------------------- /core/search/scoreanddoc.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | var _ index.Scorable = &ScoreAndDoc{} 8 | 9 | type ScoreAndDoc struct { 10 | *BaseScorable 11 | 12 | score float64 13 | doc int 14 | } 15 | 16 | func NewScoreAndDoc() *ScoreAndDoc { 17 | return &ScoreAndDoc{BaseScorable: &BaseScorable{}} 18 | } 19 | 20 | func (s *ScoreAndDoc) Score() (float64, error) { 21 | return s.score, nil 22 | } 23 | 24 | func (s *ScoreAndDoc) DocID() int { 25 | return s.doc 26 | } 27 | -------------------------------------------------------------------------------- /core/codecs/lucene50/termvectorsformat.go: -------------------------------------------------------------------------------- 1 | package lucene50 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/codecs/compressing" 5 | "github.com/geange/lucene-go/core/interface/index" 6 | ) 7 | 8 | var _ index.TermVectorsFormat = &TermVectorsFormat{} 9 | 10 | type TermVectorsFormat struct { 11 | *compressing.TermVectorsFormat 12 | } 13 | 14 | func NewTermVectorsFormat() *TermVectorsFormat { 15 | return &TermVectorsFormat{ 16 | compressing.NewTermVectorsFormat("Lucene50TermVectorsData", "", compressing.FAST, 1<<12, 128, 10), 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /core/interface/index/bits.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | // Bits Interface for Bitset-like structures. 4 | type Bits interface { 5 | 6 | // Test 7 | // Returns the value of the bit with the specified index. 8 | // index: index, should be non-negative and < length(). The result of passing negative or out of bounds 9 | // values is undefined by this interface, just don't do it! 10 | // Returns: true if the bit is set, false otherwise. 11 | Test(index uint) bool 12 | 13 | // Len 14 | // Returns the number of bits in this set 15 | Len() uint 16 | } 17 | -------------------------------------------------------------------------------- /core/store/bytesref_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBytesRef(t *testing.T) { 10 | bs := []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} 11 | 12 | bytesRef := NewBytesRef(bs) 13 | err := bytesRef.Set(1, 2) 14 | assert.Nil(t, err, nil) 15 | assert.Equal(t, bytesRef.Bytes(), []byte{1, 2}) 16 | 17 | err = bytesRef.Set(8, 2) 18 | assert.Nil(t, err, nil) 19 | assert.Equal(t, bytesRef.Bytes(), []byte{8, 9}) 20 | 21 | err = bytesRef.Set(8, 3) 22 | assert.NotNil(t, err) 23 | } 24 | -------------------------------------------------------------------------------- /core/util/automaton/operationsrun_test.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | import ( 4 | "github.com/stretchr/testify/assert" 5 | "testing" 6 | ) 7 | 8 | func TestRun(t *testing.T) { 9 | type args struct { 10 | a *Automaton 11 | s string 12 | } 13 | tests := []struct { 14 | name string 15 | args args 16 | want bool 17 | }{ 18 | // TODO: Add test cases. 19 | } 20 | for _, tt := range tests { 21 | t.Run(tt.name, func(t *testing.T) { 22 | assert.Equalf(t, tt.want, Run(tt.args.a, tt.args.s), "Run(%v, %v)", tt.args.a, tt.args.s) 23 | }) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /core/util/bits.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | // Bits Interface for Bitset-like structures. 4 | type Bits interface { 5 | 6 | // Test 7 | // Returns the value of the bit with the specified index. 8 | // index: index, should be non-negative and < length(). The result of passing negative or out of bounds 9 | // values is undefined by this interface, just don't do it! 10 | // Returns: true if the bit is set, false otherwise. 11 | Test(index uint) bool 12 | 13 | // Len 14 | // Returns the number of bits in this set 15 | Len() uint 16 | 17 | Words() []uint64 18 | } 19 | -------------------------------------------------------------------------------- /core/util/automaton/bitmixer.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | const ( 4 | // Golden ratio bit mixers. 5 | PHI_C32 = uint32(0x9e3779b9) 6 | PHI_C64 = uint64(0x9e3779b97f4a7c15) 7 | ) 8 | 9 | func mix(key int) int { 10 | return mix32(key) 11 | } 12 | 13 | // MurmurHash3算法中的32位最终混合步骤 14 | func mix32(v int) int { 15 | k := uint32(v) 16 | k = (k ^ (k >> 16)) * 0x85ebca6b 17 | k = (k ^ (k >> 13)) * 0xc2b2ae35 18 | return int(k ^ (k >> 16)) 19 | } 20 | 21 | //func mixPhi(k int32) int32 { 22 | // h := k * int32(PHI_C32) 23 | // return (h) ^ int32(uint32(h)>>16) 24 | //} 25 | -------------------------------------------------------------------------------- /core/index/nomergescheduler.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import "github.com/geange/lucene-go/core/store" 4 | 5 | var _ MergeScheduler = &NoMergeScheduler{} 6 | 7 | type NoMergeScheduler struct { 8 | } 9 | 10 | func NewNoMergeScheduler() *NoMergeScheduler { 11 | return &NoMergeScheduler{} 12 | } 13 | 14 | func (n *NoMergeScheduler) Close() error { 15 | return nil 16 | } 17 | 18 | func (n *NoMergeScheduler) Merge(mergeSource MergeSource, trigger MergeTrigger) error { 19 | return nil 20 | } 21 | 22 | func (n *NoMergeScheduler) Initialize(dir store.Directory) { 23 | return 24 | } 25 | -------------------------------------------------------------------------------- /core/util/bkd/heappointwriter_test.go: -------------------------------------------------------------------------------- 1 | package bkd 2 | 3 | import ( 4 | "github.com/stretchr/testify/assert" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestNewHeapPointWriter(t *testing.T) { 11 | config, err := getRandomConfig() 12 | assert.Nil(t, err) 13 | 14 | size := rand.New(rand.NewSource(time.Now().UnixNano())).Intn(15000) 15 | 16 | writer := NewHeapPointWriter(config, size) 17 | for docId := 0; docId < size; docId++ { 18 | packedValue := getPackedValue(config) 19 | err := writer.Append(nil, packedValue, docId) 20 | assert.Nil(t, err) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /core/util/packed/reader.go: -------------------------------------------------------------------------------- 1 | package packed 2 | 3 | // Reader 4 | // A read-only random access array of positive integers. 5 | // lucene.internal 6 | type Reader interface { 7 | // Get the long at the given index. Behavior is undefined for out-of-range indices. 8 | Get(index int) (uint64, error) 9 | 10 | // GetBulk Bulk get: read at least one and at most len longs starting from index into 11 | // arr[off:off+len] and return the actual number of values that have been read. 12 | GetBulk(index int, arr []uint64) int 13 | 14 | // Size Returns: the number of values. 15 | Size() int 16 | } 17 | -------------------------------------------------------------------------------- /core/util/structure/priorityqueue_test.go: -------------------------------------------------------------------------------- 1 | package structure 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "testing" 7 | ) 8 | 9 | func TestNewPriorityQueue(t *testing.T) { 10 | queue := NewPriorityQueue[*Struct](5, func(a, b *Struct) bool { 11 | return (a.A < b.A) || (a.A == b.A && a.B < b.B) 12 | }) 13 | 14 | tmpTop := &Struct{1, 3} 15 | queue.Add(tmpTop) 16 | 17 | queue.Add(&Struct{ 18 | A: 1, 19 | B: 2, 20 | }) 21 | 22 | tmpTop.A = math.Inf(-1) 23 | 24 | queue.UpdateTop() 25 | fmt.Println(queue.Top()) 26 | } 27 | 28 | type Struct struct { 29 | A float64 30 | B int 31 | } 32 | -------------------------------------------------------------------------------- /core/index/postingsenum.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | const ( 4 | POSTINGS_ENUM_NONE = 0 5 | POSTINGS_ENUM_FREQS = 1 << 3 6 | POSTINGS_ENUM_POSITIONS = POSTINGS_ENUM_FREQS | 1<<4 7 | POSTINGS_ENUM_OFFSETS = POSTINGS_ENUM_POSITIONS | 1<<5 8 | POSTINGS_ENUM_PAYLOADS = POSTINGS_ENUM_POSITIONS | 1<<6 9 | POSTINGS_ENUM_ALL = POSTINGS_ENUM_OFFSETS | POSTINGS_ENUM_PAYLOADS 10 | ) 11 | 12 | // FeatureRequested Returns true if the given feature is requested in the flags, false otherwise. 13 | func FeatureRequested(flags, feature int) bool { 14 | return (flags & feature) == feature 15 | } 16 | -------------------------------------------------------------------------------- /core/interface/index/postingsenum.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | const ( 4 | POSTINGS_ENUM_NONE = 0 5 | POSTINGS_ENUM_FREQS = 1 << 3 6 | POSTINGS_ENUM_POSITIONS = POSTINGS_ENUM_FREQS | 1<<4 7 | POSTINGS_ENUM_OFFSETS = POSTINGS_ENUM_POSITIONS | 1<<5 8 | POSTINGS_ENUM_PAYLOADS = POSTINGS_ENUM_POSITIONS | 1<<6 9 | POSTINGS_ENUM_ALL = POSTINGS_ENUM_OFFSETS | POSTINGS_ENUM_PAYLOADS 10 | ) 11 | 12 | // FeatureRequested Returns true if the given feature is requested in the flags, false otherwise. 13 | func FeatureRequested(flags, feature int) bool { 14 | return (flags & feature) == feature 15 | } 16 | -------------------------------------------------------------------------------- /core/index/sort.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import "github.com/geange/lucene-go/core/interface/index" 4 | 5 | type sortBase struct { 6 | fields []index.SortField 7 | } 8 | 9 | func NewSort(fields []index.SortField) index.Sort { 10 | return &sortBase{fields: fields} 11 | } 12 | 13 | // SetSort Sets the sort to the given criteria. 14 | func (s *sortBase) SetSort(fields []index.SortField) { 15 | s.fields = fields 16 | } 17 | 18 | // GetSort Representation of the sort criteria. 19 | // Returns: Array of SortField objects used in this sort criteria 20 | func (s *sortBase) GetSort() []index.SortField { 21 | return s.fields 22 | } 23 | -------------------------------------------------------------------------------- /core/analysis/graphtokenfilter.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | // GraphTokenFilter An abstract TokenFilter that exposes its input stream as a graph Call incrementBaseToken() 4 | // to move the root of the graph to the next position in the TokenStream, incrementGraphToken() to move along 5 | // the current graph, and incrementGraph() to reset to the next graph based at the current root. For example, 6 | // given the stream 'a b/c:2 d e`, then with the base token at 'a', incrementGraphToken() will produce the 7 | // stream 'a b d e', and then after calling incrementGraph() will produce the stream 'a c e'. 8 | type GraphTokenFilter struct { 9 | } 10 | -------------------------------------------------------------------------------- /core/index/sortedsetdocvalueswriter.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | var _ DocValuesWriter = &SortedSetDocValuesWriter{} 9 | 10 | type SortedSetDocValuesWriter struct { 11 | } 12 | 13 | func (s *SortedSetDocValuesWriter) Flush(state *index.SegmentWriteState, sortMap index.DocMap, consumer index.DocValuesConsumer) error { 14 | //TODO implement me 15 | panic("implement me") 16 | } 17 | 18 | func (s *SortedSetDocValuesWriter) GetDocValues() types.DocIdSetIterator { 19 | //TODO implement me 20 | panic("implement me") 21 | } 22 | -------------------------------------------------------------------------------- /core/store/flushinfo.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | // A FlushInfo provides information required for a CONTEXT_FLUSH context. It is used as part of an IOContext in 4 | // case of CONTEXT_FLUSH context. 5 | type FlushInfo struct { 6 | NumDocs int 7 | EstimatedSegmentSize int64 8 | } 9 | 10 | // NewFlushInfo Creates a new FlushInfo instance from the values required for a CONTEXT_FLUSH IOContext context. 11 | // These values are only estimates and are not the actual values. 12 | func NewFlushInfo(numDocs int, estimatedSegmentSize int64) *FlushInfo { 13 | return &FlushInfo{NumDocs: numDocs, EstimatedSegmentSize: estimatedSegmentSize} 14 | } 15 | -------------------------------------------------------------------------------- /core/util/automaton/limitedfinitestringsiterator.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | import "iter" 4 | 5 | type LimitedFiniteStringsIterator struct { 6 | *FiniteStringsIterator 7 | 8 | limit int 9 | count int 10 | } 11 | 12 | func NewLimitedFiniteStringsIterator(a *Automaton, limit int) *LimitedFiniteStringsIterator { 13 | return &LimitedFiniteStringsIterator{ 14 | FiniteStringsIterator: NewFiniteStringsIteratorBuilder(a).New(), 15 | limit: limit, 16 | count: 0, 17 | } 18 | } 19 | 20 | func (l *LimitedFiniteStringsIterator) Iterator() iter.Seq[[]int] { 21 | return func(yield func([]int) bool) { 22 | 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /core/index/sortednumericdocvalues.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | var _ DocValuesWriter = &SortedNumericDocValuesWriter{} 9 | 10 | type SortedNumericDocValuesWriter struct { 11 | } 12 | 13 | func (s *SortedNumericDocValuesWriter) Flush(state *index.SegmentWriteState, sortMap index.DocMap, consumer index.DocValuesConsumer) error { 14 | //TODO implement me 15 | panic("implement me") 16 | } 17 | 18 | func (s *SortedNumericDocValuesWriter) GetDocValues() types.DocIdSetIterator { 19 | //TODO implement me 20 | panic("implement me") 21 | } 22 | -------------------------------------------------------------------------------- /core/search/scorer.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | type BaseScorer struct { 9 | *BaseScorable 10 | 11 | weight index.Weight 12 | } 13 | 14 | func NewScorer(weight index.Weight) *BaseScorer { 15 | return &BaseScorer{weight: weight} 16 | } 17 | 18 | func (s *BaseScorer) GetWeight() index.Weight { 19 | return s.weight 20 | } 21 | 22 | func (s *BaseScorer) TwoPhaseIterator() index.TwoPhaseIterator { 23 | return nil 24 | } 25 | 26 | func (s *BaseScorer) AdvanceShallow(target int) (int, error) { 27 | return types.NO_MORE_DOCS, nil 28 | } 29 | -------------------------------------------------------------------------------- /core/index/docswithfieldset_test.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestDocsWithFieldSet(t *testing.T) { 11 | fieldSet := NewDocsWithFieldSet() 12 | 13 | for i := 0; i < 100; i++ { 14 | err := fieldSet.Add(i) 15 | assert.Nil(t, err) 16 | } 17 | 18 | iterator, err := fieldSet.Iterator() 19 | assert.Nil(t, err) 20 | 21 | for i := 0; i < 100; i++ { 22 | docId, err := iterator.NextDoc(context.Background()) 23 | assert.Nil(t, err) 24 | assert.EqualValues(t, i, docId) 25 | 26 | curDocID := iterator.DocID() 27 | assert.EqualValues(t, i, curDocID) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /core/store/mergeinfo.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | // A MergeInfo provides information required for a CONTEXT_MERGE context. It is used as part of an IOContext 4 | // in case of CONTEXT_MERGE context. 5 | type MergeInfo struct { 6 | TotalMaxDoc int 7 | EstimatedMergeBytes int 8 | IsExternal bool 9 | MergeMaxNumSegments int 10 | } 11 | 12 | func NewMergeInfo(totalMaxDoc int, estimatedMergeBytes int, isExternal bool, mergeMaxNumSegments int) *MergeInfo { 13 | return &MergeInfo{ 14 | TotalMaxDoc: totalMaxDoc, 15 | EstimatedMergeBytes: estimatedMergeBytes, 16 | IsExternal: isExternal, 17 | MergeMaxNumSegments: mergeMaxNumSegments, 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /core/util/version/option.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | type option struct { 4 | major uint8 5 | minor uint8 6 | bugfix uint8 7 | prerelease uint8 8 | } 9 | 10 | type Option func(op *option) 11 | 12 | func WithMajor(major uint8) Option { 13 | return func(op *option) { 14 | op.major = major 15 | } 16 | } 17 | 18 | func WithMinor(minor uint8) Option { 19 | return func(op *option) { 20 | op.minor = minor 21 | } 22 | } 23 | 24 | func WithBugfix(bugfix uint8) Option { 25 | return func(op *option) { 26 | op.bugfix = bugfix 27 | } 28 | } 29 | 30 | func WithPrerelease(prerelease uint8) Option { 31 | return func(op *option) { 32 | op.prerelease = prerelease 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /core/index/similarity.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/geange/lucene-go/core/types" 7 | ) 8 | 9 | type SimScorerSPI interface { 10 | Score(freq float64, norm int64) float64 11 | } 12 | 13 | type BaseSimScorer struct { 14 | SimScorerSPI 15 | } 16 | 17 | func NewBaseSimScorer(simScorerSPI SimScorerSPI) *BaseSimScorer { 18 | return &BaseSimScorer{SimScorerSPI: simScorerSPI} 19 | } 20 | 21 | func (s *BaseSimScorer) Explain(freq types.Explanation, norm int64) (types.Explanation, error) { 22 | return types.ExplanationMatch( 23 | s.Score(freq.GetValue().(float64), norm), 24 | fmt.Sprintf(`score(freq="%v"), with freq of:`, freq.GetValue()), 25 | freq), nil 26 | } 27 | -------------------------------------------------------------------------------- /core/search/bitdocidset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/bits-and-blooms/bitset" 5 | 6 | "github.com/geange/lucene-go/core/index" 7 | "github.com/geange/lucene-go/core/types" 8 | "github.com/geange/lucene-go/core/util" 9 | ) 10 | 11 | var _ DocIdSet = &BitDocIdSet{} 12 | 13 | type BitDocIdSet struct { 14 | set *bitset.BitSet 15 | cost int64 16 | } 17 | 18 | func (b BitDocIdSet) Iterator() types.DocIdSetIterator { 19 | return index.NewBitSetIterator(b.set, b.cost) 20 | } 21 | 22 | func (b BitDocIdSet) Bits() util.Bits { 23 | return b.set 24 | } 25 | 26 | func NewBitDocIdSet(set *bitset.BitSet, cost int64) *BitDocIdSet { 27 | return &BitDocIdSet{set: set, cost: cost} 28 | } 29 | -------------------------------------------------------------------------------- /core/util/attribute/factory_test.go: -------------------------------------------------------------------------------- 1 | package attribute 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestDefaultAttributeFactory_CreateAttributeInstance(t *testing.T) { 10 | classes := []string{ 11 | ClassBytesTerm, 12 | ClassCharTerm, 13 | ClassOffset, 14 | ClassPositionIncrement, 15 | ClassPayload, 16 | ClassPositionLength, 17 | ClassTermFrequency, 18 | ClassTermToBytesRef, 19 | } 20 | 21 | for _, class := range classes { 22 | _, err := DEFAULT_ATTRIBUTE_FACTORY.CreateAttributeInstance(class) 23 | assert.Nil(t, err) 24 | } 25 | 26 | _, err := DEFAULT_ATTRIBUTE_FACTORY.CreateAttributeInstance("") 27 | assert.NotNil(t, err) 28 | } 29 | -------------------------------------------------------------------------------- /core/util/automaton/operations_test.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_concatenate(t *testing.T) { 10 | automata := NewAutomata() 11 | 12 | a1 := automata.MakeString("m") 13 | a2 := automata.MakeAnyString() 14 | a3 := automata.MakeString("n") 15 | a4 := automata.MakeAnyString() 16 | 17 | a, err := concatenate(a1, a2, a3, a4) 18 | assert.Nil(t, err) 19 | a, err = determinize(a, 10000) 20 | assert.Nil(t, err) 21 | 22 | if !assert.True(t, Run(a, "mn")) { 23 | t.Skip() 24 | } 25 | if !assert.True(t, Run(a, "mone")) { 26 | t.Skip() 27 | } 28 | if !assert.False(t, Run(a, "m")) { 29 | t.Skip() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/index/fieldtermiterator.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import "github.com/geange/lucene-go/core/util/bytesref" 4 | 5 | // FieldTermIterator 6 | // Iterates over terms in across multiple fields. 7 | // The caller must check field after each next to see if the field changed, 8 | // but == can be used since the iterator implementation ensures it will use the same String instance for a given field. 9 | type FieldTermIterator interface { 10 | bytesref.BytesIterator 11 | 12 | // Field 13 | // Returns current field. 14 | // This method should not be called after iteration is done. 15 | // Note that you may use == to detect a change in field. 16 | Field() string 17 | 18 | // DelGen Del gen of the current term. 19 | DelGen() int64 20 | } 21 | -------------------------------------------------------------------------------- /core/index/sortedsetselector.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | // SortedSetSelector Selects a item from the document's set to use as the representative item 4 | type SortedSetSelector struct { 5 | } 6 | 7 | type SortedSetSelectorType int 8 | 9 | const ( 10 | MIN = SortedSetSelectorType(iota) // Selects the minimum item in the set 11 | MAX // Selects the maximum item in the set 12 | MIDDLE_MIN // Selects the middle item in the set. If the set has an even number of values, the lower of the middle two is chosen. 13 | MIDDLE_MAX // Selects the middle item in the set. If the set has an even number of values, the higher of the middle two is chosen 14 | ) 15 | -------------------------------------------------------------------------------- /core/index/leafmetadata.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/util/version" 6 | ) 7 | 8 | // LeafMetaData Provides read-only metadata about a leaf. 9 | type leafMetaData struct { 10 | createdVersionMajor int 11 | minVersion *version.Version 12 | sort index.Sort 13 | } 14 | 15 | func NewLeafMetaData(createdVersionMajor int, minVersion *version.Version, sort index.Sort) index.LeafMetaData { 16 | return &leafMetaData{ 17 | createdVersionMajor: createdVersionMajor, 18 | minVersion: minVersion, 19 | sort: sort, 20 | } 21 | } 22 | 23 | func (l *leafMetaData) GetSort() index.Sort { 24 | return l.sort 25 | } 26 | -------------------------------------------------------------------------------- /core/search/spans/spancollector.go: -------------------------------------------------------------------------------- 1 | package spans 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | // SpanCollector 9 | // An interface defining the collection of postings information from the leaves of a Spans 10 | // lucene.experimental 11 | type SpanCollector interface { 12 | // CollectLeaf 13 | // Collect information from postings 14 | // postings: a PostingsEnum 15 | // position: – the position of the PostingsEnum 16 | // term: – the Term for this postings list 17 | CollectLeaf(postings index.PostingsEnum, position int, term *types.Term) error 18 | 19 | // Reset 20 | // Call to indicate that the driving Spans has moved to a new position 21 | Reset() 22 | } 23 | -------------------------------------------------------------------------------- /core/util/packed/packed64singleblock_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestPacked64SingleBlock_Set(t *testing.T) { 12 | bits := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32} 13 | 14 | for _, bit := range bits { 15 | block, err := NewPacked64SingleBlock(1000, bit) 16 | assert.Nil(t, err) 17 | 18 | for i := 0; i < 10; i++ { 19 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 20 | idx := r.Intn(1000) 21 | value := uint64(r.Intn(1 << bit)) 22 | block.Set(idx, value) 23 | 24 | getNum, err := block.Get(idx) 25 | assert.Nil(t, err) 26 | assert.EqualValues(t, value, getNum) 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /core/util/automaton/finitestringsiterator_test.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestFiniteStringsIterator_Next(t *testing.T) { 11 | a, err := Union(NewAutomata().MakeString("dog"), NewAutomata().MakeString("duck")) 12 | assert.Nil(t, err) 13 | a, err = Minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT) 14 | assert.Nil(t, err) 15 | 16 | iterator := NewFiniteStringsIteratorBuilder(a).New() 17 | 18 | values := make([][]int, 0) 19 | for { 20 | value, err := iterator.Next() 21 | assert.Nil(t, err) 22 | if value == nil { 23 | break 24 | } 25 | values = append(values, value) 26 | } 27 | 28 | for _, v := range values { 29 | fmt.Println(v) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /core/analysis/stopwordanalyzer.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | // StopWordAnalyzer Base class for Analyzers that need to make use of stopword sets. 4 | // Since: 3.1 5 | type StopWordAnalyzer interface { 6 | Analyzer 7 | } 8 | 9 | type BaseStopWordAnalyzer struct { 10 | stopWords *CharArraySet 11 | } 12 | 13 | func NewStopWordAnalyzer(stopWords *CharArraySet) *BaseStopWordAnalyzer { 14 | return &BaseStopWordAnalyzer{ 15 | stopWords: stopWords, 16 | } 17 | } 18 | 19 | // GetStopWordSet Returns the analyzer's stopWord set or an empty set if the analyzer has no stopWords 20 | // Returns: the analyzer's stopWord set or an empty set if the analyzer has no stopWords 21 | func (r *BaseStopWordAnalyzer) GetStopWordSet() *CharArraySet { 22 | return r.stopWords 23 | } 24 | -------------------------------------------------------------------------------- /core/index/readerutil.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | // SubIndex 4 | // Returns index of the searcher/reader for document n in the array used to construct this searcher/reader. 5 | func SubIndex(n int, docStarts []int) int { 6 | // searcher/reader for doc n: 7 | size := len(docStarts) 8 | lo := 0 // search starts array 9 | hi := size - 1 // for first element less than n, return its index 10 | for hi >= lo { 11 | mid := (lo + hi) >> 1 12 | midValue := docStarts[mid] 13 | if n < midValue { 14 | hi = mid - 1 15 | } else if n > midValue { 16 | lo = mid + 1 17 | } else { // found a match 18 | for mid+1 < size && docStarts[mid+1] == midValue { 19 | mid++ // scan to last match 20 | } 21 | return mid 22 | } 23 | } 24 | return hi 25 | } 26 | -------------------------------------------------------------------------------- /core/index/filterleafreader.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "iter" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | ) 8 | 9 | type FilterLeafReader struct { 10 | } 11 | 12 | var _ index.Fields = &FilterFields{} 13 | 14 | type FilterFields struct { 15 | in index.Fields 16 | } 17 | 18 | func (f *FilterFields) Iterator() iter.Seq[string] { 19 | return f.in.Iterator() 20 | } 21 | 22 | func (f *FilterFields) Names() []string { 23 | res := make([]string, 0) 24 | for v := range f.in.Iterator() { 25 | res = append(res, v) 26 | } 27 | return res 28 | } 29 | 30 | func (f *FilterFields) Terms(field string) (index.Terms, error) { 31 | return f.in.Terms(field) 32 | } 33 | 34 | func (f *FilterFields) Size() int { 35 | return f.in.Size() 36 | } 37 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/geange/lucene-go 2 | 3 | go 1.24.3 4 | 5 | require ( 6 | github.com/bits-and-blooms/bitset v1.22.0 7 | github.com/geange/gods-generic v0.0.0-20250111143821-76f09d5dd7e0 8 | github.com/google/uuid v1.6.0 9 | github.com/matishsiao/goInfo v0.0.0-20241216093258-66a9250504d6 10 | github.com/pierrec/lz4/v4 v4.1.22 11 | github.com/pkg/errors v0.9.1 12 | github.com/samber/lo v1.51.0 13 | github.com/spaolacci/murmur3 v1.1.0 14 | github.com/stretchr/testify v1.10.0 15 | golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 16 | ) 17 | 18 | require ( 19 | github.com/davecgh/go-spew v1.1.1 // indirect 20 | github.com/pmezard/go-difflib v1.0.0 // indirect 21 | golang.org/x/text v0.27.0 // indirect 22 | gopkg.in/yaml.v3 v3.0.1 // indirect 23 | ) 24 | -------------------------------------------------------------------------------- /core/search/topfielddocs.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | type TopFieldDocs struct { 8 | *BaseTopDocs 9 | 10 | fields []index.SortField 11 | } 12 | 13 | // NewTopFieldDocs 14 | // Creates one of these objects. 15 | // totalHits – Total number of hits for the query. 16 | // scoreDocs – The top hits for the query. 17 | // fields – The sort criteria used to find the top hits. 18 | func NewTopFieldDocs(totalHits *index.TotalHits, scoreDocs []index.ScoreDoc, fields []index.SortField) *TopFieldDocs { 19 | return &TopFieldDocs{ 20 | BaseTopDocs: NewTopDocs(totalHits, scoreDocs), 21 | fields: fields, 22 | } 23 | } 24 | 25 | func (t *TopFieldDocs) GetFields() []index.SortField { 26 | return t.fields 27 | } 28 | -------------------------------------------------------------------------------- /core/document/storedfield.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | type StoredFieldType interface { 4 | int32 | int64 | float32 | float64 | string | []byte 5 | } 6 | 7 | type StoredField[T StoredFieldType] struct { 8 | *Field[T] 9 | } 10 | 11 | var STORED_ONLY = newStoreFieldType() 12 | 13 | func newStoreFieldType() *FieldType { 14 | fieldType := NewFieldType() 15 | _ = fieldType.SetStored(true) 16 | fieldType.Freeze() 17 | return fieldType 18 | } 19 | 20 | func NewStoredField[T StoredFieldType](name string, value T) *StoredField[T] { 21 | return &StoredField[T]{NewField(name, value, STORED_ONLY)} 22 | } 23 | 24 | func NewStoredFieldWithType[T StoredFieldType](name string, value T, fieldType IndexableFieldType) *StoredField[T] { 25 | return &StoredField[T]{NewField(name, value, fieldType)} 26 | } 27 | -------------------------------------------------------------------------------- /core/util/automaton/transition.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | // Transition Holds one transition from an Automaton. This is typically used temporarily when iterating 4 | // through transitions by invoking Automaton.initTransition and Automaton.getNextTransition. 5 | type Transition struct { 6 | // Source state. 7 | Source int 8 | 9 | // Destination state. 10 | Dest int 11 | 12 | // Minimum accepted label (inclusive). 13 | Min int 14 | 15 | // Maximum accepted label (inclusive). 16 | Max int 17 | 18 | // Remembers where we are in the iteration; init to -1 to provoke exception if nextTransition is 19 | // called without first initTransition. 20 | TransitionUpto int 21 | } 22 | 23 | func NewTransition() *Transition { 24 | return &Transition{ 25 | TransitionUpto: -1, 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /core/util/fst/fst_utils_test.go: -------------------------------------------------------------------------------- 1 | package fst 2 | 3 | import "testing" 4 | 5 | func Test_getNumPresenceBytes(t *testing.T) { 6 | type args struct { 7 | labelRange int 8 | } 9 | tests := []struct { 10 | name string 11 | args args 12 | want int 13 | }{ 14 | { 15 | args: args{labelRange: 10}, 16 | want: 2, 17 | }, 18 | { 19 | args: args{labelRange: 7}, 20 | want: 1, 21 | }, 22 | { 23 | args: args{labelRange: 16}, 24 | want: 2, 25 | }, 26 | { 27 | args: args{labelRange: 17}, 28 | want: 3, 29 | }, 30 | } 31 | for _, tt := range tests { 32 | t.Run(tt.name, func(t *testing.T) { 33 | if got := getNumPresenceBytes(tt.args.labelRange); got != tt.want { 34 | t.Errorf("getNumPresenceBytes() = %v, want %v", got, tt.want) 35 | } 36 | }) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /core/analysis/stopfilter.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/util/attribute" 5 | ) 6 | 7 | // StopFilter 8 | // Removes stop words from a token stream. 9 | type StopFilter struct { 10 | *BaseFilteringTokenFilter 11 | 12 | stopWords *CharArraySet 13 | termAtt attribute.CharTermAttr 14 | } 15 | 16 | func (r *StopFilter) Accept() (bool, error) { 17 | bytes := []byte(r.termAtt.GetString()) 18 | 19 | return !r.stopWords.Contain(bytes), nil 20 | } 21 | 22 | func NewStopFilter(in TokenStream, stopWords *CharArraySet) *StopFilter { 23 | stopFilter := &StopFilter{ 24 | stopWords: stopWords, 25 | termAtt: in.AttributeSource().CharTerm(), 26 | } 27 | stopFilter.BaseFilteringTokenFilter = NewFilteringTokenFilter(stopFilter, in) 28 | 29 | return stopFilter 30 | } 31 | -------------------------------------------------------------------------------- /core/index/eventqueue.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | type EventQueue struct { 4 | closed bool 5 | queue chan Event 6 | writer *IndexWriter 7 | } 8 | 9 | func NewEventQueue(writer *IndexWriter) *EventQueue { 10 | return &EventQueue{ 11 | closed: false, 12 | queue: make(chan Event, 10), 13 | writer: writer, 14 | } 15 | } 16 | 17 | type Event func(writer *IndexWriter) error 18 | 19 | func (e *EventQueue) Add(event Event) bool { 20 | select { 21 | case e.queue <- event: 22 | return true 23 | default: 24 | return false 25 | } 26 | } 27 | 28 | func (e *EventQueue) processEvents() error { 29 | OUT: 30 | for { 31 | select { 32 | case fn := <-e.queue: 33 | if err := fn(e.writer); err != nil { 34 | return err 35 | } 36 | default: 37 | break OUT 38 | } 39 | } 40 | return nil 41 | } 42 | -------------------------------------------------------------------------------- /core/store/nolockfactory.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | var _ LockFactory = &NoLockFactory{} 4 | 5 | var NoLockFactoryInstance = NewNoLockFactory() 6 | 7 | // NoLockFactory Use this LockFactory to disable locking entirely. This is a singleton, you have to use INSTANCE. 8 | // See Also: LockFactory 9 | type NoLockFactory struct { 10 | singletonLock *NoLock 11 | } 12 | 13 | func NewNoLockFactory() *NoLockFactory { 14 | return &NoLockFactory{singletonLock: &NoLock{}} 15 | } 16 | 17 | func (n *NoLockFactory) ObtainLock(dir Directory, lockName string) (Lock, error) { 18 | return n.singletonLock, nil 19 | } 20 | 21 | var _ Lock = &NoLock{} 22 | 23 | type NoLock struct { 24 | } 25 | 26 | func (n *NoLock) Close() error { 27 | return nil 28 | } 29 | 30 | func (n *NoLock) EnsureValid() error { 31 | return nil 32 | } 33 | -------------------------------------------------------------------------------- /memory/producer.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/geange/lucene-go/core/util/bytesref" 7 | ) 8 | 9 | type binaryDocValuesProducer struct { 10 | dvBytesValuesSet *bytesref.BytesHash 11 | bytesIds []int 12 | } 13 | 14 | func newBinaryDocValuesProducer() *binaryDocValuesProducer { 15 | return &binaryDocValuesProducer{} 16 | } 17 | 18 | func (r *binaryDocValuesProducer) prepareForUsage() { 19 | r.bytesIds = r.dvBytesValuesSet.Sort() 20 | } 21 | 22 | type numericDocValuesProducer struct { 23 | dvLongValues []int 24 | count int 25 | } 26 | 27 | func newNumericDocValuesProducer() *numericDocValuesProducer { 28 | return &numericDocValuesProducer{} 29 | } 30 | 31 | func (r *numericDocValuesProducer) prepareForUsage() { 32 | sort.Ints(r.dvLongValues[0:r.count]) 33 | } 34 | -------------------------------------------------------------------------------- /core/util/sorter/pdqsort_test.go: -------------------------------------------------------------------------------- 1 | package sorter 2 | 3 | import ( 4 | "math/rand" 5 | "slices" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestPdqSorterSort(t *testing.T) { 13 | //doTestPdqSort(t, 100) 14 | //doTestPdqSort(t, 1000) 15 | //doTestPdqSort(t, 10000) 16 | doTestPdqSort(t, 100000) 17 | //doTestPdqSort(t, 1000000) 18 | } 19 | 20 | func doTestPdqSort(t *testing.T, size int) { 21 | values := make([]int, 0) 22 | 23 | for i := 0; i < size; i++ { 24 | n := rand.New(rand.NewSource(time.Now().UnixNano())).Int() 25 | values = append(values, n) 26 | } 27 | 28 | expects := slices.Clone(values) 29 | 30 | mock := NewMockInt(values...) 31 | NewPdqSorter(mock).Sort(0, size) 32 | 33 | slices.Sort(expects) 34 | 35 | assert.Equal(t, expects, mock.values) 36 | } 37 | -------------------------------------------------------------------------------- /core/search/collectormanager.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | // CollectorManager 8 | // A manager of collectors. This class is useful to parallelize execution of search requests and has two main methods: 9 | // - NewCollector() which must return a NEW collector which will be used to collect a certain set of leaves. 10 | // - Reduce(Collection) which will be used to reduce the results of individual collections into a meaningful result. 11 | // This method is only called after all leaves have been fully collected. 12 | // 13 | // See Also: IndexSearcher.search(Query, CollectorManager) 14 | // lucene.experimental 15 | type CollectorManager interface { 16 | NewCollector() (index.Collector, error) 17 | Reduce(collectors []index.Collector) (any, error) 18 | } 19 | -------------------------------------------------------------------------------- /core/index/documentswriterperthreadpool.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | // DocumentsWriterPerThreadPool controls DocumentsWriterPerThread instances and their thread assignments 4 | // during indexing. Each DocumentsWriterPerThread is once a obtained from the pool exclusively used for 5 | // indexing a single document or list of documents by the obtaining thread. Each indexing thread must 6 | // obtain such a DocumentsWriterPerThread to make progress. Depending on the DocumentsWriterPerThreadPool 7 | // implementation DocumentsWriterPerThread assignments might differ from document to document. 8 | // Once a DocumentsWriterPerThread is selected for Flush the DocumentsWriterPerThread will be checked out 9 | // of the thread pool and won't be reused for indexing. See checkout(DocumentsWriterPerThread). 10 | type DocumentsWriterPerThreadPool struct { 11 | } 12 | -------------------------------------------------------------------------------- /core/store/utils_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "github.com/stretchr/testify/assert" 5 | "testing" 6 | ) 7 | 8 | func TestSegmentFileName(t *testing.T) { 9 | items := []struct { 10 | segmentName, segmentSuffix, ext, result string 11 | }{ 12 | { 13 | segmentName: "a", 14 | segmentSuffix: "", 15 | ext: "ext", 16 | result: "a.ext", 17 | }, 18 | { 19 | segmentName: "a", 20 | segmentSuffix: "b", 21 | ext: "ext", 22 | result: "a_b.ext", 23 | }, 24 | { 25 | segmentName: "aaaaaaaa", 26 | segmentSuffix: "b", 27 | ext: "ext", 28 | result: "aaaaaaaa_b.ext", 29 | }, 30 | } 31 | 32 | for _, v := range items { 33 | name := SegmentFileName(v.segmentName, v.segmentSuffix, v.ext) 34 | assert.Equal(t, v.result, name) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /core/util/attribute/attribute.go: -------------------------------------------------------------------------------- 1 | package attribute 2 | 3 | const ( 4 | ClassBytesTerm = "BytesTerm" 5 | ClassCharTerm = "CharTerm" 6 | ClassOffset = "Offset" 7 | ClassPositionIncrement = "PositionIncrement" 8 | ClassPayload = "Payload" 9 | ClassPositionLength = "PositionLength" 10 | ClassTermFrequency = "TermFrequency" 11 | ClassTermToBytesRef = "TermToBytesRef" 12 | ClassType = "Type" 13 | ) 14 | 15 | // Attribute 16 | // Base class for Attributes that can be added to a AttributeSourceV2. 17 | // Attributes are used to add data in a dynamic, yet types-safe way to a source of usually streamed objects, 18 | type Attribute interface { 19 | Interfaces() []string 20 | Reset() error 21 | CopyTo(target Attribute) error 22 | Clone() Attribute 23 | } 24 | 25 | const ( 26 | DEFAULT_TYPE = "word" 27 | ) 28 | -------------------------------------------------------------------------------- /core/util/selector.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "sort" 4 | 5 | // Selector An implementation of a selection algorithm, ie. computing the k-th greatest value from a collection. 6 | type Selector interface { 7 | // Select Reorder elements so that the element at position k is the same as if all elements were 8 | // sorted and all other elements are partitioned around it: [from, k) only contains elements that 9 | // are less than or equal to k and (k, to) only contains elements that are greater than or equal to k. 10 | Select(from, to, k int) 11 | 12 | // Swap values at slots i and j. 13 | Swap(i, j int) 14 | } 15 | 16 | func SelectorCheckArgs(from, to, k int) { 17 | if k < from { 18 | panic("k must be >= from") 19 | } 20 | if k >= to { 21 | panic("k must be < to") 22 | } 23 | } 24 | 25 | func SelectK(k int, data sort.Interface) { 26 | sort.Sort(data) 27 | } 28 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked8.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | type Packed8 struct { 4 | *BulkOperationPacked 5 | } 6 | 7 | func NewPacked8() *Packed8 { 8 | return &Packed8{NewPacked(8)} 9 | } 10 | 11 | func (b *Packed8) DecodeUint64(blocks []uint64, values []uint64, iterations int) { 12 | blocksOffset, valuesOffset := 0, 0 13 | for i := 0; i < iterations; i++ { 14 | block := blocks[blocksOffset] 15 | blocksOffset++ 16 | for shift := 56; shift >= 0; shift -= 8 { 17 | values[valuesOffset] = (block >> shift) & 255 18 | valuesOffset++ 19 | } 20 | } 21 | } 22 | 23 | func (b *Packed8) DecodeBytes(blocks []byte, values []uint64, iterations int) { 24 | blocksOffset, valuesOffset := 0, 0 25 | for j := 0; j < iterations; j++ { 26 | values[valuesOffset] = uint64(blocks[blocksOffset]) 27 | blocksOffset++ 28 | valuesOffset++ 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /core/search/blockmaxdisi.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/types" 7 | ) 8 | 9 | var _ types.DocIdSetIterator = &BlockMaxDISI{} 10 | 11 | type BlockMaxDISI struct { 12 | } 13 | 14 | func (b *BlockMaxDISI) DocID() int { 15 | //TODO implement me 16 | panic("implement me") 17 | } 18 | 19 | func (b *BlockMaxDISI) NextDoc(context.Context) (int, error) { 20 | //TODO implement me 21 | panic("implement me") 22 | } 23 | 24 | func (b *BlockMaxDISI) Advance(ctx context.Context, target int) (int, error) { 25 | //TODO implement me 26 | panic("implement me") 27 | } 28 | 29 | func (b *BlockMaxDISI) Cost() int64 { 30 | //TODO implement me 31 | panic("implement me") 32 | } 33 | 34 | func (b *BlockMaxDISI) SlowAdvance(ctx context.Context, target int) (int, error) { 35 | return types.SlowAdvanceWithContext(ctx, b, target) 36 | } 37 | -------------------------------------------------------------------------------- /core/search/disjunctionscorer.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | // DisjunctionScorer 9 | // Base class for Scorers that score disjunctions. 10 | type DisjunctionScorer struct { 11 | *BaseScorer 12 | 13 | needsScores bool 14 | 15 | subScorers *DisiPriorityQueue 16 | 17 | approximation types.DocIdSetIterator 18 | 19 | twoPhase *TwoPhase 20 | } 21 | 22 | var _ index.TwoPhaseIterator = &TwoPhase{} 23 | 24 | type TwoPhase struct { 25 | } 26 | 27 | func (t *TwoPhase) Approximation() types.DocIdSetIterator { 28 | //TODO implement me 29 | panic("implement me") 30 | } 31 | 32 | func (t *TwoPhase) Matches() (bool, error) { 33 | //TODO implement me 34 | panic("implement me") 35 | } 36 | 37 | func (t *TwoPhase) MatchCost() float64 { 38 | //TODO implement me 39 | panic("implement me") 40 | } 41 | -------------------------------------------------------------------------------- /core/document/textfield.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | var ( 4 | textFieldStored *FieldType 5 | textFieldNotStored *FieldType 6 | ) 7 | 8 | func init() { 9 | textFieldStored = NewFieldType() 10 | _ = textFieldStored.SetIndexOptions(INDEX_OPTIONS_DOCS_AND_FREQS_AND_POSITIONS) 11 | _ = textFieldStored.SetTokenized(true) 12 | _ = textFieldStored.SetStored(true) 13 | textFieldStored.Freeze() 14 | 15 | textFieldNotStored = NewFieldType() 16 | _ = textFieldNotStored.SetIndexOptions(INDEX_OPTIONS_DOCS_AND_FREQS_AND_POSITIONS) 17 | _ = textFieldNotStored.SetTokenized(true) 18 | textFieldNotStored.Freeze() 19 | } 20 | 21 | type TextField struct { 22 | *Field[string] 23 | } 24 | 25 | func NewTextField(name string, value string, stored bool) *TextField { 26 | fieldType := textFieldStored 27 | if !stored { 28 | fieldType = textFieldNotStored 29 | } 30 | return &TextField{NewField(name, value, fieldType)} 31 | } 32 | -------------------------------------------------------------------------------- /core/util/fst/utils.go: -------------------------------------------------------------------------------- 1 | package fst 2 | 3 | import "context" 4 | 5 | func binarySearch[T any](ctx context.Context, fst *FST[T], arc *Arc[T], targetLabel int) (int, error) { 6 | in, err := fst.GetBytesReader() 7 | if err != nil { 8 | return 0, err 9 | } 10 | 11 | low := arc.ArcIdx() 12 | mid := 0 13 | high := arc.NumArcs() - 1 14 | 15 | for low <= high { 16 | mid = (low + high) >> 1 17 | if err := in.SetPosition(arc.PosArcsStart()); err != nil { 18 | return 0, err 19 | } 20 | if err := in.SkipBytes(ctx, arc.BytesPerArc()*mid+1); err != nil { 21 | return 0, err 22 | } 23 | midLabel, err := fst.ReadLabel(ctx, in) 24 | if err != nil { 25 | return 0, err 26 | } 27 | cmp := midLabel - targetLabel 28 | if cmp == 0 { 29 | return mid, nil 30 | } 31 | 32 | if cmp < 0 { 33 | low = mid + 1 34 | } else { 35 | high = mid - 1 36 | } 37 | } 38 | 39 | return -1 - low, nil 40 | } 41 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v4 21 | with: 22 | go-version: '1.24' 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test and run coverage 28 | run: go test -v -race -coverprofile=coverage.txt -covermode=atomic ./... 29 | 30 | - name: Upload coverage reports to Codecov 31 | uses: codecov/codecov-action@v4.0.1 32 | with: 33 | token: ${{ secrets.CODECOV_TOKEN }} 34 | slug: geange/lucene-go 35 | -------------------------------------------------------------------------------- /core/interface/index/segmentinfo.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/store" 5 | "github.com/geange/lucene-go/core/util/version" 6 | ) 7 | 8 | type SegmentInfo interface { 9 | GetID() []byte 10 | GetId() []byte 11 | Name() string 12 | Dir() store.Directory 13 | Files() map[string]struct{} 14 | FilesNum() int 15 | MaxDoc() (int, error) 16 | SetMaxDoc(maxDoc int) error 17 | SetFiles(files map[string]struct{}) 18 | AddFile(file string) error 19 | GetVersion() *version.Version 20 | GetMinVersion() *version.Version 21 | SetUseCompoundFile(isCompoundFile bool) 22 | GetUseCompoundFile() bool 23 | SetDiagnostics(diagnostics map[string]string) 24 | GetDiagnostics() map[string]string 25 | PutAttribute(key, value string) string 26 | GetAttributes() map[string]string 27 | GetIndexSort() Sort 28 | NamedForThisSegment(file string) string 29 | GetCodec() Codec 30 | SetCodec(codec Codec) 31 | } 32 | -------------------------------------------------------------------------------- /core/search/bulkscorer.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "math" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/util" 8 | ) 9 | 10 | type ScoreRange func(collector index.LeafCollector, acceptDocs util.Bits, from, to int) (int, error) 11 | type ScoreCost func() int64 12 | 13 | type BulkScorer interface { 14 | GetScorer() (ScoreRange, ScoreCost) 15 | } 16 | 17 | type BaseBulkScorer struct { 18 | FnScoreRange func(collector index.LeafCollector, acceptDocs util.Bits, min, max int) (int, error) 19 | FnCost func() int64 20 | } 21 | 22 | func (b *BaseBulkScorer) Score(collector index.LeafCollector, acceptDocs util.Bits, minDoc, maxDoc int) (int, error) { 23 | if minDoc < 0 && maxDoc < 0 { 24 | minDoc = 0 25 | maxDoc = math.MaxInt32 26 | } 27 | return b.FnScoreRange(collector, acceptDocs, minDoc, maxDoc) 28 | } 29 | 30 | func (b *BaseBulkScorer) Cost() int64 { 31 | return b.FnCost() 32 | } 33 | -------------------------------------------------------------------------------- /core/util/zigzag/zigzag.go: -------------------------------------------------------------------------------- 1 | package zigzag 2 | 3 | // Decode decodes a zig-zag-encoded uint64 as an int64. 4 | // 5 | // Input: {…, 5, 3, 1, 0, 2, 4, 6, …} 6 | // Output: {…, -3, -2, -1, 0, +1, +2, +3, …} 7 | func Decode(x uint64) int64 { 8 | return int64(x>>1) ^ int64(x)<<63>>63 9 | } 10 | 11 | // Encode encodes an int64 as a zig-zag-encoded uint64. 12 | // 13 | // Input: {…, -3, -2, -1, 0, +1, +2, +3, …} 14 | // Output: {…, 5, 3, 1, 0, 2, 4, 6, …} 15 | func Encode(x int64) uint64 { 16 | return uint64(x<<1) ^ uint64(x>>63) 17 | } 18 | 19 | // DecodeBool decodes a uint64 as a bool. 20 | // 21 | // Input: { 0, 1, 2, …} 22 | // Output: {false, true, true, …} 23 | func DecodeBool(x uint64) bool { 24 | return x != 0 25 | } 26 | 27 | // EncodeBool encodes a bool as a uint64. 28 | // 29 | // Input: {false, true} 30 | // Output: { 0, 1} 31 | func EncodeBool(x bool) uint64 { 32 | if x { 33 | return 1 34 | } 35 | return 0 36 | } 37 | -------------------------------------------------------------------------------- /codecs/simpletext/storedfieldsformat.go: -------------------------------------------------------------------------------- 1 | package simpletext 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/store" 8 | ) 9 | 10 | var _ index.StoredFieldsFormat = &StoredFieldsFormat{} 11 | 12 | type StoredFieldsFormat struct { 13 | } 14 | 15 | func NewStoredFieldsFormat() *StoredFieldsFormat { 16 | return &StoredFieldsFormat{} 17 | } 18 | 19 | func (s *StoredFieldsFormat) FieldsReader(ctx context.Context, directory store.Directory, si index.SegmentInfo, fn index.FieldInfos, ioContext *store.IOContext) (index.StoredFieldsReader, error) { 20 | 21 | return NewStoredFieldsReader(ctx, directory, si, fn, ioContext) 22 | } 23 | 24 | func (s *StoredFieldsFormat) FieldsWriter(ctx context.Context, directory store.Directory, si index.SegmentInfo, ioContext *store.IOContext) (index.StoredFieldsWriter, error) { 25 | return NewStoredFieldsWriter(ctx, directory, si.Name(), ioContext) 26 | } 27 | -------------------------------------------------------------------------------- /codecs/simpletext/termvectorsformat.go: -------------------------------------------------------------------------------- 1 | package simpletext 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/store" 8 | ) 9 | 10 | var _ index.TermVectorsFormat = &TermVectorsFormat{} 11 | 12 | type TermVectorsFormat struct { 13 | } 14 | 15 | func NewTermVectorsFormat() *TermVectorsFormat { 16 | return &TermVectorsFormat{} 17 | } 18 | 19 | func (s *TermVectorsFormat) VectorsReader(ctx context.Context, dir store.Directory, segmentInfo index.SegmentInfo, fieldInfos index.FieldInfos, ioContext *store.IOContext) (index.TermVectorsReader, error) { 20 | return NewTermVectorsReader(ctx, dir, segmentInfo, ioContext) 21 | } 22 | 23 | func (s *TermVectorsFormat) VectorsWriter(ctx context.Context, dir store.Directory, segmentInfo index.SegmentInfo, ioContext *store.IOContext) (index.TermVectorsWriter, error) { 24 | return NewTermVectorsWriter(ctx, dir, segmentInfo.Name(), ioContext) 25 | } 26 | -------------------------------------------------------------------------------- /core/codecs/compressing/interface.go: -------------------------------------------------------------------------------- 1 | package compressing 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "io" 7 | 8 | "github.com/geange/lucene-go/core/store" 9 | ) 10 | 11 | type FieldsIndex interface { 12 | io.Closer 13 | 14 | // GetStartPointer 15 | // Get the start pointer for the block that contains the given docID. 16 | GetStartPointer(docId int) (int64, error) 17 | 18 | // CheckIntegrity 19 | // Check the integrity of the index. 20 | CheckIntegrity() error 21 | 22 | Clone() (FieldsIndex, error) 23 | } 24 | 25 | type Compressor interface { 26 | Compress(ctx context.Context, bytes []byte, out store.DataOutput) error 27 | } 28 | 29 | type Decompressor interface { 30 | io.Closer 31 | 32 | Decompress(ctx context.Context, in store.DataInput, offset int64, length int64, buf *bytes.Buffer) error 33 | 34 | Clone() Decompressor 35 | } 36 | 37 | type CompressionMode interface { 38 | NewCompressor() Compressor 39 | NewDecompressor() Decompressor 40 | } 41 | -------------------------------------------------------------------------------- /core/util/automaton/byterunautomaton.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | // ByteRunAutomaton Automaton representation for matching UTF-8 byte[]. 4 | type ByteRunAutomaton struct { 5 | *RunAutomaton 6 | } 7 | 8 | func NewByteRunAutomaton(a *Automaton, isBinary bool, determinizeWorkLimit int) *ByteRunAutomaton { 9 | var auto *Automaton 10 | 11 | if isBinary { 12 | auto = a 13 | } else { 14 | 15 | } 16 | 17 | return &ByteRunAutomaton{ 18 | NewRunAutomaton(auto, 256, determinizeWorkLimit), 19 | } 20 | } 21 | 22 | func (a *Automaton) NewByteRunAutomaton() *ByteRunAutomaton { 23 | return &ByteRunAutomaton{ 24 | NewRunAutomaton(a, 256, 10000), 25 | } 26 | } 27 | 28 | // Run Returns true if the given byte array is accepted by this automaton 29 | func (r *ByteRunAutomaton) Run(s []byte) bool { 30 | p := 0 31 | for i := 0; i < len(s); i++ { 32 | p = r.Step(p, int(s[i]&0xFF)) 33 | if p == -1 { 34 | return false 35 | } 36 | } 37 | return r.accept[p] 38 | } 39 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked4.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | type Packed4 struct { 4 | *BulkOperationPacked 5 | } 6 | 7 | func NewPacked4() *Packed4 { 8 | return &Packed4{NewPacked(4)} 9 | } 10 | 11 | func (b *Packed4) DecodeUint64(blocks []uint64, values []uint64, iterations int) { 12 | blocksOffset, valuesOffset := 0, 0 13 | for i := 0; i < iterations; i++ { 14 | block := blocks[blocksOffset] 15 | blocksOffset++ 16 | for shift := 60; shift >= 0; shift -= 4 { 17 | values[valuesOffset] = (block >> shift) & 15 18 | valuesOffset++ 19 | } 20 | } 21 | } 22 | 23 | func (b *Packed4) DecodeBytes(blocks []byte, values []uint64, iterations int) { 24 | blocksOffset, valuesOffset := 0, 0 25 | for j := 0; j < iterations; j++ { 26 | block := uint64(blocks[blocksOffset]) 27 | blocksOffset++ 28 | values[valuesOffset] = (block >> 4) & 15 29 | valuesOffset++ 30 | values[valuesOffset] = block & 15 31 | valuesOffset++ 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /core/query/intrange.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | 7 | "github.com/geange/lucene-go/core/document" 8 | ) 9 | 10 | func encodeInt32(val int32, dst []byte, offset int) { 11 | value := uint32(val) ^ 0x80000000 12 | binary.BigEndian.PutUint32(dst[offset:], value) 13 | } 14 | 15 | func verifyAndEncodeInt32(minNums, maxNums []int32, dst []byte) error { 16 | for d, i, j := 0, 0, len(minNums)*document.INTEGER_BYTES; d < len(minNums); { 17 | 18 | if IsNaN(minNums[d]) { 19 | return errors.New("invalid min value") 20 | } 21 | 22 | if IsNaN(maxNums[d]) { 23 | return errors.New("invalid max value") 24 | } 25 | 26 | if minNums[d] > maxNums[d] { 27 | return errors.New("min value is greater than max value") 28 | } 29 | 30 | encodeInt32(minNums[d], dst, i) 31 | encodeInt32(maxNums[d], dst, j) 32 | 33 | d++ 34 | i += document.INTEGER_BYTES 35 | j += document.INTEGER_BYTES 36 | } 37 | 38 | return nil 39 | } 40 | -------------------------------------------------------------------------------- /core/query/longrange.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | 7 | "github.com/geange/lucene-go/core/document" 8 | ) 9 | 10 | func encodeInt64(val int64, dst []byte, offset int) { 11 | value := uint64(val) ^ 0x8000000000000000 12 | binary.BigEndian.PutUint64(dst[offset:], value) 13 | } 14 | 15 | func verifyAndEncodeInt64(minNums, maxNums []int64, dst []byte) error { 16 | for d, i, j := 0, 0, len(minNums)*document.LONG_BYTES; d < len(minNums); { 17 | 18 | if IsNaN(minNums[d]) { 19 | return errors.New("invalid min value") 20 | } 21 | 22 | if IsNaN(maxNums[d]) { 23 | return errors.New("invalid max value") 24 | } 25 | 26 | if minNums[d] > maxNums[d] { 27 | return errors.New("min value is greater than max value") 28 | } 29 | 30 | encodeInt64(minNums[d], dst, i) 31 | encodeInt64(maxNums[d], dst, j) 32 | 33 | d++ 34 | i += document.LONG_BYTES 35 | j += document.LONG_BYTES 36 | } 37 | 38 | return nil 39 | } 40 | -------------------------------------------------------------------------------- /core/store/utils.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import "bytes" 4 | 5 | // SegmentFileName 6 | // Returns a file name that includes the given segment name, your own custom name 7 | // and extension. The format of the filename is: (_)(.). 8 | // NOTE: . is added to the result file name only if ext is not empty. 9 | // NOTE: _ is added to the result file name only if it's not the empty string 10 | // NOTE: all custom files should be named using this method, or otherwise some structures may fail 11 | // to handle them properly (such as if they are added to compound files). 12 | func SegmentFileName(segmentName, segmentSuffix, ext string) string { 13 | buf := new(bytes.Buffer) 14 | buf.WriteString(segmentName) 15 | if len(segmentSuffix) > 0 { 16 | buf.WriteString("_") 17 | buf.WriteString(segmentSuffix) 18 | } 19 | 20 | if len(ext) > 0 { 21 | buf.WriteString(".") 22 | buf.WriteString(ext) 23 | } 24 | return buf.String() 25 | } 26 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked16.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | type Packed16 struct { 4 | *BulkOperationPacked 5 | } 6 | 7 | func NewPacked16() *Packed16 { 8 | return &Packed16{NewPacked(16)} 9 | } 10 | 11 | func (b *Packed16) DecodeUint64(blocks []uint64, values []uint64, iterations int) { 12 | blocksOffset, valuesOffset := 0, 0 13 | for i := 0; i < iterations; i++ { 14 | block := blocks[blocksOffset] 15 | blocksOffset++ 16 | for shift := 48; shift >= 0; shift -= 16 { 17 | values[valuesOffset] = (block >> shift) & 65535 18 | valuesOffset++ 19 | } 20 | } 21 | } 22 | 23 | func (b *Packed16) DecodeBytes(blocks []byte, values []uint64, iterations int) { 24 | blocksOffset, valuesOffset := 0, 0 25 | for j := 0; j < iterations; j++ { 26 | block0 := uint64(blocks[blocksOffset]) << 8 27 | block1 := uint64(blocks[blocksOffset+1]) 28 | values[valuesOffset] = block0 | block1 29 | valuesOffset++ 30 | blocksOffset += 2 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /core/analysis/lowercasefilter.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/geange/lucene-go/core/util/attribute" 7 | ) 8 | 9 | // LowerCaseFilter Normalizes token text to lower case. 10 | type LowerCaseFilter struct { 11 | *BaseTokenFilter 12 | 13 | termAtt attribute.CharTermAttr 14 | } 15 | 16 | func NewLowerCaseFilter(in TokenStream) *LowerCaseFilter { 17 | filter := LowerCaseFilter{ 18 | BaseTokenFilter: NewBaseTokenFilter(in), 19 | termAtt: in.AttributeSource().CharTerm(), 20 | } 21 | return &filter 22 | } 23 | 24 | func (r *LowerCaseFilter) IncrementToken() (bool, error) { 25 | ok, err := r.input.IncrementToken() 26 | if err != nil { 27 | return false, err 28 | } 29 | 30 | if ok { 31 | lower := strings.ToLower(r.termAtt.GetString()) 32 | _ = r.termAtt.Reset() 33 | if err := r.termAtt.AppendString(lower); err != nil { 34 | return false, err 35 | } 36 | return true, nil 37 | } 38 | return false, nil 39 | } 40 | -------------------------------------------------------------------------------- /core/util/array/util.go: -------------------------------------------------------------------------------- 1 | package array 2 | 3 | func Oversize[T int | int64](minTargetSize, bytesPerElement T) T { 4 | if minTargetSize%4 != 0 { 5 | minTargetSize = (minTargetSize%bytesPerElement + 1) * bytesPerElement 6 | } 7 | return minTargetSize 8 | } 9 | 10 | func Grow[T any](array []T, minSize int) []T { 11 | if len(array) < minSize { 12 | return append(array, make([]T, minSize-len(array))...) 13 | } 14 | return array 15 | } 16 | 17 | func GrowExact[T any](array []T, size int) []T { 18 | newArray := make([]T, size) 19 | copy(newArray, array) 20 | return newArray 21 | } 22 | 23 | func Mismatch(a, b []byte) int { 24 | aLen, bLen := len(a), len(b) 25 | size := min(aLen, bLen) 26 | for i := 0; i < size; i++ { 27 | if a[i] != b[i] { 28 | return i 29 | } 30 | } 31 | if aLen == bLen { 32 | return -1 33 | } 34 | return size 35 | } 36 | 37 | func Fill[T any](arr []T, value T) []T { 38 | for i := range arr { 39 | arr[i] = value 40 | } 41 | return arr 42 | } 43 | -------------------------------------------------------------------------------- /core/query/doublerange.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "math" 7 | 8 | "github.com/geange/lucene-go/core/document" 9 | ) 10 | 11 | func encodeFloat64(val float64, dst []byte, offset int) { 12 | value := math.Float64bits(val) ^ 0x8000000000000000 13 | binary.BigEndian.PutUint64(dst[offset:], value) 14 | } 15 | 16 | func verifyAndEncodeFloat64(minNums, maxNums []float64, dst []byte) error { 17 | for d, i, j := 0, 0, len(minNums)*document.LONG_BYTES; d < len(minNums); { 18 | 19 | if IsNaN(minNums[d]) { 20 | return errors.New("invalid min value") 21 | } 22 | 23 | if IsNaN(maxNums[d]) { 24 | return errors.New("invalid max value") 25 | } 26 | 27 | if minNums[d] > maxNums[d] { 28 | return errors.New("min value is greater than max value") 29 | } 30 | 31 | encodeFloat64(minNums[d], dst, i) 32 | encodeFloat64(maxNums[d], dst, j) 33 | 34 | d++ 35 | i += document.LONG_BYTES 36 | j += document.LONG_BYTES 37 | } 38 | 39 | return nil 40 | } 41 | -------------------------------------------------------------------------------- /core/query/floatrange.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "math" 7 | 8 | "github.com/geange/lucene-go/core/document" 9 | ) 10 | 11 | func encodeFloat32(val float32, dst []byte, offset int) { 12 | value := math.Float32bits(val) ^ 0x80000000 13 | binary.BigEndian.PutUint32(dst[offset:], value) 14 | } 15 | 16 | func verifyAndEncodeFloat32(minNums, maxNums []float32, dst []byte) error { 17 | for d, i, j := 0, 0, len(minNums)*document.INTEGER_BYTES; d < len(minNums); { 18 | 19 | if IsNaN(minNums[d]) { 20 | return errors.New("invalid min value") 21 | } 22 | 23 | if IsNaN(maxNums[d]) { 24 | return errors.New("invalid max value") 25 | } 26 | 27 | if minNums[d] > maxNums[d] { 28 | return errors.New("min value is greater than max value") 29 | } 30 | 31 | encodeFloat32(minNums[d], dst, i) 32 | encodeFloat32(maxNums[d], dst, j) 33 | 34 | d++ 35 | i += document.INTEGER_BYTES 36 | j += document.INTEGER_BYTES 37 | } 38 | 39 | return nil 40 | } 41 | -------------------------------------------------------------------------------- /core/store/inputstream_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "github.com/stretchr/testify/assert" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | func TestInputStreamDataInput(t *testing.T) { 10 | file, err := os.CreateTemp("", "") 11 | assert.Nil(t, err) 12 | 13 | defer func() { 14 | err = os.Remove(file.Name()) 15 | if err != nil { 16 | t.Error(err) 17 | } 18 | }() 19 | 20 | n, err := file.Write([]byte{1, 2, 3}) 21 | assert.Nil(t, err) 22 | assert.EqualValues(t, 3, n) 23 | 24 | _, err = file.Seek(0, 0) 25 | assert.Nil(t, err) 26 | 27 | assert.Nil(t, err) 28 | input := NewInputStream(file) 29 | defer input.Close() 30 | 31 | b1, err := input.ReadByte() 32 | assert.Nil(t, err) 33 | assert.EqualValues(t, 1, b1) 34 | 35 | bs := make([]byte, 2) 36 | n, err = input.Read(bs) 37 | assert.Nil(t, err) 38 | assert.EqualValues(t, 2, n) 39 | assert.Equal(t, []byte{2, 3}, bs) 40 | 41 | _, err = input.Read(bs) 42 | assert.NotNil(t, err) 43 | 44 | _, err = input.Read(bs) 45 | assert.NotNil(t, err) 46 | } 47 | -------------------------------------------------------------------------------- /core/document/doublefield_test.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | import ( 4 | "iter" 5 | "math" 6 | "slices" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestFloat64Point(t *testing.T) { 13 | doc := NewDocument() 14 | 15 | type KV struct { 16 | Name string 17 | Value []float64 18 | } 19 | 20 | kvs := []KV{ 21 | {"f1", []float64{1.1, 1.2, 1.4}}, 22 | {"f2", []float64{2.1, 2, 2.5}}, 23 | {"f3", []float64{-1, 2.1, 2.5}}, 24 | } 25 | 26 | next, stop := iter.Pull(slices.Values(kvs)) 27 | defer stop() 28 | 29 | for _, kv := range kvs { 30 | field, err := NewFloat64Point(kv.Name, kv.Value...) 31 | assert.Nil(t, err) 32 | doc.Add(field) 33 | } 34 | 35 | for field := range doc.GetFields() { 36 | kv, ok := next() 37 | assert.True(t, ok) 38 | 39 | points := field.(*Float64Point).Points() 40 | 41 | assert.Equal(t, len(kv.Value), len(points)) 42 | 43 | for i, num := range kv.Value { 44 | assert.Less(t, math.Abs(num-points[i]), 0.000001) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /core/codecs/lucene80/normsproducer.go: -------------------------------------------------------------------------------- 1 | package lucene80 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/document" 7 | "github.com/geange/lucene-go/core/interface/index" 8 | ) 9 | 10 | var _ index.NormsProducer = &NormsProducer{} 11 | 12 | type NormsProducer struct { 13 | } 14 | 15 | func NewNormsProducer(ctx context.Context, state *index.SegmentReadState, 16 | dataCodec, dataExtension, metaCodec, metaExtension string) (*NormsProducer, error) { 17 | panic("implement me") 18 | } 19 | 20 | func (n *NormsProducer) Close() error { 21 | //TODO implement me 22 | panic("implement me") 23 | } 24 | 25 | func (n *NormsProducer) GetNorms(field *document.FieldInfo) (index.NumericDocValues, error) { 26 | //TODO implement me 27 | panic("implement me") 28 | } 29 | 30 | func (n *NormsProducer) CheckIntegrity() error { 31 | //TODO implement me 32 | panic("implement me") 33 | } 34 | 35 | func (n *NormsProducer) GetMergeInstance() index.NormsProducer { 36 | //TODO implement me 37 | panic("implement me") 38 | } 39 | -------------------------------------------------------------------------------- /core/search/docvaluesfieldexistsquery.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/geange/lucene-go/core/interface/index" 4 | 5 | var _ index.Query = &DocValuesFieldExistsQuery{} 6 | 7 | // DocValuesFieldExistsQuery 8 | // A Query that matches documents that have a value for a given field as reported by doc values iterators. 9 | type DocValuesFieldExistsQuery struct { 10 | } 11 | 12 | func (d *DocValuesFieldExistsQuery) CreateWeight(searcher index.IndexSearcher, scoreMode index.ScoreMode, boost float64) (index.Weight, error) { 13 | //TODO implement me 14 | panic("implement me") 15 | } 16 | 17 | func (d *DocValuesFieldExistsQuery) Rewrite(reader index.IndexReader) (index.Query, error) { 18 | //TODO implement me 19 | panic("implement me") 20 | } 21 | 22 | func (d *DocValuesFieldExistsQuery) Visit(visitor index.QueryVisitor) error { 23 | //TODO implement me 24 | panic("implement me") 25 | } 26 | 27 | func (d *DocValuesFieldExistsQuery) String(field string) string { 28 | //TODO implement me 29 | panic("implement me") 30 | } 31 | -------------------------------------------------------------------------------- /core/util/sorter/pdqsort.go: -------------------------------------------------------------------------------- 1 | package sorter 2 | 3 | import ( 4 | "sort" 5 | ) 6 | 7 | const ( 8 | // SINGLE_MEDIAN_THRESHOLD 9 | // Below this size threshold, the partition selection is simplified to a single median. 10 | SINGLE_MEDIAN_THRESHOLD = 40 11 | ) 12 | 13 | type pdqSorter struct { 14 | data sort.Interface 15 | } 16 | 17 | func NewPdqSorter(data sort.Interface) Sorter { 18 | return &pdqSorter{ 19 | data: data, 20 | } 21 | } 22 | 23 | func (s *pdqSorter) Sort(from, to int) { 24 | sort.Sort(&pdqSorterRange{ 25 | from: from, 26 | to: to, 27 | data: s.data, 28 | }) 29 | } 30 | 31 | var _ sort.Interface = &pdqSorterRange{} 32 | 33 | type pdqSorterRange struct { 34 | from int 35 | to int 36 | data sort.Interface 37 | } 38 | 39 | func (s *pdqSorterRange) Len() int { 40 | return s.to - s.from 41 | } 42 | 43 | func (s *pdqSorterRange) Less(i, j int) bool { 44 | return s.data.Less(s.from+i, s.from+j) 45 | } 46 | 47 | func (s *pdqSorterRange) Swap(i, j int) { 48 | s.data.Swap(s.from+i, s.from+j) 49 | } 50 | -------------------------------------------------------------------------------- /core/util/sorter/radixsort_test.go: -------------------------------------------------------------------------------- 1 | package sorter 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "math/rand" 7 | "slices" 8 | "testing" 9 | "time" 10 | 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestMSBRadixSorterSort(t *testing.T) { 15 | doTestMSBRadixSorter(t, 100) 16 | doTestMSBRadixSorter(t, 1000) 17 | doTestMSBRadixSorter(t, 10000) 18 | doTestMSBRadixSorter(t, 100000) 19 | doTestMSBRadixSorter(t, 500000) 20 | } 21 | 22 | func doTestMSBRadixSorter(t *testing.T, size int) { 23 | expects := make([][]byte, 0) 24 | actual := make([][]byte, 0) 25 | 26 | for i := 0; i < size; i++ { 27 | n := rand.New(rand.NewSource(time.Now().UnixNano())).Uint32() 28 | expects = append(expects, binary.BigEndian.AppendUint32(nil, n)) 29 | actual = append(actual, binary.BigEndian.AppendUint32(nil, n)) 30 | } 31 | 32 | slices.SortFunc(expects, bytes.Compare) 33 | 34 | radix := NewMock(actual...) 35 | NewMsbRadixSorter(size, radix).Sort(0, size) 36 | 37 | assert.Equal(t, expects, radix.values) 38 | } 39 | -------------------------------------------------------------------------------- /core/index/checkindex.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | ) 8 | 9 | func TestLiveDocs(reader index.CodecReader) error { 10 | numDocs := reader.NumDocs() 11 | if reader.HasDeletions() { 12 | liveDocs := reader.GetLiveDocs() 13 | if liveDocs == nil { 14 | return errors.New("segment should have deletions, but liveDocs is null") 15 | } 16 | 17 | numLive := 0 18 | size := int(liveDocs.Len()) 19 | for i := 0; i < size; i++ { 20 | if liveDocs.Test(uint(i)) { 21 | numLive++ 22 | } 23 | } 24 | if numLive != numDocs { 25 | return errors.New("liveDocs count mismatch") 26 | } 27 | return nil 28 | } 29 | 30 | liveDocs := reader.GetLiveDocs() 31 | if liveDocs != nil { 32 | // it's ok for it to be non-null here, as long as none are set right? 33 | size := int(liveDocs.Len()) 34 | for i := 0; i < size; i++ { 35 | if !liveDocs.Test(uint(i)) { 36 | return errors.New("liveDocs mismatch") 37 | } 38 | } 39 | } 40 | return nil 41 | } 42 | -------------------------------------------------------------------------------- /core/search/matchnodocsquery.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | var _ index.Query = &MatchNoDocsQuery{} 8 | 9 | // MatchNoDocsQuery 10 | // A query that matches no documents. 11 | type MatchNoDocsQuery struct { 12 | reason string 13 | } 14 | 15 | func NewMatchNoDocsQuery(reason string) *MatchNoDocsQuery { 16 | return &MatchNoDocsQuery{reason: reason} 17 | } 18 | 19 | func (m *MatchNoDocsQuery) String(field string) string { 20 | //TODO implement me 21 | panic("implement me") 22 | } 23 | 24 | func (m *MatchNoDocsQuery) CreateWeight(searcher index.IndexSearcher, scoreMode index.ScoreMode, boost float64) (index.Weight, error) { 25 | //TODO implement me 26 | panic("implement me") 27 | } 28 | 29 | func (m *MatchNoDocsQuery) Rewrite(reader index.IndexReader) (index.Query, error) { 30 | //TODO implement me 31 | panic("implement me") 32 | } 33 | 34 | func (m *MatchNoDocsQuery) Visit(visitor index.QueryVisitor) (err error) { 35 | //TODO implement me 36 | panic("implement me") 37 | } 38 | -------------------------------------------------------------------------------- /core/util/selector/mock_test.go: -------------------------------------------------------------------------------- 1 | package selector 2 | 3 | import "bytes" 4 | 5 | var ( 6 | _ RadixSelector = &MockRadix{} 7 | _ IntroSelector = &MockRadix{} 8 | ) 9 | 10 | type MockRadix struct { 11 | values [][]byte 12 | } 13 | 14 | func NewMockRadix(values ...[]byte) *MockRadix { 15 | radix := &MockRadix{ 16 | values: make([][]byte, 0), 17 | } 18 | radix.values = append(radix.values, values...) 19 | return radix 20 | } 21 | 22 | func (m *MockRadix) Add(bs []byte) { 23 | m.values = append(m.values, bs) 24 | } 25 | 26 | func (m *MockRadix) Swap(i, j int) { 27 | m.values[i], m.values[j] = m.values[j], m.values[i] 28 | } 29 | 30 | func (m *MockRadix) ByteAt(i int, k int) int { 31 | if i >= len(m.values) { 32 | return -1 33 | } 34 | 35 | if k >= len(m.values[i]) { 36 | return -1 37 | } 38 | 39 | b := m.values[i][k] 40 | return int(b) 41 | } 42 | 43 | func (m *MockRadix) Value(i int) []byte { 44 | return m.values[i] 45 | } 46 | 47 | func (m *MockRadix) Compare(i, j int) int { 48 | return bytes.Compare(m.values[i], m.values[j]) 49 | } 50 | -------------------------------------------------------------------------------- /core/index/segmentmerger.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/store" 6 | ) 7 | 8 | // The SegmentMerger class combines two or more Segments, represented by an IndexReader, 9 | // into a single Segment. Call the merge method to combine the segments. 10 | type SegmentMerger struct { 11 | directory store.Directory 12 | codec index.Codec 13 | mergeState *MergeState 14 | fieldInfosBuilder *FieldInfosBuilder 15 | } 16 | 17 | func NewSegmentMerger(readers []index.CodecReader, segmentInfo *SegmentInfo, dir store.Directory, 18 | fieldNumbers *FieldNumbers, ioCtx *store.IOContext) (*SegmentMerger, error) { 19 | 20 | //if ioCtx.Type != store.CONTEXT_MERGE { 21 | // return nil, errors.New("context type should be MERGE") 22 | //} 23 | // 24 | //mergeState := store.NewMer 25 | // TODO: fix it 26 | panic("") 27 | } 28 | 29 | func (s *SegmentMerger) ShouldMerge() bool { 30 | maxDoc, _ := s.mergeState.SegmentInfo.MaxDoc() 31 | return maxDoc > 0 32 | } 33 | -------------------------------------------------------------------------------- /core/index/mergetrigger.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | type MergeTrigger int 4 | 5 | const ( 6 | // MERGE_TRIGGER_SEGMENT_FLUSH 7 | // Merge was triggered by a segment Flush. 8 | // 由一个段的flush来触发的 9 | MERGE_TRIGGER_SEGMENT_FLUSH = MergeTrigger(iota) 10 | 11 | // MERGE_TRIGGER_FULL_FLUSH 12 | // Merge was triggered by a full Flush. Full flushes can be caused by a commit, 13 | // NRT reader reopen or a close call on the index writer. 14 | MERGE_TRIGGER_FULL_FLUSH 15 | 16 | // MERGE_TRIGGER_EXPLICIT 17 | // Merge has been triggered explicitly by the user. 18 | MERGE_TRIGGER_EXPLICIT 19 | 20 | // MERGE_TRIGGER_MERGE_FINISHED 21 | // Merge was triggered by a successfully finished merge. 22 | MERGE_TRIGGER_MERGE_FINISHED 23 | 24 | // MERGE_TRIGGER_CLOSING 25 | // Merge was triggered by a closing IndexWriter. 26 | MERGE_TRIGGER_CLOSING 27 | 28 | // MERGE_TRIGGER_COMMIT 29 | // Merge was triggered on commit. 30 | MERGE_TRIGGER_COMMIT 31 | 32 | // MERGE_TRIGGER_GET_READER 33 | // Merge was triggered on opening NRT readers. 34 | MERGE_TRIGGER_GET_READER 35 | ) 36 | -------------------------------------------------------------------------------- /core/util/ints/allocator.go: -------------------------------------------------------------------------------- 1 | package ints 2 | 3 | var _ IntsAllocator = &RecyclingIntBlockAllocator{} 4 | 5 | const ( 6 | DEFAULT_BUFFERED_BLOCKS = 64 7 | ) 8 | 9 | type RecyclingIntBlockAllocator struct { 10 | *AllocatorImp 11 | 12 | freeByteBlocks [][]int 13 | maxBufferedBlocks int 14 | freeBlocks int 15 | } 16 | 17 | func NewRecyclingIntBlockAllocator(blockSize, maxBufferedBlocks int) *RecyclingIntBlockAllocator { 18 | allocator := RecyclingIntBlockAllocator{ 19 | AllocatorImp: nil, 20 | freeBlocks: 0, 21 | maxBufferedBlocks: maxBufferedBlocks, 22 | } 23 | allocator.AllocatorImp = NewAllocator(blockSize, &allocator) 24 | return &allocator 25 | } 26 | 27 | func (r *RecyclingIntBlockAllocator) RecycleIntBlocks(blocks [][]int, start, end int) { 28 | panic("TODO") 29 | } 30 | 31 | func (r *RecyclingIntBlockAllocator) GetIntBlock() []int { 32 | if r.freeBlocks == 0 { 33 | return make([]int, r.blockSize) 34 | } 35 | b := r.freeByteBlocks[r.freeBlocks-1] 36 | r.freeBlocks-- 37 | r.freeByteBlocks[r.freeBlocks] = nil 38 | return b 39 | } 40 | -------------------------------------------------------------------------------- /core/analysis/tokenfilter.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/util/attribute" 5 | ) 6 | 7 | // A TokenFilter is a TokenStream whose input is another TokenStream. 8 | // This is an abstract class; subclasses must override incrementToken(). 9 | // See Also: TokenStream 10 | type TokenFilter interface { 11 | TokenStream 12 | 13 | End() error 14 | Reset() error 15 | Close() error 16 | } 17 | 18 | type BaseTokenFilter struct { 19 | source *attribute.Source 20 | input TokenStream 21 | } 22 | 23 | func NewBaseTokenFilter(input TokenStream) *BaseTokenFilter { 24 | return &BaseTokenFilter{ 25 | source: input.AttributeSource(), 26 | input: input, 27 | } 28 | } 29 | 30 | func (t *BaseTokenFilter) AttributeSource() *attribute.Source { 31 | return t.input.AttributeSource() 32 | } 33 | 34 | func (t *BaseTokenFilter) End() error { 35 | return t.input.End() 36 | } 37 | 38 | func (t *BaseTokenFilter) Reset() error { 39 | return t.input.Reset() 40 | } 41 | 42 | func (t *BaseTokenFilter) Close() error { 43 | return t.input.Close() 44 | } 45 | -------------------------------------------------------------------------------- /core/index/indexreadercontext.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import "github.com/google/uuid" 4 | 5 | type BaseIndexReaderContext struct { 6 | // The reader context for this reader's immediate parent, or null if none 7 | parent *CompositeReaderContext 8 | 9 | // true if this context struct represents the top level reader within the hierarchical context 10 | isTopLevel bool 11 | 12 | // the doc base for this reader in the parent, 0 if parent is null 13 | docBaseInParent int 14 | 15 | // the ord for this reader in the parent, 0 if parent is null 16 | ordInParent int 17 | 18 | identity string 19 | } 20 | 21 | func NewBaseIndexReaderContext(parent *CompositeReaderContext, ordInParent, docBaseInParent int) *BaseIndexReaderContext { 22 | isTop := parent == nil 23 | return &BaseIndexReaderContext{ 24 | parent: parent, 25 | isTopLevel: isTop, 26 | docBaseInParent: docBaseInParent, 27 | ordInParent: ordInParent, 28 | identity: uuid.New().String(), 29 | } 30 | } 31 | 32 | func (r *BaseIndexReaderContext) Identity() string { 33 | return r.identity 34 | } 35 | -------------------------------------------------------------------------------- /core/search/disjunctionsumscorer.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | var _ index.Scorer = &DisjunctionSumScorer{} 9 | 10 | // DisjunctionSumScorer 11 | // A Scorer for OR like queries, counterpart of ConjunctionScorer. 12 | type DisjunctionSumScorer struct { 13 | *DisjunctionScorer 14 | } 15 | 16 | func newDisjunctionScorer(weight index.Weight, subScorers []index.Scorer, scoreMode index.ScoreMode) (*DisjunctionSumScorer, error) { 17 | panic("") 18 | } 19 | 20 | func (d *DisjunctionSumScorer) Score() (float64, error) { 21 | //TODO implement me 22 | panic("implement me") 23 | } 24 | 25 | func (d *DisjunctionSumScorer) DocID() int { 26 | //TODO implement me 27 | panic("implement me") 28 | } 29 | 30 | func (d *DisjunctionSumScorer) Iterator() types.DocIdSetIterator { 31 | //TODO implement me 32 | panic("implement me") 33 | } 34 | 35 | func (d *DisjunctionSumScorer) GetMaxScore(upTo int) (float64, error) { 36 | //TODO implement me 37 | panic("implement me") 38 | } 39 | -------------------------------------------------------------------------------- /core/store/outputstream_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "bytes" 5 | "hash/crc32" 6 | "io" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | var _ io.WriteCloser = &mockWriter{} 13 | 14 | type mockWriter struct { 15 | *bytes.Buffer 16 | } 17 | 18 | func newMockWriter() *mockWriter { 19 | return &mockWriter{Buffer: new(bytes.Buffer)} 20 | } 21 | 22 | func (m *mockWriter) Close() error { 23 | return nil 24 | } 25 | 26 | func TestOutputStreamIndexOutput(t *testing.T) { 27 | ieee := crc32.NewIEEE() 28 | _, err := ieee.Write([]byte{1, 2, 3, 4}) 29 | assert.Nil(t, err) 30 | 31 | w := newMockWriter() 32 | output := NewOutputStream("x", w) 33 | defer output.Close() 34 | 35 | err = output.WriteByte(1) 36 | assert.Nil(t, err) 37 | 38 | n, err := output.Write([]byte{2, 3, 4}) 39 | assert.Nil(t, err) 40 | assert.EqualValues(t, 3, n) 41 | 42 | checksum, err := output.GetChecksum() 43 | assert.Nil(t, err) 44 | 45 | assert.Equal(t, ieee.Sum32(), checksum) 46 | 47 | pointer := output.GetFilePointer() 48 | assert.EqualValues(t, 4, pointer) 49 | } 50 | -------------------------------------------------------------------------------- /core/index/impact.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/gods-generic/utils" 5 | "github.com/geange/lucene-go/core/interface/index" 6 | ) 7 | 8 | var _ index.Impact = &impact{} 9 | 10 | // Impact 11 | // Per-document scoring factors. 12 | type impact struct { 13 | 14 | // Term frequency of the term in the document. 15 | Freq int 16 | 17 | // Norm factor of the document. 18 | Norm int64 19 | } 20 | 21 | func (i *impact) GetFreq() int { 22 | return i.Freq 23 | } 24 | 25 | func (i *impact) GetNorm() int64 { 26 | return i.Norm 27 | } 28 | 29 | func (i *impact) SetFreq(freq int) { 30 | i.Freq = freq 31 | } 32 | 33 | func ( i *impact) SetNorm(norm int64) { 34 | i.Norm = norm 35 | } 36 | 37 | func NewImpact(freq int, norm int64) index.Impact { 38 | return &impact{Freq: freq, Norm: norm} 39 | } 40 | 41 | func ImpactComparator(c1, c2 index.Impact) int { 42 | //c1 := a.(Impact) 43 | //c2 := b.(Impact) 44 | 45 | cmp := utils.IntComparator(c1.GetFreq(), c2.GetFreq()) 46 | if cmp == 0 { 47 | return utils.Int64Comparator(c1.GetNorm(), c2.GetNorm()) 48 | } 49 | return cmp 50 | } 51 | -------------------------------------------------------------------------------- /core/search/builder/binary.go: -------------------------------------------------------------------------------- 1 | package builder 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/search" 8 | ) 9 | 10 | type Binary struct{} 11 | 12 | // NewExactQuery 13 | // Create a query for matching an exact binary value. 14 | // This is for simple one-dimension points, for multidimensional points use 15 | // NewRangeQuery(String, []byte, []byte) instead. 16 | // field: field name. must not be null. 17 | // value: binary value 18 | func (b *Binary) NewExactQuery(field string, value []byte) (index.Query, error) { 19 | return b.NewRangeQuery(field, value, value) 20 | } 21 | 22 | func (b *Binary) NewRangeQuery(field string, lower, upper []byte) (index.Query, error) { 23 | return b.NewRangeQueryNDim(field, [][]byte{lower}, [][]byte{upper}) 24 | } 25 | 26 | func (b *Binary) NewRangeQueryNDim(field string, lower, upper [][]byte) (index.Query, error) { 27 | packLower := bytes.Join(lower, []byte{}) 28 | packUpper := bytes.Join(upper, []byte{}) 29 | return search.NewPointRangeQuery(field, packLower, packUpper, len(lower)) 30 | } 31 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked2.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | type Packed2 struct { 4 | *BulkOperationPacked 5 | } 6 | 7 | func NewPacked2() *Packed2 { 8 | return &Packed2{NewPacked(2)} 9 | } 10 | 11 | func (b *Packed2) DecodeUint64(blocks []uint64, values []uint64, iterations int) { 12 | blocksOffset, valuesOffset := 0, 0 13 | for i := 0; i < iterations; i++ { 14 | block := blocks[blocksOffset] 15 | blocksOffset++ 16 | for shift := 62; shift >= 0; shift -= 2 { 17 | values[valuesOffset] = uint64((block >> shift) & 3) 18 | valuesOffset++ 19 | } 20 | } 21 | } 22 | 23 | func (b *Packed2) DecodeBytes(blocks []byte, values []uint64, iterations int) { 24 | blocksOffset, valuesOffset := 0, 0 25 | for i := 0; i < iterations; i++ { 26 | block := uint64(blocks[blocksOffset]) 27 | blocksOffset++ 28 | values[valuesOffset] = (block >> 6) & 3 29 | valuesOffset++ 30 | values[valuesOffset] = (block >> 4) & 3 31 | valuesOffset++ 32 | values[valuesOffset] = (block >> 2) & 3 33 | valuesOffset++ 34 | values[valuesOffset] = block & 3 35 | valuesOffset++ 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /core/search/leafcollector.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/types" 8 | ) 9 | 10 | type baseLeafCollector struct { 11 | } 12 | 13 | func (*baseLeafCollector) CompetitiveIterator() (types.DocIdSetIterator, error) { 14 | return nil, nil 15 | } 16 | 17 | type FilterLeafCollector struct { 18 | in index.LeafCollector 19 | } 20 | 21 | var _ index.LeafCollector = &LeafCollectorAnon{} 22 | 23 | type LeafCollectorAnon struct { 24 | FnSetScorer func(scorer index.Scorable) error 25 | FnCollect func(ctx context.Context, doc int) error 26 | FnCompetitiveIterator func() (types.DocIdSetIterator, error) 27 | } 28 | 29 | func (l *LeafCollectorAnon) SetScorer(scorer index.Scorable) error { 30 | return l.FnSetScorer(scorer) 31 | } 32 | 33 | func (l *LeafCollectorAnon) Collect(ctx context.Context, doc int) error { 34 | return l.FnCollect(ctx, doc) 35 | } 36 | 37 | func (l *LeafCollectorAnon) CompetitiveIterator() (types.DocIdSetIterator, error) { 38 | return l.FnCompetitiveIterator() 39 | } 40 | -------------------------------------------------------------------------------- /core/index/keeponlylastcommitdeletionpolicy.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | var _ IndexDeletionPolicy = &KeepOnlyLastCommitDeletionPolicy{} 4 | 5 | // KeepOnlyLastCommitDeletionPolicy 6 | // This IndexDeletionPolicy implementation that keeps only the most recent commit and immediately removes all 7 | // prior commits after a new commit is done. This is the default deletion policy. 8 | type KeepOnlyLastCommitDeletionPolicy struct { 9 | } 10 | 11 | func NewKeepOnlyLastCommitDeletionPolicy() *KeepOnlyLastCommitDeletionPolicy { 12 | return &KeepOnlyLastCommitDeletionPolicy{} 13 | } 14 | 15 | func (k *KeepOnlyLastCommitDeletionPolicy) OnInit(commits []IndexCommit) error { 16 | // Note that commits.size() should normally be 1: 17 | return k.OnCommit(commits) 18 | } 19 | 20 | func (k *KeepOnlyLastCommitDeletionPolicy) OnCommit(commits []IndexCommit) error { 21 | // Note that commits.size() should normally be 2 (if not 22 | // called by onInit above): 23 | size := len(commits) 24 | for i := 0; i < size-1; i++ { 25 | commit := commits[i] 26 | if err := commit.Delete(); err != nil { 27 | return err 28 | } 29 | } 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /core/search/namedmatches.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | var _ index.Matches = &NamedMatches{} 8 | 9 | // NamedMatches 10 | // Utility class to help extract the set of sub queries that have matched from a larger query. 11 | // Individual subqueries may be wrapped using wrapQuery(String, Query), and the matching queries for a 12 | // particular document can then be pulled from the parent Query's Matches object by calling findNamedMatches(Matches) 13 | type NamedMatches struct { 14 | in index.Matches 15 | name string 16 | } 17 | 18 | func NewNamedMatches(in index.Matches, name string) *NamedMatches { 19 | return &NamedMatches{in: in, name: name} 20 | } 21 | 22 | func (n *NamedMatches) GetName() string { 23 | return n.name 24 | } 25 | 26 | func (n *NamedMatches) Strings() []string { 27 | return n.in.Strings() 28 | } 29 | 30 | func (n *NamedMatches) GetMatches(field string) (index.MatchesIterator, error) { 31 | return n.in.GetMatches(field) 32 | } 33 | 34 | func (n *NamedMatches) GetSubMatches() []index.Matches { 35 | return []index.Matches{n.in} 36 | } 37 | -------------------------------------------------------------------------------- /core/util/packed/packedlongvalues_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | 3 | import ( 4 | "github.com/stretchr/testify/assert" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestPackedLongValues(t *testing.T) { 11 | shift := 6 + rand.Intn(15) 12 | acceptableOverheadRatio := 1.0 13 | longValuesBuilder := NewPackedLongValuesBuilder(1<= docID { 41 | return errors.New("out of order doc ids") 42 | } 43 | d.lastDocId = docID 44 | d.set.Set(uint(docID)) 45 | return nil 46 | } 47 | -------------------------------------------------------------------------------- /core/interface/index/topdocs.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | type TopDocs interface { 4 | GetTotalHits() *TotalHits 5 | GetScoreDocs() []ScoreDoc 6 | } 7 | 8 | // TotalHits 9 | // Description of the total number of hits of a query. 10 | // The total hit count can't generally be computed accurately without visiting all matches, 11 | // which is costly for queries that match lots of documents. Given that it is often enough 12 | // to have a lower bounds of the number of hits, such as "there are more than 1000 hits", 13 | // Lucene has options to stop counting as soon as a threshold has been reached in order to 14 | // improve query times. 15 | type TotalHits struct { 16 | Value int64 17 | Relation TotalHitsRelation 18 | } 19 | 20 | func NewTotalHits(value int64, relation TotalHitsRelation) *TotalHits { 21 | return &TotalHits{Value: value, Relation: relation} 22 | } 23 | 24 | // TotalHitsRelation 25 | // How the value should be interpreted. 26 | type TotalHitsRelation int 27 | 28 | const ( 29 | EQUAL_TO = TotalHitsRelation(iota) // The total hit count is equal to value. 30 | GREATER_THAN_OR_EQUAL_TO // The total hit count is greater than or equal to value. 31 | ) 32 | -------------------------------------------------------------------------------- /core/index/singletermsenum.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | ) 8 | 9 | var _ FilteredTermsEnum = &SingleTermsEnum{} 10 | 11 | // SingleTermsEnum Subclass of FilteredTermsEnum for enumerating a single term. 12 | // For example, this can be used by MultiTermQuerys that need only visit one term, 13 | // but want to preserve MultiTermQuery semantics such as MultiTermQuery.getRewriteMethod. 14 | type SingleTermsEnum struct { 15 | *FilteredTermsEnumBase 16 | 17 | singleRef []byte 18 | } 19 | 20 | func NewSingleTermsEnum(tenum index.TermsEnum, termText []byte) *SingleTermsEnum { 21 | enum := &SingleTermsEnum{ 22 | singleRef: termText, 23 | } 24 | enum.FilteredTermsEnumBase = NewFilteredTermsEnumDefault(&FilteredTermsEnumDefaultConfig{ 25 | Accept: enum.Accept, 26 | NextSeekTerm: nil, 27 | Tenum: tenum, 28 | StartWithSeek: true, 29 | }) 30 | enum.setInitialSeekTerm(termText) 31 | return enum 32 | } 33 | 34 | func (s *SingleTermsEnum) Accept(term []byte) (AcceptStatus, error) { 35 | if bytes.Equal(term, s.singleRef) { 36 | return ACCEPT_STATUS_YES, nil 37 | } 38 | return ACCEPT_STATUS_END, nil 39 | } 40 | -------------------------------------------------------------------------------- /core/util/packed/monotoniclongvalues_test.go: -------------------------------------------------------------------------------- 1 | package packed 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestMonotonicLongValues(t *testing.T) { 12 | 13 | for shift := 6; shift <= 20; shift++ { 14 | acceptableOverheadRatio := 1.0 15 | longValuesBuilder := NewMonotonicLongValuesBuilder(1<= len(i.num) { 23 | return 0, io.EOF 24 | } 25 | 26 | v := i.num[i.i] 27 | i.i++ 28 | return v, nil 29 | } 30 | 31 | func TestIterator(t *testing.T) { 32 | it := iterator[int]{ 33 | i: 0, 34 | num: []int{1, 2, 3, 4, 5}, 35 | } 36 | 37 | v, err := it.Next(context.TODO()) 38 | assert.Nil(t, err) 39 | assert.Equal(t, 1, v) 40 | 41 | v, err = it.Next(context.TODO()) 42 | assert.Nil(t, err) 43 | assert.Equal(t, 2, v) 44 | 45 | v, err = it.Next(context.TODO()) 46 | assert.Nil(t, err) 47 | assert.Equal(t, 3, v) 48 | 49 | v, err = it.Next(context.TODO()) 50 | assert.Nil(t, err) 51 | assert.Equal(t, 4, v) 52 | 53 | v, err = it.Next(context.TODO()) 54 | assert.Nil(t, err) 55 | assert.Equal(t, 5, v) 56 | 57 | _, err = it.Next(context.TODO()) 58 | assert.Error(t, err) 59 | } 60 | -------------------------------------------------------------------------------- /core/analysis/chartokenizer_test.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestCharTokenizerImpl_IncrementToken(t *testing.T) { 11 | text := "a b ccc dddd" 12 | 13 | tokenizer := NewCharTokenizerImpl(&ext{}, bytes.NewReader([]byte(text))) 14 | 15 | ok, err := tokenizer.IncrementToken() 16 | assert.Equal(t, err, nil) 17 | assert.Equal(t, ok, true) 18 | assert.Equal(t, "a", tokenizer.termAtt.GetString()) 19 | tokenizer.termAtt.Reset() 20 | 21 | ok, err = tokenizer.IncrementToken() 22 | assert.Equal(t, err, nil) 23 | assert.Equal(t, ok, true) 24 | assert.Equal(t, "b", tokenizer.termAtt.GetString()) 25 | tokenizer.termAtt.Reset() 26 | 27 | ok, err = tokenizer.IncrementToken() 28 | assert.Equal(t, err, nil) 29 | assert.Equal(t, ok, true) 30 | assert.Equal(t, "ccc", tokenizer.termAtt.GetString()) 31 | tokenizer.termAtt.Reset() 32 | 33 | ok, err = tokenizer.IncrementToken() 34 | assert.Equal(t, err, nil) 35 | assert.Equal(t, ok, true) 36 | assert.Equal(t, "dddd", tokenizer.termAtt.GetString()) 37 | tokenizer.termAtt.Reset() 38 | } 39 | 40 | type ext struct { 41 | } 42 | 43 | func (e *ext) IsTokenChar(r rune) bool { 44 | return r != ' ' 45 | } 46 | -------------------------------------------------------------------------------- /memory/collector.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/search" 8 | ) 9 | 10 | var _ search.SimpleCollector = &simpleCollector{} 11 | 12 | type simpleCollector struct { 13 | *search.BaseSimpleCollector 14 | 15 | scorer index.Scorable 16 | scores []float64 17 | } 18 | 19 | func newSimpleCollector(scores []float64) *simpleCollector { 20 | collector := &simpleCollector{ 21 | BaseSimpleCollector: nil, 22 | scorer: nil, 23 | scores: scores, 24 | } 25 | collector.BaseSimpleCollector = search.NewSimpleCollector(collector) 26 | return collector 27 | } 28 | 29 | func (s *simpleCollector) ScoreMode() index.ScoreMode { 30 | return search.COMPLETE 31 | } 32 | 33 | func (s *simpleCollector) Collect(ctx context.Context, doc int) error { 34 | var err error 35 | score, err := s.scorer.Score() 36 | if err != nil { 37 | return err 38 | } 39 | s.scores[0] = score 40 | return err 41 | } 42 | 43 | func (s *simpleCollector) DoSetNextReader(_ index.LeafReaderContext) error { 44 | return nil 45 | } 46 | 47 | func (s *simpleCollector) SetScorer(scorer index.Scorable) error { 48 | s.scorer = scorer 49 | return nil 50 | } 51 | -------------------------------------------------------------------------------- /core/search/scorable.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | type BaseScorable struct { 8 | } 9 | 10 | func (*BaseScorable) SmoothingScore(docId int) (float64, error) { 11 | return 0, nil 12 | } 13 | 14 | func (*BaseScorable) SetMinCompetitiveScore(minScore float64) error { 15 | return nil 16 | } 17 | 18 | func (*BaseScorable) GetChildren() ([]index.ChildScorable, error) { 19 | return []index.ChildScorable{}, nil 20 | } 21 | 22 | // ChildScorable 23 | // A child Scorer and its relationship to its parent. the meaning of the relationship 24 | // depends upon the parent query. 25 | type childScorable struct { 26 | 27 | // Child Scorer. (note this is typically a direct child, and may itself also have children). 28 | Child index.Scorable 29 | 30 | // An arbitrary string relating this scorer to the parent. 31 | Relationship string 32 | } 33 | 34 | func (c *childScorable) GetChild() index.Scorable { 35 | return c.Child 36 | } 37 | 38 | func (c *childScorable) GetRelationship() string { 39 | return c.Relationship 40 | } 41 | 42 | func NewChildScorable(child index.Scorable, relationship string) index.ChildScorable { 43 | return &childScorable{Child: child, Relationship: relationship} 44 | } 45 | -------------------------------------------------------------------------------- /core/index/fields.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | ) 8 | 9 | type BaseFieldsConsumer struct { 10 | 11 | // Merges in the fields from the readers in mergeState. 12 | // The default implementation skips and maps around deleted documents, 13 | // and calls write(Fields, NormsProducer). Implementations can override 14 | // this method for more sophisticated merging (bulk-byte copying, etc). 15 | // Write func(ctx context.Context, fields Fields, norms NormsProducer) error 16 | 17 | // NOTE: strange but necessary so javadocs linting is happy: 18 | // Closer func() error 19 | } 20 | 21 | func (f *BaseFieldsConsumer) Merge(ctx context.Context, mergeState *MergeState, norms index.NormsProducer) error { 22 | return nil 23 | } 24 | 25 | // MergeFromReaders 26 | // Merges in the fields from the readers in mergeState. The default implementation skips and 27 | // maps around deleted documents, and calls write(Fields, NormsProducer). Implementations can override 28 | // this method for more sophisticated merging (bulk-byte copying, etc). 29 | func MergeFromReaders(ctx context.Context, consumer index.FieldsConsumer, mergeState *MergeState, norms index.NormsProducer) error { 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /core/interface/index/readercontext.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | // IndexReaderContext 4 | // represents a hierarchical relationship between IndexReader instances. 5 | type IndexReaderContext interface { 6 | 7 | // Reader Returns the IndexReader, this context represents. 8 | Reader() IndexReader 9 | 10 | // Leaves 11 | // Returns the context's leaves if this context is a top-level context. For convenience, if this is 12 | // an LeafReaderContextImpl this returns itself as the only leaf. 13 | // Note: this is convenience method since leaves can always be obtained by walking the context tree 14 | // using children(). 15 | // Throws: ErrUnsupportedOperation – if this is not a top-level context. 16 | // See Also: children() 17 | Leaves() ([]LeafReaderContext, error) 18 | 19 | // Children Returns the context's children iff this context is a composite context otherwise null. 20 | Children() []IndexReaderContext 21 | 22 | Identity() string 23 | } 24 | 25 | // LeafReaderContext 26 | // IndexReaderContext for LeafReader instances. 27 | type LeafReaderContext interface { 28 | Reader() IndexReader 29 | Leaves() ([]LeafReaderContext, error) 30 | Children() []IndexReaderContext 31 | Identity() string 32 | 33 | Ord() int 34 | DocBase() int 35 | LeafReader() LeafReader 36 | } 37 | -------------------------------------------------------------------------------- /core/search/filterscorer.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | // A FilterScorer contains another Scorer, which it uses as its basic source of data, 9 | // possibly transforming the data along the way or providing additional functionality. 10 | // The class FilterScorer itself simply implements all abstract methods of Scorer with versions 11 | // that pass all requests to the contained scorer. Subclasses of FilterScorer may further 12 | // override some of these methods and may also provide additional methods and fields. 13 | type FilterScorer struct { 14 | *BaseScorer 15 | 16 | in index.Scorer 17 | } 18 | 19 | func newFilterScorer(in index.Scorer) *FilterScorer { 20 | return &FilterScorer{ 21 | BaseScorer: NewScorer(in.GetWeight()), 22 | in: in, 23 | } 24 | } 25 | 26 | func (f *FilterScorer) Score() (float64, error) { 27 | return f.in.Score() 28 | } 29 | 30 | func (f *FilterScorer) DocID() int { 31 | return f.in.DocID() 32 | } 33 | 34 | func (f *FilterScorer) Iterator() types.DocIdSetIterator { 35 | return f.in.Iterator() 36 | } 37 | 38 | func (f *FilterScorer) TwoPhaseIterator() index.TwoPhaseIterator { 39 | return f.in.TwoPhaseIterator() 40 | } 41 | -------------------------------------------------------------------------------- /core/store/outputstream.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | ) 7 | 8 | var _ IndexOutput = &OutputStream{} 9 | 10 | // OutputStream 11 | // Implementation class for buffered IndexOutput that writes to an OutputStream. 12 | type OutputStream struct { 13 | *BaseIndexOutput 14 | 15 | out *bufio.Writer 16 | closer io.Closer 17 | bytesWritten int64 18 | crc Hash 19 | } 20 | 21 | func (o *OutputStream) GetChecksum() (uint32, error) { 22 | if err := o.out.Flush(); err != nil { 23 | return 0, err 24 | } 25 | return o.crc.Sum(), nil 26 | } 27 | 28 | func NewOutputStream(name string, out io.WriteCloser) *OutputStream { 29 | output := &OutputStream{ 30 | out: bufio.NewWriter(out), 31 | closer: out, 32 | crc: NewHash(), 33 | } 34 | output.BaseIndexOutput = NewBaseIndexOutput(name, output) 35 | return output 36 | } 37 | 38 | func (o *OutputStream) Write(b []byte) (int, error) { 39 | o.crc.Write(b) 40 | 41 | o.bytesWritten += int64(len(b)) 42 | return o.out.Write(b) 43 | } 44 | 45 | func (o *OutputStream) Close() error { 46 | if err := o.out.Flush(); err != nil { 47 | return err 48 | } 49 | return o.closer.Close() 50 | } 51 | 52 | func (o *OutputStream) GetFilePointer() int64 { 53 | return o.bytesWritten 54 | } 55 | -------------------------------------------------------------------------------- /memory/pointvalues_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestNewMemoryIndexPointValues(t *testing.T) { 8 | //set := analysis.NewCharArraySet() 9 | //set.Add(" ") 10 | //set.Add("\n") 11 | //set.Add("\t") 12 | //analyzer := standard.NewAnalyzer(set) 13 | // 14 | //memIndex, err := NewIndex(WithStorePayloads(true)) 15 | //assert.Nil(t, err) 16 | // 17 | //points1, err := document.NewBinaryPoint("dim1", []byte{0, 0, 1}, []byte{0, 0, 2}, []byte{0, 0, 4}) 18 | //err = memIndex.AddIndexAbleField(points1, nil) 19 | // 20 | //points2, err := document.NewBinaryPoint("dim2", []byte{0, 0, 1}, []byte{0, 0, 2}, []byte{0, 0, 5}) 21 | //err = memIndex.AddIndexAbleField(points2, nil) 22 | //memIndex.Freeze() 23 | // 24 | //query, err := search.NewPointInSetQuery("dim1", 3, 3, [][]byte{{0, 0, 1}, {0, 0, 2}, {0, 0, 4}}) 25 | //assert.Nil(t, err) 26 | //score := memIndex.Search(query) 27 | //assert.True(t, score > 0) 28 | // 29 | //points, err := document.NewLongPoint("name", 1, 2, 3) 30 | // 31 | //fInfo, err := memIndex.getInfo(points.Name(), points.FieldType()) 32 | // 33 | //pointValues := newMemoryIndexPointValues(fInfo) 34 | // 35 | //count, err := pointValues.EstimatePointCount(nil) 36 | //assert.Nil(t, err) 37 | //assert.Equal(t, 1, count) 38 | } 39 | -------------------------------------------------------------------------------- /core/search/scoredoc.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | var _ index.ScoreDoc = &baseScoreDoc{} 8 | 9 | // baseScoreDoc 10 | // Holds one hit in TopDocs. 11 | type baseScoreDoc struct { 12 | // The score of this document for the query. 13 | score float64 14 | 15 | // A hit document's number. 16 | // See Also: IndexSearcher.doc(int) 17 | doc int 18 | 19 | // Only set by TopDocs.merge 20 | shardIndex int 21 | } 22 | 23 | func (s *baseScoreDoc) GetScore() float64 { 24 | return s.score 25 | } 26 | 27 | func (s *baseScoreDoc) SetScore(score float64) { 28 | s.score = score 29 | } 30 | 31 | func (s *baseScoreDoc) GetDoc() int { 32 | return s.doc 33 | } 34 | 35 | func (s *baseScoreDoc) SetDoc(doc int) { 36 | s.doc = doc 37 | } 38 | 39 | func (s *baseScoreDoc) GetShardIndex() int { 40 | return s.shardIndex 41 | } 42 | 43 | func (s *baseScoreDoc) SetShardIndex(shardIndex int) { 44 | s.shardIndex = shardIndex 45 | } 46 | 47 | func newScoreDoc(doc int, score float64) *baseScoreDoc { 48 | return &baseScoreDoc{score: score, doc: doc, shardIndex: -1} 49 | } 50 | 51 | func newScoreDocWIthShard(score float64, doc int, shardIndex int) *baseScoreDoc { 52 | return &baseScoreDoc{score: score, doc: doc, shardIndex: shardIndex} 53 | } 54 | -------------------------------------------------------------------------------- /core/index/bitsetiterator.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/bits-and-blooms/bitset" 8 | 9 | "github.com/geange/lucene-go/core/types" 10 | ) 11 | 12 | var _ types.DocIdSetIterator = &BitSetIterator{} 13 | 14 | type BitSetIterator struct { 15 | bits *bitset.BitSet 16 | cost int64 17 | doc int 18 | } 19 | 20 | func NewBitSetIterator(bits *bitset.BitSet, cost int64) *BitSetIterator { 21 | it := &BitSetIterator{ 22 | bits: bits, 23 | cost: cost, 24 | doc: -1, 25 | } 26 | 27 | return it 28 | } 29 | 30 | func (b *BitSetIterator) GetBitSet() *bitset.BitSet { 31 | return b.bits 32 | } 33 | 34 | func (b *BitSetIterator) DocID() int { 35 | return b.doc 36 | } 37 | 38 | func (b *BitSetIterator) NextDoc(ctx context.Context) (int, error) { 39 | return b.Advance(ctx, b.doc+1) 40 | } 41 | 42 | func (b *BitSetIterator) Advance(ctx context.Context, target int) (int, error) { 43 | value, ok := b.bits.NextSet(uint(target)) 44 | if !ok { 45 | return 0, io.EOF 46 | } 47 | 48 | b.doc = int(value) 49 | return b.doc, nil 50 | } 51 | 52 | func (b *BitSetIterator) SlowAdvance(ctx context.Context, target int) (int, error) { 53 | return types.SlowAdvanceWithContext(ctx, b, target) 54 | } 55 | 56 | func (b *BitSetIterator) Cost() int64 { 57 | return b.cost 58 | } 59 | -------------------------------------------------------------------------------- /core/analysis/charfilter.go: -------------------------------------------------------------------------------- 1 | package analysis 2 | 3 | import "io" 4 | 5 | type CharFilter interface { 6 | CharFilterExt 7 | 8 | io.ReadCloser 9 | 10 | // CorrectOffset Chains the corrected offset through the input CharFilter(s). 11 | CorrectOffset(currentOff int) int 12 | } 13 | 14 | type CharFilterExt interface { 15 | // Correct Subclasses override to correct the current offset. 16 | // Params: currentOff – current offset 17 | // Returns: corrected offset 18 | Correct(currentOff int) int 19 | } 20 | 21 | func NewBaseCharFilter(ext CharFilterExt, input io.ReadCloser) *BaseCharFilter { 22 | return &BaseCharFilter{ 23 | ext: ext, 24 | input: input, 25 | } 26 | } 27 | 28 | type BaseCharFilter struct { 29 | ext CharFilterExt 30 | input io.ReadCloser 31 | } 32 | 33 | func (c *BaseCharFilter) Correct(currentOff int) int { 34 | return c.ext.Correct(currentOff) 35 | } 36 | 37 | func (c *BaseCharFilter) Close() error { 38 | return c.input.Close() 39 | } 40 | 41 | func (c *BaseCharFilter) Read(p []byte) (n int, err error) { 42 | return c.input.Read(p) 43 | } 44 | 45 | func (c *BaseCharFilter) CorrectOffset(currentOff int) int { 46 | corrected := c.ext.Correct(currentOff) 47 | if charFilter, ok := c.input.(CharFilter); ok { 48 | return charFilter.CorrectOffset(corrected) 49 | } 50 | return corrected 51 | } 52 | -------------------------------------------------------------------------------- /core/search/constantscorequery.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | var _ index.Query = &ConstantScoreQuery{} 8 | 9 | // ConstantScoreQuery 10 | // A query that wraps another query and simply returns a constant score equal to 1 for every document 11 | // that matches the query. It therefore simply strips of all scores and always returns 1. 12 | type ConstantScoreQuery struct { 13 | query index.Query 14 | } 15 | 16 | func NewConstantScoreQuery(query index.Query) *ConstantScoreQuery { 17 | return &ConstantScoreQuery{query: query} 18 | } 19 | 20 | func (c *ConstantScoreQuery) String(field string) string { 21 | //TODO implement me 22 | panic("implement me") 23 | } 24 | 25 | func (c *ConstantScoreQuery) CreateWeight(searcher index.IndexSearcher, scoreMode index.ScoreMode, boost float64) (index.Weight, error) { 26 | //TODO implement me 27 | panic("implement me") 28 | } 29 | 30 | func (c *ConstantScoreQuery) Rewrite(reader index.IndexReader) (index.Query, error) { 31 | //TODO implement me 32 | panic("implement me") 33 | } 34 | 35 | func (c *ConstantScoreQuery) Visit(visitor index.QueryVisitor) (err error) { 36 | //TODO implement me 37 | panic("implement me") 38 | } 39 | 40 | func (c *ConstantScoreQuery) GetQuery() index.Query { 41 | return c.query 42 | } 43 | -------------------------------------------------------------------------------- /memory/collector_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "context" 5 | "github.com/geange/lucene-go/core/interface/index" 6 | "math/rand" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestNewSimpleCollector(t *testing.T) { 13 | scores := make([]float64, 1) 14 | collector := newSimpleCollector(scores) 15 | 16 | score := rand.Float64() 17 | err := collector.SetScorer(&mockScorable{score: score}) 18 | assert.Nil(t, err) 19 | 20 | err = collector.Collect(context.Background(), 1) 21 | assert.Nil(t, err) 22 | 23 | assert.InDelta(t, score, scores[0], 0.0000001) 24 | } 25 | 26 | var _ index.Scorable = &mockScorable{} 27 | 28 | type mockScorable struct { 29 | score float64 30 | } 31 | 32 | func (m *mockScorable) Score() (float64, error) { 33 | return m.score, nil 34 | } 35 | 36 | func (m *mockScorable) SmoothingScore(docId int) (float64, error) { 37 | //TODO implement me 38 | panic("implement me") 39 | } 40 | 41 | func (m *mockScorable) DocID() int { 42 | //TODO implement me 43 | panic("implement me") 44 | } 45 | 46 | func (m *mockScorable) SetMinCompetitiveScore(minScore float64) error { 47 | //TODO implement me 48 | panic("implement me") 49 | } 50 | 51 | func (m *mockScorable) GetChildren() ([]index.ChildScorable, error) { 52 | //TODO implement me 53 | panic("implement me") 54 | } 55 | -------------------------------------------------------------------------------- /core/interface/index/codecreader.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | type CodecReaderSPI interface { 4 | GetFieldsReader() StoredFieldsReader 5 | GetTermVectorsReader() TermVectorsReader 6 | GetPostingsReader() FieldsProducer 7 | GetFieldInfos() FieldInfos 8 | MaxDoc() int 9 | GetDocValuesReader() DocValuesProducer 10 | GetNormsReader() NormsProducer 11 | GetPointsReader() PointsReader 12 | } 13 | 14 | type CodecReader interface { 15 | LeafReader 16 | 17 | // GetFieldsReader 18 | // Expert: retrieve thread-private StoredFieldsReader 19 | // lucene.internal 20 | GetFieldsReader() StoredFieldsReader 21 | 22 | // GetTermVectorsReader 23 | // Expert: retrieve thread-private TermVectorsReader 24 | // lucene.internal 25 | GetTermVectorsReader() TermVectorsReader 26 | 27 | // GetNormsReader 28 | // Expert: retrieve underlying NormsProducer 29 | // lucene.internal 30 | GetNormsReader() NormsProducer 31 | 32 | // GetDocValuesReader 33 | // Expert: retrieve underlying DocValuesProducer 34 | // lucene.internal 35 | GetDocValuesReader() DocValuesProducer 36 | 37 | // GetPostingsReader 38 | // Expert: retrieve underlying FieldsProducer 39 | // lucene.internal 40 | GetPostingsReader() FieldsProducer 41 | 42 | // GetPointsReader 43 | // Expert: retrieve underlying PointsReader 44 | // lucene.internal 45 | GetPointsReader() PointsReader 46 | } 47 | -------------------------------------------------------------------------------- /memory/bytestartarray_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestNewSliceByteStartArray(t *testing.T) { 10 | startArray := newSliceByteStartArray(10) 11 | startArray.Init() 12 | 13 | assert.Equal(t, 10, len(startArray.bytesStart)) 14 | assert.Equal(t, 12, len(startArray.start)) 15 | assert.Equal(t, 12, len(startArray.end)) 16 | assert.Equal(t, 12, len(startArray.freq)) 17 | 18 | startArray.Grow() 19 | assert.Equal(t, 11, len(startArray.bytesStart)) 20 | assert.Equal(t, 12, len(startArray.start)) 21 | assert.Equal(t, 12, len(startArray.end)) 22 | assert.Equal(t, 12, len(startArray.freq)) 23 | 24 | startArray.Grow() 25 | assert.Equal(t, 12, len(startArray.bytesStart)) 26 | assert.Equal(t, 12, len(startArray.start)) 27 | assert.Equal(t, 12, len(startArray.end)) 28 | assert.Equal(t, 12, len(startArray.freq)) 29 | 30 | startArray.Grow() 31 | assert.Equal(t, 13, len(startArray.bytesStart)) 32 | assert.Equal(t, 13, len(startArray.start)) 33 | assert.Equal(t, 13, len(startArray.end)) 34 | assert.Equal(t, 13, len(startArray.freq)) 35 | 36 | startArray.Clear() 37 | assert.Equal(t, 0, len(startArray.bytesStart)) 38 | assert.Equal(t, 0, len(startArray.start)) 39 | assert.Equal(t, 0, len(startArray.end)) 40 | assert.Equal(t, 0, len(startArray.freq)) 41 | } 42 | -------------------------------------------------------------------------------- /core/store/ramoutputstream_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "iter" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestRAMOutputStream(t *testing.T) { 11 | testRAMOutputStream(t, 64, 100) 12 | testRAMOutputStream(t, 64, 10) 13 | testRAMOutputStream(t, 1, 1) 14 | testRAMOutputStream(t, 200, 2080) 15 | testRAMOutputStream(t, 200, 3000) 16 | } 17 | 18 | func testRAMOutputStream(t *testing.T, outLoopSize, inLoopSize int) { 19 | directory := NewRAMDirectory() 20 | ramFile := NewRAMFile(directory) 21 | output := NewRAMOutputStream("test_001", ramFile, true) 22 | 23 | for i := 0; i < outLoopSize; i++ { 24 | n := i % 10 25 | 26 | char := '0' + byte(n) 27 | 28 | data := make([]byte, 0) 29 | for j := 0; j < inLoopSize; j++ { 30 | data = append(data, char) 31 | } 32 | _, err := output.Write(data) 33 | assert.Nil(t, err) 34 | } 35 | 36 | err := output.Close() 37 | assert.Nil(t, err) 38 | 39 | assert.Equal(t, output.GetFilePointer(), int64(outLoopSize*inLoopSize)) 40 | 41 | next, stop := iter.Pull(ramFile.Iterator()) 42 | defer stop() 43 | 44 | for i := 0; i < outLoopSize; i++ { 45 | n := i % 10 46 | 47 | for j := 0; j < inLoopSize; j++ { 48 | expect := '0' + byte(n) 49 | char, ok := next() 50 | assert.True(t, ok) 51 | assert.Equal(t, expect, char) 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /core/util/automaton/intsrefbuilder.go: -------------------------------------------------------------------------------- 1 | package automaton 2 | 3 | import "github.com/geange/lucene-go/core/util/array" 4 | 5 | type IntsRefBuilder[T any] struct { 6 | ref *IntsRef[T] 7 | } 8 | 9 | func NewIntsRefBuilder[T any]() *IntsRefBuilder[T] { 10 | return &IntsRefBuilder[T]{ref: NewIntsRef[T]()} 11 | } 12 | 13 | type IntsRef[T any] struct { 14 | ints []T 15 | offset int 16 | length int 17 | } 18 | 19 | func NewIntsRef[T any]() *IntsRef[T] { 20 | return &IntsRef[T]{ 21 | ints: make([]T, 0), 22 | } 23 | } 24 | 25 | func (i *IntsRefBuilder[T]) SetLength(length int) { 26 | i.ref.length = length 27 | } 28 | 29 | func (i *IntsRefBuilder[T]) Length() int { 30 | return i.ref.length 31 | } 32 | 33 | func (i *IntsRefBuilder[T]) Clear() { 34 | i.SetLength(0) 35 | } 36 | 37 | func (i *IntsRefBuilder[T]) At(offset int) T { 38 | return i.ref.ints[offset] 39 | } 40 | 41 | func (i *IntsRefBuilder[T]) Set(offset int, value T) { 42 | i.ref.ints[offset] = value 43 | } 44 | 45 | func (i *IntsRefBuilder[T]) Append(value T) { 46 | if i.ref.offset+i.ref.length >= len(i.ref.ints) { 47 | i.ref.ints = append(i.ref.ints, value) 48 | } 49 | i.ref.length++ 50 | } 51 | 52 | func (i *IntsRefBuilder[T]) Grow(depth int) { 53 | i.ref.ints = array.Grow(i.ref.ints, depth) 54 | } 55 | 56 | func (i *IntsRefBuilder[T]) Get() []T { 57 | return i.ref.ints 58 | } 59 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked1.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | type Packed1 struct { 4 | *BulkOperationPacked 5 | } 6 | 7 | func NewPacked1() *Packed1 { 8 | return &Packed1{NewPacked(1)} 9 | } 10 | 11 | func (b *Packed1) DecodeUint64(blocks []uint64, values []uint64, iterations int) { 12 | blocksOffset, valuesOffset := 0, 0 13 | for i := 0; i < iterations; i++ { 14 | block := blocks[blocksOffset] 15 | blocksOffset++ 16 | for shift := 63; shift >= 0; shift -= 1 { 17 | values[valuesOffset] = (block >> shift) & 1 18 | valuesOffset++ 19 | } 20 | } 21 | } 22 | 23 | func (b *Packed1) DecodeBytes(blocks []byte, values []uint64, iterations int) { 24 | blocksOffset, valuesOffset := 0, 0 25 | for i := 0; i < iterations; i++ { 26 | block := uint64(blocks[blocksOffset]) 27 | blocksOffset++ 28 | values[valuesOffset] = (block >> 7) & 1 29 | valuesOffset++ 30 | values[valuesOffset] = (block >> 6) & 1 31 | valuesOffset++ 32 | values[valuesOffset] = (block >> 5) & 1 33 | valuesOffset++ 34 | values[valuesOffset] = (block >> 4) & 1 35 | valuesOffset++ 36 | values[valuesOffset] = (block >> 3) & 1 37 | valuesOffset++ 38 | values[valuesOffset] = (block >> 2) & 1 39 | valuesOffset++ 40 | values[valuesOffset] = (block >> 1) & 1 41 | valuesOffset++ 42 | values[valuesOffset] = block & 1 43 | valuesOffset++ 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /core/codecs/types/postingsreader.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/geange/lucene-go/core/document" 8 | "github.com/geange/lucene-go/core/interface/index" 9 | "github.com/geange/lucene-go/core/store" 10 | ) 11 | 12 | type PostingsReader interface { 13 | io.Closer 14 | 15 | // Init 16 | // Performs any initialization, such as reading and verifying the header from the provided terms dictionary IndexInput. 17 | Init(ctx context.Context, termsIn store.IndexInput, state *index.SegmentReadState) error 18 | 19 | // NewTermState 20 | // Return a newly created empty TermState 21 | NewTermState() (BlockTermState, error) 22 | 23 | // DecodeTerm 24 | // Actually decode metadata for next term 25 | DecodeTerm(ctx context.Context, in store.DataInput, fieldInfo *document.FieldInfo, state BlockTermState, absolute bool) error 26 | 27 | // Postings 28 | // Must fully consume state, since after this call that TermState may be reused. 29 | Postings(ctx context.Context, fieldInfo *document.FieldInfo, state BlockTermState, reuse index.PostingsEnum, flags int) (index.PostingsEnum, error) 30 | 31 | // Impacts 32 | // Return a ImpactsEnum that computes impacts with scorer. 33 | Impacts(ctx context.Context, fieldInfo *document.FieldInfo, state BlockTermState, flags int) (index.ImpactsEnum, error) 34 | 35 | CheckIntegrity() error 36 | } 37 | -------------------------------------------------------------------------------- /core/util/bkd/intersect_state.go: -------------------------------------------------------------------------------- 1 | package bkd 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/store" 5 | "github.com/geange/lucene-go/core/types" 6 | ) 7 | 8 | // IntersectState Used to track all state for a single call to intersect. 9 | // 用于跟踪要相交的单个调用的所有状态。 10 | type IntersectState struct { 11 | in store.IndexInput 12 | scratchIterator *readerDocIDSetIterator 13 | scratchDataPackedValue []byte 14 | scratchMinIndexPackedValue []byte 15 | scratchMaxIndexPackedValue []byte 16 | commonPrefixLengths []int 17 | visitor types.IntersectVisitor 18 | index *IndexTree 19 | } 20 | 21 | func NewIntersectState(in store.IndexInput, config *Config, 22 | visitor types.IntersectVisitor, indexVisitor *IndexTree) *IntersectState { 23 | 24 | return &IntersectState{ 25 | in: in, 26 | scratchIterator: newReaderDocIDSetIterator(config.maxPointsInLeafNode), 27 | scratchDataPackedValue: make([]byte, config.packedBytesLength), 28 | scratchMinIndexPackedValue: make([]byte, config.packedIndexBytesLength), 29 | scratchMaxIndexPackedValue: make([]byte, config.packedIndexBytesLength), 30 | commonPrefixLengths: make([]int, config.numDims), 31 | visitor: visitor, 32 | index: indexVisitor, 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /codecs/simpletext/postingsformat.go: -------------------------------------------------------------------------------- 1 | package simpletext 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/store" 8 | ) 9 | 10 | const ( 11 | POSTINGS_EXTENSION = "pst" 12 | ) 13 | 14 | var _ index.PostingsFormat = &PostingsFormat{} 15 | 16 | // PostingsFormat For debugging, curiosity, transparency only!! Do not use this codec in production. 17 | // This codec stores all postings data in a single human-readable text file (_N.pst). You can view this in any text editor, and even edit it to alter your index. 18 | // lucene.experimental 19 | type PostingsFormat struct { 20 | name string 21 | } 22 | 23 | func NewPostingsFormat() *PostingsFormat { 24 | return &PostingsFormat{name: "SimpleText"} 25 | } 26 | 27 | func (s *PostingsFormat) GetName() string { 28 | return s.name 29 | } 30 | 31 | func (s *PostingsFormat) FieldsConsumer(ctx context.Context, state *index.SegmentWriteState) (index.FieldsConsumer, error) { 32 | return NewFieldsWriter(ctx, state) 33 | } 34 | 35 | func (s *PostingsFormat) FieldsProducer(ctx context.Context, state *index.SegmentReadState) (index.FieldsProducer, error) { 36 | return NewSimpleTextFieldsReader(state) 37 | } 38 | 39 | func getPostingsFileName(segment, segmentSuffix string) string { 40 | return store.SegmentFileName(segment, segmentSuffix, POSTINGS_EXTENSION) 41 | } 42 | -------------------------------------------------------------------------------- /core/document/stringfield.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | import "sync" 4 | 5 | var ( 6 | stringFieldTypeOnce sync.Once 7 | stringFieldTypeNotStored *FieldType 8 | stringFieldTypeStored *FieldType 9 | ) 10 | 11 | type StringField struct { 12 | *Field[string] 13 | } 14 | 15 | // NewStringField 16 | // Creates a new textual StringField, indexing the provided String value as a single token. 17 | // name: field name 18 | // value: String value 19 | // stored: true if the content should also be stored 20 | func NewStringField(name string, value string, stored bool) *StringField { 21 | stringFieldTypeOnce.Do(func() { 22 | stringFieldTypeNotStored = NewFieldType() 23 | _ = stringFieldTypeNotStored.SetOmitNorms(true) 24 | _ = stringFieldTypeNotStored.SetIndexOptions(INDEX_OPTIONS_DOCS) 25 | _ = stringFieldTypeNotStored.SetTokenized(false) 26 | stringFieldTypeNotStored.Freeze() 27 | 28 | stringFieldTypeStored = NewFieldType() 29 | _ = stringFieldTypeStored.SetOmitNorms(true) 30 | _ = stringFieldTypeStored.SetIndexOptions(INDEX_OPTIONS_DOCS) 31 | _ = stringFieldTypeStored.SetStored(true) 32 | _ = stringFieldTypeStored.SetTokenized(false) 33 | stringFieldTypeStored.Freeze() 34 | }) 35 | 36 | fieldType := stringFieldTypeStored 37 | if !stored { 38 | fieldType = stringFieldTypeNotStored 39 | } 40 | return &StringField{NewField(name, value, fieldType)} 41 | } 42 | -------------------------------------------------------------------------------- /core/util/packed/packedreaderiterator.go: -------------------------------------------------------------------------------- 1 | package packed 2 | 3 | import ( 4 | "iter" 5 | 6 | "github.com/geange/lucene-go/core/store" 7 | "github.com/geange/lucene-go/core/util/packed/bulkoperation" 8 | ) 9 | 10 | type PackedReaderIterator struct { 11 | in store.DataInput 12 | format Format 13 | valueCount int 14 | bitsPerValue int 15 | mem int 16 | } 17 | 18 | func NewPackedReaderIterator(in store.DataInput, format Format, 19 | valueCount int, bitsPerValue int, mem int) *PackedReaderIterator { 20 | return &PackedReaderIterator{in: in, format: format, valueCount: valueCount, bitsPerValue: bitsPerValue, mem: mem} 21 | } 22 | 23 | func (p *PackedReaderIterator) Iterator() (iter.Seq[uint64], error) { 24 | bulkOperation, err := Of(p.format, p.bitsPerValue) 25 | if err != nil { 26 | return nil, err 27 | } 28 | iterations := bulkoperation.ComputeIterations(bulkOperation, p.valueCount, p.mem) 29 | 30 | blocks := make([]byte, bulkOperation.ByteBlockCount()*iterations) 31 | values := make([]uint64, bulkOperation.ByteValueCount()*iterations) 32 | 33 | if _, err := p.in.Read(blocks); err != nil { 34 | return nil, err 35 | } 36 | bulkOperation.DecodeBytes(blocks, values, iterations) 37 | 38 | return func(yield func(uint64) bool) { 39 | for _, value := range values { 40 | if !yield(value) { 41 | return 42 | } 43 | } 44 | }, nil 45 | } 46 | -------------------------------------------------------------------------------- /core/interface/index/merge.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/util" 5 | ) 6 | 7 | type MergeStateDocMap struct { 8 | Get func(docId int) int 9 | } 10 | 11 | // MergeState 12 | // Holds common state used during segment merging. 13 | type MergeState struct { 14 | // Maps document IDs from old segments to document IDs in the new segment 15 | DocMaps []MergeStateDocMap 16 | 17 | // SegmentInfo of the newly merged segment. 18 | SegmentInfo SegmentInfo 19 | 20 | // FieldInfos of the newly merged segment. 21 | MergeFieldInfos FieldInfos 22 | 23 | // Stored field producers being merged 24 | StoredFieldsReaders []StoredFieldsReader 25 | 26 | // Term vector producers being merged 27 | TermVectorsReaders []TermVectorsReader 28 | 29 | // Norms producers being merged 30 | NormsProducers []NormsProducer 31 | 32 | // DocValues producers being merged 33 | DocValuesProducers []DocValuesProducer 34 | 35 | // FieldInfos being merged 36 | FieldInfos []FieldInfos 37 | 38 | // Live docs for each reader 39 | LiveDocs []util.Bits 40 | 41 | // Postings to merge 42 | FieldsProducers []FieldsProducer 43 | 44 | // Point readers to merge 45 | PointsReaders []PointsReader 46 | 47 | // Max docs per reader 48 | MaxDocs []int 49 | 50 | // InfoStream for debugging messages. 51 | 52 | // Indicates if the index needs to be sorted 53 | NeedsIndexSort bool 54 | } 55 | -------------------------------------------------------------------------------- /core/analysis/standard/tokenizer_test.go: -------------------------------------------------------------------------------- 1 | package standard 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestTokenizer_IncrementToken(t *testing.T) { 11 | as := assert.New(t) 12 | 13 | text := "aaaa bbbb cccc dddd eeee" 14 | 15 | tokenizer := NewTokenizer() 16 | 17 | err := tokenizer.SetReader(bytes.NewReader([]byte(text))) 18 | assert.Nil(t, err) 19 | 20 | ok, err := tokenizer.IncrementToken() 21 | assert.Nil(t, err) 22 | assert.True(t, ok) 23 | as.Equal("aaaa", string(tokenizer.AttributeSource().CharTerm().GetString())) 24 | 25 | ok, err = tokenizer.IncrementToken() 26 | assert.Nil(t, err) 27 | assert.True(t, ok) 28 | as.Equal("bbbb", string(tokenizer.AttributeSource().CharTerm().GetString())) 29 | 30 | ok, err = tokenizer.IncrementToken() 31 | assert.Nil(t, err) 32 | assert.True(t, ok) 33 | as.Equal("cccc", string(tokenizer.AttributeSource().CharTerm().GetString())) 34 | 35 | ok, err = tokenizer.IncrementToken() 36 | assert.Nil(t, err) 37 | assert.True(t, ok) 38 | as.Equal("dddd", string(tokenizer.AttributeSource().CharTerm().GetString())) 39 | 40 | ok, err = tokenizer.IncrementToken() 41 | assert.Nil(t, err) 42 | assert.True(t, ok) 43 | as.Equal("eeee", string(tokenizer.AttributeSource().CharTerm().GetString())) 44 | 45 | ok, err = tokenizer.IncrementToken() 46 | assert.Nil(t, err) 47 | assert.False(t, ok) 48 | } 49 | -------------------------------------------------------------------------------- /core/util/smallfloat.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "math" 5 | "math/bits" 6 | ) 7 | 8 | var ( 9 | MaxInt4 = LongToInt4(math.MaxInt32) 10 | NumFreeValues = 255 - MaxInt4 11 | ) 12 | 13 | func IntToByte4(i int) byte { 14 | if i < NumFreeValues { 15 | return byte(i) 16 | } else { 17 | return (byte)(NumFreeValues + LongToInt4(int64(i-NumFreeValues))) 18 | } 19 | } 20 | 21 | func Int4ToLong(i int) int64 { 22 | b := i & 0x07 23 | shift := (i >> 3) - 1 24 | decoded := 0 25 | if shift == -1 { 26 | // subnormal value 27 | decoded = b 28 | } else { 29 | // normal value 30 | decoded = (b | 0x08) << shift 31 | } 32 | return int64(decoded) 33 | } 34 | 35 | func LongToInt4(i int64) int { 36 | numBits := 64 - bits.LeadingZeros64(uint64(i)) 37 | if numBits < 4 { 38 | // subnormal value 39 | return int(i) 40 | } else { 41 | // normal value 42 | shift := int64(numBits - 4) 43 | // only keep the 5 most significant bits 44 | encoded := i >> shift 45 | // clear the most significant bit, which is implicit 46 | encoded &= 0x07 47 | // encode the shift, adding 1 because 0 is reserved for subnormal values 48 | encoded = encoded | ((shift + 1) << 3) 49 | return int(encoded) 50 | } 51 | } 52 | 53 | func Byte4ToInt(b byte) int { 54 | i := int(b) 55 | if i < NumFreeValues { 56 | return i 57 | } else { 58 | return NumFreeValues + int(Int4ToLong(i-NumFreeValues)) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /memory/bytestartarray.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/util/array" 5 | ) 6 | 7 | type sliceByteStartArray struct { 8 | initSize int 9 | bytesStart []uint32 10 | start []int // the start offset in the IntBlockPool per term 11 | end []int // the end pointer in the IntBlockPool for the postings slice per term 12 | freq []int // the term frequency 13 | } 14 | 15 | func newSliceByteStartArray(initSize int) *sliceByteStartArray { 16 | return &sliceByteStartArray{ 17 | initSize: initSize, 18 | } 19 | } 20 | 21 | func (s *sliceByteStartArray) Init() []uint32 { 22 | s.bytesStart = make([]uint32, s.initSize) 23 | ord := s.bytesStart 24 | size := len(ord) 25 | 26 | size = array.Oversize(size, 4) 27 | 28 | s.start = make([]int, size) 29 | s.end = make([]int, size) 30 | s.freq = make([]int, size) 31 | return ord 32 | } 33 | 34 | func (s *sliceByteStartArray) Grow() []uint32 { 35 | s.bytesStart = append(s.bytesStart, 0) 36 | ord := s.bytesStart 37 | 38 | size := len(ord) 39 | if len(s.start) < size { 40 | s.start = array.Grow(s.start, size) 41 | s.end = array.Grow(s.end, size) 42 | s.freq = array.Grow(s.freq, size) 43 | } 44 | return ord 45 | } 46 | 47 | func (s *sliceByteStartArray) Clear() []uint32 { 48 | s.start = s.start[:0] 49 | s.end = s.end[:0] 50 | s.freq = s.freq[:0] 51 | s.bytesStart = s.bytesStart[:0] 52 | return s.bytesStart 53 | } 54 | -------------------------------------------------------------------------------- /core/store/bytesref.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import "errors" 4 | 5 | type BytesRef struct { 6 | bs []byte 7 | offset int 8 | length int 9 | } 10 | 11 | func NewBytesRef(bs []byte) *BytesRef { 12 | return &BytesRef{ 13 | bs: bs, 14 | offset: 0, 15 | length: 0, 16 | } 17 | } 18 | 19 | func NewMustBytesRef(bs []byte, offset, length int) *BytesRef { 20 | return &BytesRef{ 21 | bs: bs, 22 | offset: offset, 23 | length: length, 24 | } 25 | } 26 | 27 | func (b *BytesRef) SetOffset(offset int) error { 28 | if offset < 0 || offset > len(b.bs) { 29 | return errors.New("") 30 | } 31 | b.offset = offset 32 | return nil 33 | } 34 | 35 | func (b *BytesRef) SetLength(length int) error { 36 | if len(b.bs[b.offset:]) < length { 37 | return errors.New("") 38 | } 39 | b.length = length 40 | return nil 41 | } 42 | 43 | func (b *BytesRef) Set(offset, length int) error { 44 | if offset < 0 || offset > len(b.bs) { 45 | return errors.New("") 46 | } 47 | if len(b.bs[offset:]) < length { 48 | return errors.New("") 49 | } 50 | 51 | b.offset = offset 52 | b.length = length 53 | 54 | return nil 55 | } 56 | 57 | func (b *BytesRef) Bytes() []byte { 58 | return b.bs[b.offset : b.offset+b.length] 59 | } 60 | 61 | func (b *BytesRef) RawBytes() []byte { 62 | return b.bs 63 | } 64 | 65 | func (b *BytesRef) Offset() int { 66 | return b.offset 67 | } 68 | 69 | func (b *BytesRef) Len() int { 70 | return b.length 71 | } 72 | -------------------------------------------------------------------------------- /core/interface/index/notifications.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | type FlushNotifications interface { 4 | // DeleteUnusedFiles 5 | // Called when files were written to disk that are not used anymore. It's the implementation's 6 | // responsibility to clean these files up 7 | DeleteUnusedFiles(files map[string]struct{}) 8 | 9 | // FlushFailed 10 | // Called when a segment failed to Flush. 11 | FlushFailed(info SegmentInfo) 12 | 13 | // AfterSegmentsFlushed 14 | // Called after one or more segments were flushed to disk. 15 | AfterSegmentsFlushed() error 16 | 17 | // Should be called if a Flush or an indexing operation caused a tragic / unrecoverable event. 18 | //onTragicEvent(Throwable event, String message) 19 | 20 | // OnDeletesApplied 21 | // Called once deletes have been applied either after a Flush or on a deletes call 22 | OnDeletesApplied() 23 | 24 | // OnTicketBacklog 25 | // Called once the DocumentsWriter ticket queue has a backlog. This means there 26 | // is an inner thread that tries to publish flushed segments but can't keep up with the other 27 | // threads flushing new segments. This likely requires other thread to forcefully purge the buffer 28 | // to help publishing. This can't be done in-place since we might hold index writer locks when 29 | // this is called. The caller must ensure that the purge happens without an index writer lock being held. 30 | // See Also: purgeFlushTickets(boolean, IOUtils.IOConsumer) 31 | OnTicketBacklog() 32 | } 33 | -------------------------------------------------------------------------------- /core/util/attribute/source.go: -------------------------------------------------------------------------------- 1 | package attribute 2 | 3 | type Source struct { 4 | packed *packedTokenAttr 5 | termAttr *bytesAttr 6 | payload *bytesAttr 7 | } 8 | 9 | func NewSource() *Source { 10 | return &Source{ 11 | packed: newPackedTokenAttr(), 12 | termAttr: newBytesAttr(ClassBytesTerm, ClassTermToBytesRef), 13 | payload: newBytesAttr(ClassPayload), 14 | } 15 | } 16 | 17 | func (r *Source) Type() TypeAttr { 18 | return r.packed 19 | } 20 | 21 | func (r *Source) PackedTokenAttribute() PackedTokenAttr { 22 | return r.packed 23 | } 24 | 25 | func (r *Source) BytesTerm() BytesTermAttr { 26 | return r.termAttr 27 | } 28 | 29 | func (r *Source) Payload() PayloadAttr { 30 | return r.payload 31 | } 32 | 33 | func (r *Source) CharTerm() CharTermAttr { 34 | return r.packed.bytesAttr 35 | } 36 | 37 | func (r *Source) Offset() OffsetAttr { 38 | return r.packed 39 | } 40 | 41 | func (r *Source) PositionIncrement() PositionIncrAttr { 42 | return r.packed 43 | } 44 | 45 | func (r *Source) PositionLength() PositionLengthAttr { 46 | return r.packed 47 | } 48 | 49 | func (r *Source) TermFrequency() TermFreqAttr { 50 | return r.packed 51 | } 52 | 53 | func (r *Source) Term2Bytes() Term2BytesAttr { 54 | return r.termAttr 55 | } 56 | 57 | func (r *Source) Reset() error { 58 | if err := r.packed.Reset(); err != nil { 59 | return err 60 | } 61 | 62 | if err := r.termAttr.Reset(); err != nil { 63 | return err 64 | } 65 | 66 | return r.payload.Reset() 67 | } 68 | -------------------------------------------------------------------------------- /core/store/bytes_test.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestBytesInput(t *testing.T) { 10 | input := NewByteArrayDataInput([]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) 11 | 12 | v, err := input.ReadByte() 13 | assert.Nil(t, err) 14 | assert.Equal(t, byte(0), v) 15 | 16 | b10 := make([]byte, 10) 17 | n, err := input.Read(b10) 18 | assert.Nil(t, err) 19 | assert.Equal(t, 9, n) 20 | } 21 | 22 | func TestBytesOutput(t *testing.T) { 23 | bs := make([]byte, 10) 24 | output := NewByteArrayDataOutput(bs) 25 | n, err := output.Write([]byte("ab")) 26 | assert.Nil(t, err) 27 | assert.Equal(t, 2, n) 28 | 29 | assert.Equal(t, []byte("ab"), output.bytes[:2]) 30 | assert.Equal(t, 2, output.pos) 31 | 32 | n, err = output.Write(make([]byte, 8)) 33 | assert.Nil(t, err) 34 | assert.Equal(t, 8, n) 35 | assert.Equal(t, 10, output.pos) 36 | 37 | _, err = output.Write(make([]byte, 8)) 38 | assert.NotNil(t, err) 39 | 40 | } 41 | 42 | func TestBytesOutputReset(t *testing.T) { 43 | bs := make([]byte, 10) 44 | output := NewByteArrayDataOutput(bs) 45 | n, err := output.Write([]byte("ab")) 46 | assert.Nil(t, err) 47 | assert.Equal(t, 2, n) 48 | 49 | err = output.Reset(make([]byte, 30)) 50 | assert.Nil(t, err) 51 | 52 | n, err = output.Write(make([]byte, 30)) 53 | assert.Nil(t, err) 54 | assert.Equal(t, 30, n) 55 | assert.Equal(t, 30, output.GetPosition()) 56 | 57 | _, err = output.Write(make([]byte, 8)) 58 | assert.NotNil(t, err) 59 | } 60 | -------------------------------------------------------------------------------- /core/store/mmap.go: -------------------------------------------------------------------------------- 1 | //go:build linux || darwin 2 | 3 | package store 4 | 5 | import ( 6 | "io" 7 | 8 | "golang.org/x/exp/mmap" 9 | ) 10 | 11 | var ( 12 | _ DataInput = &MmapDataInput{} 13 | ) 14 | 15 | type MmapDataInput struct { 16 | *BaseDataInput 17 | 18 | pos int64 19 | reader *mmap.ReaderAt 20 | isEOF bool 21 | } 22 | 23 | func NewMmapDataInput(file string) (*MmapDataInput, error) { 24 | reader, err := mmap.Open(file) 25 | if err != nil { 26 | return nil, err 27 | } 28 | 29 | input := &MmapDataInput{ 30 | pos: 0, 31 | reader: reader, 32 | isEOF: false, 33 | } 34 | input.BaseDataInput = NewBaseDataInput(input) 35 | return input, nil 36 | } 37 | 38 | func (m *MmapDataInput) Read(p []byte) (n int, err error) { 39 | if m.isEOF { 40 | return 0, io.EOF 41 | } 42 | 43 | less := m.reader.Len() - int(m.pos) 44 | 45 | if len(p) > less { 46 | size, err := m.reader.ReadAt(p[:less], m.pos) 47 | if err != nil { 48 | return 0, err 49 | } 50 | m.pos += int64(size) 51 | m.isEOF = true 52 | return size, nil 53 | } 54 | 55 | size, err := m.reader.ReadAt(p, m.pos) 56 | if err != nil { 57 | return 0, err 58 | } 59 | m.pos += int64(size) 60 | return size, nil 61 | } 62 | 63 | func (m *MmapDataInput) Clone() CloneReader { 64 | input := &MmapDataInput{ 65 | pos: m.pos, 66 | reader: m.reader, 67 | isEOF: m.isEOF, 68 | } 69 | input.BaseDataInput = NewBaseDataInput(input) 70 | return input 71 | } 72 | 73 | func (m *MmapDataInput) Close() error { 74 | return m.reader.Close() 75 | } 76 | -------------------------------------------------------------------------------- /core/interface/index/docidset.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/types" 5 | ) 6 | 7 | // A DocIdSet contains a set of doc ids. Implementing classes must only implement iterator to provide access to the set. 8 | type DocIdSet interface { 9 | // Iterator 10 | // DVFUIterator Provides a DocIdSetIterator to access the set. 11 | // This implementation can return null if there are no docs that match. 12 | Iterator() (types.DocIdSetIterator, error) 13 | 14 | // Bits 15 | // TODO: somehow this class should express the cost of 16 | // iteration vs the cost of random access Bits; for 17 | // expensive Filters (e.g. distance < 1 km) we should use 18 | // bits() after all other Query/Filters have matched, but 19 | // this is the opposite of what bits() is for now 20 | // (down-low filtering using e.g. FixedBitSet) 21 | // Optionally provides a Bits interface for random access to matching documents. 22 | // Returns: null, if this DocIdSet does not support random access. In contrast to iterator(), 23 | // a return item of null does not imply that no documents match the filter! 24 | // The default implementation does not provide random access, 25 | // so you only need to implement this method if your DocIdSet can guarantee random 26 | // access to every docid in O(1) time without external disk access 27 | // (as Bits interface cannot throw IOException). This is generally true for bit sets 28 | // like org.apache.lucene.util.FixedBitSet, which return itself if they are used as DocIdSet. 29 | Bits() (Bits, error) 30 | } 31 | -------------------------------------------------------------------------------- /core/search/scorerutil.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/geange/lucene-go/core/util/structure" 4 | 5 | func costWithMinShouldMatch(costs []int64, numScorers, minShouldMatch int) int64 { 6 | // the idea here is the following: a boolean query c1,c2,...cn with minShouldMatch=m 7 | // could be rewritten to: 8 | // (c1 AND (c2..cn|msm=m-1)) OR (!c1 AND (c2..cn|msm=m)) 9 | // if we assume that clauses come in ascending cost, then 10 | // the cost of the first part is the cost of c1 (because the cost of a conjunction is 11 | // the cost of the least costly clause) 12 | // the cost of the second part is the cost of finding m matches among the c2...cn 13 | // remaining clauses 14 | // since it is a disjunction overall, the total cost is the sum of the costs of these 15 | // two parts 16 | 17 | // 这里的想法如下:一个布尔查询c1,c2,。。。minShouldMatch=m的cn可以重写为: 18 | //(c1 AND(c2..cn|msm=m-1))OR(!c1 AND(c2..cn|msm=m)), 19 | // 如果我们假设子句以升序出现,那么第一部分的成本是c1的成本(因为连词的成本是成本最低的子句的成本) 20 | // 第二部分的成本就是在剩余的c2…cn子句中找到m个匹配项的成本,因为它总体上是一个析取,所以总成本是这两个部分的成本之和 21 | 22 | // If we recurse infinitely, we find out that the cost of a msm query is the sum of the 23 | // costs of the num_scorers - minShouldMatch + 1 least costly scorers 24 | maxSize := numScorers - minShouldMatch + 1 25 | pq := structure.NewPriorityQueue[int64](maxSize, func(a, b int64) bool { 26 | return a > b 27 | }) 28 | 29 | for _, cost := range costs { 30 | pq.InsertWithOverflow(cost) 31 | } 32 | 33 | sum := int64(0) 34 | for v := range pq.Iterator() { 35 | sum += v 36 | } 37 | return sum 38 | } 39 | -------------------------------------------------------------------------------- /core/document/latlon.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | import "sync" 4 | 5 | var ( 6 | latLonPointTypeOnce sync.Once 7 | latLonPointType *FieldType 8 | ) 9 | 10 | type LatLonPoint struct { 11 | *Field[*LatLon] 12 | } 13 | 14 | type LatLon struct { 15 | Latitude float64 16 | Longitude float64 17 | } 18 | 19 | func NewLatLonPoint(name string, latitude, longitude float64) LatLonPoint { 20 | latLonPointTypeOnce.Do(func() { 21 | latLonPointType = NewFieldType() 22 | _ = latLonPointType.SetDimensions(2, INTEGER_BYTES) 23 | }) 24 | 25 | value := &LatLon{ 26 | Latitude: latitude, 27 | Longitude: longitude, 28 | } 29 | return LatLonPoint{NewField(name, value, latLonPointType)} 30 | } 31 | 32 | // LatLonDocValuesField 33 | // An per-document location field. 34 | // Sorting by distance is efficient. Multiple values for the same field in one document is allowed. 35 | // This field defines static factory methods for common operations: 36 | // newDistanceSort() for ordering documents by distance from a specified location. 37 | // If you also need query operations, you should add a separate LatLonPoint instance. If you also need to store 38 | // the value, you should add a separate StoredField instance. 39 | // WARNING: Values are indexed with some loss of precision from the original double values (4.190951585769653E-8 40 | // for the latitude component and 8.381903171539307E-8 for longitude). 41 | // See Also: LatLonPoint 42 | type LatLonDocValuesField Field[LatLon] 43 | 44 | type LatLonPointSortField struct { 45 | } 46 | 47 | type LatLonShape struct { 48 | } 49 | -------------------------------------------------------------------------------- /core/store/inputstream.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | var _ IndexInput = &InputStream{} 9 | 10 | // InputStream A DataInput wrapping a plain InputStream. 11 | type InputStream struct { 12 | *BaseDataInput 13 | 14 | eof bool 15 | is io.Reader 16 | } 17 | 18 | func NewInputStream(is io.Reader) *InputStream { 19 | input := &InputStream{is: is} 20 | input.BaseDataInput = NewBaseDataInput(input) 21 | return input 22 | } 23 | 24 | func (i *InputStream) Seek(offset int64, whence int) (int64, error) { 25 | return 0, errors.New("unsupported operate") 26 | } 27 | 28 | func (i *InputStream) GetFilePointer() int64 { 29 | return -1 30 | } 31 | 32 | func (i *InputStream) Slice(sliceDescription string, offset, length int64) (IndexInput, error) { 33 | return nil, errors.New("unsupported operate") 34 | } 35 | 36 | func (i *InputStream) Length() int64 { 37 | return 0 38 | } 39 | 40 | func (i *InputStream) RandomAccessSlice(offset int64, length int64) (RandomAccessInput, error) { 41 | return nil, errors.New("unsupported operate") 42 | } 43 | 44 | func (i *InputStream) Clone() CloneReader { 45 | return i 46 | } 47 | 48 | func (i *InputStream) ReadByte() (byte, error) { 49 | bs := [1]byte{} 50 | if _, err := i.Read(bs[:]); err != nil { 51 | return 0, err 52 | } 53 | return bs[0], nil 54 | } 55 | 56 | func (i *InputStream) Read(b []byte) (int, error) { 57 | return i.is.Read(b) 58 | } 59 | 60 | func (i *InputStream) Close() error { 61 | if closer, ok := i.is.(io.Closer); ok { 62 | return closer.Close() 63 | } 64 | return nil 65 | } 66 | -------------------------------------------------------------------------------- /core/util/structure/hashmap.go: -------------------------------------------------------------------------------- 1 | package structure 2 | 3 | type Hash interface { 4 | Hash() int64 5 | } 6 | 7 | type Map[K Hash, V any] struct { 8 | mp map[int64]int 9 | values []*MapEntry[K, V] 10 | rmIdx []int 11 | } 12 | 13 | func NewMap[K Hash, V any]() *Map[K, V] { 14 | return &Map[K, V]{ 15 | mp: map[int64]int{}, 16 | values: make([]*MapEntry[K, V], 0), 17 | rmIdx: make([]int, 0), 18 | } 19 | } 20 | 21 | func (m *Map[K, V]) Put(key K, value V) { 22 | code := key.Hash() 23 | idx, ok := m.mp[code] 24 | if ok { 25 | m.values[idx].Key = key 26 | m.values[idx].Value = value 27 | return 28 | } 29 | if len(m.rmIdx) == 0 { 30 | m.values = append(m.values, &MapEntry[K, V]{Key: key, Value: value}) 31 | m.mp[code] = len(m.values) - 1 32 | return 33 | } 34 | idx = m.rmIdx[len(m.rmIdx)-1] 35 | m.mp[code] = idx 36 | m.values[idx].Key = key 37 | m.values[idx].Value = value 38 | m.rmIdx = m.rmIdx[:len(m.rmIdx)-1] 39 | } 40 | 41 | func (m *Map[K, V]) Get(key K) (v V, ok bool) { 42 | code := key.Hash() 43 | idx, ok := m.mp[code] 44 | if ok { 45 | return m.values[idx].Value, true 46 | } 47 | return 48 | } 49 | 50 | func (m *Map[K, V]) Remove(key K) bool { 51 | code := key.Hash() 52 | idx, ok := m.mp[code] 53 | if !ok { 54 | return false 55 | } 56 | 57 | delete(m.mp, code) 58 | m.rmIdx = append(m.rmIdx, idx) 59 | return true 60 | } 61 | 62 | func (m *Map[K, V]) Clear() { 63 | m.values = m.values[:0] 64 | m.rmIdx = m.rmIdx[:0] 65 | clear(m.mp) 66 | } 67 | 68 | type MapEntry[K Hash, V any] struct { 69 | Key K 70 | Value V 71 | } 72 | -------------------------------------------------------------------------------- /core/codecs/compressing/termvectorsformat.go: -------------------------------------------------------------------------------- 1 | package compressing 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/interface/index" 7 | "github.com/geange/lucene-go/core/store" 8 | ) 9 | 10 | var _ index.TermVectorsFormat = &TermVectorsFormat{} 11 | 12 | type TermVectorsFormat struct { 13 | formatName string 14 | segmentSuffix string 15 | compressionMode CompressionMode 16 | chunkSize int 17 | blockSize int 18 | maxDocsPerChunk int 19 | } 20 | 21 | func NewTermVectorsFormat(formatName, segmentSuffix string, compressionMode CompressionMode, 22 | chunkSize, maxDocsPerChunk, blockSize int) *TermVectorsFormat { 23 | return &TermVectorsFormat{formatName: formatName, segmentSuffix: segmentSuffix, compressionMode: compressionMode, chunkSize: chunkSize, blockSize: blockSize, maxDocsPerChunk: maxDocsPerChunk} 24 | } 25 | 26 | func (f *TermVectorsFormat) VectorsReader(ctx context.Context, directory store.Directory, 27 | segmentInfo index.SegmentInfo, fieldInfos index.FieldInfos, 28 | ioContext *store.IOContext) (index.TermVectorsReader, error) { 29 | return NewTermVectorsReader(ctx, directory, segmentInfo, f.segmentSuffix, 30 | fieldInfos, nil, f.formatName, f.compressionMode) 31 | } 32 | 33 | func (f *TermVectorsFormat) VectorsWriter(ctx context.Context, directory store.Directory, 34 | segmentInfo index.SegmentInfo, ioContext *store.IOContext) (index.TermVectorsWriter, error) { 35 | return NewTermVectorsWriter(ctx, directory, segmentInfo, f.segmentSuffix, nil, 36 | f.formatName, f.compressionMode, f.chunkSize, f.maxDocsPerChunk, f.blockSize) 37 | } 38 | -------------------------------------------------------------------------------- /core/search/termmatchesiterator.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | var _ index.MatchesIterator = &TermMatchesIterator{} 8 | 9 | // TermMatchesIterator 10 | // A MatchesIterator over a single term's postings list 11 | type TermMatchesIterator struct { 12 | upto int 13 | pos int 14 | pe index.PostingsEnum 15 | query index.Query 16 | } 17 | 18 | func NewTermMatchesIterator(query index.Query, pe index.PostingsEnum) (*TermMatchesIterator, error) { 19 | freq, err := pe.Freq() 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | return &TermMatchesIterator{ 25 | pe: pe, 26 | query: query, 27 | upto: freq, 28 | }, nil 29 | } 30 | 31 | func (t *TermMatchesIterator) Next() (bool, error) { 32 | upto := t.upto 33 | t.upto-- 34 | if upto > 0 { 35 | pos, err := t.pe.NextPosition() 36 | if err != nil { 37 | return false, err 38 | } 39 | t.pos = pos 40 | return true, nil 41 | } 42 | return false, nil 43 | } 44 | 45 | func (t *TermMatchesIterator) StartPosition() int { 46 | return t.pos 47 | } 48 | 49 | func (t *TermMatchesIterator) EndPosition() int { 50 | return t.pos 51 | } 52 | 53 | func (t *TermMatchesIterator) StartOffset() (int, error) { 54 | return t.pe.StartOffset() 55 | } 56 | 57 | func (t *TermMatchesIterator) EndOffset() (int, error) { 58 | return t.pe.EndOffset() 59 | } 60 | 61 | func (t *TermMatchesIterator) GetSubMatches() (index.MatchesIterator, error) { 62 | return nil, nil 63 | } 64 | 65 | func (t *TermMatchesIterator) GetQuery() index.Query { 66 | return t.query 67 | } 68 | -------------------------------------------------------------------------------- /core/index/multisorter.go: -------------------------------------------------------------------------------- 1 | package index 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | "github.com/geange/lucene-go/core/util" 6 | ) 7 | 8 | // SortCodecReader 9 | // Does a merge sort of the leaves of the incoming reader, returning MergeState.DocMap to map each leaf's documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort! Returns null if the merge sort is not needed (segments are already in index sort order). 10 | func SortCodecReader(sort index.Sort, readers []index.CodecReader) ([]MergeStateDocMap, error) { 11 | //fields := sort.GetSort() 12 | // 13 | //comparables := make([][]ComparableProvider, len(fields)) 14 | //reverseMuls := make([]int, len(fields)) 15 | //for _, field := range fields { 16 | // sorter := field.GetIndexSorter() 17 | // if sorter == nil { 18 | // return nil, fmt.Errorf("cannot use sort field:%s for index sorting", field) 19 | // } 20 | // comparables[i] = sorter.get 21 | //} 22 | 23 | // TODO 24 | panic("") 25 | } 26 | 27 | type LeafAndDocID struct { 28 | readerIndex int 29 | liveDocs util.Bits 30 | maxDoc int 31 | valuesAsComparableLongs []int64 32 | docId int 33 | } 34 | 35 | func NewLeafAndDocID(readerIndex int, liveDocs util.Bits, maxDoc int, numComparables int) *LeafAndDocID { 36 | return &LeafAndDocID{ 37 | readerIndex: readerIndex, 38 | liveDocs: liveDocs, 39 | maxDoc: maxDoc, 40 | valuesAsComparableLongs: make([]int64, numComparables), 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /core/util/numeric/numeric_utils_test.go: -------------------------------------------------------------------------------- 1 | package numeric 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestIllegalSubtract(t *testing.T) { 11 | type args struct { 12 | bytesPerDim int 13 | dim int 14 | a []byte 15 | b []byte 16 | result []byte 17 | } 18 | tests := []struct { 19 | name string 20 | args args 21 | wantErr bool 22 | }{ 23 | { 24 | name: "IllegalSubtract", 25 | args: args{ 26 | bytesPerDim: 4, 27 | dim: 0, 28 | a: []byte{0, 0, 0, 0xf0}, 29 | b: []byte{0, 0, 0, 0xf1}, 30 | result: []byte{0, 0, 0, 0}, 31 | }, 32 | wantErr: true, 33 | }, 34 | } 35 | for _, tt := range tests { 36 | t.Run(tt.name, func(t *testing.T) { 37 | if err := Subtract(tt.args.bytesPerDim, tt.args.dim, tt.args.a, tt.args.b, tt.args.result); (err != nil) != tt.wantErr { 38 | t.Errorf("Subtract() error = %v, wantErr %v", err, tt.wantErr) 39 | } 40 | }) 41 | } 42 | } 43 | 44 | func TestSubtract(t *testing.T) { 45 | //rand.Seed(time.Now().UnixNano()) 46 | //numBytes := rand.Intn(100) + 1 47 | // 48 | //for i := 0; i < 1000; i++ { 49 | // big.Int 50 | //} 51 | } 52 | 53 | func TestSortableDoubleBits(t *testing.T) { 54 | nums := []float64{ 55 | -10, 56 | -5, 57 | -1, 58 | 0, 59 | 1, 60 | 10, 61 | } 62 | 63 | preNum := uint64(0) 64 | for _, num := range nums { 65 | sortNum := SortableFloat64Bits(math.Float64bits(num)) ^ (1 << 63) 66 | assert.True(t, sortNum > preNum) 67 | sortNum = preNum 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /core/store/ramfile.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "iter" 5 | "slices" 6 | "sync/atomic" 7 | ) 8 | 9 | type RAMFile struct { 10 | buffers [][]byte 11 | size *atomic.Int64 12 | directory *RAMDirectory 13 | } 14 | 15 | func NewRAMFile(dir *RAMDirectory) *RAMFile { 16 | return &RAMFile{ 17 | buffers: make([][]byte, 0), 18 | size: new(atomic.Int64), 19 | directory: dir, 20 | } 21 | } 22 | 23 | func (f *RAMFile) GetLength() int64 { 24 | return f.size.Load() 25 | } 26 | 27 | func (f *RAMFile) Clone() *RAMFile { 28 | dst := &RAMFile{ 29 | buffers: make([][]byte, 0), 30 | size: &atomic.Int64{}, 31 | directory: f.directory, 32 | } 33 | 34 | for _, buf := range f.buffers { 35 | dst.buffers = append(dst.buffers, slices.Clone(buf)) 36 | } 37 | dst.size.Store(f.size.Load()) 38 | return dst 39 | } 40 | 41 | func (f *RAMFile) Write(p []byte) { 42 | if len(p) == 0 { 43 | return 44 | } 45 | buf := slices.Clone(p) 46 | f.buffers = append(f.buffers, buf) 47 | f.size.Add(int64(len(p))) 48 | } 49 | 50 | func (f *RAMFile) GetBuffer(n int) ([]byte, bool) { 51 | if n >= len(f.buffers) || n < 0 { 52 | return nil, false 53 | } 54 | return f.buffers[n], true 55 | } 56 | 57 | func (f *RAMFile) NumBuffers() int { 58 | return len(f.buffers) 59 | } 60 | 61 | func (f *RAMFile) Iterator() iter.Seq[byte] { 62 | return func(yield func(byte) bool) { 63 | for _, buffer := range f.buffers { 64 | for _, b := range buffer { 65 | if !yield(b) { 66 | return 67 | } 68 | } 69 | } 70 | } 71 | } 72 | 73 | func (f *RAMFile) SetLength(size int64) { 74 | f.size.Store(size) 75 | } 76 | -------------------------------------------------------------------------------- /core/codecs/blocktree/compressionalgorithm.go: -------------------------------------------------------------------------------- 1 | package blocktree 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | "github.com/geange/lucene-go/core/store" 8 | "github.com/geange/lucene-go/core/util/compress" 9 | ) 10 | 11 | var ( 12 | NO_COMPRESSION = &noCompression{} 13 | LOWERCASE_ASCII = &lowercaseAscii{} 14 | LZ4 = &lz4Algorithm{} 15 | ) 16 | 17 | type CompressionAlgorithm interface { 18 | Code() int 19 | Read(ctx context.Context, in store.DataInput, bs []byte) error 20 | } 21 | 22 | type noCompression struct { 23 | } 24 | 25 | func (*noCompression) Code() int { 26 | return 0 27 | } 28 | 29 | func (n *noCompression) Read(ctx context.Context, in store.DataInput, out []byte) error { 30 | _, err := in.Read(out) 31 | return err 32 | } 33 | 34 | type lowercaseAscii struct { 35 | } 36 | 37 | func (*lowercaseAscii) Code() int { 38 | return 1 39 | } 40 | 41 | func (*lowercaseAscii) Read(ctx context.Context, in store.DataInput, out []byte) error { 42 | return compress.LowercaseAsciiCompression.Decompress(ctx, in, out) 43 | } 44 | 45 | type lz4Algorithm struct { 46 | } 47 | 48 | func (*lz4Algorithm) Code() int { 49 | return 2 50 | } 51 | 52 | func (*lz4Algorithm) Read(ctx context.Context, in store.DataInput, out []byte) error { 53 | return compress.LZ4Compression.Decompress(in, out) 54 | } 55 | 56 | func ByCode(code int) (CompressionAlgorithm, error) { 57 | switch code { 58 | case 0: 59 | return &noCompression{}, nil 60 | case 1: 61 | return &lowercaseAscii{}, nil 62 | case 2: 63 | return &lz4Algorithm{}, nil 64 | default: 65 | return nil, errors.New("unsupported compression") 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /core/search/matches.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/interface/index" 5 | ) 6 | 7 | type baseMatches struct { 8 | strs []string 9 | } 10 | 11 | var _ index.Matches = &matchWithNoTerms{} 12 | 13 | type matchWithNoTerms struct { 14 | } 15 | 16 | func (m *matchWithNoTerms) Strings() []string { 17 | return nil 18 | } 19 | 20 | func (m *matchWithNoTerms) GetMatches(field string) (index.MatchesIterator, error) { 21 | return nil, nil 22 | } 23 | 24 | func (m *matchWithNoTerms) GetSubMatches() []index.Matches { 25 | return nil 26 | } 27 | 28 | var _ index.Matches = &matchForField{} 29 | 30 | type matchForField struct { 31 | field string 32 | cached bool 33 | } 34 | 35 | func (m *matchForField) Strings() []string { 36 | //TODO implement me 37 | panic("implement me") 38 | } 39 | 40 | func (m *matchForField) GetMatches(field string) (index.MatchesIterator, error) { 41 | //TODO implement me 42 | panic("implement me") 43 | } 44 | 45 | func (m *matchForField) GetSubMatches() []index.Matches { 46 | //TODO implement me 47 | panic("implement me") 48 | } 49 | 50 | var _ index.Matches = &MatchesAnon{} 51 | 52 | type MatchesAnon struct { 53 | FnStrings func() []string 54 | FnGetMatches func(field string) (index.MatchesIterator, error) 55 | FnGetSubMatches func() []index.Matches 56 | } 57 | 58 | func (m *MatchesAnon) Strings() []string { 59 | return m.FnStrings() 60 | } 61 | 62 | func (m *MatchesAnon) GetMatches(field string) (index.MatchesIterator, error) { 63 | return m.FnGetMatches(field) 64 | } 65 | 66 | func (m *MatchesAnon) GetSubMatches() []index.Matches { 67 | return m.FnGetSubMatches() 68 | } 69 | -------------------------------------------------------------------------------- /core/query/longrangeslowrangequery.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/document" 5 | "github.com/geange/lucene-go/core/interface/index" 6 | ) 7 | 8 | type LongRangeSlowRangeQuery struct { 9 | *BinaryRangeFieldRangeQuery 10 | 11 | field string 12 | mins []int64 13 | maxs []int64 14 | } 15 | 16 | func NewLongRangeSlowRangeQuery(field string, minNums, maxNums []int64, queryType QueryType) (*LongRangeSlowRangeQuery, error) { 17 | packedValues, err := encodeLongRanges(minNums, maxNums) 18 | if err != nil { 19 | return nil, err 20 | } 21 | 22 | rangeQuery := NewBinaryRangeFieldRangeQuery(field, packedValues, document.INTEGER_BYTES, len(minNums), queryType) 23 | 24 | return &LongRangeSlowRangeQuery{ 25 | BinaryRangeFieldRangeQuery: rangeQuery, 26 | mins: minNums, 27 | field: field, 28 | maxs: maxNums, 29 | }, nil 30 | } 31 | 32 | func (q *LongRangeSlowRangeQuery) CreateWeight(searcher index.IndexSearcher, scoreMode index.ScoreMode, boost float64) (index.Weight, error) { 33 | return q.createWeight(q, scoreMode, boost), nil 34 | } 35 | 36 | func (q *LongRangeSlowRangeQuery) Rewrite(reader index.IndexReader) (index.Query, error) { 37 | return q, nil 38 | } 39 | 40 | func (q *LongRangeSlowRangeQuery) Visit(visitor index.QueryVisitor) error { 41 | return rangeQueryVisit(q.field, q, visitor) 42 | } 43 | 44 | func encodeLongRanges(mins, maxs []int64) ([]byte, error) { 45 | dst := make([]byte, 2*document.LONG_BYTES*len(mins)) 46 | if err := verifyAndEncodeInt64(mins, maxs, dst); err != nil { 47 | return nil, err 48 | } 49 | return dst, nil 50 | } 51 | -------------------------------------------------------------------------------- /core/util/packed/bulkoperation/bulkoperationpacked24.go: -------------------------------------------------------------------------------- 1 | package bulkoperation 2 | 3 | type Packed24 struct { 4 | *BulkOperationPacked 5 | } 6 | 7 | func NewPacked24() *Packed24 { 8 | return &Packed24{NewPacked(24)} 9 | } 10 | 11 | func (b *Packed24) DecodeUint64(blocks []uint64, values []uint64, iterations int) { 12 | blocksOffset, valuesOffset := 0, 0 13 | for i := 0; i < iterations; i++ { 14 | block0 := blocks[blocksOffset] 15 | blocksOffset++ 16 | values[valuesOffset] = block0 >> 40 17 | valuesOffset++ 18 | values[valuesOffset] = (block0 >> 16) & 0xFFFFFF 19 | valuesOffset++ 20 | block1 := blocks[blocksOffset] 21 | blocksOffset++ 22 | values[valuesOffset] = ((block0 & 65535) << 8) | (block1 >> 56) 23 | valuesOffset++ 24 | values[valuesOffset] = (block1 >> 32) & 0xFFFFFF 25 | valuesOffset++ 26 | values[valuesOffset] = (block1 >> 8) & 0xFFFFFF 27 | valuesOffset++ 28 | block2 := blocks[blocksOffset] 29 | blocksOffset++ 30 | values[valuesOffset] = ((block1 & 255) << 16) | (block2 >> 48) 31 | valuesOffset++ 32 | values[valuesOffset] = (block2 >> 24) & 0xFFFFFF 33 | valuesOffset++ 34 | values[valuesOffset] = block2 & 0xFFFFFF 35 | valuesOffset++ 36 | } 37 | } 38 | 39 | func (b *Packed24) DecodeBytes(blocks []byte, values []uint64, iterations int) { 40 | blocksOffset, valuesOffset := 0, 0 41 | for i := 0; i < iterations; i++ { 42 | byte0 := uint64(blocks[blocksOffset]) 43 | blocksOffset++ 44 | byte1 := uint64(blocks[blocksOffset]) 45 | blocksOffset++ 46 | byte2 := uint64(blocks[blocksOffset]) 47 | blocksOffset++ 48 | values[valuesOffset] = (byte0 << 16) | (byte1 << 8) | byte2 49 | valuesOffset++ 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /core/analysis/standard/analyzer.go: -------------------------------------------------------------------------------- 1 | package standard 2 | 3 | import ( 4 | "github.com/geange/lucene-go/core/analysis" 5 | "io" 6 | ) 7 | 8 | var _ analysis.Analyzer = &Analyzer{} 9 | 10 | type Analyzer struct { 11 | *analysis.BaseAnalyzer 12 | 13 | stopWord *analysis.BaseStopWordAnalyzer 14 | maxTokenLength int 15 | } 16 | 17 | func NewAnalyzer(set *analysis.CharArraySet) *Analyzer { 18 | analyzer := &Analyzer{ 19 | stopWord: analysis.NewStopWordAnalyzer(set), 20 | maxTokenLength: 255, 21 | } 22 | analyzer.BaseAnalyzer = analysis.NewBaseAnalyzer(analyzer) 23 | return analyzer 24 | } 25 | 26 | // SetMaxTokenLength Set the max allowed token length. Tokens larger than this will be chopped up at this 27 | // token length and emitted as multiple tokens. If you need to skip such large tokens, you could increase 28 | // this max length, and then use LengthFilter to remove long tokens. The default is DEFAULT_MAX_TOKEN_LENGTH. 29 | func (r *Analyzer) SetMaxTokenLength(length int) { 30 | r.maxTokenLength = length 31 | } 32 | 33 | // GetMaxTokenLength Returns the current maximum token length 34 | // See Also: SetMaxTokenLength 35 | func (r *Analyzer) GetMaxTokenLength() int { 36 | return r.maxTokenLength 37 | } 38 | 39 | func (r *Analyzer) CreateComponents(_ string) *analysis.TokenStreamComponents { 40 | src := NewTokenizer() 41 | src.setMaxTokenLength(r.maxTokenLength) 42 | tok1 := analysis.NewLowerCaseFilter(src) 43 | tok2 := analysis.NewStopFilter(tok1, r.stopWord.GetStopWordSet()) 44 | return analysis.NewTokenStreamComponents(func(reader io.Reader) { 45 | src.setMaxTokenLength(r.maxTokenLength) 46 | _ = src.SetReader(reader) 47 | }, tok2) 48 | } 49 | -------------------------------------------------------------------------------- /core/query/rangefieldquery.go: -------------------------------------------------------------------------------- 1 | package query 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/geange/lucene-go/core/types" 7 | ) 8 | 9 | // RangeFieldQuery 10 | // Query class for searching RangeField types by a defined PointValues. Relation. 11 | type RangeFieldQuery struct { 12 | field string 13 | queryType QueryType 14 | numDims int 15 | ranges []byte 16 | bytesPerDim int 17 | } 18 | 19 | // QueryType 20 | // Used by RangeFieldQuery to check how each internal or leaf node relates to the query. 21 | type QueryType interface { 22 | Compare(ctx context.Context, queryPackedValue, minPackedValue, maxPackedValue []byte, numDims, bytesPerDim, dim int) (types.Relation, error) 23 | Matches(ctx context.Context, queryPackedValue, packedValue []byte, numDims, bytesPerDim, dim int) bool 24 | } 25 | 26 | func matches(ctx context.Context, queryType QueryType, queryPackedValue, packedValue []byte, numDims, bytesPerDim int) bool { 27 | for dim := 0; dim < numDims; dim++ { 28 | if queryType.Matches(ctx, queryPackedValue, packedValue, numDims, bytesPerDim, dim) == false { 29 | return false 30 | } 31 | } 32 | return true 33 | } 34 | 35 | var _ QueryType = &INTERSECTS_QueryType{} 36 | 37 | type INTERSECTS_QueryType struct { 38 | } 39 | 40 | func (*INTERSECTS_QueryType) Compare(ctx context.Context, queryPackedValue, minPackedValue, maxPackedValue []byte, numDims, bytesPerDim, dim int) (types.Relation, error) { 41 | //TODO implement me 42 | panic("implement me") 43 | } 44 | 45 | func (*INTERSECTS_QueryType) Matches(ctx context.Context, queryPackedValue, packedValue []byte, numDims, bytesPerDim, dim int) bool { 46 | //TODO implement me 47 | panic("implement me") 48 | } 49 | -------------------------------------------------------------------------------- /core/util/bkd/docidswriter_test.go: -------------------------------------------------------------------------------- 1 | package bkd 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/geange/lucene-go/core/store" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestWriteDocIdsSorted(t *testing.T) { 12 | output := store.NewBufferDataOutput() 13 | docIds := make([]int, 100) 14 | for i := range docIds { 15 | docIds[i] = i 16 | } 17 | err := WriteDocIds(nil, docIds, output) 18 | assert.Nil(t, err) 19 | 20 | input := store.NewByteArrayDataInput(output.Bytes()) 21 | 22 | newDocIds := make([]int, 100) 23 | err = ReadInts(nil, input, 100, newDocIds) 24 | assert.Nil(t, err) 25 | 26 | assert.Equal(t, docIds, newDocIds) 27 | } 28 | 29 | func TestWriteDocIdsInt24(t *testing.T) { 30 | output := store.NewBufferDataOutput() 31 | docIds := make([]int, 100) 32 | for i := range docIds { 33 | docIds[i] = rand.Intn(0xFFFFFF) 34 | } 35 | err := WriteDocIds(nil, docIds, output) 36 | assert.Nil(t, err) 37 | 38 | input := store.NewByteArrayDataInput(output.Bytes()) 39 | 40 | newDocIds := make([]int, 100) 41 | err = ReadInts(nil, input, 100, newDocIds) 42 | assert.Nil(t, err) 43 | 44 | assert.Equal(t, docIds, newDocIds) 45 | } 46 | 47 | func TestWriteDocIdsInt32(t *testing.T) { 48 | output := store.NewBufferDataOutput() 49 | docIds := make([]int, 100) 50 | for i := range docIds { 51 | docIds[i] = rand.Intn(0xFFFFFF) + 0xFFFFFF 52 | } 53 | err := WriteDocIds(nil, docIds, output) 54 | assert.Nil(t, err) 55 | 56 | input := store.NewByteArrayDataInput(output.Bytes()) 57 | 58 | newDocIds := make([]int, 100) 59 | err = ReadInts(nil, input, 100, newDocIds) 60 | assert.Nil(t, err) 61 | 62 | assert.Equal(t, docIds, newDocIds) 63 | } 64 | --------------------------------------------------------------------------------