├── Cargo.toml ├── LICENSE ├── README.md ├── examples └── example.rs ├── rust-toolchain ├── rustfmt.toml └── src ├── core ├── analysis │ ├── mod.rs │ ├── token_stream.rs │ └── whitespace_tokenizer.rs ├── codec │ ├── codec_util.rs │ ├── compound.rs │ ├── doc_values │ │ ├── doc_values_consumer.rs │ │ ├── doc_values_format.rs │ │ ├── doc_values_iterator.rs │ │ ├── doc_values_producer.rs │ │ ├── doc_values_writer.rs │ │ ├── lucene54 │ │ │ ├── doc_values_provider.rs │ │ │ ├── doc_values_term_iterator.rs │ │ │ ├── lucene54_doc_values_consumer.rs │ │ │ ├── lucene54_doc_values_format.rs │ │ │ ├── lucene54_doc_values_producer.rs │ │ │ └── mod.rs │ │ └── mod.rs │ ├── field_infos │ │ ├── field_infos_format.rs │ │ └── mod.rs │ ├── fields.rs │ ├── live_docs.rs │ ├── matching_reader.rs │ ├── mod.rs │ ├── multi_fields.rs │ ├── multi_terms.rs │ ├── norms │ │ ├── mod.rs │ │ ├── norm_values_writer.rs │ │ ├── norms.rs │ │ ├── norms_consumer.rs │ │ └── norms_producer.rs │ ├── points │ │ ├── mod.rs │ │ ├── point_values.rs │ │ ├── point_values_writer.rs │ │ ├── points_reader.rs │ │ └── points_writer.rs │ ├── posting_iterator.rs │ ├── postings │ │ ├── blocktree │ │ │ ├── blocktree_reader.rs │ │ │ ├── blocktree_writer.rs │ │ │ ├── mod.rs │ │ │ └── term_iter_frame.rs │ │ ├── for_util.rs │ │ ├── mod.rs │ │ ├── partial_block_decoder.rs │ │ ├── per_field_postings_format.rs │ │ ├── posting_format.rs │ │ ├── posting_reader.rs │ │ ├── posting_writer.rs │ │ ├── simd_block_decoder.rs │ │ ├── skip_reader.rs │ │ ├── skip_writer.rs │ │ ├── terms_hash.rs │ │ └── terms_hash_per_field.rs │ ├── segment_infos │ │ ├── mod.rs │ │ ├── segment_infos.rs │ │ └── segment_infos_format.rs │ ├── sorter.rs │ ├── stored_fields │ │ ├── mod.rs │ │ ├── stored_fields.rs │ │ ├── stored_fields_consumer.rs │ │ ├── stored_fields_reader.rs │ │ └── stored_fields_writer.rs │ ├── term_vectors │ │ ├── mod.rs │ │ ├── term_vector_consumer.rs │ │ ├── term_vectors_reader.rs │ │ └── term_vectors_writer.rs │ └── terms.rs ├── doc │ ├── doc_values.rs │ ├── document.rs │ ├── field.rs │ ├── index_options.rs │ ├── mod.rs │ └── term.rs ├── highlight │ ├── frag_list_builder.rs │ ├── fragments_builder.rs │ ├── fvh_highlighter.rs │ └── mod.rs ├── index │ ├── merge │ │ ├── doc_id_merger.rs │ │ ├── merge_policy.rs │ │ ├── merge_rate_limiter.rs │ │ ├── merge_scheduler.rs │ │ ├── merge_state.rs │ │ ├── mod.rs │ │ └── segment_merger.rs │ ├── mod.rs │ ├── reader │ │ ├── directory_reader.rs │ │ ├── index_lookup.rs │ │ ├── leaf_reader.rs │ │ ├── leaf_reader_wrapper.rs │ │ ├── mod.rs │ │ └── segment_reader.rs │ └── writer │ │ ├── bufferd_updates.rs │ │ ├── delete_policy.rs │ │ ├── dir_wrapper.rs │ │ ├── doc_consumer.rs │ │ ├── doc_values_update.rs │ │ ├── doc_writer.rs │ │ ├── doc_writer_delete_queue.rs │ │ ├── doc_writer_flush_queue.rs │ │ ├── doc_writer_per_thread.rs │ │ ├── flush_control.rs │ │ ├── flush_policy.rs │ │ ├── index_file_deleter.rs │ │ ├── index_writer.rs │ │ ├── index_writer_config.rs │ │ ├── mod.rs │ │ └── prefix_code_terms.rs ├── mod.rs ├── search │ ├── cache │ │ ├── cache_policy.rs │ │ ├── lru_cache.rs │ │ ├── mod.rs │ │ └── query_cache.rs │ ├── collector │ │ ├── chain.rs │ │ ├── early_terminating.rs │ │ ├── mod.rs │ │ ├── timeout.rs │ │ └── top_docs.rs │ ├── explanation.rs │ ├── mod.rs │ ├── query │ │ ├── boolean_query.rs │ │ ├── boost_query.rs │ │ ├── boosting_query.rs │ │ ├── disjunction_max_query.rs │ │ ├── exists_query.rs │ │ ├── filter_query.rs │ │ ├── match_all_query.rs │ │ ├── mod.rs │ │ ├── phrase_query.rs │ │ ├── point_range_query.rs │ │ ├── query_string.rs │ │ ├── spans │ │ │ ├── mod.rs │ │ │ ├── span.rs │ │ │ ├── span_boost.rs │ │ │ ├── span_near.rs │ │ │ ├── span_or.rs │ │ │ └── span_term.rs │ │ └── term_query.rs │ ├── scorer │ │ ├── boosting_scorer.rs │ │ ├── bulk_scorer.rs │ │ ├── conjunction_scorer.rs │ │ ├── disjunction_scorer.rs │ │ ├── min_scorer.rs │ │ ├── mod.rs │ │ ├── phrase_scorer.rs │ │ ├── req_not_scorer.rs │ │ ├── req_opt_scorer.rs │ │ ├── rescorer.rs │ │ └── term_scorer.rs │ ├── search_manager.rs │ ├── searcher.rs │ ├── similarity │ │ ├── bm25_similarity.rs │ │ └── mod.rs │ ├── sort_field │ │ ├── collapse_top_docs.rs │ │ ├── field_comparator.rs │ │ ├── mod.rs │ │ ├── search_group.rs │ │ └── sort_field.rs │ └── statistics.rs ├── store │ ├── directory │ │ ├── directory.rs │ │ ├── fs_directory.rs │ │ ├── mmap_directory.rs │ │ ├── mod.rs │ │ └── tracking_directory_wrapper.rs │ ├── io │ │ ├── buffered_checksum_index_input.rs │ │ ├── byte_array_data_input.rs │ │ ├── checksum_index_input.rs │ │ ├── data_input.rs │ │ ├── data_output.rs │ │ ├── fs_index_output.rs │ │ ├── growable_byte_array_output.rs │ │ ├── index_input.rs │ │ ├── index_output.rs │ │ ├── mmap_index_input.rs │ │ ├── mod.rs │ │ ├── ram_output.rs │ │ └── random_access_input.rs │ └── mod.rs └── util │ ├── bit_set.rs │ ├── bit_util.rs │ ├── bits.rs │ ├── bkd │ ├── bkd_reader.rs │ ├── bkd_writer.rs │ ├── doc_ids_writer.rs │ ├── heap_point.rs │ ├── mod.rs │ └── offline_point.rs │ ├── byte_block_pool.rs │ ├── byte_slice_reader.rs │ ├── bytes_ref.rs │ ├── bytes_ref_hash.rs │ ├── compression.rs │ ├── context.rs │ ├── counter.rs │ ├── disi.rs │ ├── doc_id_set.rs │ ├── doc_id_set_builder.rs │ ├── external │ ├── binary_heap.rs │ ├── deferred.rs │ ├── mod.rs │ ├── thread_pool.rs │ └── volatile.rs │ ├── fst │ ├── bytes_output.rs │ ├── bytes_store.rs │ ├── fst_builder.rs │ ├── fst_iteartor.rs │ ├── fst_reader.rs │ └── mod.rs │ ├── int_block_pool.rs │ ├── ints_ref.rs │ ├── math.rs │ ├── mod.rs │ ├── numeric.rs │ ├── packed │ ├── block_packed_writer.rs │ ├── direct_monotonic_reader.rs │ ├── direct_monotonic_writer.rs │ ├── direct_reader.rs │ ├── direct_writer.rs │ ├── elias_fano_decoder.rs │ ├── elias_fano_encoder.rs │ ├── mod.rs │ ├── monotonic_block_packed_reader.rs │ ├── monotonic_block_packed_writer.rs │ ├── packed_ints_null_reader.rs │ ├── packed_long_values.rs │ ├── packed_misc.rs │ ├── packed_simd.rs │ └── paged_mutable.rs │ ├── paged_bytes.rs │ ├── selector.rs │ ├── small_float.rs │ ├── sorter.rs │ ├── string_util.rs │ ├── variant_value.rs │ └── version.rs ├── error.rs └── lib.rs /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rucene" 3 | version = "0.1.1" 4 | authors = ["Zhihu Search Team"] 5 | repository = "https://github.com/zhihu/rucene" 6 | license-file = "LICENSE" 7 | description = """ 8 | Rucene is a Rust port of the popular Apache Lucene project. Rucene is 9 | not a complete application, but rather a code library and API that 10 | can easily be used to add full text search capabilities to applications. 11 | """ 12 | 13 | [dependencies] 14 | bytes = "0.4" 15 | chan = "0.1.21" 16 | chan-signal = "0.3.1" 17 | crc = "1.5.0" 18 | crossbeam = "0.7" 19 | either = "1.3" 20 | error-chain = "0.12.1" 21 | fasthash = "0.3" 22 | flate2 = "1.0.2" 23 | lazy_static = "1.0" 24 | log = "0.4" 25 | memmap = "0.6" 26 | num_cpus = "1.10.0" 27 | rand = "0.5" 28 | regex = "0.2" 29 | serde = "1.0" 30 | serde_derive = "1.0" 31 | serde_json = "1.0" 32 | smallvec = "0.6.9" 33 | thread_local = "0.3" 34 | unicode_reader = "0.1.1" 35 | num-traits = "0.2" 36 | byteorder = "1" 37 | crunchy = "0.2.2" 38 | 39 | [dev-dependencies] 40 | tempfile = "3.0.8" 41 | 42 | # The release profile, used for `cargo build --release` 43 | [profile.release] 44 | debug = true 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Rucene - Rust implementation of Lucene 2 | ===================================================================================== 3 | 4 | ## Introduction 5 | 6 | Rucene is a Rust port of the popular Apache Lucene project. Rucene is not a complete application, but rather a code library and API that can easily be used to add full text search capabilities to applications. 7 | 8 | ## Status 9 | 10 | The index searcher part of Rucene has been put into production and has served all search traffics at Zhihu since July, 2018. Development of the index writer part was started in late 2018, and has been put into production to serve real-time searching since May, 2019. 11 | 12 | ## Documentation 13 | 14 | We don't yet have an API documentation for Rucene, but the usage is similar to [Lucene 6.2.1](https://lucene.apache.org/core/6_2_1/). 15 | 16 | > **Note:** 17 | > 18 | > We are working on this, but could use more help since it is a massive project. 19 | 20 | ## License 21 | 22 | Rucene is under the Apache 2.0 license. See the [LICENSE](./LICENSE) file for details. 23 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly-2020-03-12 2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | # REFERENCE: https://github.com/rust-lang-nursery/rustfmt/blob/master/Configurations.md 2 | unstable_features = true 3 | comment_width = 100 4 | wrap_comments = true 5 | format_strings = true 6 | imports_indent = "Block" 7 | imports_layout = "Mixed" 8 | newline_style = "Unix" 9 | normalize_comments = true 10 | -------------------------------------------------------------------------------- /src/core/codec/doc_values/doc_values_producer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::doc_values::{ 15 | BinaryDocValuesProvider, NumericDocValuesProvider, SortedDocValuesProvider, 16 | SortedNumericDocValuesProvider, SortedSetDocValuesProvider, 17 | }; 18 | use core::codec::field_infos::FieldInfo; 19 | use core::util::BitsMut; 20 | 21 | use error::Result; 22 | use std::sync::Arc; 23 | 24 | /// Abstract API that produces numeric, binary, sorted, sortedset, 25 | /// and sortednumeric docvalues. 26 | /// 27 | /// NOTE: the returned instance must always be thread-safe, this is different from 28 | /// the Lucene restraint 29 | pub trait DocValuesProducer: Send + Sync { 30 | /// Returns `NumericDocValues` for this field. 31 | fn get_numeric(&self, field_info: &FieldInfo) -> Result>; 32 | 33 | /// Returns `BinaryDocValues` for this field. 34 | fn get_binary(&self, field_info: &FieldInfo) -> Result>; 35 | 36 | /// Returns `SortedDocValues` for this field. 37 | fn get_sorted(&self, field: &FieldInfo) -> Result>; 38 | 39 | /// Returns `SortedNumericDocValues` for this field. 40 | fn get_sorted_numeric( 41 | &self, 42 | field: &FieldInfo, 43 | ) -> Result>; 44 | 45 | /// Returns `SortedSetDocValues` for this field. 46 | fn get_sorted_set(&self, field: &FieldInfo) -> Result>; 47 | /// Returns a `bits` at the size of `reader.max_doc()`, with turned on bits for each doc_id 48 | /// that does have a value for this field. 49 | /// The returned instance need not be thread-safe: it will only be used by a single thread. 50 | fn get_docs_with_field(&self, field: &FieldInfo) -> Result>; 51 | /// Checks consistency of this producer 52 | /// Note that this may be costly in terms of I/O, e.g. 53 | /// may involve computing a checksum value against large data files. 54 | fn check_integrity(&self) -> Result<()>; 55 | 56 | /// Returns an instance optimized for merging. 57 | fn get_merge_instance(&self) -> Result>; 58 | } 59 | -------------------------------------------------------------------------------- /src/core/codec/doc_values/lucene54/lucene54_doc_values_format.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::doc_values::lucene54::{Lucene54DocValuesConsumer, Lucene54DocValuesProducer}; 15 | use core::codec::doc_values::{DocValuesConsumerEnum, DocValuesFormat, DocValuesProducer}; 16 | use core::codec::segment_infos::{SegmentReadState, SegmentWriteState}; 17 | use core::codec::Codec; 18 | use core::store::directory::Directory; 19 | 20 | use error::Result; 21 | 22 | #[derive(Copy, Clone, Default)] 23 | pub struct Lucene54DocValuesFormat; 24 | 25 | impl Lucene54DocValuesFormat { 26 | const DATA_CODEC: &'static str = "Lucene54DocValuesData"; 27 | const DATA_EXTENSION: &'static str = "dvd"; 28 | const META_CODEC: &'static str = "Lucene54DocValuesMetadata"; 29 | const META_EXTENSION: &'static str = "dvm"; 30 | pub const VERSION_START: i32 = 0; 31 | pub const VERSION_CURRENT: i32 = 0; 32 | 33 | // indicates docvalues type 34 | pub const NUMERIC: u8 = 0; 35 | pub const BINARY: u8 = 1; 36 | pub const SORTED: u8 = 2; 37 | pub const SORTED_SET: u8 = 3; 38 | pub const SORTED_NUMERIC: u8 = 4; 39 | 40 | // address terms in blocks of 16 terms 41 | pub const INTERVAL_SHIFT: i32 = 4; 42 | pub const INTERVAL_COUNT: i32 = 1 << Self::INTERVAL_SHIFT; 43 | pub const INTERVAL_MASK: i32 = Self::INTERVAL_COUNT - 1; 44 | 45 | // build reverse index from every 1024th term 46 | pub const REVERSE_INTERVAL_SHIFT: i32 = 10; 47 | pub const REVERSE_INTERVAL_COUNT: i32 = 1 << Self::REVERSE_INTERVAL_SHIFT; 48 | pub const REVERSE_INTERVAL_MASK: i32 = Self::REVERSE_INTERVAL_COUNT - 1; 49 | 50 | // for conversion from reverse index to block 51 | pub const BLOCK_INTERVAL_SHIFT: i32 = Self::REVERSE_INTERVAL_SHIFT - Self::INTERVAL_SHIFT; 52 | pub const BLOCK_INTERVAL_COUNT: i32 = 1 << Self::BLOCK_INTERVAL_SHIFT; 53 | pub const BLOCK_INTERVAL_MASK: i32 = Self::BLOCK_INTERVAL_COUNT - 1; 54 | 55 | // Compressed using packed blocks of ints 56 | pub const DELTA_COMPRESSED: i32 = 0; 57 | // Compressed by computing the GCD 58 | pub const GCD_COMPRESSED: i32 = 1; 59 | // Compressed by giving IDs to unique values 60 | pub const TABLE_COMPRESSED: i32 = 2; 61 | // Compressed with monotonically increasing values 62 | pub const MONOTONIC_COMPRESSED: i32 = 3; 63 | // Compressed with pub constant value (uses only missing bitset) 64 | pub const CONST_COMPRESSED: i32 = 4; 65 | // Compressed with sparse arrays 66 | pub const SPARSE_COMPRESSED: i32 = 5; 67 | 68 | // Uncompressed binary, written directly (fixed length) 69 | pub const BINARY_FIXED_UNCOMPRESSED: i32 = 0; 70 | // Uncompressed binary, written directly (variable length) 71 | pub const BINARY_VARIABLE_UNCOMPRESSED: i32 = 1; 72 | // Compressed binary with shared prefixes 73 | pub const BINARY_PREFIX_COMPRESSED: i32 = 2; 74 | 75 | // Standard storage for sorted set values with 1 level of indirection: 76 | // docId -> address -> ord 77 | pub const SORTED_WITH_ADDRESSES: i32 = 0; 78 | // Single-valued sorted set values, encoded as sorted values, so no level 79 | // of indirection: docId -> ord 80 | pub const SORTED_SINGLE_VALUED: i32 = 1; 81 | // Compressed giving IDs to unique sets of values: 82 | // docId -> setId -> ords 83 | pub const SORTED_SET_TABLE: i32 = 2; 84 | 85 | // placeholder for missing offset that means there are no missing values 86 | pub const ALL_LIVE: i32 = -1; 87 | // placeholder for missing offset that means all values are missing 88 | pub const ALL_MISSING: i32 = -2; 89 | 90 | // addressing uses 16k blocks 91 | pub const MONOTONIC_BLOCK_SIZE: i32 = 16384; 92 | pub const DIRECT_MONOTONIC_BLOCK_SHIFT: i32 = 16; 93 | } 94 | 95 | impl DocValuesFormat for Lucene54DocValuesFormat { 96 | fn name(&self) -> &str { 97 | "Lucene54" 98 | } 99 | fn fields_producer<'a, D: Directory, DW: Directory, C: Codec>( 100 | &self, 101 | state: &SegmentReadState<'a, D, DW, C>, 102 | ) -> Result> { 103 | let boxed = Lucene54DocValuesProducer::new( 104 | state, 105 | Self::DATA_CODEC, 106 | Self::DATA_EXTENSION, 107 | Self::META_CODEC, 108 | Self::META_EXTENSION, 109 | )?; 110 | Ok(Box::new(boxed)) 111 | } 112 | 113 | fn fields_consumer( 114 | &self, 115 | state: &SegmentWriteState, 116 | ) -> Result> { 117 | Ok(DocValuesConsumerEnum::Lucene54( 118 | Lucene54DocValuesConsumer::new( 119 | state, 120 | Self::DATA_CODEC, 121 | Self::DATA_EXTENSION, 122 | Self::META_CODEC, 123 | Self::META_EXTENSION, 124 | )?, 125 | )) 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/core/codec/fields.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::Terms; 15 | 16 | use error::Result; 17 | 18 | /// Flex API for access to fields and terms 19 | pub trait Fields { 20 | type Terms: Terms; 21 | fn fields(&self) -> Vec; 22 | fn terms(&self, field: &str) -> Result>; 23 | fn size(&self) -> usize; 24 | fn terms_freq(&self, _field: &str) -> usize { 25 | unimplemented!() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/core/codec/matching_reader.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::Codec; 15 | use core::index::merge::MergeState; 16 | use core::store::directory::Directory; 17 | 18 | /// Computes which segments have identical field name to number mappings, 19 | /// which allows stored fields and term vectors in this codec to be bulk-merged. 20 | pub struct MatchingReaders { 21 | /// `SegmentReader`s that have identical field name/number mapping, 22 | /// so their stored fields and term vectors may be bulk merged. 23 | pub matching_readers: Vec, 24 | /// How many #matching_readers are set 25 | pub count: usize, 26 | } 27 | 28 | impl MatchingReaders { 29 | pub fn new(merge_state: &MergeState) -> Self { 30 | // If the i'th reader is a SegmentReader and has 31 | // identical fieldName -> number mapping, then this 32 | // array will be non-null at position i: 33 | let num_readers = merge_state.max_docs.len(); 34 | let mut matched_count = 0; 35 | 36 | let mut matching_readers = vec![false; num_readers]; 37 | 38 | // If this reader is a SegmentReader, and all of its 39 | // field name -> number mappings match the "merged" 40 | // FieldInfos, then we can do a bulk copy of the 41 | // stored fields: 42 | 'next_reader: for i in 0..num_readers { 43 | for fi in merge_state.fields_infos[i].by_number.values() { 44 | let other = merge_state 45 | .merge_field_infos 46 | .as_ref() 47 | .unwrap() 48 | .field_info_by_number(fi.number); 49 | if other.map_or(true, |o| o.name != fi.name) { 50 | continue 'next_reader; 51 | } 52 | } 53 | matching_readers[i] = true; 54 | matched_count += 1; 55 | } 56 | MatchingReaders { 57 | matching_readers, 58 | count: matched_count, 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/core/codec/norms/norm_values_writer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::doc_values::{NumericDVIter, NumericDocValuesWriter}; 15 | use core::codec::field_infos::FieldInfo; 16 | use core::codec::norms::NormsConsumer; 17 | use core::codec::segment_infos::SegmentWriteState; 18 | use core::codec::{Codec, SorterDocMap}; 19 | use core::util::packed::COMPACT; 20 | use core::util::packed::{ 21 | LongValuesIterator, PackedLongValuesBuilder, PackedLongValuesBuilderType, DEFAULT_PAGE_SIZE, 22 | }; 23 | use core::util::{BitSet, FixedBitSet}; 24 | use core::util::{Bits, DocId, Numeric, ReusableIterator}; 25 | 26 | use core::store::directory::Directory; 27 | use error::Result; 28 | 29 | const MISSING: i64 = 0; 30 | 31 | pub struct NormValuesWriter { 32 | pending: PackedLongValuesBuilder, 33 | docs_with_field: FixedBitSet, 34 | field_info: FieldInfo, 35 | last_doc: DocId, 36 | } 37 | 38 | impl NormValuesWriter { 39 | pub fn new(field_info: &FieldInfo) -> Self { 40 | NormValuesWriter { 41 | pending: PackedLongValuesBuilder::new( 42 | DEFAULT_PAGE_SIZE, 43 | COMPACT as f32, 44 | PackedLongValuesBuilderType::Delta, 45 | ), 46 | docs_with_field: FixedBitSet::new(64), 47 | field_info: field_info.clone(), 48 | last_doc: -1, 49 | } 50 | } 51 | 52 | pub fn add_value(&mut self, doc_id: DocId, value: i64) { 53 | debug_assert!(self.last_doc < doc_id); 54 | self.docs_with_field.ensure_capacity(doc_id as usize); 55 | self.docs_with_field.set(doc_id as usize); 56 | self.pending.add(value); 57 | self.last_doc = doc_id; 58 | } 59 | 60 | pub fn finish(&mut self, _num_doc: i32) {} 61 | 62 | pub fn flush( 63 | &mut self, 64 | state: &SegmentWriteState, 65 | sort_map: Option<&impl SorterDocMap>, 66 | consumer: &mut NC, 67 | ) -> Result<()> { 68 | let max_doc = state.segment_info.max_doc; 69 | let values = self.pending.build(); 70 | if let Some(sort_map) = sort_map { 71 | let sorted = NumericDocValuesWriter::sort_doc_values( 72 | max_doc, 73 | sort_map, 74 | &self.docs_with_field, 75 | values.iterator(), 76 | ); 77 | let mut iter = NumericDVIter::new(sorted); 78 | consumer.add_norms_field(&self.field_info, &mut iter) 79 | } else { 80 | let mut iter = 81 | NumericIter::new(values.iterator(), &self.docs_with_field, max_doc as usize); 82 | consumer.add_norms_field(&self.field_info, &mut iter) 83 | } 84 | } 85 | } 86 | 87 | struct NumericIter<'a> { 88 | values_iter: LongValuesIterator<'a>, 89 | docs_with_field: &'a FixedBitSet, 90 | upto: usize, 91 | max_doc: usize, 92 | } 93 | 94 | impl<'a> NumericIter<'a> { 95 | fn new( 96 | values_iter: LongValuesIterator<'a>, 97 | docs_with_field: &'a FixedBitSet, 98 | max_doc: usize, 99 | ) -> NumericIter<'a> { 100 | NumericIter { 101 | values_iter, 102 | docs_with_field, 103 | upto: 0, 104 | max_doc, 105 | } 106 | } 107 | } 108 | 109 | impl<'a> Iterator for NumericIter<'a> { 110 | type Item = Result; 111 | 112 | fn next(&mut self) -> Option> { 113 | if self.upto < self.max_doc { 114 | let v = if self.upto >= self.docs_with_field.len() 115 | || !self.docs_with_field.get(self.upto).unwrap() 116 | { 117 | MISSING 118 | } else { 119 | self.values_iter.next().unwrap() 120 | }; 121 | self.upto += 1; 122 | Some(Ok(Numeric::Long(v))) 123 | } else { 124 | None 125 | } 126 | } 127 | } 128 | 129 | impl<'a> ReusableIterator for NumericIter<'a> { 130 | fn reset(&mut self) { 131 | self.values_iter.reset(); 132 | self.upto = 0; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/core/codec/norms/norms.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::norms::{ 15 | Lucene53NormsConsumer, Lucene53NormsProducer, NormsConsumerEnum, NormsFormat, 16 | }; 17 | use core::codec::segment_infos::{SegmentReadState, SegmentWriteState}; 18 | use core::codec::Codec; 19 | use core::store::directory::Directory; 20 | 21 | use error::Result; 22 | 23 | pub const DATA_CODEC: &str = "Lucene53NormsData"; 24 | pub const DATA_EXTENSION: &str = "nvd"; 25 | pub const METADATA_CODEC: &str = "Lucene53NormsMetadata"; 26 | pub const METADATA_EXTENSION: &str = "nvm"; 27 | pub const VERSION_START: i32 = 0; 28 | pub const VERSION_CURRENT: i32 = VERSION_START; 29 | 30 | #[derive(Copy, Clone, Default)] 31 | pub struct Lucene53NormsFormat; 32 | 33 | impl NormsFormat for Lucene53NormsFormat { 34 | type NormsProducer = Lucene53NormsProducer; 35 | fn norms_producer<'a, D: Directory, DW: Directory, C: Codec>( 36 | &self, 37 | state: &SegmentReadState<'a, D, DW, C>, 38 | ) -> Result { 39 | Lucene53NormsProducer::new( 40 | state, 41 | DATA_CODEC, 42 | DATA_EXTENSION, 43 | METADATA_CODEC, 44 | METADATA_EXTENSION, 45 | ) 46 | } 47 | 48 | fn norms_consumer( 49 | &self, 50 | state: &SegmentWriteState, 51 | ) -> Result> { 52 | Ok(NormsConsumerEnum::Lucene53(Lucene53NormsConsumer::new( 53 | state, 54 | DATA_CODEC, 55 | DATA_EXTENSION, 56 | METADATA_CODEC, 57 | METADATA_EXTENSION, 58 | )?)) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/core/codec/posting_iterator.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::search::{DocIterator, Payload, NO_MORE_DOCS}; 15 | use core::util::DocId; 16 | use error::Result; 17 | 18 | pub struct PostingIteratorFlags; 19 | 20 | /// flags constants and helper function defined for `TermIterator::postings_with_flag()`. 21 | impl PostingIteratorFlags { 22 | /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} if you don't 23 | /// require per-document postings in the returned iterator. 24 | pub const NONE: u16 = 0; 25 | 26 | /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} 27 | /// if you require term frequencies in the returned iterator. 28 | pub const FREQS: u16 = 1 << 3; 29 | 30 | /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} 31 | /// if you require term positions in the returned iterator. 32 | pub const POSITIONS: u16 = Self::FREQS | 1 << 4; 33 | 34 | /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} 35 | /// if you require offsets in the returned iterator. 36 | pub const OFFSETS: u16 = Self::POSITIONS | 1 << 5; 37 | 38 | /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} 39 | /// if you require payloads in the returned iterator. 40 | pub const PAYLOADS: u16 = Self::POSITIONS | 1 << 6; 41 | 42 | /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} 43 | /// to get positions, payloads and offsets in the returned iterator. 44 | pub const ALL: u16 = Self::OFFSETS | Self::PAYLOADS; 45 | 46 | pub fn feature_requested(flags: u16, feature: u16) -> bool { 47 | (flags & feature) == feature 48 | } 49 | } 50 | 51 | /// Iterates through the postings. 52 | /// 53 | /// NOTE: you must first call `next()` before using any of the per-doc methods. 54 | pub trait PostingIterator: DocIterator { 55 | /// Returns term frequency in the current document, or 1 if the field was 56 | /// indexed with `IndexOptions::Docs`. Do not call this before 57 | /// `next_doc()` is first called, nor after `#next()` returns `NO_MORE_DOCS`. 58 | /// 59 | /// *NOTE:* if the [`PostingIterator`] was obtain with `PostingIteratorFlags::NONE`, 60 | /// the result of this method is undefined. 61 | fn freq(&self) -> Result; 62 | 63 | /// Returns the next position, or -1 if positions were not indexed. 64 | /// Calling this more than `freq()` times is undefined. 65 | fn next_position(&mut self) -> Result; 66 | 67 | /// Returns start offset for the current position, or -1 68 | /// if offsets were not indexed. */ 69 | fn start_offset(&self) -> Result; 70 | 71 | /// Returns end offset for the current position, or -1 if 72 | /// offsets were not indexed. */ 73 | fn end_offset(&self) -> Result; 74 | 75 | /// Returns the payload at this position, or null if no 76 | /// payload was indexed. You should not modify anything 77 | /// (neither members of the returned BytesRef nor bytes 78 | /// in the bytes). */ 79 | fn payload(&self) -> Result; 80 | } 81 | 82 | /// a `PostingIterator` that no matching docs are available. 83 | #[derive(Clone)] 84 | pub struct EmptyPostingIterator { 85 | doc_id: DocId, 86 | } 87 | 88 | impl Default for EmptyPostingIterator { 89 | fn default() -> Self { 90 | EmptyPostingIterator { doc_id: -1 } 91 | } 92 | } 93 | 94 | impl DocIterator for EmptyPostingIterator { 95 | fn doc_id(&self) -> DocId { 96 | self.doc_id 97 | } 98 | 99 | fn next(&mut self) -> Result { 100 | self.doc_id = NO_MORE_DOCS; 101 | Ok(NO_MORE_DOCS) 102 | } 103 | 104 | fn advance(&mut self, _target: DocId) -> Result { 105 | self.doc_id = NO_MORE_DOCS; 106 | Ok(NO_MORE_DOCS) 107 | } 108 | 109 | fn cost(&self) -> usize { 110 | 0usize 111 | } 112 | } 113 | 114 | impl PostingIterator for EmptyPostingIterator { 115 | fn freq(&self) -> Result { 116 | Ok(0) 117 | } 118 | 119 | fn next_position(&mut self) -> Result { 120 | Ok(-1) 121 | } 122 | 123 | fn start_offset(&self) -> Result { 124 | Ok(-1) 125 | } 126 | 127 | fn end_offset(&self) -> Result { 128 | Ok(-1) 129 | } 130 | 131 | fn payload(&self) -> Result { 132 | Ok(Payload::new()) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/core/codec/postings/blocktree/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod blocktree_reader; 15 | 16 | pub use self::blocktree_reader::*; 17 | 18 | mod blocktree_writer; 19 | 20 | pub use self::blocktree_writer::*; 21 | 22 | mod term_iter_frame; 23 | 24 | pub use self::term_iter_frame::*; 25 | 26 | const MAX_LONGS_SIZE: usize = 3; 27 | 28 | use core::codec::TermState; 29 | 30 | /// Holds all state required for `PostingsReaderBase` to produce a 31 | /// `PostingIterator` without re-seeking the term dict. 32 | #[derive(Clone, Debug)] 33 | pub struct BlockTermState { 34 | /// Term ordinal, i.e. its position in the full list of 35 | /// sorted terms. 36 | pub ord: i64, 37 | /// how many docs have this term 38 | pub doc_freq: i32, 39 | 40 | /// total number of occurrences of this term 41 | pub total_term_freq: i64, 42 | 43 | /// the term's ord in the current block 44 | pub term_block_ord: i32, 45 | 46 | /// fp into the terms dict primary file (_X.tim) that holds this term 47 | // TODO: update BTR to nuke this 48 | pub block_file_pointer: i64, 49 | 50 | /// fields from IntBlockTermState 51 | pub doc_start_fp: i64, 52 | pub pos_start_fp: i64, 53 | pub pay_start_fp: i64, 54 | pub skip_offset: i64, 55 | pub last_pos_block_offset: i64, 56 | // docid when there is a single pulsed posting, otherwise -1 57 | // freq is always implicitly totalTermFreq in this case. 58 | pub singleton_doc_id: i32, 59 | } 60 | 61 | impl BlockTermState { 62 | pub fn new() -> BlockTermState { 63 | BlockTermState { 64 | ord: 0, 65 | doc_freq: 0, 66 | total_term_freq: 0, 67 | term_block_ord: 0, 68 | block_file_pointer: 0, 69 | 70 | doc_start_fp: 0, 71 | pos_start_fp: 0, 72 | pay_start_fp: 0, 73 | skip_offset: -1, 74 | last_pos_block_offset: -1, 75 | singleton_doc_id: -1, 76 | } 77 | } 78 | 79 | pub fn copy_from(&mut self, other: &BlockTermState) { 80 | self.ord = other.ord; 81 | self.doc_freq = other.doc_freq; 82 | self.total_term_freq = other.total_term_freq; 83 | self.term_block_ord = other.term_block_ord; 84 | self.block_file_pointer = other.block_file_pointer; 85 | self.doc_start_fp = other.doc_start_fp; 86 | self.pos_start_fp = other.pos_start_fp; 87 | self.pay_start_fp = other.pay_start_fp; 88 | self.last_pos_block_offset = other.last_pos_block_offset; 89 | self.skip_offset = other.skip_offset; 90 | self.singleton_doc_id = other.singleton_doc_id; 91 | } 92 | 93 | pub fn ord(&self) -> i64 { 94 | self.ord 95 | } 96 | 97 | pub fn doc_freq(&self) -> i32 { 98 | self.doc_freq 99 | } 100 | 101 | pub fn total_term_freq(&self) -> i64 { 102 | self.total_term_freq 103 | } 104 | 105 | pub fn term_block_ord(&self) -> i32 { 106 | self.term_block_ord 107 | } 108 | 109 | pub fn block_file_pointer(&self) -> i64 { 110 | self.block_file_pointer 111 | } 112 | 113 | pub fn doc_start_fp(&self) -> i64 { 114 | self.doc_start_fp 115 | } 116 | pub fn pos_start_fp(&self) -> i64 { 117 | self.pos_start_fp 118 | } 119 | pub fn pay_start_fp(&self) -> i64 { 120 | self.pay_start_fp 121 | } 122 | pub fn skip_offset(&self) -> i64 { 123 | self.skip_offset 124 | } 125 | pub fn last_pos_block_offset(&self) -> i64 { 126 | self.last_pos_block_offset 127 | } 128 | pub fn singleton_doc_id(&self) -> i32 { 129 | self.singleton_doc_id 130 | } 131 | } 132 | 133 | impl TermState for BlockTermState {} 134 | -------------------------------------------------------------------------------- /src/core/codec/postings/posting_format.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::postings::blocktree::{BlockTreeTermsReader, BlockTreeTermsWriter}; 15 | use core::codec::postings::{ 16 | FieldsConsumerEnum, Lucene50PostingsReader, Lucene50PostingsWriter, PostingsFormat, 17 | }; 18 | use core::codec::segment_infos::{SegmentReadState, SegmentWriteState}; 19 | use core::codec::Codec; 20 | use core::store::directory::Directory; 21 | 22 | use error::Result; 23 | 24 | use std::fmt; 25 | 26 | #[derive(Hash, Eq, Ord, PartialEq, PartialOrd)] 27 | pub struct Lucene50PostingsFormat { 28 | name: &'static str, 29 | min_term_block_size: usize, 30 | max_term_block_size: usize, 31 | } 32 | 33 | /// Fixed packed block size, number of integers encoded in 34 | /// a single packed block. 35 | // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding 36 | pub const BLOCK_SIZE: i32 = 128; 37 | 38 | const DEFAULT_MIN_BLOCK_SIZE: usize = 25; 39 | const DEFAULT_MAX_BLOCK_SIZE: usize = 48; 40 | 41 | impl fmt::Display for Lucene50PostingsFormat { 42 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 43 | write!(f, "{}(blocksize={})", self.name, BLOCK_SIZE) 44 | } 45 | } 46 | 47 | impl Default for Lucene50PostingsFormat { 48 | fn default() -> Lucene50PostingsFormat { 49 | Self::with_block_size(DEFAULT_MIN_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE) 50 | } 51 | } 52 | 53 | impl Lucene50PostingsFormat { 54 | pub fn with_block_size( 55 | min_term_block_size: usize, 56 | max_term_block_size: usize, 57 | ) -> Lucene50PostingsFormat { 58 | Lucene50PostingsFormat { 59 | name: "Lucene50", 60 | min_term_block_size, 61 | max_term_block_size, 62 | } 63 | } 64 | } 65 | 66 | impl PostingsFormat for Lucene50PostingsFormat { 67 | type FieldsProducer = BlockTreeTermsReader; 68 | fn fields_producer<'a, D: Directory, DW: Directory, C: Codec>( 69 | &self, 70 | state: &SegmentReadState<'a, D, DW, C>, 71 | ) -> Result { 72 | let reader = Lucene50PostingsReader::open(&state)?; 73 | BlockTreeTermsReader::new(reader, state) 74 | } 75 | 76 | fn fields_consumer( 77 | &self, 78 | state: &SegmentWriteState, 79 | ) -> Result> { 80 | let postings_writer = Lucene50PostingsWriter::new(state)?; 81 | Ok(FieldsConsumerEnum::Lucene50(BlockTreeTermsWriter::new( 82 | state, 83 | postings_writer, 84 | self.min_term_block_size, 85 | self.max_term_block_size, 86 | )?)) 87 | } 88 | 89 | fn name(&self) -> &str { 90 | "Lucene50" 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/core/codec/stored_fields/stored_fields.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use std::str::FromStr; 15 | use std::sync::Arc; 16 | 17 | use core::codec::field_infos::FieldInfos; 18 | use core::codec::segment_infos::SegmentInfo; 19 | use core::codec::stored_fields::{ 20 | CompressingStoredFieldsFormat, CompressingStoredFieldsReader, StoredFieldsFormat, 21 | StoredFieldsWriterEnum, 22 | }; 23 | use core::codec::Codec; 24 | use core::store::directory::Directory; 25 | use core::store::IOContext; 26 | use core::util::CompressionMode; 27 | use error::{Error as CoreError, ErrorKind::IllegalState, Result}; 28 | 29 | const MODE_KEY: &str = "Lucene50StoredFieldsFormat.mode"; 30 | 31 | #[derive(Debug, Copy, Clone)] 32 | pub enum StoredFieldCompressMode { 33 | BestSpeed, 34 | BestCompression, 35 | } 36 | 37 | impl StoredFieldCompressMode { 38 | fn name(&self) -> &'static str { 39 | match self { 40 | StoredFieldCompressMode::BestSpeed => "BEST_SPEED", 41 | StoredFieldCompressMode::BestCompression => "BEST_COMPRESSION", 42 | } 43 | } 44 | } 45 | 46 | impl FromStr for StoredFieldCompressMode { 47 | type Err = CoreError; 48 | fn from_str(v: &str) -> Result { 49 | let r = if v == "BEST_SPEED" { 50 | StoredFieldCompressMode::BestSpeed 51 | } else { 52 | StoredFieldCompressMode::BestCompression 53 | }; 54 | Ok(r) 55 | } 56 | } 57 | 58 | /// Lucene 5.0 stored fields format. 59 | #[derive(Copy, Clone)] 60 | pub struct Lucene50StoredFieldsFormat { 61 | #[allow(dead_code)] 62 | mode: StoredFieldCompressMode, 63 | } 64 | 65 | impl Lucene50StoredFieldsFormat { 66 | pub fn new(mode: Option) -> Lucene50StoredFieldsFormat { 67 | if let Some(m) = mode { 68 | Lucene50StoredFieldsFormat { mode: m } 69 | } else { 70 | Lucene50StoredFieldsFormat { 71 | mode: StoredFieldCompressMode::BestSpeed, 72 | } 73 | } 74 | } 75 | 76 | pub fn format(self, mode: StoredFieldCompressMode) -> CompressingStoredFieldsFormat { 77 | match mode { 78 | StoredFieldCompressMode::BestSpeed => CompressingStoredFieldsFormat::new( 79 | "Lucene50StoredFieldsFast", 80 | "", 81 | CompressionMode::FAST, 82 | 1 << 14, 83 | 128, 84 | 1024, 85 | ), 86 | StoredFieldCompressMode::BestCompression => CompressingStoredFieldsFormat::new( 87 | "Lucene50StoredFieldsHigh", 88 | "", 89 | CompressionMode::HighCompression, 90 | 61440, 91 | 512, 92 | 1024, 93 | ), 94 | } 95 | } 96 | } 97 | 98 | impl StoredFieldsFormat for Lucene50StoredFieldsFormat { 99 | type Reader = CompressingStoredFieldsReader; 100 | fn fields_reader( 101 | &self, 102 | directory: &DW, 103 | si: &SegmentInfo, 104 | field_info: Arc, 105 | ioctx: &IOContext, 106 | ) -> Result { 107 | if let Some(value) = si.attributes.get(MODE_KEY) { 108 | let mode = StoredFieldCompressMode::from_str(value)?; 109 | 110 | self.format(mode) 111 | .fields_reader(directory, si, field_info, ioctx) 112 | } else { 113 | bail!(IllegalState(format!( 114 | "missing value for {} for segment: {}", 115 | MODE_KEY, si.name 116 | ))) 117 | } 118 | } 119 | 120 | fn fields_writer( 121 | &self, 122 | directory: Arc, 123 | si: &mut SegmentInfo, 124 | ioctx: &IOContext, 125 | ) -> Result> 126 | where 127 | D: Directory, 128 | DW: Directory, 129 | DW::IndexOutput: 'static, 130 | C: Codec, 131 | { 132 | let previous = si 133 | .attributes 134 | .insert(MODE_KEY.to_string(), self.mode.name().to_string()); 135 | if let Some(prev_name) = previous { 136 | if prev_name.as_str() != self.mode.name() { 137 | bail!(IllegalState(format!( 138 | "found existing value for {} for segment: {}", 139 | MODE_KEY, si.name 140 | ))); 141 | } 142 | } 143 | self.format(self.mode).fields_writer(directory, si, ioctx) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/core/doc/index_options.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use error::{ErrorKind::IllegalArgument, Result}; 15 | use std::cmp::Ordering; 16 | 17 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)] 18 | pub enum IndexOptions { 19 | Null, 20 | Docs, 21 | DocsAndFreqs, 22 | DocsAndFreqsAndPositions, 23 | DocsAndFreqsAndPositionsAndOffsets, 24 | } 25 | 26 | impl Default for IndexOptions { 27 | fn default() -> IndexOptions { 28 | IndexOptions::Null 29 | } 30 | } 31 | 32 | impl IndexOptions { 33 | pub fn from(options: &str) -> Result { 34 | let res = match options { 35 | "offsets" => IndexOptions::DocsAndFreqsAndPositionsAndOffsets, 36 | "positions" => IndexOptions::DocsAndFreqsAndPositions, 37 | "freqs" => IndexOptions::DocsAndFreqs, 38 | "docs" => IndexOptions::Docs, 39 | _ => { 40 | bail!(IllegalArgument(format!( 41 | "failed to parse index option [{}]", 42 | options 43 | ))); 44 | } 45 | }; 46 | Ok(res) 47 | } 48 | 49 | pub fn as_str(self) -> &'static str { 50 | match self { 51 | IndexOptions::DocsAndFreqsAndPositionsAndOffsets => "offsets", 52 | IndexOptions::DocsAndFreqs => "freqs", 53 | IndexOptions::DocsAndFreqsAndPositions => "positions", 54 | IndexOptions::Docs => "docs", 55 | _ => unreachable!(), 56 | } 57 | } 58 | 59 | pub fn has_docs(self) -> bool { 60 | match self { 61 | IndexOptions::Null => false, 62 | _ => true, 63 | } 64 | } 65 | 66 | pub fn has_freqs(self) -> bool { 67 | match self { 68 | IndexOptions::DocsAndFreqs => true, 69 | IndexOptions::DocsAndFreqsAndPositions => true, 70 | IndexOptions::DocsAndFreqsAndPositionsAndOffsets => true, 71 | _ => false, 72 | } 73 | } 74 | 75 | pub fn has_positions(self) -> bool { 76 | match self { 77 | IndexOptions::DocsAndFreqsAndPositions => true, 78 | IndexOptions::DocsAndFreqsAndPositionsAndOffsets => true, 79 | _ => false, 80 | } 81 | } 82 | 83 | pub fn has_offsets(self) -> bool { 84 | match self { 85 | IndexOptions::DocsAndFreqsAndPositionsAndOffsets => true, 86 | _ => false, 87 | } 88 | } 89 | 90 | pub fn value(self) -> i32 { 91 | match self { 92 | IndexOptions::Null => 0, 93 | IndexOptions::Docs => 1, 94 | IndexOptions::DocsAndFreqs => 2, 95 | IndexOptions::DocsAndFreqsAndPositions => 3, 96 | IndexOptions::DocsAndFreqsAndPositionsAndOffsets => 4, 97 | } 98 | } 99 | } 100 | 101 | impl Ord for IndexOptions { 102 | fn cmp(&self, other: &Self) -> Ordering { 103 | self.value().cmp(&other.value()) 104 | } 105 | } 106 | 107 | impl PartialOrd for IndexOptions { 108 | fn partial_cmp(&self, other: &Self) -> Option { 109 | Some(self.cmp(other)) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/core/doc/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod term; 15 | 16 | pub use self::term::*; 17 | 18 | mod field; 19 | 20 | pub use self::field::*; 21 | 22 | mod document; 23 | 24 | pub use self::document::*; 25 | 26 | mod index_options; 27 | 28 | pub use self::index_options::*; 29 | 30 | mod doc_values; 31 | 32 | pub use self::doc_values::*; 33 | -------------------------------------------------------------------------------- /src/core/doc/term.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use error::Result; 15 | use std::cmp::Ordering; 16 | 17 | /// A Term represents a word from text. This is the unit of search. It is 18 | /// composed of two elements, the text of the word, as a string, and the name of 19 | /// the field that the text occurred in. 20 | /// 21 | /// Note that terms may represent more than words from text fields, but also 22 | /// things like dates, email addresses, urls, etc. 23 | #[derive(Clone, Debug, PartialEq, Hash, Eq)] 24 | pub struct Term { 25 | pub field: String, 26 | pub bytes: Vec, 27 | } 28 | 29 | impl Term { 30 | /// Constructs a Term with the given field and bytes. 31 | ///

Note that a null field or null bytes value results in undefined 32 | /// behavior for most Lucene APIs that accept a Term parameter. 33 | /// 34 | ///

The provided BytesRef is copied when it is non null. 35 | pub fn new(field: String, bytes: Vec) -> Term { 36 | Term { field, bytes } 37 | } 38 | 39 | /// Returns the field of this term. The field indicates 40 | /// the part of a document which this term came from. 41 | pub fn field(&self) -> &str { 42 | &self.field 43 | } 44 | 45 | /// Returns the text of this term. In the case of words, this is simply the 46 | /// text of the word. In the case of dates and other types, this is an 47 | /// encoding of the object as a string. 48 | pub fn text(&self) -> Result { 49 | Ok(String::from_utf8(self.bytes.clone())?) 50 | } 51 | 52 | pub fn is_empty(&self) -> bool { 53 | self.field.is_empty() && self.bytes.is_empty() 54 | } 55 | 56 | pub fn copy_bytes(&mut self, bytes: &[u8]) { 57 | if self.bytes.len() != bytes.len() { 58 | self.bytes.resize(bytes.len(), 0); 59 | } 60 | self.bytes.copy_from_slice(bytes); 61 | } 62 | } 63 | 64 | impl PartialOrd for Term { 65 | fn partial_cmp(&self, other: &Self) -> Option { 66 | Some(self.cmp(other)) 67 | } 68 | } 69 | 70 | impl Ord for Term { 71 | fn cmp(&self, other: &Self) -> Ordering { 72 | let res = self.field.cmp(&other.field); 73 | if res == Ordering::Equal { 74 | self.bytes.cmp(&other.bytes) 75 | } else { 76 | res 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/core/index/merge/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod doc_id_merger; 15 | 16 | pub use self::doc_id_merger::*; 17 | 18 | mod merge_policy; 19 | 20 | pub use self::merge_policy::*; 21 | 22 | mod merge_rate_limiter; 23 | 24 | pub use self::merge_rate_limiter::*; 25 | 26 | mod merge_scheduler; 27 | 28 | pub use self::merge_scheduler::*; 29 | 30 | mod merge_state; 31 | 32 | pub use self::merge_state::*; 33 | 34 | mod segment_merger; 35 | 36 | pub use self::segment_merger::*; 37 | -------------------------------------------------------------------------------- /src/core/index/reader/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod directory_reader; 15 | 16 | pub use self::directory_reader::*; 17 | 18 | mod leaf_reader; 19 | 20 | pub use self::leaf_reader::*; 21 | 22 | mod leaf_reader_wrapper; 23 | 24 | pub use self::leaf_reader_wrapper::*; 25 | 26 | mod segment_reader; 27 | 28 | pub use self::segment_reader::*; 29 | 30 | mod index_lookup; 31 | 32 | pub use self::index_lookup::*; 33 | 34 | use core::codec::Codec; 35 | use core::codec::CodecTVFields; 36 | use core::doc::Document; 37 | use core::util::DocId; 38 | 39 | use error::Result; 40 | 41 | /// `IndexReader` providing an interface for accessing a point-in-time view of an index. 42 | /// 43 | /// Any changes made to the index via `IndexWriter` will not be visible until a new 44 | /// `IndexReader` is opened. It's best to use {@link 45 | /// StandardDirectoryReader#open(IndexWriter)} to obtain an `IndexReader`, if your 46 | /// `IndexWriter` is in-process. When you need to re-open to see changes to the 47 | /// index, it's best to use {@link DirectoryReader#openIfChanged(DirectoryReader)} 48 | /// since the new reader will share resources with the previous 49 | /// one when possible. Search of an index is done entirely 50 | /// through this abstract interface, so that any subclass which 51 | /// implements it is searchable. 52 | /// 53 | /// IndexReader instances for indexes on disk are usually constructed 54 | /// with a call to one of the static StandardDirectoryReader::open() methods. 55 | /// 56 | /// For efficiency, in this API documents are often referred to via 57 | /// *document numbers*, non-negative integers which each name a unique 58 | /// document in the index. These document numbers are ephemeral -- they may change 59 | /// as documents are added to and deleted from an index. Clients should thus not 60 | /// rely on a given document having the same number between sessions. 61 | /// 62 | /// NOTE: `IndexReader` instances are completely thread 63 | /// safe, meaning multiple threads can call any of its methods, 64 | /// concurrently. If your application requires external 65 | /// synchronization, you should *not* synchronize on the 66 | /// `IndexReader` instance; use your own (non-Lucene) objects instead. 67 | pub trait IndexReader { 68 | type Codec: Codec; 69 | fn leaves(&self) -> Vec>; 70 | fn term_vector(&self, doc_id: DocId) -> Result>>; 71 | fn document(&self, doc_id: DocId, fields: &[String]) -> Result; 72 | fn max_doc(&self) -> i32; 73 | fn num_docs(&self) -> i32; 74 | fn num_deleted_docs(&self) -> i32 { 75 | self.max_doc() - self.num_docs() 76 | } 77 | fn has_deletions(&self) -> bool { 78 | self.num_deleted_docs() > 0 79 | } 80 | fn leaf_reader_for_doc(&self, doc: DocId) -> LeafReaderContext<'_, Self::Codec> { 81 | let leaves = self.leaves(); 82 | let size = leaves.len(); 83 | let mut lo = 0usize; 84 | let mut hi = size - 1; 85 | while hi >= lo { 86 | let mut mid = (lo + hi) >> 1; 87 | let mid_value = leaves[mid].doc_base; 88 | if doc < mid_value { 89 | hi = mid - 1; 90 | } else if doc > mid_value { 91 | lo = mid + 1; 92 | } else { 93 | while mid + 1 < size && leaves[mid + 1].doc_base == mid_value { 94 | mid += 1; 95 | } 96 | return leaves[mid].clone(); 97 | } 98 | } 99 | leaves[hi].clone() 100 | } 101 | 102 | // used for refresh 103 | fn refresh(&self) -> Result>>> { 104 | Ok(None) 105 | } 106 | } 107 | 108 | #[derive(Copy, Clone)] 109 | pub struct ReaderSlice { 110 | pub start: i32, 111 | pub length: i32, 112 | pub reader_index: usize, 113 | } 114 | 115 | impl ReaderSlice { 116 | pub fn new(start: i32, length: i32, reader_index: usize) -> ReaderSlice { 117 | ReaderSlice { 118 | start, 119 | length, 120 | reader_index, 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/core/index/writer/delete_policy.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::index::writer::index_file_deleter::CommitPoint; 15 | use error::Result; 16 | 17 | /// Expert: policy for deletion of stale `IndexCommit index commits`. 18 | /// 19 | /// Implement this interface, and pass it to one 20 | /// of the `IndexWriter` or `IndexReader` 21 | /// constructors, to customize when older 22 | /// `IndexCommit point-in-time commits` 23 | /// are deleted from the index directory. The default deletion policy 24 | /// is `KeepOnlyLastCommitDeletionPolicy`, which always 25 | /// removes old commits as soon as a new commit is done (this 26 | /// matches the behavior before 2.2). 27 | /// 28 | /// One expected use case for this (and the reason why it 29 | /// was first created) is to work around problems with an 30 | /// index directory accessed via filesystems like NFS because 31 | /// NFS does not provide the "delete on last close" semantics 32 | /// that Lucene's "point in time" search normally relies on. 33 | /// By implementing a custom deletion policy, such as "a 34 | /// commit is only removed once it has been stale for more 35 | /// than X minutes", you can give your readers time to 36 | /// refresh to the new commit before `IndexWriter` 37 | /// removes the old commits. Note that doing so will 38 | /// increase the storage requirements of the index. See LUCENE-710 41 | /// for details. 42 | /// 43 | /// Implementers of sub-classes should make sure that `#clone()` 44 | /// returns an independent instance able to work with any other `IndexWriter` 45 | /// or `Directory` instance. 46 | pub trait IndexDeletionPolicy { 47 | /// This is called once when a writer is first 48 | /// instantiated to give the policy a chance to remove old 49 | /// commit points. 50 | /// 51 | /// The writer locates all index commits present in the 52 | /// index directory and calls this method. The policy may 53 | /// choose to delete some of the commit points, doing so by 54 | /// calling method `IndexCommit#delete delete()` 55 | /// of `IndexCommit`. 56 | /// 57 | /// Note: the last CommitPoint is the most recent one, 58 | /// i.e. the "front index state". Be careful not to delete it, 59 | /// unless you know for sure what you are doing, and unless 60 | /// you can afford to lose the index content while doing that. 61 | /// 62 | /// @param commits List of current 63 | /// `IndexCommit point-in-time commits`, 64 | /// sorted by age (the 0th one is the oldest commit). 65 | /// Note that for a new index this method is invoked with 66 | /// an empty list. 67 | fn on_init(&self, commits: Vec<&mut CommitPoint>) -> Result<()>; 68 | 69 | /// This is called each time the writer completed a commit. 70 | /// This gives the policy a chance to remove old commit points 71 | /// with each commit. 72 | /// 73 | /// The policy may now choose to delete old commit points 74 | /// by calling method `IndexCommit#delete delete()` 75 | /// of `IndexCommit`. 76 | /// 77 | /// This method is only called when `IndexWriter#commit` 78 | /// or `IndexWriter#close` is called, or possibly not at all 79 | /// if the `IndexWriter#rollback` is called. 80 | /// 81 | /// Note: the last CommitPoint is the most recent one, 82 | /// i.e. the "front index state". Be careful not to delete it, 83 | /// unless you know for sure what you are doing, and unless 84 | /// you can afford to lose the index content while doing that. 85 | /// 86 | /// @param commits List of `IndexCommit`, 87 | /// sorted by age (the 0th one is the oldest commit). 88 | fn on_commit(&self, commits: Vec<&mut CommitPoint>) -> Result<()>; 89 | } 90 | 91 | #[derive(Default)] 92 | pub struct KeepOnlyLastCommitDeletionPolicy; 93 | 94 | impl KeepOnlyLastCommitDeletionPolicy { 95 | pub fn on_init(&self, commits: Vec<&mut CommitPoint>) -> Result<()> { 96 | self.on_commit(commits) 97 | } 98 | 99 | pub fn on_commit(&self, mut commits: Vec<&mut CommitPoint>) -> Result<()> { 100 | commits.pop(); 101 | for commit in commits { 102 | commit.delete()?; 103 | } 104 | Ok(()) 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/core/index/writer/index_writer_config.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::{Codec, CodecEnum, Lucene62Codec}; 15 | use core::index::merge::MergeScheduler; 16 | use core::index::merge::SerialMergeScheduler; 17 | use core::index::merge::{MergePolicy, TieredMergePolicy}; 18 | use core::index::writer::KeepOnlyLastCommitDeletionPolicy; 19 | use core::search::sort_field::Sort; 20 | 21 | use std::sync::Arc; 22 | 23 | /// Denotes a flush trigger is disabled. 24 | pub const DISABLE_AUTO_FLUSH: i32 = -1; 25 | 26 | /// Disabled by default (because IndexWriter flushes by RAM usage by default). 27 | pub const DEFAULT_MAX_BUFFERED_DELETE_TERMS: i32 = DISABLE_AUTO_FLUSH; 28 | 29 | /// Disabled by default (because IndexWriter flushes by RAM usage by default). 30 | pub const DEFAULT_MAX_BUFFERED_DOCS: i32 = DISABLE_AUTO_FLUSH; 31 | 32 | /// Default setting for `seg_reader_pooling` 33 | pub const DEFAULT_READER_POOLING: bool = false; 34 | 35 | /// Default value for compound file system for newly written segments 36 | /// (set to true). For batch indexing with very large 37 | /// ram buffers use false 38 | pub const DEFAULT_USE_COMPOUND_FILE_SYSTEM: bool = true; 39 | 40 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 41 | pub enum OpenMode { 42 | Create, 43 | Append, 44 | CreateOrAppend, 45 | } 46 | 47 | /// Holds all the configuration that is used to create an {@link IndexWriter}. 48 | /// Once {@link IndexWriter} has been created with this object, changes to this 49 | /// object will not affect the {@link IndexWriter} instance. For that, use 50 | /// {@link LiveIndexWriterConfig} that is returned from {@link IndexWriter#getConfig()}. 51 | /// 52 | /// All setter methods return {@link IndexWriterConfig} to allow chaining 53 | /// settings conveniently, for example: 54 | /// 55 | ///

 56 | /// IndexWriterConfig conf = new IndexWriterConfig(analyzer);
 57 | /// conf.setter1().setter2();
 58 | /// 
59 | /// 60 | /// @see IndexWriter#getConfig() 61 | pub struct IndexWriterConfig { 62 | pub use_compound_file: bool, 63 | pub max_buffered_delete_terms: Option, 64 | pub max_buffered_docs: Option, 65 | pub merge_policy: MP, 66 | pub merge_scheduler: MS, 67 | pub index_sort: Option, 68 | /// True if readers should be pooled. 69 | pub reader_pooling: bool, 70 | pub open_mode: OpenMode, 71 | pub codec: Arc, 72 | pub commit_on_close: bool, 73 | } 74 | 75 | impl Default for IndexWriterConfig { 76 | fn default() -> Self { 77 | Self::new( 78 | Arc::new(CodecEnum::Lucene62(Lucene62Codec::default())), 79 | SerialMergeScheduler {}, 80 | TieredMergePolicy::default(), 81 | ) 82 | } 83 | } 84 | 85 | impl IndexWriterConfig { 86 | pub fn new(codec: Arc, merge_scheduler: MS, merge_policy: MP) -> Self { 87 | IndexWriterConfig { 88 | use_compound_file: false, 89 | max_buffered_delete_terms: None, 90 | max_buffered_docs: None, 91 | merge_policy, 92 | merge_scheduler, 93 | index_sort: None, 94 | reader_pooling: true, 95 | open_mode: OpenMode::CreateOrAppend, 96 | codec, 97 | commit_on_close: true, 98 | } 99 | } 100 | 101 | pub fn max_buffered_delete_terms(&self) -> u32 { 102 | self.max_buffered_delete_terms.unwrap_or(0) 103 | } 104 | 105 | pub fn max_buffered_docs(&self) -> u32 { 106 | self.max_buffered_docs.unwrap_or(0) 107 | } 108 | 109 | pub fn flush_on_delete_terms(&self) -> bool { 110 | self.max_buffered_delete_terms.is_some() 111 | } 112 | 113 | pub fn flush_on_doc_count(&self) -> bool { 114 | self.max_buffered_docs.is_some() 115 | } 116 | 117 | pub fn merge_policy(&self) -> &MP { 118 | &self.merge_policy 119 | } 120 | 121 | pub fn index_sort(&self) -> Option<&Sort> { 122 | self.index_sort.as_ref() 123 | } 124 | 125 | pub fn index_deletion_policy(&self) -> KeepOnlyLastCommitDeletionPolicy { 126 | KeepOnlyLastCommitDeletionPolicy::default() 127 | } 128 | 129 | pub fn merge_scheduler(&self) -> MS { 130 | self.merge_scheduler.clone() 131 | } 132 | 133 | pub fn codec(&self) -> &C { 134 | self.codec.as_ref() 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/core/index/writer/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod index_writer; 15 | 16 | pub use self::index_writer::*; 17 | 18 | mod bufferd_updates; 19 | 20 | pub use self::bufferd_updates::*; 21 | 22 | mod delete_policy; 23 | 24 | pub use self::delete_policy::*; 25 | 26 | mod dir_wrapper; 27 | 28 | pub use self::dir_wrapper::*; 29 | 30 | mod doc_consumer; 31 | 32 | pub use self::doc_consumer::*; 33 | 34 | mod doc_writer; 35 | 36 | pub use self::doc_writer::*; 37 | 38 | mod doc_writer_delete_queue; 39 | 40 | pub use self::doc_writer_delete_queue::*; 41 | 42 | mod doc_writer_flush_queue; 43 | 44 | pub use self::doc_writer_flush_queue::*; 45 | 46 | mod flush_control; 47 | 48 | pub use self::flush_control::*; 49 | 50 | mod flush_policy; 51 | 52 | pub use self::flush_policy::*; 53 | 54 | mod index_file_deleter; 55 | 56 | pub use self::index_file_deleter::*; 57 | 58 | mod index_writer_config; 59 | 60 | pub use self::index_writer_config::*; 61 | 62 | mod doc_writer_per_thread; 63 | 64 | pub use self::doc_writer_per_thread::*; 65 | 66 | mod prefix_code_terms; 67 | 68 | pub use self::prefix_code_terms::*; 69 | 70 | pub mod doc_values_update; 71 | 72 | pub use self::doc_values_update::*; 73 | -------------------------------------------------------------------------------- /src/core/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | pub mod analysis; 15 | pub mod codec; 16 | pub mod doc; 17 | pub mod highlight; 18 | pub mod index; 19 | pub mod search; 20 | pub mod store; 21 | pub mod util; 22 | -------------------------------------------------------------------------------- /src/core/search/cache/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod cache_policy; 15 | 16 | pub use self::cache_policy::*; 17 | 18 | mod lru_cache; 19 | 20 | pub use self::lru_cache::*; 21 | 22 | mod query_cache; 23 | 24 | pub use self::query_cache::*; 25 | -------------------------------------------------------------------------------- /src/core/search/collector/chain.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::Codec; 15 | use core::index::reader::LeafReaderContext; 16 | use core::search::collector::{Collector, ParallelLeafCollector, SearchCollector}; 17 | use core::search::scorer::Scorer; 18 | use core::util::DocId; 19 | use error::Result; 20 | 21 | /// ChainCollector makes it possible to collect on more than one collector in sequence. 22 | pub struct ChainedCollector { 23 | first: A, 24 | second: B, 25 | } 26 | 27 | impl ChainedCollector { 28 | /// Constructor 29 | pub fn new(first: A, second: B) -> ChainedCollector { 30 | ChainedCollector { first, second } 31 | } 32 | } 33 | 34 | impl SearchCollector for ChainedCollector 35 | where 36 | A: SearchCollector, 37 | B: SearchCollector, 38 | { 39 | type LC = ChainedCollector; 40 | 41 | fn set_next_reader(&mut self, reader: &LeafReaderContext<'_, C>) -> Result<()> { 42 | self.first.set_next_reader(reader)?; 43 | self.second.set_next_reader(reader) 44 | } 45 | 46 | fn support_parallel(&self) -> bool { 47 | self.first.support_parallel() && self.second.support_parallel() 48 | } 49 | 50 | fn init_parallel(&mut self) { 51 | self.first.init_parallel(); 52 | self.second.init_parallel(); 53 | } 54 | 55 | fn leaf_collector( 56 | &self, 57 | reader: &LeafReaderContext<'_, C>, 58 | ) -> Result> { 59 | Ok(ChainedCollector { 60 | first: self.first.leaf_collector(reader)?, 61 | second: self.second.leaf_collector(reader)?, 62 | }) 63 | } 64 | 65 | fn finish_parallel(&mut self) -> Result<()> { 66 | // reverse order for finish 67 | self.second.finish_parallel()?; 68 | self.first.finish_parallel() 69 | } 70 | } 71 | 72 | impl Collector for ChainedCollector 73 | where 74 | A: Collector, 75 | B: Collector, 76 | { 77 | fn needs_scores(&self) -> bool { 78 | self.first.needs_scores() || self.second.needs_scores() 79 | } 80 | 81 | fn collect(&mut self, doc: DocId, scorer: &mut S) -> Result<()> { 82 | self.first.collect(doc, scorer)?; 83 | self.second.collect(doc, scorer) 84 | } 85 | } 86 | 87 | impl ParallelLeafCollector for ChainedCollector 88 | where 89 | A: ParallelLeafCollector, 90 | B: ParallelLeafCollector, 91 | { 92 | fn finish_leaf(&mut self) -> Result<()> { 93 | // reverse order for finish 94 | self.second.finish_leaf()?; 95 | self.first.finish_leaf() 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/core/search/collector/early_terminating.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::Codec; 15 | use core::index::reader::LeafReaderContext; 16 | use core::search::collector; 17 | use core::search::collector::{Collector, ParallelLeafCollector, SearchCollector}; 18 | use core::search::scorer::Scorer; 19 | use core::util::external::Volatile; 20 | use core::util::DocId; 21 | use error::{ErrorKind, Result}; 22 | use std::sync::Arc; 23 | 24 | pub struct EarlyTerminatingSortingCollector { 25 | early_terminated: Arc>, 26 | num_docs_to_collect_per_reader: usize, 27 | num_docs_collected_per_reader: usize, 28 | } 29 | 30 | impl EarlyTerminatingSortingCollector { 31 | pub fn new(num_docs_to_collect_per_reader: usize) -> EarlyTerminatingSortingCollector { 32 | assert!( 33 | num_docs_to_collect_per_reader > 0, 34 | format!( 35 | "num_docs_to_collect_per_reader must always be > 0, got {}", 36 | num_docs_to_collect_per_reader 37 | ) 38 | ); 39 | 40 | EarlyTerminatingSortingCollector { 41 | early_terminated: Arc::new(Volatile::new(false)), 42 | num_docs_to_collect_per_reader, 43 | num_docs_collected_per_reader: 0, 44 | } 45 | } 46 | 47 | pub fn early_terminated(&self) -> bool { 48 | self.early_terminated.read() 49 | } 50 | } 51 | 52 | impl SearchCollector for EarlyTerminatingSortingCollector { 53 | type LC = EarlyTerminatingLeafCollector; 54 | fn set_next_reader(&mut self, _reader: &LeafReaderContext<'_, C>) -> Result<()> { 55 | self.num_docs_collected_per_reader = 0; 56 | Ok(()) 57 | } 58 | 59 | fn support_parallel(&self) -> bool { 60 | true 61 | } 62 | 63 | fn leaf_collector(&self, _reader: &LeafReaderContext<'_, C>) -> Result { 64 | assert!(self.support_parallel()); 65 | Ok(EarlyTerminatingLeafCollector::new( 66 | self.num_docs_to_collect_per_reader, 67 | Arc::clone(&self.early_terminated), 68 | )) 69 | } 70 | 71 | fn finish_parallel(&mut self) -> Result<()> { 72 | Ok(()) 73 | } 74 | } 75 | 76 | impl Collector for EarlyTerminatingSortingCollector { 77 | fn needs_scores(&self) -> bool { 78 | false 79 | } 80 | 81 | fn collect(&mut self, _doc: DocId, _scorer: &mut S) -> Result<()> { 82 | self.num_docs_collected_per_reader += 1; 83 | 84 | if self.num_docs_collected_per_reader > self.num_docs_to_collect_per_reader { 85 | self.early_terminated.write(true); 86 | bail!(ErrorKind::Collector( 87 | collector::ErrorKind::LeafCollectionTerminated, 88 | )) 89 | } 90 | Ok(()) 91 | } 92 | } 93 | 94 | /// A `Collector` that early terminates collection of documents on a 95 | /// per-segment basis, if the segment was sorted according to the given 96 | /// `Sort`. 97 | /// 98 | /// *NOTE:* the `Collector` detects segments sorted according to a 99 | /// an `IndexWriterConfig#setIndexSort`. Also, it collects up to a specified 100 | /// `num_docs_to_collect_per_reader` from each segment, and therefore is mostly suitable 101 | /// for use in conjunction with collectors such as `TopDocsCollector`, and 102 | /// not e.g. `TotalHitCountCollector`. 103 | /// 104 | /// *NOTE*: If you wrap a `TopDocsCollector` that sorts in the same 105 | /// order as the index order, the returned top docs will be correct. 106 | /// However the total of hit count will be vastly underestimated since not all matching documents 107 | /// will have been collected. 108 | pub struct EarlyTerminatingLeafCollector { 109 | early_terminated: Arc>, 110 | num_docs_to_collect: usize, 111 | num_docs_collected: usize, 112 | } 113 | 114 | impl EarlyTerminatingLeafCollector { 115 | pub fn new( 116 | num_docs_to_collect: usize, 117 | early_terminated: Arc>, 118 | ) -> EarlyTerminatingLeafCollector { 119 | EarlyTerminatingLeafCollector { 120 | early_terminated, 121 | num_docs_to_collect, 122 | num_docs_collected: 0, 123 | } 124 | } 125 | 126 | pub fn early_terminated(&self) -> bool { 127 | self.early_terminated.read() 128 | } 129 | } 130 | 131 | impl ParallelLeafCollector for EarlyTerminatingLeafCollector { 132 | fn finish_leaf(&mut self) -> Result<()> { 133 | Ok(()) 134 | } 135 | } 136 | 137 | impl Collector for EarlyTerminatingLeafCollector { 138 | fn needs_scores(&self) -> bool { 139 | false 140 | } 141 | 142 | fn collect(&mut self, _doc: i32, _scorer: &mut S) -> Result<()> { 143 | self.num_docs_collected += 1; 144 | 145 | if self.num_docs_collected > self.num_docs_to_collect { 146 | self.early_terminated.write(true); 147 | bail!(ErrorKind::Collector( 148 | collector::ErrorKind::LeafCollectionTerminated, 149 | )) 150 | } 151 | Ok(()) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/core/search/collector/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod top_docs; 15 | 16 | pub use self::top_docs::*; 17 | 18 | mod early_terminating; 19 | 20 | pub use self::early_terminating::*; 21 | 22 | mod timeout; 23 | 24 | pub use self::timeout::*; 25 | 26 | mod chain; 27 | 28 | pub use self::chain::*; 29 | 30 | use error::Result; 31 | 32 | use core::codec::Codec; 33 | use core::index::reader::LeafReaderContext; 34 | use core::search::scorer::Scorer; 35 | use core::util::DocId; 36 | 37 | error_chain! { 38 | types { 39 | Error, ErrorKind, ResultExt; 40 | } 41 | errors { 42 | LeafCollectionTerminated { 43 | description("Leaf collection terminated") 44 | } 45 | CollectionTerminated { 46 | description("Collection terminated") 47 | } 48 | CollectionTimeout { 49 | description("Collection timeout") 50 | } 51 | 52 | CollectionFailed { 53 | description("Collection failed") 54 | } 55 | } 56 | } 57 | 58 | /// Expert: Collectors are primarily meant to be used to 59 | /// gather raw results from a search, and implement sorting 60 | /// or custom result filtering, collation, etc. 61 | /// 62 | /// `Collector` decouples the score from the collected doc: 63 | /// the score computation is skipped entirely if it's not 64 | /// needed. If your collector may request the 65 | /// score for a single hit multiple times, you should use 66 | /// `ScoreCachingWrappingScorer`. 67 | /// 68 | /// *NOTE:* The doc that is passed to the collect 69 | /// method is relative to the current reader. If your 70 | /// collector needs to resolve this to the docID space of the 71 | /// Multi*Reader, you must re-base it by recording the 72 | /// docBase from the most recent setNextReader call. 73 | /// 74 | /// Not all collectors will need to rebase the docID. For 75 | /// example, a collector that simply counts the total number 76 | /// of hits would skip it. 77 | pub trait SearchCollector: Collector { 78 | type LC: ParallelLeafCollector; 79 | /// This method is called before collecting on a new leaf. 80 | fn set_next_reader(&mut self, reader: &LeafReaderContext<'_, C>) -> Result<()>; 81 | 82 | /// iff this collector support parallel collect 83 | fn support_parallel(&self) -> bool; 84 | fn init_parallel(&mut self) {} 85 | 86 | /// segment collector for parallel search 87 | fn leaf_collector(&self, reader: &LeafReaderContext<'_, C>) -> Result; 88 | 89 | fn finish_parallel(&mut self) -> Result<()>; 90 | } 91 | 92 | impl<'a, T: SearchCollector + 'a> SearchCollector for &'a mut T { 93 | type LC = T::LC; 94 | 95 | fn set_next_reader(&mut self, reader: &LeafReaderContext<'_, C>) -> Result<()> { 96 | (**self).set_next_reader(reader) 97 | } 98 | 99 | fn support_parallel(&self) -> bool { 100 | (**self).support_parallel() 101 | } 102 | 103 | fn init_parallel(&mut self) { 104 | (**self).init_parallel() 105 | } 106 | 107 | fn leaf_collector(&self, reader: &LeafReaderContext<'_, C>) -> Result { 108 | (**self).leaf_collector(reader) 109 | } 110 | 111 | fn finish_parallel(&mut self) -> Result<()> { 112 | (**self).finish_parallel() 113 | } 114 | } 115 | 116 | pub trait Collector { 117 | /// Indicates if document scores are needed by this collector. 118 | /// return `true` if scores are needed. 119 | fn needs_scores(&self) -> bool; 120 | 121 | /// Called once for every document matching a query, with the unbased document 122 | /// number. 123 | /// Note: The collection of the current segment can be terminated by throwing 124 | /// a `ErrorKind::LeafCollectionTerminated`. In this case, the last docs of the 125 | /// current `LeafReader` will be skipped and `IndexSearcher` 126 | /// will swallow the exception and continue collection with the next leaf. 127 | /// 128 | /// Note: This is called in an inner search loop. For good search performance, 129 | /// implementations of this method should not call `IndexSearcher::doc(DocId)` on every hit. 130 | /// Doing so can slow searches by an order of magnitude or more. 131 | fn collect(&mut self, doc: DocId, scorer: &mut S) -> Result<()>; 132 | } 133 | 134 | impl<'a, T: Collector + 'a> Collector for &'a mut T { 135 | fn needs_scores(&self) -> bool { 136 | (**self).needs_scores() 137 | } 138 | 139 | fn collect(&mut self, doc: i32, scorer: &mut S) -> Result<()> { 140 | (**self).collect(doc, scorer) 141 | } 142 | } 143 | 144 | /// `Collector` that collect parallel for a single segment. 145 | /// 146 | /// once finished, the `finish_leaf` method must be 147 | /// called to notify to main thread. 148 | pub trait ParallelLeafCollector: Collector + Send + 'static { 149 | fn finish_leaf(&mut self) -> Result<()>; 150 | } 151 | -------------------------------------------------------------------------------- /src/core/search/collector/timeout.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::Codec; 15 | use core::index::reader::LeafReaderContext; 16 | use core::search::collector; 17 | use core::search::collector::{Collector, ParallelLeafCollector, SearchCollector}; 18 | use core::search::scorer::Scorer; 19 | use core::util::external::Volatile; 20 | use core::util::DocId; 21 | use error::{ErrorKind, Result}; 22 | use std::sync::Arc; 23 | use std::time::{Duration, SystemTime}; 24 | 25 | /// the `TimeoutCollector` collector is used to timeout search requests that 26 | /// take longer than the maximum allowed search time limit. 27 | /// 28 | /// After this time is exceeded, the search thread is stopped by return a 29 | /// `CollectionTerminated` error. 30 | /// 31 | /// this collector is useful if your search must be complete at some specific time. 32 | /// you can use the `ChainedCollector` to compose this collector with eg. 33 | /// `TopDocsCollector`. 34 | pub struct TimeoutCollector { 35 | timeout_duration: Duration, 36 | start_time: SystemTime, 37 | timeout: Arc>, 38 | } 39 | 40 | impl TimeoutCollector { 41 | pub fn new(timeout_duration: Duration, start_time: SystemTime) -> TimeoutCollector { 42 | TimeoutCollector { 43 | timeout_duration, 44 | start_time, 45 | timeout: Arc::new(Volatile::new(false)), 46 | } 47 | } 48 | 49 | pub fn timeout(&self) -> bool { 50 | self.timeout.read() 51 | } 52 | } 53 | 54 | impl SearchCollector for TimeoutCollector { 55 | type LC = TimeoutLeafCollector; 56 | 57 | fn set_next_reader(&mut self, _reader: &LeafReaderContext<'_, C>) -> Result<()> { 58 | Ok(()) 59 | } 60 | 61 | fn support_parallel(&self) -> bool { 62 | true 63 | } 64 | 65 | fn leaf_collector( 66 | &self, 67 | _reader: &LeafReaderContext<'_, C>, 68 | ) -> Result { 69 | Ok(TimeoutLeafCollector::new( 70 | self.timeout_duration, 71 | self.start_time, 72 | Arc::clone(&self.timeout), 73 | )) 74 | } 75 | 76 | fn finish_parallel(&mut self) -> Result<()> { 77 | Ok(()) 78 | } 79 | } 80 | 81 | impl Collector for TimeoutCollector { 82 | fn needs_scores(&self) -> bool { 83 | false 84 | } 85 | 86 | fn collect(&mut self, _doc: DocId, _scorer: &mut S) -> Result<()> { 87 | let now = SystemTime::now(); 88 | if self.start_time < now && now.duration_since(self.start_time)? >= self.timeout_duration { 89 | self.timeout.write(true); 90 | bail!(ErrorKind::Collector( 91 | collector::ErrorKind::CollectionTimeout, 92 | )) 93 | } 94 | Ok(()) 95 | } 96 | } 97 | 98 | pub struct TimeoutLeafCollector { 99 | timeout_duration: Duration, 100 | start_time: SystemTime, 101 | timeout: Arc>, 102 | } 103 | 104 | impl TimeoutLeafCollector { 105 | pub fn new( 106 | timeout_duration: Duration, 107 | start_time: SystemTime, 108 | timeout: Arc>, 109 | ) -> TimeoutLeafCollector { 110 | TimeoutLeafCollector { 111 | timeout_duration, 112 | start_time, 113 | timeout, 114 | } 115 | } 116 | } 117 | 118 | impl Collector for TimeoutLeafCollector { 119 | fn needs_scores(&self) -> bool { 120 | false 121 | } 122 | 123 | fn collect(&mut self, _doc: i32, _scorer: &mut S) -> Result<()> { 124 | let now = SystemTime::now(); 125 | if self.start_time < now && now.duration_since(self.start_time)? >= self.timeout_duration { 126 | self.timeout.write(true); 127 | bail!(ErrorKind::Collector( 128 | collector::ErrorKind::CollectionTerminated, 129 | )) 130 | } 131 | Ok(()) 132 | } 133 | } 134 | 135 | impl ParallelLeafCollector for TimeoutLeafCollector { 136 | fn finish_leaf(&mut self) -> Result<()> { 137 | Ok(()) 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/core/search/explanation.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | #[derive(Serialize, Deserialize)] 15 | pub struct Explanation { 16 | is_match: bool, 17 | value: f32, 18 | description: String, 19 | details: Vec, 20 | } 21 | 22 | impl Explanation { 23 | pub fn new( 24 | is_match: bool, 25 | value: f32, 26 | description: String, 27 | details: Vec, 28 | ) -> Explanation { 29 | let value = if !is_match { 0.0f32 } else { value }; 30 | 31 | Explanation { 32 | is_match, 33 | value, 34 | description, 35 | details, 36 | } 37 | } 38 | 39 | pub fn is_match(&self) -> bool { 40 | self.is_match 41 | } 42 | 43 | pub fn value(&self) -> f32 { 44 | self.value 45 | } 46 | 47 | pub fn description(&self) -> String { 48 | self.description.clone() 49 | } 50 | 51 | pub fn summary(&self) -> String { 52 | format!("{} = {}", self.value, self.description) 53 | } 54 | 55 | pub fn details(&self) -> &[Explanation] { 56 | self.details.as_ref() 57 | } 58 | 59 | pub fn to_string(&self, depth: i32) -> String { 60 | let mut buffer = String::from(""); 61 | 62 | for _i in 0..depth { 63 | buffer.push_str(" "); 64 | } 65 | 66 | buffer.push_str(&self.summary()); 67 | buffer.push_str("\n"); 68 | 69 | for detail in &self.details { 70 | buffer.push_str(&detail.to_string(depth + 1)) 71 | } 72 | 73 | buffer 74 | } 75 | } 76 | 77 | impl Clone for Explanation { 78 | fn clone(&self) -> Self { 79 | let mut details: Vec = vec![]; 80 | for detail in &self.details { 81 | details.push(detail.clone()); 82 | } 83 | Explanation { 84 | is_match: self.is_match, 85 | value: self.value(), 86 | description: self.description(), 87 | details, 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/core/search/query/boost_query.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use std::any::Any; 15 | use std::f32; 16 | use std::fmt; 17 | 18 | use core::codec::Codec; 19 | use core::index::reader::LeafReaderContext; 20 | use core::search::explanation::Explanation; 21 | use core::search::query::{Query, TermQuery, Weight}; 22 | use core::search::scorer::Scorer; 23 | use core::search::searcher::SearchPlanBuilder; 24 | use core::util::DocId; 25 | 26 | use error::Result; 27 | 28 | const BOOST_QUERY: &str = "boost"; 29 | 30 | /// A `Query` wrapper that allows to give a boost to the wrapped query. 31 | /// 32 | /// Boost values that are less than one will give less importance to this 33 | /// query compared to other ones while values that are greater than one will 34 | /// give more importance to the scores returned by this query. 35 | pub struct BoostQuery { 36 | query: Box>, 37 | boost: f32, 38 | } 39 | 40 | impl BoostQuery { 41 | pub fn build(query: Box>, boost: f32) -> Box> { 42 | if (boost - 1.0f32).abs() <= f32::EPSILON { 43 | query 44 | } else { 45 | Box::new(BoostQuery { query, boost }) 46 | } 47 | } 48 | } 49 | 50 | impl Query for BoostQuery { 51 | fn create_weight( 52 | &self, 53 | searcher: &dyn SearchPlanBuilder, 54 | needs_scores: bool, 55 | ) -> Result>> { 56 | let mut weight = self.query.create_weight(searcher, needs_scores)?; 57 | Weight::::normalize(weight.as_mut(), 1.0f32, self.boost); 58 | // weight.normalize(1.0f32, self.boost); 59 | Ok(Box::new(BoostWeight::new(weight, self.boost))) 60 | } 61 | 62 | fn extract_terms(&self) -> Vec { 63 | self.query.extract_terms() 64 | } 65 | 66 | fn as_any(&self) -> &dyn Any { 67 | self 68 | } 69 | } 70 | 71 | impl fmt::Display for BoostQuery { 72 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 73 | write!( 74 | f, 75 | "BoostQuery(query: {}, boost: {})", 76 | &self.query, self.boost 77 | ) 78 | } 79 | } 80 | 81 | struct BoostWeight { 82 | weight: Box>, 83 | boost: f32, 84 | } 85 | 86 | impl BoostWeight { 87 | pub fn new(weight: Box>, boost: f32) -> BoostWeight { 88 | assert!((boost - 1.0f32).abs() > f32::EPSILON); 89 | 90 | BoostWeight { weight, boost } 91 | } 92 | } 93 | 94 | impl Weight for BoostWeight { 95 | fn create_scorer( 96 | &self, 97 | leaf_reader: &LeafReaderContext<'_, C>, 98 | ) -> Result>> { 99 | self.weight.create_scorer(leaf_reader) 100 | } 101 | 102 | fn query_type(&self) -> &'static str { 103 | BOOST_QUERY 104 | } 105 | 106 | fn actual_query_type(&self) -> &'static str { 107 | self.weight.query_type() 108 | } 109 | 110 | fn normalize(&mut self, norm: f32, boost: f32) { 111 | self.weight.normalize(norm, boost * self.boost) 112 | } 113 | 114 | fn value_for_normalization(&self) -> f32 { 115 | self.weight.value_for_normalization() 116 | } 117 | 118 | fn needs_scores(&self) -> bool { 119 | self.weight.needs_scores() 120 | } 121 | 122 | fn explain(&self, reader: &LeafReaderContext<'_, C>, doc: DocId) -> Result { 123 | self.weight.explain(reader, doc) 124 | } 125 | } 126 | 127 | impl fmt::Display for BoostWeight { 128 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 129 | write!( 130 | f, 131 | "BoostWeight(weight: {}, boost: {})", 132 | &self.weight, self.boost 133 | ) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/core/search/query/boosting_query.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use std::any::Any; 15 | use std::fmt; 16 | 17 | use core::codec::Codec; 18 | use core::index::reader::LeafReaderContext; 19 | use core::search::explanation::Explanation; 20 | use core::search::query::{Query, TermQuery, Weight}; 21 | use core::search::scorer::BoostingScorer; 22 | use core::search::scorer::Scorer; 23 | use core::search::searcher::SearchPlanBuilder; 24 | use core::util::DocId; 25 | use error::Result; 26 | 27 | const BOOSTING_QUERY: &str = "boosting"; 28 | 29 | pub struct BoostingQuery { 30 | positive: Box>, 31 | negative: Box>, 32 | negative_boost: f32, 33 | } 34 | 35 | impl BoostingQuery { 36 | pub fn build( 37 | positive: Box>, 38 | negative: Box>, 39 | negative_boost: f32, 40 | ) -> Box> { 41 | Box::new(BoostingQuery { 42 | positive, 43 | negative, 44 | negative_boost, 45 | }) 46 | } 47 | } 48 | 49 | impl Query for BoostingQuery { 50 | fn create_weight( 51 | &self, 52 | searcher: &dyn SearchPlanBuilder, 53 | needs_scores: bool, 54 | ) -> Result>> { 55 | Ok(Box::new(BoostingWeight::new( 56 | self.positive.create_weight(searcher, needs_scores)?, 57 | self.negative.create_weight(searcher, false)?, 58 | self.negative_boost, 59 | ))) 60 | } 61 | 62 | fn extract_terms(&self) -> Vec { 63 | self.positive.extract_terms() 64 | } 65 | 66 | fn as_any(&self) -> &dyn Any { 67 | self 68 | } 69 | } 70 | 71 | impl fmt::Display for BoostingQuery { 72 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 73 | write!( 74 | f, 75 | "BoostingQuery(positive: {}, negative: {}, negative_boost: {})", 76 | &self.positive, &self.negative, self.negative_boost 77 | ) 78 | } 79 | } 80 | 81 | struct BoostingWeight { 82 | positive_weight: Box>, 83 | negative_weight: Box>, 84 | negative_boost: f32, 85 | } 86 | 87 | impl BoostingWeight { 88 | pub fn new( 89 | positive_weight: Box>, 90 | negative_weight: Box>, 91 | negative_boost: f32, 92 | ) -> BoostingWeight { 93 | BoostingWeight { 94 | positive_weight, 95 | negative_weight, 96 | negative_boost, 97 | } 98 | } 99 | } 100 | 101 | impl Weight for BoostingWeight { 102 | fn create_scorer( 103 | &self, 104 | leaf_reader: &LeafReaderContext<'_, C>, 105 | ) -> Result>> { 106 | if let (Some(positive_scorer), Some(negative_scorer)) = ( 107 | self.positive_weight.create_scorer(leaf_reader)?, 108 | self.negative_weight.create_scorer(leaf_reader)?, 109 | ) { 110 | Ok(Some(Box::new(BoostingScorer::new( 111 | positive_scorer, 112 | negative_scorer, 113 | self.negative_boost, 114 | )))) 115 | } else { 116 | Ok(None) 117 | } 118 | } 119 | 120 | fn query_type(&self) -> &'static str { 121 | BOOSTING_QUERY 122 | } 123 | 124 | fn actual_query_type(&self) -> &'static str { 125 | BOOSTING_QUERY 126 | } 127 | 128 | fn normalize(&mut self, norm: f32, boost: f32) { 129 | self.positive_weight.normalize(norm, boost) 130 | } 131 | 132 | fn value_for_normalization(&self) -> f32 { 133 | self.positive_weight.value_for_normalization() 134 | } 135 | 136 | fn needs_scores(&self) -> bool { 137 | self.positive_weight.needs_scores() 138 | } 139 | 140 | fn explain(&self, reader: &LeafReaderContext<'_, C>, doc: DocId) -> Result { 141 | self.positive_weight.explain(reader, doc) 142 | } 143 | } 144 | 145 | impl fmt::Display for BoostingWeight { 146 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 147 | write!( 148 | f, 149 | "BoostingWeight(positive: {}, negative: {}, negative_boost: {})", 150 | &self.positive_weight, &self.negative_weight, self.negative_boost 151 | ) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/core/search/query/exists_query.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use std::any::Any; 15 | use std::fmt; 16 | 17 | use core::codec::doc_values::DocValuesIterator; 18 | use core::codec::Codec; 19 | use core::index::reader::LeafReaderContext; 20 | use core::search::explanation::Explanation; 21 | use core::search::query::{Query, TermQuery, Weight}; 22 | use core::search::scorer::ConstantScoreScorer; 23 | use core::search::scorer::Scorer; 24 | use core::search::searcher::SearchPlanBuilder; 25 | use core::util::DocId; 26 | use error::Result; 27 | 28 | const EXISTS_QUERY: &str = "exists"; 29 | 30 | pub struct ExistsQuery { 31 | field: String, 32 | } 33 | 34 | impl ExistsQuery { 35 | pub fn build(field: String) -> ExistsQuery { 36 | ExistsQuery { field } 37 | } 38 | } 39 | 40 | impl Query for ExistsQuery { 41 | fn create_weight( 42 | &self, 43 | _searcher: &dyn SearchPlanBuilder, 44 | _needs_scores: bool, 45 | ) -> Result>> { 46 | Ok(Box::new(ExistsWeight::new(self.field.clone()))) 47 | } 48 | 49 | fn extract_terms(&self) -> Vec { 50 | vec![] 51 | } 52 | 53 | fn as_any(&self) -> &dyn Any { 54 | self 55 | } 56 | } 57 | 58 | impl fmt::Display for ExistsQuery { 59 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 60 | write!(f, "ExistsQuery(field={})", &self.field) 61 | } 62 | } 63 | 64 | struct ExistsWeight { 65 | field: String, 66 | weight: f32, 67 | norm: f32, 68 | } 69 | 70 | impl ExistsWeight { 71 | pub fn new(field: String) -> ExistsWeight { 72 | ExistsWeight { 73 | field, 74 | weight: 0f32, 75 | norm: 0f32, 76 | } 77 | } 78 | } 79 | 80 | impl Weight for ExistsWeight { 81 | fn create_scorer( 82 | &self, 83 | leaf_reader: &LeafReaderContext<'_, C>, 84 | ) -> Result>> { 85 | if let Some(field_info) = leaf_reader.reader.field_info(self.field.as_str()) { 86 | let cost: i32 = leaf_reader.reader.max_doc(); 87 | let doc_iterator = DocValuesIterator::new(field_info.name.as_str(), cost, leaf_reader); 88 | 89 | return Ok(Some(Box::new(ConstantScoreScorer::new( 90 | self.weight, 91 | doc_iterator, 92 | cost as usize, 93 | )))); 94 | } 95 | 96 | Ok(None) 97 | } 98 | 99 | fn query_type(&self) -> &'static str { 100 | EXISTS_QUERY 101 | } 102 | 103 | fn actual_query_type(&self) -> &'static str { 104 | EXISTS_QUERY 105 | } 106 | 107 | fn normalize(&mut self, norm: f32, boost: f32) { 108 | self.norm = norm; 109 | self.weight = norm * boost; 110 | } 111 | 112 | fn value_for_normalization(&self) -> f32 { 113 | self.weight * self.weight 114 | } 115 | 116 | fn needs_scores(&self) -> bool { 117 | false 118 | } 119 | 120 | fn explain(&self, _reader: &LeafReaderContext<'_, C>, _doc: DocId) -> Result { 121 | Ok(Explanation::new( 122 | true, 123 | self.weight, 124 | format!("{}, product of:", self), 125 | vec![Explanation::new( 126 | true, 127 | self.weight, 128 | "exists".to_string(), 129 | vec![], 130 | )], 131 | )) 132 | } 133 | } 134 | 135 | impl fmt::Display for ExistsWeight { 136 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 137 | write!( 138 | f, 139 | "ExistsWeight(field={}, weight={}, norm={})", 140 | &self.field, self.weight, self.norm 141 | ) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/core/search/query/spans/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | #[macro_use] 15 | mod span; 16 | 17 | pub use self::span::*; 18 | 19 | mod span_boost; 20 | 21 | pub use self::span_boost::*; 22 | 23 | mod span_near; 24 | 25 | pub use self::span_near::*; 26 | 27 | mod span_or; 28 | 29 | pub use self::span_or::*; 30 | 31 | mod span_term; 32 | 33 | pub use self::span_term::*; 34 | -------------------------------------------------------------------------------- /src/core/search/scorer/boosting_scorer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::search::scorer::Scorer; 15 | use core::search::DocIterator; 16 | use core::util::DocId; 17 | use error::Result; 18 | 19 | pub struct BoostingScorer { 20 | positive: Box, 21 | negative: Box, 22 | negative_boost: f32, 23 | } 24 | 25 | impl BoostingScorer { 26 | pub fn new( 27 | positive: Box, 28 | negative: Box, 29 | negative_boost: f32, 30 | ) -> BoostingScorer { 31 | debug_assert!(negative_boost > 0.0 && negative_boost < 1.0); 32 | BoostingScorer { 33 | positive, 34 | negative, 35 | negative_boost, 36 | } 37 | } 38 | } 39 | 40 | impl Scorer for BoostingScorer { 41 | fn score(&mut self) -> Result { 42 | let current_doc = self.positive.doc_id(); 43 | let mut score = self.positive.score()?; 44 | 45 | if current_doc == self.negative.advance(current_doc)? { 46 | score *= self.negative_boost; 47 | } 48 | 49 | Ok(score) 50 | } 51 | } 52 | 53 | impl DocIterator for BoostingScorer { 54 | fn doc_id(&self) -> DocId { 55 | self.positive.doc_id() 56 | } 57 | 58 | fn next(&mut self) -> Result { 59 | self.positive.next() 60 | } 61 | 62 | fn advance(&mut self, target: DocId) -> Result { 63 | self.positive.advance(target) 64 | } 65 | 66 | fn cost(&self) -> usize { 67 | self.positive.cost() 68 | } 69 | 70 | fn matches(&mut self) -> Result { 71 | self.positive.matches() 72 | } 73 | 74 | fn approximate_next(&mut self) -> Result { 75 | self.positive.approximate_next() 76 | } 77 | 78 | fn approximate_advance(&mut self, target: DocId) -> Result { 79 | self.positive.approximate_advance(target) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/core/search/scorer/min_scorer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use error::Result; 15 | 16 | use core::search::scorer::Scorer; 17 | use core::search::DocIterator; 18 | use core::util::DocId; 19 | 20 | // currently directory merge `ScoreCachingWrappingScorer` into this class 21 | pub struct MinScoreScorer { 22 | origin: S, 23 | min_score: f32, 24 | // cache these two fields to avoid calculate score twice 25 | cur_doc: DocId, 26 | cur_score: f32, 27 | } 28 | 29 | impl MinScoreScorer { 30 | pub fn new(origin: S, min_score: f32) -> Self { 31 | MinScoreScorer { 32 | origin, 33 | min_score, 34 | cur_doc: -1, 35 | cur_score: 0f32, 36 | } 37 | } 38 | } 39 | 40 | impl Scorer for MinScoreScorer { 41 | fn score(&mut self) -> Result { 42 | let doc = self.origin.doc_id(); 43 | if doc != self.cur_doc { 44 | self.cur_score = self.origin.score()?; 45 | self.cur_doc = doc; 46 | } 47 | Ok(self.cur_score) 48 | } 49 | } 50 | 51 | impl DocIterator for MinScoreScorer { 52 | fn doc_id(&self) -> DocId { 53 | self.origin.doc_id() 54 | } 55 | 56 | fn next(&mut self) -> Result { 57 | self.approximate_next() 58 | } 59 | 60 | fn advance(&mut self, target: DocId) -> Result { 61 | self.approximate_advance(target) 62 | } 63 | 64 | fn cost(&self) -> usize { 65 | self.origin.cost() 66 | } 67 | 68 | fn matches(&mut self) -> Result { 69 | Ok(self.origin.matches()? && self.score()? > self.min_score) 70 | } 71 | 72 | fn approximate_next(&mut self) -> Result { 73 | self.origin.approximate_next() 74 | } 75 | 76 | fn approximate_advance(&mut self, target: DocId) -> Result { 77 | self.origin.approximate_advance(target) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/core/search/scorer/req_opt_scorer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::search::scorer::Scorer; 15 | use core::search::DocIterator; 16 | use core::util::DocId; 17 | use error::Result; 18 | 19 | const OPT_SCORE_THRESHOLD: usize = 100; 20 | 21 | /// A Scorer for queries with a required part and an optional part. 22 | /// Delays `advance()` on the optional part until a `score()` is needed. 23 | pub struct ReqOptScorer { 24 | req_scorer: Box, 25 | opt_scorer: Box, 26 | scores_sum: f32, 27 | scores_num: usize, 28 | } 29 | 30 | impl ReqOptScorer { 31 | pub fn new(req_scorer: Box, opt_scorer: Box) -> ReqOptScorer { 32 | ReqOptScorer { 33 | req_scorer, 34 | opt_scorer, 35 | scores_sum: 0f32, 36 | scores_num: 0usize, 37 | } 38 | } 39 | } 40 | 41 | impl Scorer for ReqOptScorer { 42 | fn score(&mut self) -> Result { 43 | let current_doc = self.req_scorer.doc_id(); 44 | let mut score = self.req_scorer.score()?; 45 | 46 | if self.scores_num > OPT_SCORE_THRESHOLD { 47 | if 2.0 * score < self.scores_sum / self.scores_num as f32 { 48 | return Ok(score); 49 | } 50 | } 51 | 52 | self.scores_sum += score; 53 | self.scores_num += 1; 54 | 55 | let mut opt_doc = self.opt_scorer.doc_id(); 56 | if opt_doc < current_doc { 57 | opt_doc = self.opt_scorer.advance(current_doc)?; 58 | } 59 | 60 | if opt_doc == current_doc { 61 | score += self.opt_scorer.score()?; 62 | } 63 | 64 | Ok(score) 65 | } 66 | } 67 | 68 | impl DocIterator for ReqOptScorer { 69 | fn doc_id(&self) -> DocId { 70 | self.req_scorer.doc_id() 71 | } 72 | 73 | fn next(&mut self) -> Result { 74 | self.req_scorer.next() 75 | } 76 | 77 | fn advance(&mut self, target: DocId) -> Result { 78 | self.req_scorer.advance(target) 79 | } 80 | 81 | fn cost(&self) -> usize { 82 | self.req_scorer.cost() 83 | } 84 | 85 | fn matches(&mut self) -> Result { 86 | self.req_scorer.matches() 87 | } 88 | 89 | fn approximate_next(&mut self) -> Result { 90 | self.req_scorer.approximate_next() 91 | } 92 | 93 | fn approximate_advance(&mut self, target: DocId) -> Result { 94 | self.req_scorer.approximate_advance(target) 95 | } 96 | } 97 | 98 | #[cfg(test)] 99 | mod tests { 100 | use super::*; 101 | use core::search::scorer::*; 102 | use core::search::tests::*; 103 | use core::search::*; 104 | 105 | #[test] 106 | fn test_score() { 107 | let s1 = create_mock_scorer(vec![1, 2, 3, 4, 5]); 108 | let s2 = create_mock_scorer(vec![2, 3, 5]); 109 | let s3 = create_mock_scorer(vec![2, 5]); 110 | let s4 = create_mock_scorer(vec![3, 4, 5]); 111 | 112 | let conjunction_scorer: Box = Box::new(ConjunctionScorer::new(vec![s1, s2])); 113 | let disjunction_scorer: Box = 114 | Box::new(DisjunctionSumScorer::new(vec![s3, s4], true, 0)); 115 | let mut scorer = ReqOptScorer::new(conjunction_scorer, disjunction_scorer); 116 | 117 | assert_eq!(scorer.doc_id(), -1); 118 | 119 | assert_eq!(scorer.next().unwrap(), 2); 120 | assert!((scorer.score().unwrap() - 6.0) < ::std::f32::EPSILON); 121 | 122 | assert_eq!(scorer.next().unwrap(), 3); 123 | assert!((scorer.score().unwrap() - 9.0) < ::std::f32::EPSILON); 124 | 125 | assert_eq!(scorer.next().unwrap(), 5); 126 | assert!((scorer.score().unwrap() - 20.0) < ::std::f32::EPSILON); 127 | 128 | assert_eq!(scorer.next().unwrap(), NO_MORE_DOCS); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/core/search/scorer/term_scorer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::PostingIterator; 15 | use core::search::scorer::Scorer; 16 | use core::search::similarity::SimScorer; 17 | use core::search::DocIterator; 18 | use core::util::DocId; 19 | use error::Result; 20 | 21 | pub struct TermScorer { 22 | sim_scorer: Box, 23 | postings_iterator: T, 24 | } 25 | 26 | impl TermScorer { 27 | pub fn new(sim_scorer: Box, postings_iterator: T) -> Self { 28 | TermScorer { 29 | sim_scorer, 30 | postings_iterator, 31 | } 32 | } 33 | 34 | fn freq(&self) -> i32 { 35 | if let Ok(f) = self.postings_iterator.freq() { 36 | f 37 | } else { 38 | 1 39 | } 40 | } 41 | } 42 | 43 | impl Scorer for TermScorer { 44 | fn score(&mut self) -> Result { 45 | let doc_id = self.doc_id(); 46 | let freq = self.freq(); 47 | Ok(self.sim_scorer.score(doc_id, freq as f32)?) 48 | } 49 | } 50 | 51 | impl DocIterator for TermScorer { 52 | fn doc_id(&self) -> DocId { 53 | self.postings_iterator.doc_id() 54 | } 55 | 56 | fn next(&mut self) -> Result { 57 | self.postings_iterator.next() 58 | } 59 | 60 | fn advance(&mut self, target: DocId) -> Result { 61 | self.postings_iterator.advance(target) 62 | } 63 | 64 | fn cost(&self) -> usize { 65 | self.postings_iterator.cost() 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/core/search/sort_field/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod field_comparator; 15 | 16 | pub use self::field_comparator::*; 17 | 18 | mod sort_field; 19 | 20 | pub use self::sort_field::*; 21 | 22 | mod collapse_top_docs; 23 | 24 | pub use self::collapse_top_docs::*; 25 | 26 | mod search_group; 27 | 28 | pub use self::search_group::*; 29 | 30 | /// Encapsulates sort criteria for returned hits. 31 | /// 32 | /// The fields used to determine sort order must be carefully chosen. 33 | /// Documents must contain a single term in such a field, 34 | /// and the value of the term should indicate the document's relative position in 35 | /// a given sort order. The field must be indexed, but should not be tokenized, 36 | /// and does not need to be stored (unless you happen to want it back with the 37 | /// rest of your document data). 38 | /// 39 | /// ### Valid Types of Values 40 | /// 41 | /// There are four possible kinds of term values which may be put into 42 | /// sorting fields: Integers, Longs, Floats, or Strings. Unless 43 | /// `SortField` objects are specified, the type of value 44 | /// in the field is determined by parsing the first term in the field. 45 | /// 46 | /// Integer term values should contain only digits and an optional 47 | /// preceding negative sign. Values must be base 10 and in the range 48 | /// `i32::min_value()` and `i32::max_value()` inclusive. 49 | /// Documents which should appear first in the sort 50 | /// should have low value integers, later documents high values 51 | /// (i.e. the documents should be numbered `1..n` where 52 | /// `1` is the first and `n` the last). 53 | /// 54 | /// Long term values should contain only digits and an optional 55 | /// preceding negative sign. Values must be base 10 and in the range 56 | /// `i64::min_value()` and `i64::max_value()` inclusive. 57 | /// Documents which should appear first in the sort 58 | /// should have low value integers, later documents high values. 59 | /// 60 | /// Float term values should conform to values accepted by 61 | /// {@link Float Float.valueOf(String)} (except that `NaN` 62 | /// and `Infinity` are not supported). 63 | /// Documents which should appear first in the sort 64 | /// should have low values, later documents high values. 65 | /// 66 | /// String term values can contain any valid String, but should 67 | /// not be tokenized. The values are sorted according to their 68 | /// {@link Comparable natural order}. Note that using this type 69 | /// of term value has higher memory requirements than the other 70 | /// two types. 71 | /// 72 | /// ### Object Reuse 73 | /// 74 | /// One of these objects can be 75 | /// used multiple times and the sort order changed between usages. 76 | /// 77 | /// This class is thread safe. 78 | /// 79 | /// ### Memory Usage 80 | /// 81 | /// Sorting uses of caches of term values maintained by the 82 | /// internal HitQueue(s). The cache is static and contains an integer 83 | /// or float array of length `IndexReader.max_doc()` for each field 84 | /// name for which a sort is performed. In other words, the size of the 85 | /// cache in bytes is: 86 | /// 87 | /// `4 * IndexReader.max_doc() * (# of different fields actually used to sort)` 88 | /// 89 | /// For String fields, the cache is larger: in addition to the 90 | /// above array, the value of every term in the field is kept in memory. 91 | /// If there are many unique terms in the field, this could 92 | /// be quite large. 93 | /// 94 | /// Note that the size of the cache is not affected by how many 95 | /// fields are in the index and *might* be used to sort - only by 96 | /// the ones actually used to sort a result set. 97 | #[derive(Clone, Eq, PartialEq, Debug)] 98 | pub struct Sort { 99 | fields: Vec, 100 | } 101 | 102 | impl Sort { 103 | pub fn new(fields: Vec) -> Sort { 104 | Sort { fields } 105 | } 106 | 107 | pub fn get_sort(&self) -> &[SortField] { 108 | &self.fields 109 | } 110 | 111 | pub fn needs_scores(&self) -> bool { 112 | self.fields.iter().any(|f| f.needs_scores()) 113 | } 114 | } 115 | 116 | #[cfg(test)] 117 | mod tests { 118 | use super::*; 119 | 120 | #[test] 121 | fn test_sort() { 122 | let sort_fields: Vec = vec![ 123 | SortField::Simple(SimpleSortField::new( 124 | String::from("field_one"), 125 | SortFieldType::Score, 126 | true, 127 | )), 128 | SortField::Simple(SimpleSortField::new( 129 | String::from("field_two"), 130 | SortFieldType::Doc, 131 | false, 132 | )), 133 | ]; 134 | let sort = Sort::new(sort_fields); 135 | 136 | assert!(sort.needs_scores()); 137 | 138 | let fields = sort.get_sort(); 139 | assert_eq!(fields.len(), 2); 140 | 141 | let score_field = &fields[0]; 142 | assert_eq!(score_field.field(), &String::from("field_one")); 143 | 144 | let doc_field = &fields[1]; 145 | assert_eq!(doc_field.field(), &String::from("field_two")); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/core/search/statistics.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::util::DocId; 15 | 16 | /// Contains statistics for a collection (field) 17 | #[derive(Clone)] 18 | pub struct CollectionStatistics { 19 | pub field: String, 20 | pub doc_base: DocId, 21 | pub max_doc: i64, 22 | pub doc_count: i64, 23 | pub sum_total_term_freq: i64, 24 | pub sum_doc_freq: i64, 25 | } 26 | 27 | impl CollectionStatistics { 28 | pub fn new( 29 | field: String, 30 | doc_base: DocId, 31 | max_doc: i64, 32 | doc_count: i64, 33 | sum_total_term_freq: i64, 34 | sum_doc_freq: i64, 35 | ) -> CollectionStatistics { 36 | debug_assert!(max_doc >= 0); 37 | debug_assert!(doc_count >= -1 && doc_count <= max_doc); // #docs with field must be <= #docs 38 | debug_assert!(sum_doc_freq == -1 || sum_doc_freq >= doc_count); // #postings must be >= #docs with field 39 | debug_assert!(sum_total_term_freq == -1 || sum_total_term_freq >= sum_doc_freq); // #positions must be >= #postings 40 | CollectionStatistics { 41 | field, 42 | doc_base, 43 | max_doc, 44 | doc_count, 45 | sum_total_term_freq, 46 | sum_doc_freq, 47 | } 48 | } 49 | } 50 | 51 | /// Contains statistics for a specific term 52 | pub struct TermStatistics { 53 | pub term: Vec, 54 | pub doc_freq: i64, 55 | pub total_term_freq: i64, 56 | } 57 | 58 | impl TermStatistics { 59 | pub fn new(term: Vec, doc_freq: i64, total_term_freq: i64) -> TermStatistics { 60 | debug_assert!(doc_freq >= 0); 61 | debug_assert!(total_term_freq == -1 || total_term_freq >= doc_freq); 62 | 63 | TermStatistics { 64 | term, 65 | doc_freq, 66 | total_term_freq, 67 | } 68 | } 69 | } 70 | 71 | #[cfg(test)] 72 | mod tests { 73 | use super::*; 74 | use std::string::String; 75 | 76 | #[test] 77 | fn test_collection_statistics() { 78 | let collection_statistics = 79 | CollectionStatistics::new(String::from("hello"), 0, 25, 10, 14, 13); 80 | assert_eq!(collection_statistics.field, "hello"); 81 | assert_eq!(collection_statistics.max_doc, 25); 82 | assert_eq!(collection_statistics.doc_count, 10); 83 | assert_eq!(collection_statistics.sum_total_term_freq, 14); 84 | assert_eq!(collection_statistics.sum_doc_freq, 13); 85 | } 86 | 87 | #[test] 88 | fn test_term_statistics() { 89 | let mut v: Vec = Vec::new(); 90 | v.push(1); 91 | let term_statistics = TermStatistics::new(v, 1, 1); 92 | assert_eq!(term_statistics.term[0], 1); 93 | assert_eq!(term_statistics.doc_freq, 1); 94 | assert_eq!(term_statistics.total_term_freq, 1); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/core/store/directory/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod directory; 15 | 16 | pub use self::directory::*; 17 | 18 | mod fs_directory; 19 | 20 | pub use self::fs_directory::*; 21 | 22 | mod mmap_directory; 23 | 24 | pub use self::mmap_directory::*; 25 | 26 | mod tracking_directory_wrapper; 27 | 28 | pub use self::tracking_directory_wrapper::*; 29 | -------------------------------------------------------------------------------- /src/core/store/directory/tracking_directory_wrapper.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::directory::{Directory, FilterDirectory}; 15 | use core::store::io::{IndexInput, IndexOutput}; 16 | use core::store::IOContext; 17 | 18 | use error::Result; 19 | 20 | use std::collections::HashSet; 21 | use std::fmt; 22 | use std::ops::Deref; 23 | use std::sync::Mutex; 24 | 25 | /// A delegating Directory that records which files were written to and deleted. 26 | pub struct TrackingDirectoryWrapper> { 27 | create_file_names: Mutex>, 28 | pub directory: T, 29 | } 30 | 31 | impl> TrackingDirectoryWrapper { 32 | pub fn new(directory: T) -> TrackingDirectoryWrapper { 33 | TrackingDirectoryWrapper { 34 | create_file_names: Mutex::new(HashSet::new()), 35 | directory, 36 | } 37 | } 38 | 39 | pub fn get_create_files(&self) -> HashSet { 40 | self.create_file_names.lock().unwrap().clone() 41 | } 42 | } 43 | 44 | impl FilterDirectory for TrackingDirectoryWrapper 45 | where 46 | D: Directory, 47 | T: Deref, 48 | { 49 | type Dir = D; 50 | 51 | #[inline] 52 | fn dir(&self) -> &Self::Dir { 53 | &*self.directory 54 | } 55 | } 56 | 57 | impl Directory for TrackingDirectoryWrapper 58 | where 59 | D: Directory, 60 | T: Deref, 61 | { 62 | type IndexOutput = D::IndexOutput; 63 | type TempOutput = D::TempOutput; 64 | 65 | fn create_output(&self, name: &str, ctx: &IOContext) -> Result { 66 | let output = self.directory.create_output(name, ctx)?; 67 | self.create_file_names.lock()?.insert(name.to_string()); 68 | Ok(output) 69 | } 70 | 71 | fn open_input(&self, name: &str, ctx: &IOContext) -> Result> { 72 | self.directory.open_input(name, ctx) 73 | } 74 | 75 | fn create_temp_output( 76 | &self, 77 | prefix: &str, 78 | suffix: &str, 79 | ctx: &IOContext, 80 | ) -> Result { 81 | let temp_output = self.directory.create_temp_output(prefix, suffix, ctx)?; 82 | self.create_file_names 83 | .lock()? 84 | .insert(temp_output.name().to_string()); 85 | Ok(temp_output) 86 | } 87 | 88 | fn delete_file(&self, name: &str) -> Result<()> { 89 | self.directory.delete_file(name)?; 90 | self.create_file_names.lock()?.remove(name); 91 | Ok(()) 92 | } 93 | 94 | fn rename(&self, source: &str, dest: &str) -> Result<()> { 95 | self.directory.rename(source, dest)?; 96 | let mut guard = self.create_file_names.lock()?; 97 | guard.insert(dest.to_string()); 98 | guard.remove(source); 99 | Ok(()) 100 | } 101 | 102 | fn create_files(&self) -> HashSet { 103 | self.create_file_names.lock().unwrap().clone() 104 | } 105 | } 106 | 107 | impl fmt::Display for TrackingDirectoryWrapper 108 | where 109 | D: Directory, 110 | T: Deref, 111 | { 112 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 113 | write!(f, "TrackingDirectoryWrapper({})", &*self.directory) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/core/store/io/buffered_checksum_index_input.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | extern crate crc; 15 | 16 | use core::store::io::{ChecksumIndexInput, DataInput, IndexInput, RandomAccessInput}; 17 | 18 | use error::ErrorKind::IllegalArgument; 19 | use error::Result; 20 | 21 | use crc::{crc32, Hasher32}; 22 | use std::io::Read; 23 | 24 | /// Simple implementation of `ChecksumIndexInput` that wraps 25 | /// another input and delegates calls. 26 | pub struct BufferedChecksumIndexInput { 27 | index_input: Box, 28 | digest: crc32::Digest, 29 | name: String, 30 | } 31 | 32 | impl BufferedChecksumIndexInput { 33 | pub fn new(index_input: Box) -> BufferedChecksumIndexInput { 34 | let digest = crc32::Digest::new_with_initial(crc32::IEEE, 0u32); 35 | let name = String::from(index_input.name()); 36 | BufferedChecksumIndexInput { 37 | index_input, 38 | digest, 39 | name, 40 | } 41 | } 42 | } 43 | 44 | impl ChecksumIndexInput for BufferedChecksumIndexInput { 45 | fn checksum(&self) -> i64 { 46 | i64::from(self.digest.sum32()) 47 | } 48 | } 49 | 50 | impl DataInput for BufferedChecksumIndexInput {} 51 | 52 | impl Read for BufferedChecksumIndexInput { 53 | fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result { 54 | let length = self.index_input.read(buf)?; 55 | self.digest.write(&buf[0..length]); 56 | Ok(length) 57 | } 58 | } 59 | 60 | impl IndexInput for BufferedChecksumIndexInput { 61 | fn clone(&self) -> Result> { 62 | Ok(Box::new(Self { 63 | index_input: self.index_input.clone()?, 64 | digest: crc32::Digest::new_with_initial(crc32::IEEE, self.digest.sum32()), 65 | name: self.name.clone(), 66 | })) 67 | } 68 | fn file_pointer(&self) -> i64 { 69 | self.index_input.file_pointer() 70 | } 71 | 72 | fn seek(&mut self, pos: i64) -> Result<()> { 73 | let curr_pos = self.file_pointer(); 74 | let to_skip = pos - curr_pos; 75 | if to_skip < 0 { 76 | bail!(IllegalArgument(format!( 77 | "Can't seek backwards: {} => {}", 78 | curr_pos, pos 79 | ))); 80 | } 81 | self.skip_bytes(to_skip as usize) 82 | } 83 | 84 | fn len(&self) -> u64 { 85 | self.index_input.len() 86 | } 87 | 88 | fn name(&self) -> &str { 89 | &self.name 90 | } 91 | 92 | fn random_access_slice( 93 | &self, 94 | _offset: i64, 95 | _length: i64, 96 | ) -> Result> { 97 | unimplemented!() 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/core/store/io/byte_array_data_input.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::{DataInput, DataOutput}; 15 | 16 | use error::Result; 17 | use std::cmp::min; 18 | use std::io::{self, Read, Write}; 19 | use std::sync::Arc; 20 | 21 | pub struct ByteArrayRef(Arc>); 22 | 23 | impl ByteArrayRef { 24 | pub fn new(v: Arc>) -> ByteArrayRef { 25 | ByteArrayRef(v) 26 | } 27 | } 28 | 29 | impl AsRef<[u8]> for ByteArrayRef { 30 | fn as_ref(&self) -> &[u8] { 31 | &self.0 32 | } 33 | } 34 | 35 | /// DataInput backed by a byte array. 36 | /// 37 | /// *WARNING:* This class omits all low-level checks. 38 | pub struct ByteArrayDataInput> { 39 | bytes: T, 40 | pos: usize, 41 | } 42 | 43 | impl> ByteArrayDataInput { 44 | pub fn new(bytes: T) -> ByteArrayDataInput { 45 | ByteArrayDataInput { bytes, pos: 0usize } 46 | } 47 | 48 | pub fn rewind(&mut self) { 49 | self.pos = 0; 50 | } 51 | 52 | pub fn position(&self) -> usize { 53 | self.pos 54 | } 55 | 56 | pub fn set_position(&mut self, pos: usize) { 57 | self.pos = pos; 58 | } 59 | 60 | pub fn length(&self) -> usize { 61 | self.bytes.as_ref().len() 62 | } 63 | 64 | pub fn eof(&self) -> bool { 65 | self.pos == self.length() 66 | } 67 | 68 | pub fn reset(&mut self, bytes: T) { 69 | self.bytes = bytes; 70 | self.pos = 0; 71 | } 72 | 73 | pub fn get_slice(&self, pos: usize, len: usize) -> Result<&[u8]> { 74 | let limit = self.bytes.as_ref().len(); 75 | if pos < self.pos || pos > limit || pos + len > limit { 76 | bail!( 77 | "Invalid Argument: slice ({}, {}) is beyond valid range of ({}, {})", 78 | pos, 79 | pos + len, 80 | self.pos, 81 | limit 82 | ) 83 | } 84 | Ok(&self.bytes.as_ref()[pos..pos + len]) 85 | } 86 | } 87 | 88 | impl> DataInput for ByteArrayDataInput { 89 | fn read_byte(&mut self) -> Result { 90 | let b = self.bytes.as_ref()[self.pos]; 91 | self.pos += 1; 92 | Ok(b) 93 | } 94 | 95 | fn read_bytes(&mut self, b: &mut [u8], offset: usize, len: usize) -> Result<()> { 96 | b[offset..offset + len].copy_from_slice(&self.bytes.as_ref()[self.pos..self.pos + len]); 97 | self.pos += len; 98 | Ok(()) 99 | } 100 | 101 | fn skip_bytes(&mut self, count: usize) -> Result<()> { 102 | self.pos += count; 103 | Ok(()) 104 | } 105 | } 106 | 107 | impl> Read for ByteArrayDataInput { 108 | fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result { 109 | let size = ::std::cmp::min(buf.len(), self.length() - self.pos); 110 | buf[0..size].copy_from_slice(&self.bytes.as_ref()[self.pos..self.pos + size]); 111 | self.pos += size; 112 | Ok(size) 113 | } 114 | } 115 | 116 | /// DataOutput backed by a byte array. 117 | pub struct ByteArrayDataOutput { 118 | bytes: T, 119 | pub pos: usize, 120 | limit: usize, 121 | } 122 | 123 | impl ByteArrayDataOutput 124 | where 125 | T: AsMut<[u8]>, 126 | { 127 | pub fn new(bytes: T, offset: usize, len: usize) -> ByteArrayDataOutput { 128 | ByteArrayDataOutput { 129 | bytes, 130 | pos: offset, 131 | limit: offset + len, 132 | } 133 | } 134 | 135 | #[inline] 136 | fn bytes_slice(&mut self) -> &mut [u8] { 137 | self.bytes.as_mut() 138 | } 139 | } 140 | 141 | impl Write for ByteArrayDataOutput 142 | where 143 | T: AsMut<[u8]>, 144 | { 145 | fn write(&mut self, buf: &[u8]) -> io::Result { 146 | let length = min(self.limit - self.pos, buf.len()); 147 | let pos = self.pos; 148 | self.bytes_slice()[pos..pos + length].copy_from_slice(&buf[..length]); 149 | self.pos += length; 150 | Ok(length) 151 | } 152 | 153 | fn flush(&mut self) -> io::Result<()> { 154 | Ok(()) 155 | } 156 | } 157 | 158 | impl DataOutput for ByteArrayDataOutput where T: AsMut<[u8]> {} 159 | -------------------------------------------------------------------------------- /src/core/store/io/checksum_index_input.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::IndexInput; 15 | 16 | pub trait ChecksumIndexInput: IndexInput { 17 | fn checksum(&self) -> i64; 18 | } 19 | -------------------------------------------------------------------------------- /src/core/store/io/data_output.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::DataInput; 15 | 16 | use core::util::ZigZagEncoding; 17 | use error::ErrorKind::IllegalArgument; 18 | use error::Result; 19 | 20 | use std::collections::{HashMap, HashSet}; 21 | use std::io::Write; 22 | use std::mem; 23 | 24 | /// Trait for performing write operations of Lucene's low-level data types. 25 | pub trait DataOutput: Write { 26 | fn write_byte(&mut self, b: u8) -> Result<()> { 27 | let buf = [b; 1]; 28 | self.write_all(&buf)?; 29 | Ok(()) 30 | } 31 | 32 | #[inline] 33 | fn write_bytes(&mut self, b: &[u8], offset: usize, length: usize) -> Result<()> { 34 | debug_assert!(offset + length <= b.len()); 35 | self.write_all(&b[offset..offset + length])?; 36 | Ok(()) 37 | } 38 | 39 | fn write_short(&mut self, i: i16) -> Result<()> { 40 | let bytes = unsafe { mem::transmute::<_, [u8; 2]>(i.to_be()) }; 41 | self.write_all(&bytes)?; 42 | Ok(()) 43 | } 44 | 45 | fn write_int(&mut self, i: i32) -> Result<()> { 46 | let bytes = unsafe { mem::transmute::<_, [u8; 4]>(i.to_be()) }; 47 | self.write_all(&bytes)?; 48 | Ok(()) 49 | } 50 | 51 | fn write_vint(&mut self, i: i32) -> Result<()> { 52 | let mut i = i as u32; 53 | while (i & !0x7f_u32) != 0 { 54 | self.write_byte(((i & 0x7f) | 0x80) as u8)?; 55 | i >>= 7; 56 | } 57 | self.write_byte(i as u8) 58 | } 59 | 60 | fn write_zint(&mut self, i: i32) -> Result<()> { 61 | self.write_vint(i.encode()) 62 | } 63 | 64 | fn write_long(&mut self, i: i64) -> Result<()> { 65 | let bytes = unsafe { mem::transmute::<_, [u8; 8]>(i.to_be()) }; 66 | self.write_all(&bytes)?; 67 | Ok(()) 68 | } 69 | 70 | fn _write_signed_vlong(&mut self, i: i64) -> Result<()> { 71 | let mut i = i as u64; 72 | while (i & !0x7f_u64) != 0 { 73 | self.write_byte(((i & 0x7f_u64) | 0x80_u64) as u8)?; 74 | i >>= 7; 75 | } 76 | self.write_byte(i as u8) 77 | } 78 | 79 | fn write_vlong(&mut self, i: i64) -> Result<()> { 80 | if i < 0 { 81 | bail!(IllegalArgument("Can't write negative vLong".to_owned())); 82 | } 83 | self._write_signed_vlong(i) 84 | } 85 | 86 | fn write_zlong(&mut self, i: i64) -> Result<()> { 87 | self._write_signed_vlong(i.encode()) 88 | } 89 | 90 | fn write_string(&mut self, s: &str) -> Result<()> { 91 | let s = s.as_bytes(); 92 | self.write_vint(s.len() as i32)?; 93 | self.write_all(s)?; 94 | Ok(()) 95 | } 96 | 97 | fn write_map_of_strings(&mut self, map: &HashMap) -> Result<()> { 98 | self.write_vint(map.len() as i32)?; 99 | 100 | let mut keys: Vec<&String> = map.keys().collect(); 101 | keys.sort(); 102 | for k in keys { 103 | self.write_string(k)?; 104 | self.write_string(map.get(k).unwrap())?; 105 | } 106 | Ok(()) 107 | } 108 | 109 | fn write_set_of_strings(&mut self, set: &HashSet) -> Result<()> { 110 | self.write_vint(set.len() as i32)?; 111 | 112 | let mut keys: Vec<&String> = set.iter().collect(); 113 | keys.sort(); 114 | for k in keys { 115 | self.write_string(k)?; 116 | } 117 | Ok(()) 118 | } 119 | 120 | fn copy_bytes(&mut self, from: &mut I, len: usize) -> Result<()> { 121 | const COPY_BUFFER_SIZE: usize = 16384; 122 | let mut left = len as i64; 123 | let mut copy_buffer = [0u8; COPY_BUFFER_SIZE]; 124 | while left > 0 { 125 | let to_copy = if left as usize > COPY_BUFFER_SIZE { 126 | COPY_BUFFER_SIZE 127 | } else { 128 | left as usize 129 | }; 130 | from.read_bytes(&mut copy_buffer, 0, to_copy)?; 131 | self.write_all(©_buffer[..to_copy])?; 132 | left -= to_copy as i64; 133 | } 134 | Ok(()) 135 | } 136 | } 137 | 138 | // a implement that can use Vec as a data output 139 | impl DataOutput for Vec {} 140 | -------------------------------------------------------------------------------- /src/core/store/io/fs_index_output.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::{DataOutput, IndexOutput}; 15 | 16 | use error::Result; 17 | 18 | use std::fs::{File, OpenOptions}; 19 | use std::io::BufWriter; 20 | use std::io::Write; 21 | use std::path::Path; 22 | 23 | use flate2::CrcWriter; 24 | 25 | const CHUNK_SIZE: usize = 8192; 26 | 27 | /// `IndexOutput` implement for `FsDirectory` 28 | pub struct FSIndexOutput { 29 | name: String, 30 | writer: CrcWriter>, 31 | bytes_written: usize, 32 | } 33 | 34 | impl FSIndexOutput { 35 | pub fn new>(name: String, path: P) -> Result { 36 | let file = OpenOptions::new().write(true).create(true).open(path)?; 37 | Ok(FSIndexOutput { 38 | name, 39 | writer: CrcWriter::new(BufWriter::with_capacity(CHUNK_SIZE, file)), 40 | bytes_written: 0, 41 | }) 42 | } 43 | } 44 | 45 | impl Drop for FSIndexOutput { 46 | fn drop(&mut self) { 47 | if let Err(ref desc) = self.writer.flush() { 48 | error!("Oops, failed to flush {}, errmsg: {}", self.name, desc); 49 | } 50 | self.bytes_written = 0; 51 | } 52 | } 53 | 54 | impl DataOutput for FSIndexOutput {} 55 | 56 | impl Write for FSIndexOutput { 57 | fn write(&mut self, buf: &[u8]) -> ::std::io::Result { 58 | let count = self.writer.write(buf)?; 59 | self.bytes_written += count; 60 | Ok(count) 61 | } 62 | 63 | fn flush(&mut self) -> ::std::io::Result<()> { 64 | self.writer.flush() 65 | } 66 | } 67 | 68 | impl IndexOutput for FSIndexOutput { 69 | fn name(&self) -> &str { 70 | &self.name 71 | } 72 | 73 | fn file_pointer(&self) -> i64 { 74 | self.bytes_written as i64 75 | } 76 | 77 | fn checksum(&self) -> Result { 78 | // self.writer.flush()?; 79 | Ok((self.writer.crc().sum() as i64) & 0xffff_ffffi64) 80 | } 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use super::*; 86 | use std::path::{Path, PathBuf}; 87 | 88 | #[test] 89 | fn test_write_byte() { 90 | let name = "hello.txt"; 91 | let path: PathBuf = Path::new(name).into(); 92 | let mut fsout = FSIndexOutput::new(name.to_string(), &path).unwrap(); 93 | fsout.write_byte(b'a').unwrap(); 94 | assert_eq!(fsout.file_pointer(), 1); 95 | ::std::fs::remove_file("hello.txt").unwrap(); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/core/store/io/growable_byte_array_output.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::DataOutput; 15 | 16 | use std::io::Write; 17 | 18 | const MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING: usize = 65536; 19 | 20 | /// a `IndexOutput` that can be used to build a bytes array. 21 | pub struct GrowableByteArrayDataOutput { 22 | pub bytes: Vec, 23 | length: usize, 24 | /* scratch for utf8 encoding of small strings 25 | * _scratch_bytes: Vec, */ 26 | } 27 | 28 | impl GrowableByteArrayDataOutput { 29 | pub fn new(cp: usize) -> GrowableByteArrayDataOutput { 30 | GrowableByteArrayDataOutput { 31 | bytes: vec![0u8; cp + MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING], 32 | length: 0, 33 | //_scratch_bytes: vec![0; cp + MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING], 34 | } 35 | } 36 | 37 | pub fn position(&self) -> usize { 38 | self.length 39 | } 40 | 41 | pub fn reset(&mut self) { 42 | self.length = 0; 43 | } 44 | } 45 | 46 | impl Write for GrowableByteArrayDataOutput { 47 | fn write(&mut self, buf: &[u8]) -> ::std::io::Result { 48 | let buf_len = buf.len(); 49 | let new_len = self.length + buf_len; 50 | if self.bytes.len() < new_len { 51 | self.bytes.resize(new_len, 0u8); 52 | } 53 | self.bytes[self.length..new_len].copy_from_slice(buf); 54 | self.length += buf_len; 55 | Ok(buf_len) 56 | } 57 | 58 | fn flush(&mut self) -> ::std::io::Result<()> { 59 | Ok(()) 60 | } 61 | } 62 | 63 | impl DataOutput for GrowableByteArrayDataOutput {} 64 | -------------------------------------------------------------------------------- /src/core/store/io/index_input.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::{DataInput, RandomAccessInput}; 15 | 16 | use error::Result; 17 | 18 | pub trait IndexInput: DataInput + Send + Sync { 19 | fn clone(&self) -> Result>; 20 | 21 | fn file_pointer(&self) -> i64; 22 | fn seek(&mut self, pos: i64) -> Result<()>; 23 | fn len(&self) -> u64; 24 | fn is_empty(&self) -> bool { 25 | self.len() == 0 26 | } 27 | fn name(&self) -> &str; 28 | 29 | fn random_access_slice(&self, _offset: i64, _length: i64) 30 | -> Result>; 31 | 32 | fn slice(&self, _description: &str, _offset: i64, _length: i64) -> Result> { 33 | unimplemented!(); 34 | } 35 | 36 | unsafe fn get_and_advance(&mut self, _length: usize) -> *const u8 { 37 | unimplemented!() 38 | } 39 | 40 | fn is_buffered(&self) -> bool { 41 | false 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/core/store/io/index_output.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::DataOutput; 15 | use core::store::RateLimiter; 16 | 17 | use error::Result; 18 | use std::io; 19 | use std::sync::Arc; 20 | 21 | /// Trait for output to a file in a Directory. 22 | /// 23 | /// A random-access output stream. Used for all Lucene index output operations. 24 | pub trait IndexOutput: DataOutput { 25 | fn name(&self) -> &str; 26 | fn file_pointer(&self) -> i64; 27 | fn checksum(&self) -> Result; 28 | } 29 | 30 | pub struct IndexOutputRef { 31 | // TODO: we need GAT for the lifetime declaration 32 | // so, currently directly use raw pointer instead 33 | output: *mut T, 34 | } 35 | 36 | impl IndexOutputRef { 37 | pub fn new(output: &mut T) -> Self { 38 | Self { output } 39 | } 40 | } 41 | 42 | impl IndexOutput for IndexOutputRef { 43 | fn name(&self) -> &str { 44 | unsafe { (*self.output).name() } 45 | } 46 | 47 | fn file_pointer(&self) -> i64 { 48 | unsafe { (*self.output).file_pointer() } 49 | } 50 | 51 | fn checksum(&self) -> Result { 52 | unsafe { (*self.output).checksum() } 53 | } 54 | } 55 | 56 | impl DataOutput for IndexOutputRef {} 57 | 58 | impl io::Write for IndexOutputRef { 59 | fn write(&mut self, buf: &[u8]) -> io::Result { 60 | unsafe { (*self.output).write(buf) } 61 | } 62 | 63 | fn flush(&mut self) -> io::Result<()> { 64 | unsafe { (*self.output).flush() } 65 | } 66 | } 67 | 68 | pub struct InvalidIndexOutput {} 69 | 70 | impl io::Write for InvalidIndexOutput { 71 | fn write(&mut self, _buf: &[u8]) -> io::Result { 72 | unreachable!() 73 | } 74 | 75 | fn flush(&mut self) -> io::Result<()> { 76 | unreachable!() 77 | } 78 | } 79 | 80 | impl DataOutput for InvalidIndexOutput {} 81 | 82 | impl IndexOutput for InvalidIndexOutput { 83 | fn name(&self) -> &str { 84 | "invalid" 85 | } 86 | 87 | fn file_pointer(&self) -> i64 { 88 | -1 89 | } 90 | 91 | fn checksum(&self) -> Result { 92 | unreachable!() 93 | } 94 | } 95 | 96 | /// a rate limiting `IndexOutput` 97 | pub struct RateLimitIndexOutput { 98 | delegate: O, 99 | rate_limiter: Arc, 100 | /// How many bytes we've written since we last called rateLimiter.pause. 101 | bytes_since_last_pause: usize, 102 | /// Cached here not not always have to call RateLimiter#getMinPauseCheckBytes() 103 | /// which does volatile read 104 | current_min_pause_check_bytes: usize, 105 | } 106 | 107 | impl RateLimitIndexOutput { 108 | pub fn new(rate_limiter: Arc, delegate: O) -> Self { 109 | let current_min_pause_check_bytes = rate_limiter.min_pause_check_bytes() as usize; 110 | RateLimitIndexOutput { 111 | delegate, 112 | rate_limiter, 113 | bytes_since_last_pause: 0, 114 | current_min_pause_check_bytes, 115 | } 116 | } 117 | 118 | fn check_rate(&mut self) -> Result<()> { 119 | if self.bytes_since_last_pause > self.current_min_pause_check_bytes { 120 | self.rate_limiter 121 | .pause(self.bytes_since_last_pause as u64)?; 122 | self.bytes_since_last_pause = 0; 123 | self.current_min_pause_check_bytes = self.rate_limiter.min_pause_check_bytes() as usize; 124 | } 125 | Ok(()) 126 | } 127 | } 128 | 129 | impl IndexOutput for RateLimitIndexOutput { 130 | fn name(&self) -> &str { 131 | self.delegate.name() 132 | } 133 | 134 | fn file_pointer(&self) -> i64 { 135 | self.delegate.file_pointer() 136 | } 137 | 138 | fn checksum(&self) -> Result { 139 | self.delegate.checksum() 140 | } 141 | } 142 | 143 | impl DataOutput for RateLimitIndexOutput {} 144 | 145 | impl io::Write for RateLimitIndexOutput { 146 | fn write(&mut self, buf: &[u8]) -> io::Result { 147 | self.bytes_since_last_pause += buf.len(); 148 | if let Err(_e) = self.check_rate() { 149 | return Err(io::Error::from(io::ErrorKind::WouldBlock)); 150 | } 151 | self.delegate.write(buf) 152 | } 153 | 154 | fn flush(&mut self) -> io::Result<()> { 155 | self.delegate.flush() 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/core/store/io/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod data_input; 15 | 16 | pub use self::data_input::*; 17 | 18 | mod index_input; 19 | 20 | pub use self::index_input::*; 21 | 22 | mod random_access_input; 23 | 24 | pub use self::random_access_input::*; 25 | 26 | mod checksum_index_input; 27 | 28 | pub use self::checksum_index_input::*; 29 | 30 | mod buffered_checksum_index_input; 31 | 32 | pub use self::buffered_checksum_index_input::*; 33 | 34 | mod mmap_index_input; 35 | 36 | pub use self::mmap_index_input::*; 37 | 38 | mod data_output; 39 | 40 | pub use self::data_output::*; 41 | 42 | mod index_output; 43 | 44 | pub use self::index_output::*; 45 | 46 | mod fs_index_output; 47 | 48 | pub use self::fs_index_output::*; 49 | 50 | mod byte_array_data_input; 51 | 52 | pub use self::byte_array_data_input::*; 53 | 54 | mod growable_byte_array_output; 55 | 56 | pub use self::growable_byte_array_output::*; 57 | 58 | mod ram_output; 59 | 60 | pub use self::ram_output::*; 61 | -------------------------------------------------------------------------------- /src/core/store/io/ram_output.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::{ByteArrayDataOutput, DataOutput, IndexOutput}; 15 | 16 | use error::{ErrorKind, Result}; 17 | 18 | use std::io::{self, Write}; 19 | 20 | use flate2::Crc; 21 | 22 | use core::util::fst::BytesStore; 23 | 24 | const CHUNK_SIZE: usize = 8192; 25 | 26 | /// A memory-resident `IndexOutput` implementation. 27 | /// Use `BytesStore` to represent in memory output store 28 | pub struct RAMOutputStream { 29 | name: String, 30 | pub store: BytesStore, 31 | crc: Option, 32 | } 33 | 34 | impl RAMOutputStream { 35 | pub fn new(checksum: bool) -> Self { 36 | Self::with_chunk_size(CHUNK_SIZE, checksum) 37 | } 38 | 39 | pub fn from_store(store: BytesStore) -> Self { 40 | RAMOutputStream { 41 | name: "noname".into(), 42 | store, 43 | crc: None, 44 | } 45 | } 46 | 47 | pub fn with_chunk_size(chunk_size: usize, checksum: bool) -> Self { 48 | let store = BytesStore::with_block_bits(chunk_size.trailing_zeros() as usize); 49 | let crc = if checksum { Some(Crc::new()) } else { None }; 50 | 51 | RAMOutputStream { 52 | name: "noname".into(), 53 | store, 54 | crc, 55 | } 56 | } 57 | 58 | pub fn write_to(&self, out: &mut impl DataOutput) -> Result<()> { 59 | // self.flush(); 60 | self.store.write_to(out) 61 | } 62 | 63 | pub fn write_to_buf(&self, out: &mut [u8]) -> Result<()> { 64 | let length = out.len(); 65 | let mut output = ByteArrayDataOutput::new(out, 0, length); 66 | self.write_to(&mut output) 67 | } 68 | 69 | pub fn reset(&mut self) { 70 | self.store.truncate(0); 71 | if let Some(ref mut crc) = self.crc { 72 | crc.reset(); 73 | } 74 | } 75 | } 76 | 77 | impl Write for RAMOutputStream { 78 | fn write(&mut self, buf: &[u8]) -> io::Result { 79 | let size = self.store.write(buf)?; 80 | if size > 0 { 81 | if let Some(ref mut crc) = self.crc { 82 | crc.update(&buf[0..size]); 83 | } 84 | } 85 | Ok(size) 86 | } 87 | 88 | fn flush(&mut self) -> io::Result<()> { 89 | self.store.flush() 90 | } 91 | } 92 | 93 | impl DataOutput for RAMOutputStream {} 94 | 95 | impl IndexOutput for RAMOutputStream { 96 | fn name(&self) -> &str { 97 | &self.name 98 | } 99 | 100 | fn file_pointer(&self) -> i64 { 101 | self.store.get_position() as i64 102 | } 103 | 104 | fn checksum(&self) -> Result { 105 | if let Some(ref crc) = self.crc { 106 | Ok((crc.sum() as i64) & 0xffff_ffffi64) 107 | } else { 108 | bail!(ErrorKind::IllegalState( 109 | "internal RAMOutputStream created with checksum disabled".into() 110 | )) 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/core/store/io/random_access_input.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use error::Result; 15 | 16 | /// Random Access Index API. 17 | /// 18 | /// Unlike `IndexInput`, this has no concept of file position, all reads 19 | /// are absolute. However, like IndexInput, it is only intended for use by a single thread. 20 | pub trait RandomAccessInput: Send + Sync { 21 | fn read_byte(&self, pos: u64) -> Result; 22 | fn read_short(&self, pos: u64) -> Result; 23 | fn read_int(&self, pos: u64) -> Result; 24 | fn read_long(&self, pos: u64) -> Result; 25 | } 26 | -------------------------------------------------------------------------------- /src/core/store/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | pub mod directory; 15 | pub mod io; 16 | 17 | use error::Result; 18 | 19 | use std::sync::Arc; 20 | use std::time::Duration; 21 | 22 | /// IOContext holds additional details on the merge/search context and 23 | /// specifies the context in which the Directory is being used for. 24 | #[derive(PartialEq, Eq, Clone, Copy)] 25 | pub enum IOContext { 26 | Read(bool), 27 | Default, 28 | Flush(FlushInfo), 29 | Merge(MergeInfo), 30 | } 31 | 32 | impl IOContext { 33 | pub const READ: IOContext = IOContext::Read(false); 34 | pub const READ_ONCE: IOContext = IOContext::Read(true); 35 | pub fn is_merge(&self) -> bool { 36 | match self { 37 | IOContext::Merge(_) => true, 38 | _ => false, 39 | } 40 | } 41 | } 42 | 43 | /// A FlushInfo provides information required for a FLUSH context. 44 | /// 45 | /// It is used as part of an `IOContext` in case of FLUSH context. 46 | #[derive(PartialEq, Eq, Hash, Clone, Copy)] 47 | pub struct FlushInfo { 48 | num_docs: u32, 49 | } 50 | 51 | impl FlushInfo { 52 | pub fn new(num_docs: u32) -> Self { 53 | FlushInfo { num_docs } 54 | } 55 | } 56 | 57 | /// A MergeInfo provides information required for a MERGE context. 58 | /// 59 | /// It is used as part of an `IOContext` in case of MERGE context. 60 | #[derive(PartialEq, Eq, Hash, Clone, Copy)] 61 | pub struct MergeInfo { 62 | total_max_doc: u32, 63 | estimated_merge_bytes: u64, 64 | is_external: bool, 65 | merge_max_num_segments: Option, 66 | } 67 | 68 | impl MergeInfo { 69 | pub fn new( 70 | total_max_doc: u32, 71 | estimated_merge_bytes: u64, 72 | is_external: bool, 73 | merge_max_num_segments: Option, 74 | ) -> Self { 75 | MergeInfo { 76 | total_max_doc, 77 | estimated_merge_bytes, 78 | is_external, 79 | merge_max_num_segments, 80 | } 81 | } 82 | } 83 | 84 | /// Trait base class to rate limit IO. 85 | /// 86 | /// Typically implementations are shared across multiple IndexInputs 87 | /// or IndexOutputs (for example those involved all merging). Those IndexInputs and 88 | /// IndexOutputs would call {@link #pause} whenever the have read 89 | /// or written more than {@link #getMinPauseCheckBytes} bytes. 90 | 91 | pub trait RateLimiter: Sync + Send { 92 | /// Sets an updated MB per second rate limit. 93 | fn set_mb_per_sec(&self, mb_per_sec: f64); 94 | 95 | /// The current MB per second rate limit. 96 | fn mb_per_sec(&self) -> f64; 97 | 98 | /// Pauses, if necessary, to keep the instantaneous IO rate 99 | /// at or below the target 100 | /// 101 | /// Note: the implementation is thread-safe 102 | fn pause(&self, bytes: u64) -> Result; 103 | 104 | /// how many bytes caller should add up isself before invoking `#pause` 105 | fn min_pause_check_bytes(&self) -> u64; 106 | } 107 | 108 | impl RateLimiter for Arc { 109 | fn set_mb_per_sec(&self, mb_per_sec: f64) { 110 | (**self).set_mb_per_sec(mb_per_sec); 111 | } 112 | 113 | fn mb_per_sec(&self) -> f64 { 114 | (**self).mb_per_sec() 115 | } 116 | 117 | fn pause(&self, bytes: u64) -> Result { 118 | (**self).pause(bytes) 119 | } 120 | 121 | fn min_pause_check_bytes(&self) -> u64 { 122 | (**self).min_pause_check_bytes() 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/core/util/byte_slice_reader.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::DataInput; 15 | use core::util::ByteBlockPool; 16 | 17 | use std::io; 18 | use std::ptr; 19 | 20 | /// IndexInput that knows how to read the byte slices written 21 | /// by Posting and PostingVector. We read the bytes in 22 | /// each slice until we hit the end of that slice at which 23 | /// point we read the forwarding address of the next slice 24 | /// and then jump to it. 25 | pub struct ByteSliceReader { 26 | pool: *const ByteBlockPool, 27 | buffer_upto: usize, 28 | // current buffer index of pool.buffers 29 | upto: usize, 30 | limit: usize, 31 | level: usize, 32 | buffer_offset: usize, 33 | end_index: usize, 34 | } 35 | 36 | impl Default for ByteSliceReader { 37 | fn default() -> Self { 38 | ByteSliceReader { 39 | pool: ptr::null(), 40 | buffer_upto: 0, 41 | upto: 0, 42 | limit: 0, 43 | level: 0, 44 | buffer_offset: 0, 45 | end_index: 0, 46 | } 47 | } 48 | } 49 | 50 | impl Clone for ByteSliceReader { 51 | fn clone(&self) -> Self { 52 | ByteSliceReader { 53 | pool: self.pool, 54 | buffer_upto: self.buffer_upto, 55 | upto: self.upto, 56 | limit: self.limit, 57 | level: self.level, 58 | buffer_offset: self.buffer_offset, 59 | end_index: self.end_index, 60 | } 61 | } 62 | } 63 | 64 | impl ByteSliceReader { 65 | pub fn init(&mut self, pool: &ByteBlockPool, start_index: usize, end_index: usize) { 66 | debug_assert!(end_index >= start_index); 67 | 68 | self.pool = pool; 69 | self.end_index = end_index; 70 | self.level = 0; 71 | self.buffer_upto = start_index / ByteBlockPool::BYTE_BLOCK_SIZE; 72 | self.buffer_offset = self.buffer_upto * ByteBlockPool::BYTE_BLOCK_SIZE; 73 | self.upto = start_index & ByteBlockPool::BYTE_BLOCK_MASK; 74 | 75 | let first_size = ByteBlockPool::LEVEL_SIZE_ARRAY[0]; 76 | self.limit = if start_index + first_size >= end_index { 77 | // There is noly this one slice to read 78 | end_index & ByteBlockPool::BYTE_BLOCK_MASK 79 | } else { 80 | self.upto + first_size - 4 81 | }; 82 | } 83 | 84 | pub fn eof(&self) -> bool { 85 | debug_assert!(self.upto + self.buffer_offset <= self.end_index); 86 | self.upto + self.buffer_offset == self.end_index 87 | } 88 | 89 | unsafe fn next_slice(&mut self) { 90 | let pool = &*self.pool; 91 | // skip to next slice 92 | let next_index = { 93 | let buffer = &pool.buffers[self.buffer_upto]; 94 | ((buffer[self.limit] as usize) << 24) 95 | + ((buffer[self.limit + 1] as usize) << 16) 96 | + ((buffer[self.limit + 2] as usize) << 8) 97 | + (buffer[self.limit + 3] as usize) 98 | }; 99 | self.level = ByteBlockPool::NEXT_LEVEL_ARRAY[self.level]; 100 | let new_size = ByteBlockPool::LEVEL_SIZE_ARRAY[self.level]; 101 | 102 | self.buffer_upto = next_index / ByteBlockPool::BYTE_BLOCK_SIZE; 103 | self.buffer_offset = self.buffer_upto * ByteBlockPool::BYTE_BLOCK_SIZE; 104 | self.upto = next_index & ByteBlockPool::BYTE_BLOCK_MASK; 105 | if next_index + new_size >= self.end_index { 106 | // We are advancing to the final slice 107 | debug_assert!(self.end_index >= next_index); 108 | self.limit = self.end_index - self.buffer_offset; 109 | } else { 110 | // This is not the final slice (subtract 4 for the 111 | // forwarding address at the end of this new slice) 112 | self.limit = self.upto + new_size - 4; 113 | } 114 | } 115 | } 116 | 117 | impl io::Read for ByteSliceReader { 118 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 119 | let mut len = buf.len(); 120 | let mut offset = 0; 121 | while len > 0 { 122 | let num_left = self.limit - self.upto; 123 | unsafe { 124 | if num_left < len { 125 | buf[offset..offset + num_left].copy_from_slice( 126 | &(*self.pool).buffers[self.buffer_upto][self.upto..self.upto + num_left], 127 | ); 128 | offset += num_left; 129 | len -= num_left; 130 | self.next_slice(); 131 | } else { 132 | // This slice is the last one 133 | buf[offset..offset + len].copy_from_slice( 134 | &(*self.pool).buffers[self.buffer_upto][self.upto..self.upto + len], 135 | ); 136 | self.upto += len; 137 | break; 138 | } 139 | } 140 | } 141 | Ok(buf.len()) 142 | } 143 | } 144 | 145 | impl DataInput for ByteSliceReader {} 146 | -------------------------------------------------------------------------------- /src/core/util/bytes_ref.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use std::cmp::Ordering; 15 | use std::fmt; 16 | 17 | #[derive(Copy, Clone)] 18 | pub struct BytesRef { 19 | slice: *const [u8], 20 | } 21 | 22 | const DUMMY_BYTE: [u8; 0] = []; 23 | 24 | // return a dummy `BytesPtr` for some place need dummy init 25 | // in order to avoid `Option` 26 | impl Default for BytesRef { 27 | fn default() -> Self { 28 | BytesRef::new(&DUMMY_BYTE) 29 | } 30 | } 31 | 32 | impl BytesRef { 33 | pub fn new(bytes: &[u8]) -> BytesRef { 34 | BytesRef { 35 | slice: bytes as *const [u8], 36 | } 37 | } 38 | 39 | pub fn bytes(&self) -> &[u8] { 40 | unsafe { &*self.slice } 41 | } 42 | 43 | pub fn set_bytes(&mut self, bytes: &[u8]) { 44 | self.slice = bytes as *const [u8]; 45 | } 46 | 47 | pub fn is_empty(&self) -> bool { 48 | self.len() == 0 49 | } 50 | 51 | pub fn len(&self) -> usize { 52 | unsafe { (&*self.slice).len() } 53 | } 54 | 55 | pub fn byte_at(&self, idx: usize) -> u8 { 56 | unsafe { (&*self.slice)[idx] } 57 | } 58 | } 59 | 60 | impl AsRef<[u8]> for BytesRef { 61 | fn as_ref(&self) -> &[u8] { 62 | self.bytes() 63 | } 64 | } 65 | 66 | impl fmt::Debug for BytesRef { 67 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 68 | f.debug_struct("BytesPtr") 69 | .field("bytes", &self.bytes()) 70 | .finish() 71 | } 72 | } 73 | 74 | impl Eq for BytesRef {} 75 | 76 | impl PartialEq for BytesRef { 77 | fn eq(&self, other: &Self) -> bool { 78 | self.bytes().eq(other.bytes()) 79 | } 80 | } 81 | 82 | impl Ord for BytesRef { 83 | fn cmp(&self, other: &Self) -> Ordering { 84 | self.bytes().cmp(other.bytes()) 85 | } 86 | } 87 | 88 | impl PartialOrd for BytesRef { 89 | fn partial_cmp(&self, other: &Self) -> Option { 90 | Some(self.cmp(other)) 91 | } 92 | } 93 | 94 | // A builder for `BytesRef` instances 95 | #[derive(Default)] 96 | pub struct BytesRefBuilder { 97 | pub buffer: Vec, 98 | pub offset: usize, 99 | pub length: usize, 100 | } 101 | 102 | impl BytesRefBuilder { 103 | pub fn new() -> Self { 104 | Default::default() 105 | } 106 | 107 | pub fn bytes_mut(&mut self) -> &mut [u8] { 108 | &mut self.buffer 109 | } 110 | 111 | pub fn grow(&mut self, size: usize) { 112 | self.buffer.resize(size, 0u8); 113 | } 114 | 115 | pub fn append(&mut self, b: u8) { 116 | let pos = self.offset + self.length; 117 | if pos >= self.buffer.len() { 118 | self.buffer.resize(pos + 1, 0u8); 119 | } 120 | self.buffer[pos] = b; 121 | self.length += 1; 122 | } 123 | 124 | pub fn appends(&mut self, bytes: &[u8]) { 125 | let start = self.offset + self.length; 126 | let end = start + bytes.len(); 127 | if end >= self.buffer.len() { 128 | self.buffer.resize(end, 0u8); 129 | } 130 | self.buffer[start..end].copy_from_slice(bytes); 131 | self.length += bytes.len(); 132 | } 133 | 134 | pub fn get(&self) -> BytesRef { 135 | BytesRef::new(&self.buffer[self.offset..self.length]) 136 | } 137 | 138 | pub fn copy_from(&mut self, bytes: &[u8]) { 139 | if self.buffer.len() < bytes.len() { 140 | self.buffer.resize(bytes.len(), 0u8); 141 | } 142 | self.buffer[0..bytes.len()].copy_from_slice(bytes); 143 | self.offset = 0; 144 | self.length = bytes.len(); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/core/util/counter.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use std::sync::atomic::{AtomicI64, Ordering}; 15 | 16 | /// Simple counter trait 17 | pub trait Count { 18 | fn add_get(&mut self, delta: i64) -> i64; 19 | 20 | fn get(&self) -> i64; 21 | } 22 | 23 | struct SerialCounter { 24 | count: i64, 25 | } 26 | 27 | impl Count for SerialCounter { 28 | fn add_get(&mut self, delta: i64) -> i64 { 29 | self.count += delta; 30 | self.count 31 | } 32 | 33 | fn get(&self) -> i64 { 34 | self.count 35 | } 36 | } 37 | 38 | struct AtomicCounter { 39 | count: AtomicI64, 40 | } 41 | 42 | impl Count for AtomicCounter { 43 | fn add_get(&mut self, delta: i64) -> i64 { 44 | self.count.fetch_add(delta, Ordering::Release); 45 | self.get() 46 | } 47 | 48 | fn get(&self) -> i64 { 49 | self.count.load(Ordering::Acquire) 50 | } 51 | } 52 | 53 | enum CounterEnum { 54 | Serial(Box), 55 | Atomic(Box), 56 | Borrowed(*mut dyn Count), 57 | // TODO unsafe use for borrow a exist counter 58 | } 59 | 60 | impl Count for CounterEnum { 61 | fn add_get(&mut self, delta: i64) -> i64 { 62 | match *self { 63 | CounterEnum::Serial(ref mut s) => s.add_get(delta), 64 | CounterEnum::Atomic(ref mut s) => s.add_get(delta), 65 | CounterEnum::Borrowed(b) => unsafe { (*b).add_get(delta) }, 66 | } 67 | } 68 | 69 | fn get(&self) -> i64 { 70 | match *self { 71 | CounterEnum::Serial(ref s) => s.get(), 72 | CounterEnum::Atomic(ref s) => s.get(), 73 | CounterEnum::Borrowed(b) => unsafe { (*b).get() }, 74 | } 75 | } 76 | } 77 | 78 | pub struct Counter { 79 | count: CounterEnum, 80 | } 81 | 82 | impl Default for Counter { 83 | fn default() -> Self { 84 | Self::new(false) 85 | } 86 | } 87 | 88 | impl Counter { 89 | pub fn new(thread_safe: bool) -> Self { 90 | let count = if thread_safe { 91 | CounterEnum::Atomic(Box::new(AtomicCounter { 92 | count: AtomicI64::new(0), 93 | })) 94 | } else { 95 | CounterEnum::Serial(Box::new(SerialCounter { count: 0 })) 96 | }; 97 | Counter { count } 98 | } 99 | 100 | pub fn borrow(counter: &dyn Count) -> Self { 101 | Counter { 102 | count: CounterEnum::Borrowed(counter as *const dyn Count as *mut dyn Count), 103 | } 104 | } 105 | 106 | fn borrow_raw(counter: *mut dyn Count) -> Self { 107 | Counter { 108 | count: CounterEnum::Borrowed(counter), 109 | } 110 | } 111 | 112 | // TODO this copy while share the inner count of self, 113 | // so it is not safe if self's lifetime is shorter than the copy one 114 | pub unsafe fn shallow_copy(&self) -> Counter { 115 | match self.count { 116 | CounterEnum::Borrowed(b) => Counter::borrow_raw(b), 117 | CounterEnum::Atomic(ref a) => Counter::borrow(a.as_ref() as &dyn Count), 118 | CounterEnum::Serial(ref s) => Counter::borrow(s.as_ref() as &dyn Count), 119 | } 120 | } 121 | 122 | pub fn ptr(&self) -> *const dyn Count { 123 | match self.count { 124 | CounterEnum::Serial(ref s) => s.as_ref(), 125 | CounterEnum::Atomic(ref s) => s.as_ref(), 126 | CounterEnum::Borrowed(b) => b, 127 | } 128 | } 129 | } 130 | 131 | impl Count for Counter { 132 | fn add_get(&mut self, delta: i64) -> i64 { 133 | self.count.add_get(delta) 134 | } 135 | 136 | fn get(&self) -> i64 { 137 | self.count.get() 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/core/util/external/deferred.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // TODO: copy from package `crossbeam-epoch` from it's not a public module 15 | // we use this to manage callback functions 16 | 17 | use std::mem::MaybeUninit; 18 | use std::{fmt, mem, ptr}; 19 | 20 | /// Number of words a piece of `Data` can hold. 21 | /// 22 | /// Three words should be enough for the majority of cases. For example, you can fit inside it the 23 | /// function pointer together with a fat pointer representing an object that needs to be destroyed. 24 | const DATA_WORDS: usize = 3; 25 | 26 | /// Some space to keep a `FnOnce()` object on the stack. 27 | type Data = [usize; DATA_WORDS]; 28 | 29 | /// A `FnOnce()` that is stored inline if small, or otherwise boxed on the heap. 30 | /// 31 | /// This is a handy way of keeping an unsized `FnOnce()` within a sized structure. 32 | pub struct Deferred { 33 | call: unsafe fn(*mut u8), 34 | data: MaybeUninit, 35 | } 36 | 37 | impl fmt::Debug for Deferred { 38 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 39 | write!(f, "Deferred {{ ... }}") 40 | } 41 | } 42 | 43 | impl Drop for Deferred { 44 | fn drop(&mut self) { 45 | unsafe { 46 | ptr::drop_in_place(self.data.as_mut_ptr()); 47 | } 48 | } 49 | } 50 | 51 | impl Deferred { 52 | /// Constructs a new `Deferred` from a `FnOnce()`. 53 | #[allow(clippy::cast_ptr_alignment)] 54 | pub fn new(f: F) -> Self { 55 | let size = mem::size_of::(); 56 | let align = mem::align_of::(); 57 | 58 | unsafe { 59 | if size <= mem::size_of::() && align <= mem::align_of::() { 60 | let mut data = MaybeUninit::::uninit(); 61 | ptr::write(data.as_mut_ptr() as *mut F, f); 62 | 63 | unsafe fn call(raw: *mut u8) { 64 | let f: F = ptr::read(raw as *mut F); 65 | f(); 66 | } 67 | 68 | Deferred { 69 | call: call::, 70 | data, 71 | } 72 | } else { 73 | let b: Box = Box::new(f); 74 | let mut data = MaybeUninit::::uninit(); 75 | ptr::write(data.as_mut_ptr() as *mut Box, b); 76 | 77 | unsafe fn call(raw: *mut u8) { 78 | let b: Box = ptr::read(raw as *mut Box); 79 | (*b)(); 80 | } 81 | 82 | Deferred { 83 | call: call::, 84 | data, 85 | } 86 | } 87 | } 88 | } 89 | 90 | /// Calls the function. 91 | #[inline] 92 | pub fn call(mut self) { 93 | let call = self.call; 94 | unsafe { call(self.data.as_mut_ptr() as *mut u8) }; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/core/util/external/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // this package is use to place modules copy from external packages for some reason 15 | 16 | mod deferred; 17 | 18 | pub use self::deferred::*; 19 | 20 | mod volatile; 21 | 22 | pub use self::volatile::*; 23 | 24 | mod binary_heap; 25 | 26 | pub use self::binary_heap::*; 27 | 28 | mod thread_pool; 29 | 30 | pub use self::thread_pool::*; 31 | -------------------------------------------------------------------------------- /src/core/util/external/volatile.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | // The MIT License (MIT) 15 | // 16 | // Copyright (c) Philipp Oppermann 17 | // 18 | // Permission is hereby granted, free of charge, to any 19 | // person obtaining a copy of this software and associated 20 | // documentation files (the "Software"), to deal in the 21 | // Software without restriction, including without 22 | // limitation the rights to use, copy, modify, merge, 23 | // publish, distribute, sublicense, and/or sell copies of 24 | // the Software, and to permit persons to whom the Software 25 | // is furnished to do so 26 | 27 | // copy from https://github.com/embed-rs/volatile/blob/master/src/lib.rs 28 | 29 | //! Provides wrapper types `Volatile`, `ReadOnly`, `WriteOnly`, `ReadWrite`, which wrap any 30 | //! copy-able type and allows for volatile memory access to wrapped value. Volatile memory accesses 31 | //! are never optimized away by the compiler, and are useful in many low-level systems programming 32 | //! and concurrent contexts. 33 | //! 34 | //! The wrapper types *do not* enforce any atomicity guarantees; to also get atomicity, consider 35 | //! looking at the `Atomic` wrapper type found in `libcore` or `libstd`. 36 | //! 37 | //! These wrappers do not depend on the standard library and never panic. 38 | //! 39 | //! # Dealing with Volatile Pointers 40 | //! 41 | //! Frequently, one may have to deal with volatile pointers, eg, writes to specific memory 42 | //! locations. The canonical way to solve this is to cast the pointer to a volatile wrapper 43 | //! directly, eg: 44 | //! 45 | //! ```rust 46 | //! use rucene::core::util::external::Volatile; 47 | //! 48 | //! let mut_ptr = 0xFEE00000 as *mut u32; 49 | //! 50 | //! let volatile_ptr = mut_ptr as *mut Volatile; 51 | //! ``` 52 | //! 53 | //! and then perform operations on the pointer as usual in a volatile way. This method works as all 54 | //! of the volatile wrapper types are the same size as their contained values. 55 | 56 | use std::ptr; 57 | 58 | /// A wrapper type around a volatile variable, which allows for volatile reads and writes 59 | /// to the contained value. The stored type needs to be `Copy`, as volatile reads and writes 60 | /// take and return copies of the value. 61 | /// 62 | /// The size of this struct is the same as the size of the contained type. 63 | #[derive(Debug)] 64 | #[repr(transparent)] 65 | pub struct Volatile(T); 66 | 67 | impl Volatile { 68 | /// Construct a new volatile instance wrapping the given value. 69 | /// 70 | /// This method never panics. 71 | #[cfg(feature = "const_fn")] 72 | pub const fn new(value: T) -> Volatile { 73 | Volatile(value) 74 | } 75 | 76 | /// Construct a new volatile instance wrapping the given value. 77 | /// 78 | /// This method never panics. 79 | #[cfg(not(feature = "const_fn"))] 80 | pub fn new(value: T) -> Volatile { 81 | Volatile(value) 82 | } 83 | 84 | /// Performs a volatile read of the contained value, returning a copy 85 | /// of the read value. Volatile reads are guaranteed not to be optimized 86 | /// away by the compiler, but by themselves do not have atomic ordering 87 | /// guarantees. To also get atomicity, consider looking at the `Atomic` wrapper type. 88 | /// 89 | /// This method never panics. 90 | pub fn read(&self) -> T { 91 | // UNSAFE: Safe, as we know that our internal value exists. 92 | unsafe { ptr::read_volatile(&self.0) } 93 | } 94 | 95 | /// Performs a volatile write, setting the contained value to the given value `value`. Volatile 96 | /// writes are guaranteed to not be optimized away by the compiler, but by themselves do not 97 | /// have atomic ordering guarantees. To also get atomicity, consider looking at the `Atomic` 98 | /// wrapper type. 99 | /// 100 | /// This method never panics. 101 | /// 102 | /// TODO, we force convert immutable reference to mutable pointer, because 103 | /// we needn't guarantee the race condition if multi-write at the same time, 104 | /// else we need to use Atomic instead 105 | pub fn write(&self, value: T) { 106 | // UNSAFE: Safe, as we know that our internal value exists. 107 | unsafe { ptr::write_volatile(&self.0 as *const T as *mut T, value) }; 108 | } 109 | 110 | /// Performs a volatile read of the contained value, passes a mutable reference to it to the 111 | /// function `f`, and then performs a volatile write of the (potentially updated) value back to 112 | /// the contained value. 113 | /// 114 | /// Ths method never panics. 115 | pub fn update(&self, f: F) 116 | where 117 | F: FnOnce(&mut T), 118 | { 119 | let mut value = self.read(); 120 | f(&mut value); 121 | self.write(value); 122 | } 123 | } 124 | 125 | impl Clone for Volatile { 126 | fn clone(&self) -> Self { 127 | Volatile(self.read()) 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/core/util/math.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use error::ErrorKind::IllegalState; 15 | use error::Result; 16 | 17 | use core::util::bit_util::UnsignedShift; 18 | 19 | use std::mem; 20 | 21 | pub fn log(mut x: i64, base: i32) -> i32 { 22 | debug_assert!(base > 1); 23 | 24 | let base = i64::from(base); 25 | let mut ret = 0; 26 | while x >= base { 27 | x /= base; 28 | ret += 1; 29 | } 30 | 31 | ret 32 | } 33 | 34 | pub fn long_to_int_exact(val: i64) -> Result { 35 | let ans = val as i32; 36 | if i64::from(ans) != val { 37 | bail!(IllegalState("integer overflow".to_owned())); 38 | } 39 | Ok(ans) 40 | } 41 | 42 | // see http://en.wikipedia.org/wiki/Binary_GCD_algorithm#Iterative_version_in_C.2B.2B_using_ctz_.28count_trailing_zeros.29 43 | pub fn gcd(a: i64, b: i64) -> i64 { 44 | debug_assert_ne!(a, i64::min_value()); 45 | debug_assert_ne!(b, i64::min_value()); 46 | let mut a = a.abs(); 47 | let mut b = b.abs(); 48 | 49 | if a == 0 { 50 | return b; 51 | } else if b == 0 { 52 | return a; 53 | } 54 | 55 | let common_trailing_zeros = (a | b).trailing_zeros(); 56 | a = a.unsigned_shift(a.trailing_zeros() as usize); 57 | 58 | loop { 59 | b = b.unsigned_shift(b.trailing_zeros() as usize); 60 | if a == b { 61 | break; 62 | } else if a > b || a == i64::min_value() { 63 | // MIN_VALUE is treated as 2^64 64 | mem::swap(&mut a, &mut b); 65 | } 66 | 67 | if a == 1 { 68 | break; 69 | } 70 | 71 | b -= a; 72 | } 73 | 74 | a << common_trailing_zeros 75 | } 76 | -------------------------------------------------------------------------------- /src/core/util/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | pub type DocId = i32; 15 | 16 | pub mod bkd; 17 | pub mod external; 18 | pub mod fst; 19 | pub mod packed; 20 | 21 | mod numeric; 22 | 23 | pub use self::numeric::*; 24 | 25 | mod variant_value; 26 | 27 | pub use self::variant_value::*; 28 | 29 | mod bits; 30 | 31 | pub use self::bits::*; 32 | 33 | mod version; 34 | 35 | pub use self::version::*; 36 | 37 | mod paged_bytes; 38 | 39 | pub use self::paged_bytes::*; 40 | 41 | mod doc_id_set_builder; 42 | 43 | pub use self::doc_id_set_builder::*; 44 | 45 | mod context; 46 | 47 | pub use self::context::*; 48 | 49 | mod counter; 50 | 51 | pub use self::counter::*; 52 | 53 | mod bytes_ref; 54 | 55 | pub use self::bytes_ref::*; 56 | 57 | mod bit_set; 58 | 59 | pub use self::bit_set::*; 60 | 61 | mod bit_util; 62 | 63 | pub use self::bit_util::*; 64 | 65 | mod byte_block_pool; 66 | 67 | pub use self::byte_block_pool::*; 68 | 69 | mod byte_slice_reader; 70 | 71 | pub use self::byte_slice_reader::*; 72 | 73 | mod bytes_ref_hash; 74 | 75 | pub use self::bytes_ref_hash::*; 76 | 77 | mod doc_id_set; 78 | 79 | pub use self::doc_id_set::*; 80 | 81 | mod int_block_pool; 82 | 83 | pub use self::int_block_pool::*; 84 | 85 | mod ints_ref; 86 | 87 | pub use self::ints_ref::*; 88 | 89 | mod math; 90 | 91 | pub use self::math::*; 92 | 93 | mod selector; 94 | 95 | pub use self::selector::*; 96 | 97 | mod small_float; 98 | 99 | pub use self::small_float::*; 100 | 101 | mod sorter; 102 | 103 | pub use self::sorter::*; 104 | 105 | mod string_util; 106 | 107 | pub use self::string_util::*; 108 | 109 | mod compression; 110 | 111 | pub use self::compression::*; 112 | 113 | mod disi; 114 | 115 | pub use self::disi::*; 116 | 117 | use std::ops::Deref; 118 | 119 | use core::codec::doc_values::NumericDocValues; 120 | 121 | use error::Result; 122 | 123 | // a iterator that can be used over and over by call reset 124 | pub trait ReusableIterator: Iterator { 125 | fn reset(&mut self); 126 | } 127 | 128 | pub fn fill_slice(array: &mut [T], value: T) { 129 | for i in array { 130 | *i = value; 131 | } 132 | } 133 | 134 | pub fn over_size(size: usize) -> usize { 135 | let mut size = size; 136 | let mut extra = size >> 3; 137 | if extra < 3 { 138 | // for very small arrays, where constant overhead of 139 | // realloc is presumably relatively high, we grow 140 | // faster 141 | extra = 3; 142 | } 143 | size += extra; 144 | size 145 | } 146 | 147 | pub const BM25_SIMILARITY_IDF: &str = "idf"; 148 | 149 | pub struct DerefWrapper(pub T); 150 | 151 | impl Deref for DerefWrapper { 152 | type Target = T; 153 | 154 | #[inline] 155 | fn deref(&self) -> &Self::Target { 156 | &self.0 157 | } 158 | } 159 | 160 | /// Abstraction over an array of longs. 161 | /// 162 | /// This class extends `NumericDocValues` so that we don't need to add another 163 | /// level of abstraction every time we want eg. to use the `PackedInts` 164 | /// utility classes to represent a `NumericDocValues` instance. 165 | pub trait LongValues: NumericDocValues { 166 | fn get64(&self, index: i64) -> Result; 167 | 168 | fn get64_mut(&mut self, index: i64) -> Result { 169 | self.get64(index) 170 | } 171 | } 172 | 173 | pub trait CloneableLongValues: LongValues { 174 | fn cloned(&self) -> Box; 175 | 176 | fn cloned_lv(&self) -> Box; 177 | } 178 | 179 | impl CloneableLongValues for T { 180 | fn cloned(&self) -> Box { 181 | Box::new(self.clone()) 182 | } 183 | 184 | fn cloned_lv(&self) -> Box { 185 | Box::new(self.clone()) 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/core/util/packed/direct_monotonic_reader.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::doc_values::NumericDocValues; 15 | use core::store::io::{IndexInput, RandomAccessInput}; 16 | use core::util::{packed::DirectReader, DocId, LongValues}; 17 | use error::Result; 18 | 19 | use core::util::packed::direct_reader::DirectPackedReader; 20 | use std::sync::Arc; 21 | 22 | pub struct DirectMonotonicMeta { 23 | #[allow(dead_code)] 24 | num_values: i64, 25 | block_shift: i32, 26 | num_blocks: usize, 27 | mins: Arc>, 28 | avgs: Arc>, 29 | bpvs: Arc>, 30 | offsets: Arc>, 31 | } 32 | 33 | pub struct DirectMonotonicReader; 34 | 35 | impl DirectMonotonicReader { 36 | pub fn load_meta( 37 | meta_in: &mut dyn IndexInput, 38 | num_values: i64, 39 | block_shift: i32, 40 | ) -> Result { 41 | let mut num_blocks = num_values >> block_shift; 42 | if (num_blocks << block_shift) < num_values { 43 | num_blocks += 1; 44 | } 45 | let num_blocks = num_blocks as usize; 46 | 47 | let mut mins = vec![0i64; num_blocks]; 48 | let mut avgs = vec![0f32; num_blocks]; 49 | let mut bpvs = vec![0u8; num_blocks]; 50 | let mut offsets = vec![0i64; num_blocks]; 51 | 52 | for i in 0..num_blocks { 53 | mins[i] = meta_in.read_long()?; 54 | avgs[i] = f32::from_bits(meta_in.read_int()? as u32); 55 | offsets[i] = meta_in.read_long()?; 56 | bpvs[i] = meta_in.read_byte()?; 57 | } 58 | Ok(DirectMonotonicMeta { 59 | num_values, 60 | block_shift, 61 | num_blocks, 62 | mins: Arc::new(mins), 63 | avgs: Arc::new(avgs), 64 | bpvs: Arc::new(bpvs), 65 | offsets: Arc::new(offsets), 66 | }) 67 | } 68 | 69 | pub fn get_instance( 70 | meta: &DirectMonotonicMeta, 71 | data: &Arc, 72 | ) -> Result { 73 | let mut readers = Vec::with_capacity(meta.num_blocks); 74 | for i in 0..meta.num_blocks { 75 | let reader = if meta.bpvs[i] == 0 { 76 | None 77 | } else { 78 | Some(DirectReader::get_instance( 79 | Arc::clone(data), 80 | i32::from(meta.bpvs[i]), 81 | meta.offsets[i], 82 | )?) 83 | }; 84 | readers.push(reader); 85 | } 86 | 87 | Ok(MixinMonotonicLongValues { 88 | readers: Arc::from(Box::from(readers)), 89 | block_shift: meta.block_shift, 90 | mins: Arc::clone(&meta.mins), 91 | avgs: Arc::clone(&meta.avgs), 92 | }) 93 | } 94 | } 95 | 96 | #[derive(Clone)] 97 | pub struct MixinMonotonicLongValues { 98 | readers: Arc<[Option]>, 99 | block_shift: i32, 100 | mins: Arc>, 101 | avgs: Arc>, 102 | } 103 | 104 | impl LongValues for MixinMonotonicLongValues { 105 | fn get64(&self, index: i64) -> Result { 106 | // we know all readers don't require context 107 | let block = ((index as u64) >> self.block_shift) as usize; 108 | let block_index: i64 = index & ((1 << self.block_shift) - 1); 109 | let delta = if let Some(ref reader) = self.readers[block] { 110 | reader.get64(block_index)? 111 | } else { 112 | 0 113 | }; 114 | Ok(self.mins[block] + (self.avgs[block] * block_index as f32) as i64 + delta) 115 | } 116 | } 117 | 118 | impl NumericDocValues for MixinMonotonicLongValues { 119 | fn get(&self, doc_id: DocId) -> Result { 120 | self.get64(i64::from(doc_id)) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/core/util/packed/direct_monotonic_writer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::IndexOutput; 15 | use core::util::packed::DirectWriter; 16 | 17 | use error::{ 18 | ErrorKind::{IllegalArgument, IllegalState}, 19 | Result, 20 | }; 21 | 22 | pub const MIN_BLOCK_SHIFT: i32 = 3; 23 | pub const MAX_BLOCK_SHIFT: i32 = 30; 24 | 25 | pub struct DirectMonotonicWriter<'a, O: IndexOutput> { 26 | meta: &'a mut O, 27 | data: &'a mut O, 28 | num_values: usize, 29 | base_data_pointer: i64, 30 | buffer: Vec, 31 | buffer_size: usize, 32 | count: usize, 33 | finished: bool, 34 | previous: i64, 35 | } 36 | 37 | impl<'a, O: IndexOutput> DirectMonotonicWriter<'a, O> { 38 | pub fn new( 39 | meta: &'a mut O, 40 | data: &'a mut O, 41 | num_values: i64, 42 | block_shift: i32, 43 | ) -> Result> { 44 | if block_shift < MIN_BLOCK_SHIFT || block_shift > MAX_BLOCK_SHIFT { 45 | bail!(IllegalArgument(format!( 46 | "block_shift must be in [3-30], got {}", 47 | block_shift 48 | ))); 49 | } 50 | 51 | let base_data_pointer = data.file_pointer(); 52 | 53 | Ok(DirectMonotonicWriter { 54 | meta, 55 | data, 56 | num_values: num_values as usize, 57 | base_data_pointer, 58 | buffer: vec![0i64; (1 << block_shift) as usize], 59 | buffer_size: 0, 60 | count: 0, 61 | finished: false, 62 | previous: i64::min_value(), 63 | }) 64 | } 65 | 66 | pub fn add(&mut self, v: i64) -> Result<()> { 67 | if v < self.previous { 68 | bail!(IllegalArgument(format!( 69 | "Values do not come in order: {}, {}", 70 | self.previous, v 71 | ))); 72 | } 73 | 74 | if self.buffer_size == self.buffer.len() { 75 | self.flush()?; 76 | } 77 | 78 | self.buffer[self.buffer_size] = v; 79 | self.buffer_size += 1; 80 | self.previous = v; 81 | self.count += 1; 82 | Ok(()) 83 | } 84 | 85 | pub fn finish(&mut self) -> Result<()> { 86 | if self.count != self.num_values { 87 | bail!(IllegalState(format!( 88 | "Wrong number of values added, expected: {}, got: {}", 89 | self.num_values, self.count 90 | ))); 91 | } 92 | 93 | if self.finished { 94 | bail!(IllegalState("#finish has been called already".into())); 95 | } 96 | 97 | if self.buffer_size > 0 { 98 | self.flush()?; 99 | } 100 | 101 | self.finished = true; 102 | 103 | Ok(()) 104 | } 105 | 106 | pub fn get_instance( 107 | meta: &'a mut O, 108 | data: &'a mut O, 109 | num_values: i64, 110 | block_shift: i32, 111 | ) -> Result> { 112 | DirectMonotonicWriter::new(meta, data, num_values, block_shift) 113 | } 114 | 115 | fn flush(&mut self) -> Result<()> { 116 | debug_assert!(self.buffer_size != 0); 117 | 118 | let avg_inc = ((self.buffer[self.buffer_size - 1] - self.buffer[0]) as f64 119 | / (self.buffer_size - 1).max(1) as f64) as f32; 120 | for i in 0..self.buffer_size { 121 | let expected = (avg_inc * i as f32) as i64; 122 | self.buffer[i] -= expected; 123 | } 124 | 125 | let mut min: i64 = self.buffer[0]; 126 | for i in 1..self.buffer_size { 127 | min = min.min(self.buffer[i]); 128 | } 129 | 130 | let mut max_delta = 0; 131 | for i in 0..self.buffer_size { 132 | self.buffer[i] -= min; 133 | // use | will change nothing when it comes to computing required bits 134 | // but has the benefit of working fine with negative values too 135 | // (in case of overflow) 136 | max_delta |= self.buffer[i]; 137 | } 138 | 139 | self.meta.write_long(min)?; 140 | self.meta.write_int(avg_inc.to_bits() as i32)?; 141 | self.meta 142 | .write_long(self.data.file_pointer() - self.base_data_pointer)?; 143 | 144 | if max_delta == 0 { 145 | self.meta.write_byte(0u8)?; 146 | } else { 147 | let bits_required = DirectWriter::::unsigned_bits_required(max_delta); 148 | let mut writer = 149 | DirectWriter::get_instance(self.data, self.buffer_size as i64, bits_required)?; 150 | for i in 0..self.buffer_size { 151 | writer.add(self.buffer[i])?; 152 | } 153 | writer.finish()?; 154 | self.meta.write_byte(bits_required as u8)?; 155 | } 156 | self.buffer_size = 0; 157 | 158 | Ok(()) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/core/util/packed/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | mod direct_monotonic_reader; 15 | 16 | pub use self::direct_monotonic_reader::*; 17 | 18 | mod direct_monotonic_writer; 19 | 20 | pub use self::direct_monotonic_writer::*; 21 | 22 | mod direct_reader; 23 | 24 | pub use self::direct_reader::*; 25 | 26 | mod direct_writer; 27 | 28 | pub use self::direct_writer::*; 29 | 30 | mod monotonic_block_packed_reader; 31 | 32 | pub use self::monotonic_block_packed_reader::*; 33 | 34 | mod monotonic_block_packed_writer; 35 | 36 | pub use self::monotonic_block_packed_writer::*; 37 | 38 | mod packed_misc; 39 | 40 | pub use self::packed_misc::*; 41 | 42 | mod packed_ints_null_reader; 43 | 44 | pub use self::packed_ints_null_reader::*; 45 | 46 | mod paged_mutable; 47 | 48 | pub use self::paged_mutable::*; 49 | 50 | mod packed_long_values; 51 | 52 | pub use self::packed_long_values::*; 53 | 54 | mod block_packed_writer; 55 | 56 | pub use self::block_packed_writer::*; 57 | 58 | mod elias_fano_encoder; 59 | 60 | pub use self::elias_fano_encoder::*; 61 | 62 | mod elias_fano_decoder; 63 | 64 | pub use self::elias_fano_decoder::*; 65 | 66 | mod packed_simd; 67 | 68 | pub use self::packed_simd::*; 69 | -------------------------------------------------------------------------------- /src/core/util/packed/monotonic_block_packed_reader.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::codec::doc_values::NumericDocValues; 15 | use core::store::io::IndexInput; 16 | use core::util::packed::PackedIntsNullReader; 17 | use core::util::packed::{self, Reader, ReaderEnum}; 18 | use core::util::{DocId, LongValues}; 19 | use error::ErrorKind::{CorruptIndex, IllegalArgument}; 20 | use error::Result; 21 | 22 | use std::sync::Arc; 23 | 24 | /// Provides random access to a stream written with MonotonicBlockPackedWriter 25 | #[derive(Clone)] 26 | pub struct MonotonicBlockPackedReader { 27 | inner: Arc, 28 | } 29 | 30 | struct MonotonicBlockPackedReaderInner { 31 | block_shift: usize, 32 | block_mask: usize, 33 | value_count: usize, 34 | min_values: Vec, 35 | averages: Vec, 36 | sub_readers: Vec, 37 | #[allow(dead_code)] 38 | sum_bpv: i64, 39 | } 40 | 41 | impl MonotonicBlockPackedReader { 42 | pub fn expected(origin: i64, average: f32, index: i32) -> i64 { 43 | origin + (average * index as f32) as i64 44 | } 45 | 46 | pub fn new( 47 | input: &mut dyn IndexInput, 48 | packed_ints_version: i32, 49 | block_size: usize, 50 | value_count: usize, 51 | direct: bool, 52 | ) -> Result { 53 | let block_shift = 54 | packed::check_block_size(block_size, packed::MIN_BLOCK_SIZE, packed::MAX_BLOCK_SIZE); 55 | let block_mask = block_size - 1; 56 | let num_blocks = packed::num_blocks(value_count, block_size); 57 | let mut min_values = vec![0_i64; num_blocks]; 58 | let mut averages = vec![0.0_f32; num_blocks]; 59 | let mut sub_readers = Vec::new(); 60 | let mut sum_bpv: i64 = 0; 61 | 62 | for i in 0..num_blocks { 63 | min_values[i] = input.read_zlong()?; 64 | averages[i] = f32::from_bits(input.read_int()? as u32); 65 | let bits_per_value = input.read_vint()?; 66 | sum_bpv += i64::from(bits_per_value); 67 | if bits_per_value > 64 { 68 | bail!(CorruptIndex("bits_per_value > 64".to_owned())); 69 | } 70 | if bits_per_value == 0 { 71 | sub_readers.push(ReaderEnum::PackedIntsNull(PackedIntsNullReader::new( 72 | block_size, 73 | ))); 74 | } else { 75 | let left = value_count - i * block_size; 76 | let size = ::std::cmp::min(left, block_size); 77 | if direct { 78 | unimplemented!(); 79 | } else { 80 | let one_reader = packed::get_reader_no_header( 81 | input, 82 | packed::Format::Packed, 83 | packed_ints_version, 84 | size, 85 | bits_per_value, 86 | )?; 87 | sub_readers.push(one_reader); 88 | } 89 | } 90 | } 91 | 92 | let inner = MonotonicBlockPackedReaderInner { 93 | block_shift, 94 | block_mask, 95 | value_count, 96 | min_values, 97 | averages, 98 | sub_readers, 99 | sum_bpv, 100 | }; 101 | 102 | Ok(Self { 103 | inner: Arc::new(inner), 104 | }) 105 | } 106 | 107 | /// Returns the number of values 108 | pub fn size(&self) -> usize { 109 | self.inner.value_count 110 | } 111 | } 112 | 113 | impl LongValues for MonotonicBlockPackedReader { 114 | fn get64(&self, index: i64) -> Result { 115 | if !(index >= 0 && index < self.inner.value_count as i64) { 116 | bail!(IllegalArgument(format!("index {} out of range", index))) 117 | } 118 | let block = (index >> self.inner.block_shift) as usize; 119 | let idx = (index & (self.inner.block_mask as i64)) as i32; 120 | let val = Self::expected( 121 | self.inner.min_values[block], 122 | self.inner.averages[block], 123 | idx, 124 | ) + self.inner.sub_readers[block].get(idx as usize); 125 | Ok(val) 126 | } 127 | } 128 | 129 | impl NumericDocValues for MonotonicBlockPackedReader { 130 | fn get(&self, doc_id: DocId) -> Result { 131 | self.get64(i64::from(doc_id)) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/core/util/packed/monotonic_block_packed_writer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::store::io::DataOutput; 15 | use core::util::bit_util::BitsRequired; 16 | use core::util::packed::MonotonicBlockPackedReader; 17 | use core::util::packed::{AbstractBlockPackedWriter, BaseBlockPackedWriter}; 18 | use error::Result; 19 | 20 | pub struct MonotonicBlockPackedWriter { 21 | base_writer: BaseBlockPackedWriter, 22 | } 23 | 24 | impl MonotonicBlockPackedWriter { 25 | pub fn new(block_size: usize) -> MonotonicBlockPackedWriter { 26 | MonotonicBlockPackedWriter { 27 | base_writer: BaseBlockPackedWriter::new(block_size), 28 | } 29 | } 30 | } 31 | 32 | impl AbstractBlockPackedWriter for MonotonicBlockPackedWriter { 33 | fn add(&mut self, l: i64, out: &mut impl DataOutput) -> Result<()> { 34 | debug_assert!(l >= 0); 35 | self.base_writer.check_not_finished()?; 36 | if self.base_writer.off == self.base_writer.values.len() { 37 | self.flush(out)?; 38 | } 39 | self.base_writer.values[self.base_writer.off] = l; 40 | self.base_writer.off += 1; 41 | self.base_writer.ord += 1; 42 | Ok(()) 43 | } 44 | 45 | fn finish(&mut self, out: &mut impl DataOutput) -> Result<()> { 46 | self.base_writer.check_not_finished()?; 47 | if self.base_writer.off > 0 { 48 | self.flush(out)?; 49 | } 50 | self.base_writer.finished = true; 51 | Ok(()) 52 | } 53 | 54 | fn flush(&mut self, out: &mut impl DataOutput) -> Result<()> { 55 | debug_assert!(self.base_writer.off > 0); 56 | let avg = if self.base_writer.off == 1 { 57 | 0f32 58 | } else { 59 | (self.base_writer.values[self.base_writer.off - 1] - self.base_writer.values[0]) as f32 60 | / (self.base_writer.off - 1) as f32 61 | }; 62 | let mut min = self.base_writer.values[0]; 63 | // adjust min so that all deltas will be positive 64 | for i in 1..self.base_writer.off { 65 | let actual = self.base_writer.values[i]; 66 | let expected = MonotonicBlockPackedReader::expected(min, avg, i as i32); 67 | if expected > actual { 68 | min -= expected - actual; 69 | } 70 | } 71 | 72 | let mut max_delta = 0i64; 73 | for i in 0..self.base_writer.off { 74 | self.base_writer.values[i] -= MonotonicBlockPackedReader::expected(min, avg, i as i32); 75 | max_delta = max_delta.max(self.base_writer.values[i]); 76 | } 77 | 78 | out.write_zlong(min)?; 79 | out.write_int(avg.to_bits() as i32)?; 80 | if max_delta == 0 { 81 | out.write_vint(0)?; 82 | } else { 83 | let bits_required = max_delta.bits_required() as i32; 84 | out.write_vint(bits_required)?; 85 | self.base_writer.write_values(bits_required, out)?; 86 | } 87 | 88 | self.base_writer.off = 0; 89 | Ok(()) 90 | } 91 | 92 | fn reset(&mut self) { 93 | self.base_writer.reset(); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/core/util/packed/packed_ints_null_reader.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use core::util::packed::Reader; 15 | 16 | pub struct PackedIntsNullReader { 17 | value_count: usize, 18 | } 19 | 20 | impl PackedIntsNullReader { 21 | pub fn new(value_count: usize) -> PackedIntsNullReader { 22 | PackedIntsNullReader { value_count } 23 | } 24 | } 25 | 26 | impl Reader for PackedIntsNullReader { 27 | fn get(&self, _doc_id: usize) -> i64 { 28 | 0 29 | } 30 | 31 | // FIXME: usize-> docId 32 | fn bulk_get(&self, index: usize, output: &mut [i64], len: usize) -> usize { 33 | assert!(index < self.value_count); 34 | let len = ::std::cmp::min(len, self.value_count - index); 35 | unsafe { 36 | let slice = output.as_mut_ptr(); 37 | ::std::ptr::write_bytes(slice, 0, len); 38 | } 39 | len 40 | } 41 | fn size(&self) -> usize { 42 | self.value_count 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/core/util/small_float.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | pub struct SmallFloat; 15 | impl SmallFloat { 16 | pub fn float_to_byte315(f: f32) -> u8 { 17 | let bits = f.to_bits() as i32; 18 | let small_float = (bits >> (24 - 3)) as i32; 19 | if small_float <= ((63 - 15) << 3) as i32 { 20 | return if bits <= 0 { 0u8 } else { 1u8 }; 21 | } 22 | if small_float >= ((63 - 15) << 3) as i32 + 0x100 { 23 | return 255u8; 24 | } 25 | (small_float - ((63 - 15) << 3) as i32) as u8 26 | } 27 | 28 | pub fn byte315_to_float(b: u8) -> f32 { 29 | if b == 0 { 30 | 0f32 31 | } else { 32 | let mut bits = u32::from(b) << (24 - 3); 33 | bits += (63 - 15) << 24; 34 | f32::from_bits(bits) 35 | } 36 | } 37 | } 38 | 39 | #[cfg(test)] 40 | pub mod tests { 41 | extern crate rand; 42 | 43 | use super::*; 44 | 45 | fn origin_byte_to_float(b: u8) -> f32 { 46 | if b == 0 { 47 | return 0f32; 48 | } 49 | let mantissa = b & 7; 50 | let exponent = (b >> 3) & 31; 51 | let bits = ((u32::from(exponent) + (63 - 15)) << 24) | ((u32::from(mantissa)) << 21) as u32; 52 | f32::from_bits(bits as u32) 53 | } 54 | 55 | fn origin_float_to_byte(f: f32) -> u8 { 56 | if f < 0.0f32 { 57 | return 0u8; 58 | } 59 | 60 | let bits = f.to_bits() as i32; 61 | let mut mantissa = (bits & 0xff_ffff) >> 21 as i32; 62 | let mut exponent = (((bits >> 24) & 0x7f) - 63) + 15; 63 | 64 | if exponent > 31 { 65 | exponent = 31; 66 | mantissa = 7; 67 | } 68 | 69 | if exponent < 0 || (exponent == 0 && mantissa == 0) { 70 | exponent = 0; 71 | mantissa = 1; 72 | } 73 | ((exponent << 3) | mantissa) as u8 74 | } 75 | 76 | #[test] 77 | fn test_float_to_byte315() { 78 | let min_value = 1.4e-45f32; 79 | let positive_infinity = 1.0f32 / 0.0f32; 80 | let negative_infinity = -1.0f32 / 0.0f32; 81 | let max_value = 3.402_823_5e+38f32; 82 | 83 | assert_eq!(1, origin_float_to_byte(5.812_381_7E-10f32)); 84 | assert_eq!(1, SmallFloat::float_to_byte315(5.812_381_7E-10f32)); 85 | 86 | assert_eq!(0, SmallFloat::float_to_byte315(0f32)); 87 | assert_eq!(1, SmallFloat::float_to_byte315(min_value)); 88 | assert_eq!(255, SmallFloat::float_to_byte315(max_value)); 89 | assert_eq!(255, SmallFloat::float_to_byte315(positive_infinity)); 90 | 91 | assert_eq!(0, SmallFloat::float_to_byte315(-min_value)); 92 | assert_eq!(0, SmallFloat::float_to_byte315(-max_value)); 93 | assert_eq!(0, SmallFloat::float_to_byte315(negative_infinity)); 94 | 95 | let num = 100_000; 96 | for _ in 0..num { 97 | let m: u32 = rand::random::(); 98 | let f = f32::from_bits(m); 99 | if f.is_nan() { 100 | continue; 101 | } 102 | let b1 = origin_float_to_byte(f); 103 | let b2 = SmallFloat::float_to_byte315(f); 104 | assert_eq!(b1, b2); 105 | } 106 | } 107 | 108 | #[test] 109 | fn test_byte315_to_float() { 110 | for i in 0..256 { 111 | let f1 = origin_byte_to_float(i as u8); 112 | let f2 = SmallFloat::byte315_to_float(i as u8); 113 | assert!((f1 - f2) < ::std::f32::EPSILON); 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/core/util/string_util.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | use rand::{thread_rng, Rng}; 15 | 16 | /// length in bytes of an ID 17 | pub const ID_LENGTH: usize = 16; 18 | 19 | /// Generates a non-cryptographic globally unique id. 20 | pub fn random_id() -> [u8; ID_LENGTH] { 21 | let mut id = [0u8; ID_LENGTH]; 22 | thread_rng().fill(&mut id); 23 | id 24 | } 25 | 26 | pub fn id2str(id: &[u8]) -> String { 27 | let strs: Vec = id.iter().map(|b| format!("{:02X}", b)).collect(); 28 | strs.join("") 29 | } 30 | 31 | pub fn bytes_subtract(bytes_per_dim: usize, dim: usize, a: &[u8], b: &[u8], result: &mut [u8]) { 32 | let start = dim * bytes_per_dim; 33 | let end = start + bytes_per_dim; 34 | let mut borrow = 0; 35 | let mut i = end - 1; 36 | while i >= start { 37 | let mut diff: i32 = (a[i] as u32 as i32) - (b[i] as u32 as i32) - borrow; 38 | if diff < 0 { 39 | diff += 256; 40 | borrow = 1; 41 | } else { 42 | borrow = 0; 43 | } 44 | 45 | result[i - start] = diff as u8; 46 | i -= 1; 47 | } 48 | 49 | if borrow != 0 { 50 | panic!("a i32 { 57 | let len = left.len().min(right.len()); 58 | for i in 0..len { 59 | if left[i] != right[i] { 60 | return i as i32; 61 | } 62 | } 63 | 64 | len as i32 65 | } 66 | 67 | /// Returns the length of {@code currentTerm} needed for use as a sort key. 68 | /// so that {@link BytesRef#compareTo(BytesRef)} still returns the same result. 69 | /// This method assumes currentTerm comes after priorTerm. 70 | pub fn sort_key_length(prior_term: &[u8], current_term: &[u8]) -> usize { 71 | let current_term_offset = 0usize; 72 | let prior_term_offset = 0usize; 73 | let limit = prior_term.len().min(current_term.len()); 74 | 75 | for i in 0..limit { 76 | if prior_term[prior_term_offset + i] != current_term[current_term_offset + i] { 77 | return i + 1; 78 | } 79 | } 80 | 81 | current_term.len().min(1 + prior_term.len()) 82 | } 83 | 84 | #[cfg(test)] 85 | mod tests { 86 | use super::*; 87 | 88 | #[test] 89 | fn test_id2str() { 90 | let v = vec![65u8, 97u8, 4u8, 127u8]; 91 | let strv = id2str(&v[..]); 92 | assert_eq!("4161047F", strv); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | extern crate error_chain; 15 | extern crate serde_json; 16 | 17 | use core::index; 18 | use core::search; 19 | use core::search::collector; 20 | 21 | use std::borrow::Cow; 22 | use std::sync::PoisonError; 23 | 24 | error_chain! { 25 | types { 26 | Error, ErrorKind, ResultExt, Result; 27 | } 28 | errors { 29 | Poisoned { 30 | description("a thread holding the locked panicked and poisoned the lock") 31 | } 32 | 33 | IllegalState(desc: String) { 34 | description(desc) 35 | display("Illegal state: {}", desc) 36 | } 37 | 38 | IllegalArgument(desc: String) { 39 | description(desc) 40 | display("Illegal argument: {}", desc) 41 | } 42 | 43 | UnexpectedEOF(errmsg: String) { 44 | description(errmsg) 45 | display("Unexpected EOF: {}", errmsg) 46 | } 47 | 48 | CorruptIndex(errmsg: String) { 49 | description(errmsg) 50 | display("Corrupt Index: {}", errmsg) 51 | } 52 | 53 | UnsupportedOperation(errmsg: Cow<'static, str>) { 54 | description(errmsg), 55 | display("Unsupported Operation: {}", errmsg) 56 | } 57 | 58 | AlreadyClosed(errmsg: String) { 59 | description(errmsg) 60 | display("Already Closed: {}", errmsg) 61 | } 62 | 63 | IOError(errmsg: String) { 64 | description(errmsg) 65 | display("IO Error: {}", errmsg) 66 | } 67 | 68 | RuntimeError(errmsg: String) { 69 | description(errmsg) 70 | display("Runtime Error: {}", errmsg) 71 | } 72 | } 73 | 74 | foreign_links { 75 | FmtError(::std::fmt::Error); 76 | IoError(::std::io::Error); 77 | FromUtf8Err(::std::string::FromUtf8Error); 78 | Utf8Error(::std::str::Utf8Error); 79 | NumError(::std::num::ParseIntError); 80 | ParseFloatError(::std::num::ParseFloatError); 81 | SerdeJsonError(self::serde_json::Error); 82 | NulError(::std::ffi::NulError); 83 | TimeError(::std::time::SystemTimeError); 84 | } 85 | 86 | links { 87 | Collector(collector::Error, collector::ErrorKind); 88 | Search(search::Error, search::ErrorKind); 89 | Index(index::Error, index::ErrorKind); 90 | } 91 | } 92 | 93 | impl From> for Error { 94 | fn from(_: PoisonError) -> Error { 95 | ErrorKind::Poisoned.into() 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | #![recursion_limit = "1024"] 15 | #![cfg_attr(feature = "clippy", feature(plugin))] 16 | #![cfg_attr(feature = "clippy", plugin(clippy))] 17 | #![cfg_attr(not(feature = "clippy"), allow(unknown_lints))] 18 | #![feature(exact_size_is_empty)] 19 | #![feature(drain_filter)] 20 | #![feature(hashmap_internals)] 21 | #![feature(integer_atomics)] 22 | #![feature(vec_remove_item)] 23 | #![feature(specialization)] 24 | #![allow(clippy::cast_lossless)] 25 | #![feature(fn_traits)] 26 | #![feature(maybe_uninit_ref)] 27 | #![feature(maybe_uninit_extra)] 28 | #![feature(in_band_lifetimes)] 29 | #![feature(vec_into_raw_parts)] 30 | #![feature(core_intrinsics)] 31 | #![feature(stmt_expr_attributes)] 32 | 33 | #[macro_use] 34 | extern crate error_chain; 35 | #[macro_use] 36 | extern crate lazy_static; 37 | #[macro_use] 38 | extern crate log; 39 | extern crate rand; 40 | extern crate regex; 41 | extern crate serde; 42 | #[macro_use] 43 | extern crate serde_derive; 44 | extern crate serde_json; 45 | 46 | extern crate alloc; 47 | extern crate byteorder; 48 | extern crate bytes; 49 | extern crate crc; 50 | extern crate crossbeam; 51 | extern crate fasthash; 52 | extern crate flate2; 53 | extern crate memmap; 54 | extern crate num_cpus; 55 | extern crate num_traits; 56 | extern crate smallvec; 57 | extern crate thread_local; 58 | extern crate unicode_reader; 59 | #[macro_use] 60 | extern crate crunchy; 61 | 62 | pub mod core; 63 | pub mod error; 64 | --------------------------------------------------------------------------------