├── Cargo.toml
├── LICENSE
├── README.md
├── examples
    └── example.rs
├── rust-toolchain
├── rustfmt.toml
└── src
    ├── core
        ├── analysis
        │   ├── mod.rs
        │   ├── token_stream.rs
        │   └── whitespace_tokenizer.rs
        ├── codec
        │   ├── codec_util.rs
        │   ├── compound.rs
        │   ├── doc_values
        │   │   ├── doc_values_consumer.rs
        │   │   ├── doc_values_format.rs
        │   │   ├── doc_values_iterator.rs
        │   │   ├── doc_values_producer.rs
        │   │   ├── doc_values_writer.rs
        │   │   ├── lucene54
        │   │   │   ├── doc_values_provider.rs
        │   │   │   ├── doc_values_term_iterator.rs
        │   │   │   ├── lucene54_doc_values_consumer.rs
        │   │   │   ├── lucene54_doc_values_format.rs
        │   │   │   ├── lucene54_doc_values_producer.rs
        │   │   │   └── mod.rs
        │   │   └── mod.rs
        │   ├── field_infos
        │   │   ├── field_infos_format.rs
        │   │   └── mod.rs
        │   ├── fields.rs
        │   ├── live_docs.rs
        │   ├── matching_reader.rs
        │   ├── mod.rs
        │   ├── multi_fields.rs
        │   ├── multi_terms.rs
        │   ├── norms
        │   │   ├── mod.rs
        │   │   ├── norm_values_writer.rs
        │   │   ├── norms.rs
        │   │   ├── norms_consumer.rs
        │   │   └── norms_producer.rs
        │   ├── points
        │   │   ├── mod.rs
        │   │   ├── point_values.rs
        │   │   ├── point_values_writer.rs
        │   │   ├── points_reader.rs
        │   │   └── points_writer.rs
        │   ├── posting_iterator.rs
        │   ├── postings
        │   │   ├── blocktree
        │   │   │   ├── blocktree_reader.rs
        │   │   │   ├── blocktree_writer.rs
        │   │   │   ├── mod.rs
        │   │   │   └── term_iter_frame.rs
        │   │   ├── for_util.rs
        │   │   ├── mod.rs
        │   │   ├── partial_block_decoder.rs
        │   │   ├── per_field_postings_format.rs
        │   │   ├── posting_format.rs
        │   │   ├── posting_reader.rs
        │   │   ├── posting_writer.rs
        │   │   ├── simd_block_decoder.rs
        │   │   ├── skip_reader.rs
        │   │   ├── skip_writer.rs
        │   │   ├── terms_hash.rs
        │   │   └── terms_hash_per_field.rs
        │   ├── segment_infos
        │   │   ├── mod.rs
        │   │   ├── segment_infos.rs
        │   │   └── segment_infos_format.rs
        │   ├── sorter.rs
        │   ├── stored_fields
        │   │   ├── mod.rs
        │   │   ├── stored_fields.rs
        │   │   ├── stored_fields_consumer.rs
        │   │   ├── stored_fields_reader.rs
        │   │   └── stored_fields_writer.rs
        │   ├── term_vectors
        │   │   ├── mod.rs
        │   │   ├── term_vector_consumer.rs
        │   │   ├── term_vectors_reader.rs
        │   │   └── term_vectors_writer.rs
        │   └── terms.rs
        ├── doc
        │   ├── doc_values.rs
        │   ├── document.rs
        │   ├── field.rs
        │   ├── index_options.rs
        │   ├── mod.rs
        │   └── term.rs
        ├── highlight
        │   ├── frag_list_builder.rs
        │   ├── fragments_builder.rs
        │   ├── fvh_highlighter.rs
        │   └── mod.rs
        ├── index
        │   ├── merge
        │   │   ├── doc_id_merger.rs
        │   │   ├── merge_policy.rs
        │   │   ├── merge_rate_limiter.rs
        │   │   ├── merge_scheduler.rs
        │   │   ├── merge_state.rs
        │   │   ├── mod.rs
        │   │   └── segment_merger.rs
        │   ├── mod.rs
        │   ├── reader
        │   │   ├── directory_reader.rs
        │   │   ├── index_lookup.rs
        │   │   ├── leaf_reader.rs
        │   │   ├── leaf_reader_wrapper.rs
        │   │   ├── mod.rs
        │   │   └── segment_reader.rs
        │   └── writer
        │   │   ├── bufferd_updates.rs
        │   │   ├── delete_policy.rs
        │   │   ├── dir_wrapper.rs
        │   │   ├── doc_consumer.rs
        │   │   ├── doc_values_update.rs
        │   │   ├── doc_writer.rs
        │   │   ├── doc_writer_delete_queue.rs
        │   │   ├── doc_writer_flush_queue.rs
        │   │   ├── doc_writer_per_thread.rs
        │   │   ├── flush_control.rs
        │   │   ├── flush_policy.rs
        │   │   ├── index_file_deleter.rs
        │   │   ├── index_writer.rs
        │   │   ├── index_writer_config.rs
        │   │   ├── mod.rs
        │   │   └── prefix_code_terms.rs
        ├── mod.rs
        ├── search
        │   ├── cache
        │   │   ├── cache_policy.rs
        │   │   ├── lru_cache.rs
        │   │   ├── mod.rs
        │   │   └── query_cache.rs
        │   ├── collector
        │   │   ├── chain.rs
        │   │   ├── early_terminating.rs
        │   │   ├── mod.rs
        │   │   ├── timeout.rs
        │   │   └── top_docs.rs
        │   ├── explanation.rs
        │   ├── mod.rs
        │   ├── query
        │   │   ├── boolean_query.rs
        │   │   ├── boost_query.rs
        │   │   ├── boosting_query.rs
        │   │   ├── disjunction_max_query.rs
        │   │   ├── exists_query.rs
        │   │   ├── filter_query.rs
        │   │   ├── match_all_query.rs
        │   │   ├── mod.rs
        │   │   ├── phrase_query.rs
        │   │   ├── point_range_query.rs
        │   │   ├── query_string.rs
        │   │   ├── spans
        │   │   │   ├── mod.rs
        │   │   │   ├── span.rs
        │   │   │   ├── span_boost.rs
        │   │   │   ├── span_near.rs
        │   │   │   ├── span_or.rs
        │   │   │   └── span_term.rs
        │   │   └── term_query.rs
        │   ├── scorer
        │   │   ├── boosting_scorer.rs
        │   │   ├── bulk_scorer.rs
        │   │   ├── conjunction_scorer.rs
        │   │   ├── disjunction_scorer.rs
        │   │   ├── min_scorer.rs
        │   │   ├── mod.rs
        │   │   ├── phrase_scorer.rs
        │   │   ├── req_not_scorer.rs
        │   │   ├── req_opt_scorer.rs
        │   │   ├── rescorer.rs
        │   │   └── term_scorer.rs
        │   ├── search_manager.rs
        │   ├── searcher.rs
        │   ├── similarity
        │   │   ├── bm25_similarity.rs
        │   │   └── mod.rs
        │   ├── sort_field
        │   │   ├── collapse_top_docs.rs
        │   │   ├── field_comparator.rs
        │   │   ├── mod.rs
        │   │   ├── search_group.rs
        │   │   └── sort_field.rs
        │   └── statistics.rs
        ├── store
        │   ├── directory
        │   │   ├── directory.rs
        │   │   ├── fs_directory.rs
        │   │   ├── mmap_directory.rs
        │   │   ├── mod.rs
        │   │   └── tracking_directory_wrapper.rs
        │   ├── io
        │   │   ├── buffered_checksum_index_input.rs
        │   │   ├── byte_array_data_input.rs
        │   │   ├── checksum_index_input.rs
        │   │   ├── data_input.rs
        │   │   ├── data_output.rs
        │   │   ├── fs_index_output.rs
        │   │   ├── growable_byte_array_output.rs
        │   │   ├── index_input.rs
        │   │   ├── index_output.rs
        │   │   ├── mmap_index_input.rs
        │   │   ├── mod.rs
        │   │   ├── ram_output.rs
        │   │   └── random_access_input.rs
        │   └── mod.rs
        └── util
        │   ├── bit_set.rs
        │   ├── bit_util.rs
        │   ├── bits.rs
        │   ├── bkd
        │       ├── bkd_reader.rs
        │       ├── bkd_writer.rs
        │       ├── doc_ids_writer.rs
        │       ├── heap_point.rs
        │       ├── mod.rs
        │       └── offline_point.rs
        │   ├── byte_block_pool.rs
        │   ├── byte_slice_reader.rs
        │   ├── bytes_ref.rs
        │   ├── bytes_ref_hash.rs
        │   ├── compression.rs
        │   ├── context.rs
        │   ├── counter.rs
        │   ├── disi.rs
        │   ├── doc_id_set.rs
        │   ├── doc_id_set_builder.rs
        │   ├── external
        │       ├── binary_heap.rs
        │       ├── deferred.rs
        │       ├── mod.rs
        │       ├── thread_pool.rs
        │       └── volatile.rs
        │   ├── fst
        │       ├── bytes_output.rs
        │       ├── bytes_store.rs
        │       ├── fst_builder.rs
        │       ├── fst_iteartor.rs
        │       ├── fst_reader.rs
        │       └── mod.rs
        │   ├── int_block_pool.rs
        │   ├── ints_ref.rs
        │   ├── math.rs
        │   ├── mod.rs
        │   ├── numeric.rs
        │   ├── packed
        │       ├── block_packed_writer.rs
        │       ├── direct_monotonic_reader.rs
        │       ├── direct_monotonic_writer.rs
        │       ├── direct_reader.rs
        │       ├── direct_writer.rs
        │       ├── elias_fano_decoder.rs
        │       ├── elias_fano_encoder.rs
        │       ├── mod.rs
        │       ├── monotonic_block_packed_reader.rs
        │       ├── monotonic_block_packed_writer.rs
        │       ├── packed_ints_null_reader.rs
        │       ├── packed_long_values.rs
        │       ├── packed_misc.rs
        │       ├── packed_simd.rs
        │       └── paged_mutable.rs
        │   ├── paged_bytes.rs
        │   ├── selector.rs
        │   ├── small_float.rs
        │   ├── sorter.rs
        │   ├── string_util.rs
        │   ├── variant_value.rs
        │   └── version.rs
    ├── error.rs
    └── lib.rs


/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rucene"
 3 | version = "0.1.1"
 4 | authors = ["Zhihu Search Team"]
 5 | repository = "https://github.com/zhihu/rucene"
 6 | license-file = "LICENSE"
 7 | description = """
 8 | Rucene is a Rust port of the popular Apache Lucene project. Rucene is 
 9 | not a complete application, but rather a code library and API that 
10 | can easily be used to add full text search capabilities to applications.
11 | """
12 | 
13 | [dependencies]
14 | bytes = "0.4"
15 | chan = "0.1.21"
16 | chan-signal = "0.3.1"
17 | crc = "1.5.0"
18 | crossbeam = "0.7"
19 | either = "1.3"
20 | error-chain = "0.12.1"
21 | fasthash = "0.3"
22 | flate2 = "1.0.2"
23 | lazy_static = "1.0"
24 | log = "0.4"
25 | memmap = "0.6"
26 | num_cpus = "1.10.0"
27 | rand = "0.5"
28 | regex = "0.2"
29 | serde = "1.0"
30 | serde_derive = "1.0"
31 | serde_json = "1.0"
32 | smallvec = "0.6.9"
33 | thread_local = "0.3"
34 | unicode_reader = "0.1.1"
35 | num-traits = "0.2"
36 | byteorder = "1"
37 | crunchy = "0.2.2"
38 | 
39 | [dev-dependencies]
40 | tempfile = "3.0.8"
41 | 
42 | # The release profile, used for `cargo build --release`
43 | [profile.release]
44 | debug = true
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Rucene - Rust implementation of Lucene
 2 | =====================================================================================
 3 | 
 4 | ## Introduction
 5 | 
 6 | Rucene is a Rust port of the popular Apache Lucene project. Rucene is not a complete application, but rather a code library and API that can easily be used to add full text search capabilities to applications.
 7 | 
 8 | ## Status
 9 | 
10 | The index searcher part of Rucene has been put into production and has served all search traffics at Zhihu since July, 2018. Development of the index writer part was started in late 2018, and has been put into production to serve real-time searching since May, 2019.
11 | 
12 | ## Documentation
13 | 
14 | We don't yet have an API documentation for Rucene, but the usage is similar to [Lucene 6.2.1](https://lucene.apache.org/core/6_2_1/).
15 | 
16 | > **Note:**
17 | >
18 | > We are working on this, but could use more help since it is a massive project.
19 | 
20 | ## License
21 | 
22 | Rucene is under the Apache 2.0 license. See the [LICENSE](./LICENSE) file for details.
23 | 


--------------------------------------------------------------------------------
/rust-toolchain:
--------------------------------------------------------------------------------
1 | nightly-2020-03-12
2 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
 1 | # REFERENCE: https://github.com/rust-lang-nursery/rustfmt/blob/master/Configurations.md
 2 | unstable_features = true
 3 | comment_width = 100
 4 | wrap_comments = true
 5 | format_strings = true
 6 | imports_indent = "Block"
 7 | imports_layout = "Mixed"
 8 | newline_style = "Unix"
 9 | normalize_comments = true
10 | 


--------------------------------------------------------------------------------
/src/core/codec/doc_values/doc_values_producer.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::doc_values::{
15 |     BinaryDocValuesProvider, NumericDocValuesProvider, SortedDocValuesProvider,
16 |     SortedNumericDocValuesProvider, SortedSetDocValuesProvider,
17 | };
18 | use core::codec::field_infos::FieldInfo;
19 | use core::util::BitsMut;
20 | 
21 | use error::Result;
22 | use std::sync::Arc;
23 | 
24 | /// Abstract API that produces numeric, binary, sorted, sortedset,
25 | /// and sortednumeric docvalues.
26 | ///
27 | /// NOTE: the returned instance must always be thread-safe, this is different from
28 | /// the Lucene restraint
29 | pub trait DocValuesProducer: Send + Sync {
30 |     /// Returns `NumericDocValues` for this field.
31 |     fn get_numeric(&self, field_info: &FieldInfo) -> Result<Arc<dyn NumericDocValuesProvider>>;
32 | 
33 |     ///  Returns `BinaryDocValues` for this field.
34 |     fn get_binary(&self, field_info: &FieldInfo) -> Result<Arc<dyn BinaryDocValuesProvider>>;
35 | 
36 |     ///  Returns `SortedDocValues` for this field.
37 |     fn get_sorted(&self, field: &FieldInfo) -> Result<Arc<dyn SortedDocValuesProvider>>;
38 | 
39 |     ///  Returns `SortedNumericDocValues` for this field.
40 |     fn get_sorted_numeric(
41 |         &self,
42 |         field: &FieldInfo,
43 |     ) -> Result<Arc<dyn SortedNumericDocValuesProvider>>;
44 | 
45 |     ///  Returns `SortedSetDocValues` for this field.
46 |     fn get_sorted_set(&self, field: &FieldInfo) -> Result<Arc<dyn SortedSetDocValuesProvider>>;
47 |     /// Returns a `bits` at the size of `reader.max_doc()`, with turned on bits for each doc_id
48 |     /// that does have a value for this field.
49 |     /// The returned instance need not be thread-safe: it will only be used by a single thread.
50 |     fn get_docs_with_field(&self, field: &FieldInfo) -> Result<Box<dyn BitsMut>>;
51 |     /// Checks consistency of this producer
52 |     /// Note that this may be costly in terms of I/O, e.g.
53 |     /// may involve computing a checksum value against large data files.
54 |     fn check_integrity(&self) -> Result<()>;
55 | 
56 |     /// Returns an instance optimized for merging.
57 |     fn get_merge_instance(&self) -> Result<Box<dyn DocValuesProducer>>;
58 | }
59 | 


--------------------------------------------------------------------------------
/src/core/codec/doc_values/lucene54/lucene54_doc_values_format.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::doc_values::lucene54::{Lucene54DocValuesConsumer, Lucene54DocValuesProducer};
 15 | use core::codec::doc_values::{DocValuesConsumerEnum, DocValuesFormat, DocValuesProducer};
 16 | use core::codec::segment_infos::{SegmentReadState, SegmentWriteState};
 17 | use core::codec::Codec;
 18 | use core::store::directory::Directory;
 19 | 
 20 | use error::Result;
 21 | 
 22 | #[derive(Copy, Clone, Default)]
 23 | pub struct Lucene54DocValuesFormat;
 24 | 
 25 | impl Lucene54DocValuesFormat {
 26 |     const DATA_CODEC: &'static str = "Lucene54DocValuesData";
 27 |     const DATA_EXTENSION: &'static str = "dvd";
 28 |     const META_CODEC: &'static str = "Lucene54DocValuesMetadata";
 29 |     const META_EXTENSION: &'static str = "dvm";
 30 |     pub const VERSION_START: i32 = 0;
 31 |     pub const VERSION_CURRENT: i32 = 0;
 32 | 
 33 |     // indicates docvalues type
 34 |     pub const NUMERIC: u8 = 0;
 35 |     pub const BINARY: u8 = 1;
 36 |     pub const SORTED: u8 = 2;
 37 |     pub const SORTED_SET: u8 = 3;
 38 |     pub const SORTED_NUMERIC: u8 = 4;
 39 | 
 40 |     // address terms in blocks of 16 terms
 41 |     pub const INTERVAL_SHIFT: i32 = 4;
 42 |     pub const INTERVAL_COUNT: i32 = 1 << Self::INTERVAL_SHIFT;
 43 |     pub const INTERVAL_MASK: i32 = Self::INTERVAL_COUNT - 1;
 44 | 
 45 |     // build reverse index from every 1024th term
 46 |     pub const REVERSE_INTERVAL_SHIFT: i32 = 10;
 47 |     pub const REVERSE_INTERVAL_COUNT: i32 = 1 << Self::REVERSE_INTERVAL_SHIFT;
 48 |     pub const REVERSE_INTERVAL_MASK: i32 = Self::REVERSE_INTERVAL_COUNT - 1;
 49 | 
 50 |     // for conversion from reverse index to block
 51 |     pub const BLOCK_INTERVAL_SHIFT: i32 = Self::REVERSE_INTERVAL_SHIFT - Self::INTERVAL_SHIFT;
 52 |     pub const BLOCK_INTERVAL_COUNT: i32 = 1 << Self::BLOCK_INTERVAL_SHIFT;
 53 |     pub const BLOCK_INTERVAL_MASK: i32 = Self::BLOCK_INTERVAL_COUNT - 1;
 54 | 
 55 |     // Compressed using packed blocks of ints
 56 |     pub const DELTA_COMPRESSED: i32 = 0;
 57 |     // Compressed by computing the GCD
 58 |     pub const GCD_COMPRESSED: i32 = 1;
 59 |     // Compressed by giving IDs to unique values
 60 |     pub const TABLE_COMPRESSED: i32 = 2;
 61 |     // Compressed with monotonically increasing values
 62 |     pub const MONOTONIC_COMPRESSED: i32 = 3;
 63 |     // Compressed with pub constant value (uses only missing bitset)
 64 |     pub const CONST_COMPRESSED: i32 = 4;
 65 |     // Compressed with sparse arrays
 66 |     pub const SPARSE_COMPRESSED: i32 = 5;
 67 | 
 68 |     // Uncompressed binary, written directly (fixed length)
 69 |     pub const BINARY_FIXED_UNCOMPRESSED: i32 = 0;
 70 |     // Uncompressed binary, written directly (variable length)
 71 |     pub const BINARY_VARIABLE_UNCOMPRESSED: i32 = 1;
 72 |     // Compressed binary with shared prefixes
 73 |     pub const BINARY_PREFIX_COMPRESSED: i32 = 2;
 74 | 
 75 |     // Standard storage for sorted set values with 1 level of indirection:
 76 |     // docId -> address -> ord
 77 |     pub const SORTED_WITH_ADDRESSES: i32 = 0;
 78 |     // Single-valued sorted set values, encoded as sorted values, so no level
 79 |     //  of indirection: docId -> ord
 80 |     pub const SORTED_SINGLE_VALUED: i32 = 1;
 81 |     // Compressed giving IDs to unique sets of values:
 82 |     //  docId -> setId -> ords
 83 |     pub const SORTED_SET_TABLE: i32 = 2;
 84 | 
 85 |     // placeholder for missing offset that means there are no missing values
 86 |     pub const ALL_LIVE: i32 = -1;
 87 |     // placeholder for missing offset that means all values are missing
 88 |     pub const ALL_MISSING: i32 = -2;
 89 | 
 90 |     // addressing uses 16k blocks
 91 |     pub const MONOTONIC_BLOCK_SIZE: i32 = 16384;
 92 |     pub const DIRECT_MONOTONIC_BLOCK_SHIFT: i32 = 16;
 93 | }
 94 | 
 95 | impl DocValuesFormat for Lucene54DocValuesFormat {
 96 |     fn name(&self) -> &str {
 97 |         "Lucene54"
 98 |     }
 99 |     fn fields_producer<'a, D: Directory, DW: Directory, C: Codec>(
100 |         &self,
101 |         state: &SegmentReadState<'a, D, DW, C>,
102 |     ) -> Result<Box<dyn DocValuesProducer>> {
103 |         let boxed = Lucene54DocValuesProducer::new(
104 |             state,
105 |             Self::DATA_CODEC,
106 |             Self::DATA_EXTENSION,
107 |             Self::META_CODEC,
108 |             Self::META_EXTENSION,
109 |         )?;
110 |         Ok(Box::new(boxed))
111 |     }
112 | 
113 |     fn fields_consumer<D: Directory, DW: Directory, C: Codec>(
114 |         &self,
115 |         state: &SegmentWriteState<D, DW, C>,
116 |     ) -> Result<DocValuesConsumerEnum<D, DW, C>> {
117 |         Ok(DocValuesConsumerEnum::Lucene54(
118 |             Lucene54DocValuesConsumer::new(
119 |                 state,
120 |                 Self::DATA_CODEC,
121 |                 Self::DATA_EXTENSION,
122 |                 Self::META_CODEC,
123 |                 Self::META_EXTENSION,
124 |             )?,
125 |         ))
126 |     }
127 | }
128 | 


--------------------------------------------------------------------------------
/src/core/codec/fields.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::Terms;
15 | 
16 | use error::Result;
17 | 
18 | /// Flex API for access to fields and terms
19 | pub trait Fields {
20 |     type Terms: Terms;
21 |     fn fields(&self) -> Vec<String>;
22 |     fn terms(&self, field: &str) -> Result<Option<Self::Terms>>;
23 |     fn size(&self) -> usize;
24 |     fn terms_freq(&self, _field: &str) -> usize {
25 |         unimplemented!()
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/core/codec/matching_reader.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::Codec;
15 | use core::index::merge::MergeState;
16 | use core::store::directory::Directory;
17 | 
18 | /// Computes which segments have identical field name to number mappings,
19 | /// which allows stored fields and term vectors in this codec to be bulk-merged.
20 | pub struct MatchingReaders {
21 |     /// `SegmentReader`s that have identical field name/number mapping,
22 |     /// so their stored fields and term vectors may be bulk merged.
23 |     pub matching_readers: Vec<bool>,
24 |     /// How many #matching_readers are set
25 |     pub count: usize,
26 | }
27 | 
28 | impl MatchingReaders {
29 |     pub fn new<D: Directory, C: Codec>(merge_state: &MergeState<D, C>) -> Self {
30 |         // If the i'th reader is a SegmentReader and has
31 |         // identical fieldName -> number mapping, then this
32 |         // array will be non-null at position i:
33 |         let num_readers = merge_state.max_docs.len();
34 |         let mut matched_count = 0;
35 | 
36 |         let mut matching_readers = vec![false; num_readers];
37 | 
38 |         // If this reader is a SegmentReader, and all of its
39 |         // field name -> number mappings match the "merged"
40 |         // FieldInfos, then we can do a bulk copy of the
41 |         // stored fields:
42 |         'next_reader: for i in 0..num_readers {
43 |             for fi in merge_state.fields_infos[i].by_number.values() {
44 |                 let other = merge_state
45 |                     .merge_field_infos
46 |                     .as_ref()
47 |                     .unwrap()
48 |                     .field_info_by_number(fi.number);
49 |                 if other.map_or(true, |o| o.name != fi.name) {
50 |                     continue 'next_reader;
51 |                 }
52 |             }
53 |             matching_readers[i] = true;
54 |             matched_count += 1;
55 |         }
56 |         MatchingReaders {
57 |             matching_readers,
58 |             count: matched_count,
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/core/codec/norms/norm_values_writer.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::doc_values::{NumericDVIter, NumericDocValuesWriter};
 15 | use core::codec::field_infos::FieldInfo;
 16 | use core::codec::norms::NormsConsumer;
 17 | use core::codec::segment_infos::SegmentWriteState;
 18 | use core::codec::{Codec, SorterDocMap};
 19 | use core::util::packed::COMPACT;
 20 | use core::util::packed::{
 21 |     LongValuesIterator, PackedLongValuesBuilder, PackedLongValuesBuilderType, DEFAULT_PAGE_SIZE,
 22 | };
 23 | use core::util::{BitSet, FixedBitSet};
 24 | use core::util::{Bits, DocId, Numeric, ReusableIterator};
 25 | 
 26 | use core::store::directory::Directory;
 27 | use error::Result;
 28 | 
 29 | const MISSING: i64 = 0;
 30 | 
 31 | pub struct NormValuesWriter {
 32 |     pending: PackedLongValuesBuilder,
 33 |     docs_with_field: FixedBitSet,
 34 |     field_info: FieldInfo,
 35 |     last_doc: DocId,
 36 | }
 37 | 
 38 | impl NormValuesWriter {
 39 |     pub fn new(field_info: &FieldInfo) -> Self {
 40 |         NormValuesWriter {
 41 |             pending: PackedLongValuesBuilder::new(
 42 |                 DEFAULT_PAGE_SIZE,
 43 |                 COMPACT as f32,
 44 |                 PackedLongValuesBuilderType::Delta,
 45 |             ),
 46 |             docs_with_field: FixedBitSet::new(64),
 47 |             field_info: field_info.clone(),
 48 |             last_doc: -1,
 49 |         }
 50 |     }
 51 | 
 52 |     pub fn add_value(&mut self, doc_id: DocId, value: i64) {
 53 |         debug_assert!(self.last_doc < doc_id);
 54 |         self.docs_with_field.ensure_capacity(doc_id as usize);
 55 |         self.docs_with_field.set(doc_id as usize);
 56 |         self.pending.add(value);
 57 |         self.last_doc = doc_id;
 58 |     }
 59 | 
 60 |     pub fn finish(&mut self, _num_doc: i32) {}
 61 | 
 62 |     pub fn flush<D: Directory, DW: Directory, C: Codec, NC: NormsConsumer>(
 63 |         &mut self,
 64 |         state: &SegmentWriteState<D, DW, C>,
 65 |         sort_map: Option<&impl SorterDocMap>,
 66 |         consumer: &mut NC,
 67 |     ) -> Result<()> {
 68 |         let max_doc = state.segment_info.max_doc;
 69 |         let values = self.pending.build();
 70 |         if let Some(sort_map) = sort_map {
 71 |             let sorted = NumericDocValuesWriter::sort_doc_values(
 72 |                 max_doc,
 73 |                 sort_map,
 74 |                 &self.docs_with_field,
 75 |                 values.iterator(),
 76 |             );
 77 |             let mut iter = NumericDVIter::new(sorted);
 78 |             consumer.add_norms_field(&self.field_info, &mut iter)
 79 |         } else {
 80 |             let mut iter =
 81 |                 NumericIter::new(values.iterator(), &self.docs_with_field, max_doc as usize);
 82 |             consumer.add_norms_field(&self.field_info, &mut iter)
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | struct NumericIter<'a> {
 88 |     values_iter: LongValuesIterator<'a>,
 89 |     docs_with_field: &'a FixedBitSet,
 90 |     upto: usize,
 91 |     max_doc: usize,
 92 | }
 93 | 
 94 | impl<'a> NumericIter<'a> {
 95 |     fn new(
 96 |         values_iter: LongValuesIterator<'a>,
 97 |         docs_with_field: &'a FixedBitSet,
 98 |         max_doc: usize,
 99 |     ) -> NumericIter<'a> {
100 |         NumericIter {
101 |             values_iter,
102 |             docs_with_field,
103 |             upto: 0,
104 |             max_doc,
105 |         }
106 |     }
107 | }
108 | 
109 | impl<'a> Iterator for NumericIter<'a> {
110 |     type Item = Result<Numeric>;
111 | 
112 |     fn next(&mut self) -> Option<Result<Numeric>> {
113 |         if self.upto < self.max_doc {
114 |             let v = if self.upto >= self.docs_with_field.len()
115 |                 || !self.docs_with_field.get(self.upto).unwrap()
116 |             {
117 |                 MISSING
118 |             } else {
119 |                 self.values_iter.next().unwrap()
120 |             };
121 |             self.upto += 1;
122 |             Some(Ok(Numeric::Long(v)))
123 |         } else {
124 |             None
125 |         }
126 |     }
127 | }
128 | 
129 | impl<'a> ReusableIterator for NumericIter<'a> {
130 |     fn reset(&mut self) {
131 |         self.values_iter.reset();
132 |         self.upto = 0;
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/core/codec/norms/norms.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::norms::{
15 |     Lucene53NormsConsumer, Lucene53NormsProducer, NormsConsumerEnum, NormsFormat,
16 | };
17 | use core::codec::segment_infos::{SegmentReadState, SegmentWriteState};
18 | use core::codec::Codec;
19 | use core::store::directory::Directory;
20 | 
21 | use error::Result;
22 | 
23 | pub const DATA_CODEC: &str = "Lucene53NormsData";
24 | pub const DATA_EXTENSION: &str = "nvd";
25 | pub const METADATA_CODEC: &str = "Lucene53NormsMetadata";
26 | pub const METADATA_EXTENSION: &str = "nvm";
27 | pub const VERSION_START: i32 = 0;
28 | pub const VERSION_CURRENT: i32 = VERSION_START;
29 | 
30 | #[derive(Copy, Clone, Default)]
31 | pub struct Lucene53NormsFormat;
32 | 
33 | impl NormsFormat for Lucene53NormsFormat {
34 |     type NormsProducer = Lucene53NormsProducer;
35 |     fn norms_producer<'a, D: Directory, DW: Directory, C: Codec>(
36 |         &self,
37 |         state: &SegmentReadState<'a, D, DW, C>,
38 |     ) -> Result<Self::NormsProducer> {
39 |         Lucene53NormsProducer::new(
40 |             state,
41 |             DATA_CODEC,
42 |             DATA_EXTENSION,
43 |             METADATA_CODEC,
44 |             METADATA_EXTENSION,
45 |         )
46 |     }
47 | 
48 |     fn norms_consumer<D: Directory, DW: Directory, C: Codec>(
49 |         &self,
50 |         state: &SegmentWriteState<D, DW, C>,
51 |     ) -> Result<NormsConsumerEnum<DW::IndexOutput>> {
52 |         Ok(NormsConsumerEnum::Lucene53(Lucene53NormsConsumer::new(
53 |             state,
54 |             DATA_CODEC,
55 |             DATA_EXTENSION,
56 |             METADATA_CODEC,
57 |             METADATA_EXTENSION,
58 |         )?))
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/core/codec/posting_iterator.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::search::{DocIterator, Payload, NO_MORE_DOCS};
 15 | use core::util::DocId;
 16 | use error::Result;
 17 | 
 18 | pub struct PostingIteratorFlags;
 19 | 
 20 | /// flags constants and helper function defined for `TermIterator::postings_with_flag()`.
 21 | impl PostingIteratorFlags {
 22 |     /// Flag to pass to {@link TermIterator#postings_with_flags(u16)} if you don't
 23 |     /// require per-document postings in the returned iterator.
 24 |     pub const NONE: u16 = 0;
 25 | 
 26 |     /// Flag to pass to {@link TermIterator#postings_with_flags(u16)}
 27 |     /// if you require term frequencies in the returned iterator.
 28 |     pub const FREQS: u16 = 1 << 3;
 29 | 
 30 |     /// Flag to pass to {@link TermIterator#postings_with_flags(u16)}
 31 |     /// if you require term positions in the returned iterator.
 32 |     pub const POSITIONS: u16 = Self::FREQS | 1 << 4;
 33 | 
 34 |     /// Flag to pass to {@link TermIterator#postings_with_flags(u16)}
 35 |     /// if you require offsets in the returned iterator.
 36 |     pub const OFFSETS: u16 = Self::POSITIONS | 1 << 5;
 37 | 
 38 |     /// Flag to pass to  {@link TermIterator#postings_with_flags(u16)}
 39 |     /// if you require payloads in the returned iterator.
 40 |     pub const PAYLOADS: u16 = Self::POSITIONS | 1 << 6;
 41 | 
 42 |     /// Flag to pass to {@link TermIterator#postings_with_flags(u16)}
 43 |     /// to get positions, payloads and offsets in the returned iterator.
 44 |     pub const ALL: u16 = Self::OFFSETS | Self::PAYLOADS;
 45 | 
 46 |     pub fn feature_requested(flags: u16, feature: u16) -> bool {
 47 |         (flags & feature) == feature
 48 |     }
 49 | }
 50 | 
 51 | /// Iterates through the postings.
 52 | ///
 53 | /// NOTE: you must first call `next()` before using any of the per-doc methods.
 54 | pub trait PostingIterator: DocIterator {
 55 |     /// Returns term frequency in the current document, or 1 if the field was
 56 |     /// indexed with `IndexOptions::Docs`. Do not call this before
 57 |     /// `next_doc()` is first called, nor after `#next()` returns `NO_MORE_DOCS`.
 58 |     ///
 59 |     /// *NOTE:* if the [`PostingIterator`] was obtain with `PostingIteratorFlags::NONE`,
 60 |     /// the result of this method is undefined.
 61 |     fn freq(&self) -> Result<i32>;
 62 | 
 63 |     /// Returns the next position, or -1 if positions were not indexed.
 64 |     /// Calling this more than `freq()` times is undefined.
 65 |     fn next_position(&mut self) -> Result<i32>;
 66 | 
 67 |     /// Returns start offset for the current position, or -1
 68 |     /// if offsets were not indexed. */
 69 |     fn start_offset(&self) -> Result<i32>;
 70 | 
 71 |     /// Returns end offset for the current position, or -1 if
 72 |     /// offsets were not indexed. */
 73 |     fn end_offset(&self) -> Result<i32>;
 74 | 
 75 |     /// Returns the payload at this position, or null if no
 76 |     /// payload was indexed. You should not modify anything
 77 |     /// (neither members of the returned BytesRef nor bytes
 78 |     /// in the bytes). */
 79 |     fn payload(&self) -> Result<Payload>;
 80 | }
 81 | 
 82 | /// a `PostingIterator` that no matching docs are available.
 83 | #[derive(Clone)]
 84 | pub struct EmptyPostingIterator {
 85 |     doc_id: DocId,
 86 | }
 87 | 
 88 | impl Default for EmptyPostingIterator {
 89 |     fn default() -> Self {
 90 |         EmptyPostingIterator { doc_id: -1 }
 91 |     }
 92 | }
 93 | 
 94 | impl DocIterator for EmptyPostingIterator {
 95 |     fn doc_id(&self) -> DocId {
 96 |         self.doc_id
 97 |     }
 98 | 
 99 |     fn next(&mut self) -> Result<DocId> {
100 |         self.doc_id = NO_MORE_DOCS;
101 |         Ok(NO_MORE_DOCS)
102 |     }
103 | 
104 |     fn advance(&mut self, _target: DocId) -> Result<DocId> {
105 |         self.doc_id = NO_MORE_DOCS;
106 |         Ok(NO_MORE_DOCS)
107 |     }
108 | 
109 |     fn cost(&self) -> usize {
110 |         0usize
111 |     }
112 | }
113 | 
114 | impl PostingIterator for EmptyPostingIterator {
115 |     fn freq(&self) -> Result<i32> {
116 |         Ok(0)
117 |     }
118 | 
119 |     fn next_position(&mut self) -> Result<i32> {
120 |         Ok(-1)
121 |     }
122 | 
123 |     fn start_offset(&self) -> Result<i32> {
124 |         Ok(-1)
125 |     }
126 | 
127 |     fn end_offset(&self) -> Result<i32> {
128 |         Ok(-1)
129 |     }
130 | 
131 |     fn payload(&self) -> Result<Payload> {
132 |         Ok(Payload::new())
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/core/codec/postings/blocktree/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | mod blocktree_reader;
 15 | 
 16 | pub use self::blocktree_reader::*;
 17 | 
 18 | mod blocktree_writer;
 19 | 
 20 | pub use self::blocktree_writer::*;
 21 | 
 22 | mod term_iter_frame;
 23 | 
 24 | pub use self::term_iter_frame::*;
 25 | 
 26 | const MAX_LONGS_SIZE: usize = 3;
 27 | 
 28 | use core::codec::TermState;
 29 | 
 30 | /// Holds all state required for `PostingsReaderBase` to produce a
 31 | /// `PostingIterator` without re-seeking the term dict.
 32 | #[derive(Clone, Debug)]
 33 | pub struct BlockTermState {
 34 |     /// Term ordinal, i.e. its position in the full list of
 35 |     /// sorted terms.
 36 |     pub ord: i64,
 37 |     /// how many docs have this term
 38 |     pub doc_freq: i32,
 39 | 
 40 |     /// total number of occurrences of this term
 41 |     pub total_term_freq: i64,
 42 | 
 43 |     /// the term's ord in the current block
 44 |     pub term_block_ord: i32,
 45 | 
 46 |     /// fp into the terms dict primary file (_X.tim) that holds this term
 47 |     // TODO: update BTR to nuke this
 48 |     pub block_file_pointer: i64,
 49 | 
 50 |     /// fields from IntBlockTermState
 51 |     pub doc_start_fp: i64,
 52 |     pub pos_start_fp: i64,
 53 |     pub pay_start_fp: i64,
 54 |     pub skip_offset: i64,
 55 |     pub last_pos_block_offset: i64,
 56 |     // docid when there is a single pulsed posting, otherwise -1
 57 |     // freq is always implicitly totalTermFreq in this case.
 58 |     pub singleton_doc_id: i32,
 59 | }
 60 | 
 61 | impl BlockTermState {
 62 |     pub fn new() -> BlockTermState {
 63 |         BlockTermState {
 64 |             ord: 0,
 65 |             doc_freq: 0,
 66 |             total_term_freq: 0,
 67 |             term_block_ord: 0,
 68 |             block_file_pointer: 0,
 69 | 
 70 |             doc_start_fp: 0,
 71 |             pos_start_fp: 0,
 72 |             pay_start_fp: 0,
 73 |             skip_offset: -1,
 74 |             last_pos_block_offset: -1,
 75 |             singleton_doc_id: -1,
 76 |         }
 77 |     }
 78 | 
 79 |     pub fn copy_from(&mut self, other: &BlockTermState) {
 80 |         self.ord = other.ord;
 81 |         self.doc_freq = other.doc_freq;
 82 |         self.total_term_freq = other.total_term_freq;
 83 |         self.term_block_ord = other.term_block_ord;
 84 |         self.block_file_pointer = other.block_file_pointer;
 85 |         self.doc_start_fp = other.doc_start_fp;
 86 |         self.pos_start_fp = other.pos_start_fp;
 87 |         self.pay_start_fp = other.pay_start_fp;
 88 |         self.last_pos_block_offset = other.last_pos_block_offset;
 89 |         self.skip_offset = other.skip_offset;
 90 |         self.singleton_doc_id = other.singleton_doc_id;
 91 |     }
 92 | 
 93 |     pub fn ord(&self) -> i64 {
 94 |         self.ord
 95 |     }
 96 | 
 97 |     pub fn doc_freq(&self) -> i32 {
 98 |         self.doc_freq
 99 |     }
100 | 
101 |     pub fn total_term_freq(&self) -> i64 {
102 |         self.total_term_freq
103 |     }
104 | 
105 |     pub fn term_block_ord(&self) -> i32 {
106 |         self.term_block_ord
107 |     }
108 | 
109 |     pub fn block_file_pointer(&self) -> i64 {
110 |         self.block_file_pointer
111 |     }
112 | 
113 |     pub fn doc_start_fp(&self) -> i64 {
114 |         self.doc_start_fp
115 |     }
116 |     pub fn pos_start_fp(&self) -> i64 {
117 |         self.pos_start_fp
118 |     }
119 |     pub fn pay_start_fp(&self) -> i64 {
120 |         self.pay_start_fp
121 |     }
122 |     pub fn skip_offset(&self) -> i64 {
123 |         self.skip_offset
124 |     }
125 |     pub fn last_pos_block_offset(&self) -> i64 {
126 |         self.last_pos_block_offset
127 |     }
128 |     pub fn singleton_doc_id(&self) -> i32 {
129 |         self.singleton_doc_id
130 |     }
131 | }
132 | 
133 | impl TermState for BlockTermState {}
134 | 


--------------------------------------------------------------------------------
/src/core/codec/postings/posting_format.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::postings::blocktree::{BlockTreeTermsReader, BlockTreeTermsWriter};
15 | use core::codec::postings::{
16 |     FieldsConsumerEnum, Lucene50PostingsReader, Lucene50PostingsWriter, PostingsFormat,
17 | };
18 | use core::codec::segment_infos::{SegmentReadState, SegmentWriteState};
19 | use core::codec::Codec;
20 | use core::store::directory::Directory;
21 | 
22 | use error::Result;
23 | 
24 | use std::fmt;
25 | 
26 | #[derive(Hash, Eq, Ord, PartialEq, PartialOrd)]
27 | pub struct Lucene50PostingsFormat {
28 |     name: &'static str,
29 |     min_term_block_size: usize,
30 |     max_term_block_size: usize,
31 | }
32 | 
33 | /// Fixed packed block size, number of integers encoded in
34 | /// a single packed block.
35 | // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
36 | pub const BLOCK_SIZE: i32 = 128;
37 | 
38 | const DEFAULT_MIN_BLOCK_SIZE: usize = 25;
39 | const DEFAULT_MAX_BLOCK_SIZE: usize = 48;
40 | 
41 | impl fmt::Display for Lucene50PostingsFormat {
42 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
43 |         write!(f, "{}(blocksize={})", self.name, BLOCK_SIZE)
44 |     }
45 | }
46 | 
47 | impl Default for Lucene50PostingsFormat {
48 |     fn default() -> Lucene50PostingsFormat {
49 |         Self::with_block_size(DEFAULT_MIN_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE)
50 |     }
51 | }
52 | 
53 | impl Lucene50PostingsFormat {
54 |     pub fn with_block_size(
55 |         min_term_block_size: usize,
56 |         max_term_block_size: usize,
57 |     ) -> Lucene50PostingsFormat {
58 |         Lucene50PostingsFormat {
59 |             name: "Lucene50",
60 |             min_term_block_size,
61 |             max_term_block_size,
62 |         }
63 |     }
64 | }
65 | 
66 | impl PostingsFormat for Lucene50PostingsFormat {
67 |     type FieldsProducer = BlockTreeTermsReader;
68 |     fn fields_producer<'a, D: Directory, DW: Directory, C: Codec>(
69 |         &self,
70 |         state: &SegmentReadState<'a, D, DW, C>,
71 |     ) -> Result<Self::FieldsProducer> {
72 |         let reader = Lucene50PostingsReader::open(&state)?;
73 |         BlockTreeTermsReader::new(reader, state)
74 |     }
75 | 
76 |     fn fields_consumer<D: Directory, DW: Directory, C: Codec>(
77 |         &self,
78 |         state: &SegmentWriteState<D, DW, C>,
79 |     ) -> Result<FieldsConsumerEnum<D, DW, C>> {
80 |         let postings_writer = Lucene50PostingsWriter::new(state)?;
81 |         Ok(FieldsConsumerEnum::Lucene50(BlockTreeTermsWriter::new(
82 |             state,
83 |             postings_writer,
84 |             self.min_term_block_size,
85 |             self.max_term_block_size,
86 |         )?))
87 |     }
88 | 
89 |     fn name(&self) -> &str {
90 |         "Lucene50"
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/core/codec/stored_fields/stored_fields.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use std::str::FromStr;
 15 | use std::sync::Arc;
 16 | 
 17 | use core::codec::field_infos::FieldInfos;
 18 | use core::codec::segment_infos::SegmentInfo;
 19 | use core::codec::stored_fields::{
 20 |     CompressingStoredFieldsFormat, CompressingStoredFieldsReader, StoredFieldsFormat,
 21 |     StoredFieldsWriterEnum,
 22 | };
 23 | use core::codec::Codec;
 24 | use core::store::directory::Directory;
 25 | use core::store::IOContext;
 26 | use core::util::CompressionMode;
 27 | use error::{Error as CoreError, ErrorKind::IllegalState, Result};
 28 | 
 29 | const MODE_KEY: &str = "Lucene50StoredFieldsFormat.mode";
 30 | 
 31 | #[derive(Debug, Copy, Clone)]
 32 | pub enum StoredFieldCompressMode {
 33 |     BestSpeed,
 34 |     BestCompression,
 35 | }
 36 | 
 37 | impl StoredFieldCompressMode {
 38 |     fn name(&self) -> &'static str {
 39 |         match self {
 40 |             StoredFieldCompressMode::BestSpeed => "BEST_SPEED",
 41 |             StoredFieldCompressMode::BestCompression => "BEST_COMPRESSION",
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | impl FromStr for StoredFieldCompressMode {
 47 |     type Err = CoreError;
 48 |     fn from_str(v: &str) -> Result<Self> {
 49 |         let r = if v == "BEST_SPEED" {
 50 |             StoredFieldCompressMode::BestSpeed
 51 |         } else {
 52 |             StoredFieldCompressMode::BestCompression
 53 |         };
 54 |         Ok(r)
 55 |     }
 56 | }
 57 | 
 58 | /// Lucene 5.0 stored fields format.
 59 | #[derive(Copy, Clone)]
 60 | pub struct Lucene50StoredFieldsFormat {
 61 |     #[allow(dead_code)]
 62 |     mode: StoredFieldCompressMode,
 63 | }
 64 | 
 65 | impl Lucene50StoredFieldsFormat {
 66 |     pub fn new(mode: Option<StoredFieldCompressMode>) -> Lucene50StoredFieldsFormat {
 67 |         if let Some(m) = mode {
 68 |             Lucene50StoredFieldsFormat { mode: m }
 69 |         } else {
 70 |             Lucene50StoredFieldsFormat {
 71 |                 mode: StoredFieldCompressMode::BestSpeed,
 72 |             }
 73 |         }
 74 |     }
 75 | 
 76 |     pub fn format(self, mode: StoredFieldCompressMode) -> CompressingStoredFieldsFormat {
 77 |         match mode {
 78 |             StoredFieldCompressMode::BestSpeed => CompressingStoredFieldsFormat::new(
 79 |                 "Lucene50StoredFieldsFast",
 80 |                 "",
 81 |                 CompressionMode::FAST,
 82 |                 1 << 14,
 83 |                 128,
 84 |                 1024,
 85 |             ),
 86 |             StoredFieldCompressMode::BestCompression => CompressingStoredFieldsFormat::new(
 87 |                 "Lucene50StoredFieldsHigh",
 88 |                 "",
 89 |                 CompressionMode::HighCompression,
 90 |                 61440,
 91 |                 512,
 92 |                 1024,
 93 |             ),
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | impl StoredFieldsFormat for Lucene50StoredFieldsFormat {
 99 |     type Reader = CompressingStoredFieldsReader;
100 |     fn fields_reader<D: Directory, DW: Directory, C: Codec>(
101 |         &self,
102 |         directory: &DW,
103 |         si: &SegmentInfo<D, C>,
104 |         field_info: Arc<FieldInfos>,
105 |         ioctx: &IOContext,
106 |     ) -> Result<Self::Reader> {
107 |         if let Some(value) = si.attributes.get(MODE_KEY) {
108 |             let mode = StoredFieldCompressMode::from_str(value)?;
109 | 
110 |             self.format(mode)
111 |                 .fields_reader(directory, si, field_info, ioctx)
112 |         } else {
113 |             bail!(IllegalState(format!(
114 |                 "missing value for {} for segment: {}",
115 |                 MODE_KEY, si.name
116 |             )))
117 |         }
118 |     }
119 | 
120 |     fn fields_writer<D, DW, C>(
121 |         &self,
122 |         directory: Arc<DW>,
123 |         si: &mut SegmentInfo<D, C>,
124 |         ioctx: &IOContext,
125 |     ) -> Result<StoredFieldsWriterEnum<DW::IndexOutput>>
126 |     where
127 |         D: Directory,
128 |         DW: Directory,
129 |         DW::IndexOutput: 'static,
130 |         C: Codec,
131 |     {
132 |         let previous = si
133 |             .attributes
134 |             .insert(MODE_KEY.to_string(), self.mode.name().to_string());
135 |         if let Some(prev_name) = previous {
136 |             if prev_name.as_str() != self.mode.name() {
137 |                 bail!(IllegalState(format!(
138 |                     "found existing value for {} for segment: {}",
139 |                     MODE_KEY, si.name
140 |                 )));
141 |             }
142 |         }
143 |         self.format(self.mode).fields_writer(directory, si, ioctx)
144 |     }
145 | }
146 | 


--------------------------------------------------------------------------------
/src/core/doc/index_options.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use error::{ErrorKind::IllegalArgument, Result};
 15 | use std::cmp::Ordering;
 16 | 
 17 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)]
 18 | pub enum IndexOptions {
 19 |     Null,
 20 |     Docs,
 21 |     DocsAndFreqs,
 22 |     DocsAndFreqsAndPositions,
 23 |     DocsAndFreqsAndPositionsAndOffsets,
 24 | }
 25 | 
 26 | impl Default for IndexOptions {
 27 |     fn default() -> IndexOptions {
 28 |         IndexOptions::Null
 29 |     }
 30 | }
 31 | 
 32 | impl IndexOptions {
 33 |     pub fn from(options: &str) -> Result<IndexOptions> {
 34 |         let res = match options {
 35 |             "offsets" => IndexOptions::DocsAndFreqsAndPositionsAndOffsets,
 36 |             "positions" => IndexOptions::DocsAndFreqsAndPositions,
 37 |             "freqs" => IndexOptions::DocsAndFreqs,
 38 |             "docs" => IndexOptions::Docs,
 39 |             _ => {
 40 |                 bail!(IllegalArgument(format!(
 41 |                     "failed to parse index option [{}]",
 42 |                     options
 43 |                 )));
 44 |             }
 45 |         };
 46 |         Ok(res)
 47 |     }
 48 | 
 49 |     pub fn as_str(self) -> &'static str {
 50 |         match self {
 51 |             IndexOptions::DocsAndFreqsAndPositionsAndOffsets => "offsets",
 52 |             IndexOptions::DocsAndFreqs => "freqs",
 53 |             IndexOptions::DocsAndFreqsAndPositions => "positions",
 54 |             IndexOptions::Docs => "docs",
 55 |             _ => unreachable!(),
 56 |         }
 57 |     }
 58 | 
 59 |     pub fn has_docs(self) -> bool {
 60 |         match self {
 61 |             IndexOptions::Null => false,
 62 |             _ => true,
 63 |         }
 64 |     }
 65 | 
 66 |     pub fn has_freqs(self) -> bool {
 67 |         match self {
 68 |             IndexOptions::DocsAndFreqs => true,
 69 |             IndexOptions::DocsAndFreqsAndPositions => true,
 70 |             IndexOptions::DocsAndFreqsAndPositionsAndOffsets => true,
 71 |             _ => false,
 72 |         }
 73 |     }
 74 | 
 75 |     pub fn has_positions(self) -> bool {
 76 |         match self {
 77 |             IndexOptions::DocsAndFreqsAndPositions => true,
 78 |             IndexOptions::DocsAndFreqsAndPositionsAndOffsets => true,
 79 |             _ => false,
 80 |         }
 81 |     }
 82 | 
 83 |     pub fn has_offsets(self) -> bool {
 84 |         match self {
 85 |             IndexOptions::DocsAndFreqsAndPositionsAndOffsets => true,
 86 |             _ => false,
 87 |         }
 88 |     }
 89 | 
 90 |     pub fn value(self) -> i32 {
 91 |         match self {
 92 |             IndexOptions::Null => 0,
 93 |             IndexOptions::Docs => 1,
 94 |             IndexOptions::DocsAndFreqs => 2,
 95 |             IndexOptions::DocsAndFreqsAndPositions => 3,
 96 |             IndexOptions::DocsAndFreqsAndPositionsAndOffsets => 4,
 97 |         }
 98 |     }
 99 | }
100 | 
101 | impl Ord for IndexOptions {
102 |     fn cmp(&self, other: &Self) -> Ordering {
103 |         self.value().cmp(&other.value())
104 |     }
105 | }
106 | 
107 | impl PartialOrd for IndexOptions {
108 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
109 |         Some(self.cmp(other))
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/src/core/doc/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod term;
15 | 
16 | pub use self::term::*;
17 | 
18 | mod field;
19 | 
20 | pub use self::field::*;
21 | 
22 | mod document;
23 | 
24 | pub use self::document::*;
25 | 
26 | mod index_options;
27 | 
28 | pub use self::index_options::*;
29 | 
30 | mod doc_values;
31 | 
32 | pub use self::doc_values::*;
33 | 


--------------------------------------------------------------------------------
/src/core/doc/term.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use error::Result;
15 | use std::cmp::Ordering;
16 | 
17 | /// A Term represents a word from text.  This is the unit of search.  It is
18 | /// composed of two elements, the text of the word, as a string, and the name of
19 | /// the field that the text occurred in.
20 | ///
21 | /// Note that terms may represent more than words from text fields, but also
22 | /// things like dates, email addresses, urls, etc.
23 | #[derive(Clone, Debug, PartialEq, Hash, Eq)]
24 | pub struct Term {
25 |     pub field: String,
26 |     pub bytes: Vec<u8>,
27 | }
28 | 
29 | impl Term {
30 |     /// Constructs a Term with the given field and bytes.
31 |     /// <p>Note that a null field or null bytes value results in undefined
32 |     /// behavior for most Lucene APIs that accept a Term parameter.
33 |     ///
34 |     /// <p>The provided BytesRef is copied when it is non null.
35 |     pub fn new(field: String, bytes: Vec<u8>) -> Term {
36 |         Term { field, bytes }
37 |     }
38 | 
39 |     /// Returns the field of this term.   The field indicates
40 |     /// the part of a document which this term came from.
41 |     pub fn field(&self) -> &str {
42 |         &self.field
43 |     }
44 | 
45 |     /// Returns the text of this term.  In the case of words, this is simply the
46 |     /// text of the word.  In the case of dates and other types, this is an
47 |     /// encoding of the object as a string.
48 |     pub fn text(&self) -> Result<String> {
49 |         Ok(String::from_utf8(self.bytes.clone())?)
50 |     }
51 | 
52 |     pub fn is_empty(&self) -> bool {
53 |         self.field.is_empty() && self.bytes.is_empty()
54 |     }
55 | 
56 |     pub fn copy_bytes(&mut self, bytes: &[u8]) {
57 |         if self.bytes.len() != bytes.len() {
58 |             self.bytes.resize(bytes.len(), 0);
59 |         }
60 |         self.bytes.copy_from_slice(bytes);
61 |     }
62 | }
63 | 
64 | impl PartialOrd for Term {
65 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
66 |         Some(self.cmp(other))
67 |     }
68 | }
69 | 
70 | impl Ord for Term {
71 |     fn cmp(&self, other: &Self) -> Ordering {
72 |         let res = self.field.cmp(&other.field);
73 |         if res == Ordering::Equal {
74 |             self.bytes.cmp(&other.bytes)
75 |         } else {
76 |             res
77 |         }
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/core/index/merge/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod doc_id_merger;
15 | 
16 | pub use self::doc_id_merger::*;
17 | 
18 | mod merge_policy;
19 | 
20 | pub use self::merge_policy::*;
21 | 
22 | mod merge_rate_limiter;
23 | 
24 | pub use self::merge_rate_limiter::*;
25 | 
26 | mod merge_scheduler;
27 | 
28 | pub use self::merge_scheduler::*;
29 | 
30 | mod merge_state;
31 | 
32 | pub use self::merge_state::*;
33 | 
34 | mod segment_merger;
35 | 
36 | pub use self::segment_merger::*;
37 | 


--------------------------------------------------------------------------------
/src/core/index/reader/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | mod directory_reader;
 15 | 
 16 | pub use self::directory_reader::*;
 17 | 
 18 | mod leaf_reader;
 19 | 
 20 | pub use self::leaf_reader::*;
 21 | 
 22 | mod leaf_reader_wrapper;
 23 | 
 24 | pub use self::leaf_reader_wrapper::*;
 25 | 
 26 | mod segment_reader;
 27 | 
 28 | pub use self::segment_reader::*;
 29 | 
 30 | mod index_lookup;
 31 | 
 32 | pub use self::index_lookup::*;
 33 | 
 34 | use core::codec::Codec;
 35 | use core::codec::CodecTVFields;
 36 | use core::doc::Document;
 37 | use core::util::DocId;
 38 | 
 39 | use error::Result;
 40 | 
 41 | /// `IndexReader` providing an interface for accessing a point-in-time view of an index.
 42 | ///
 43 | /// Any changes made to the index via `IndexWriter` will not be visible until a new
 44 | /// `IndexReader` is opened.  It's best to use {@link
 45 | /// StandardDirectoryReader#open(IndexWriter)} to obtain an `IndexReader`, if your
 46 | /// `IndexWriter` is in-process.  When you need to re-open to see changes to the
 47 | /// index, it's best to use {@link DirectoryReader#openIfChanged(DirectoryReader)}
 48 | /// since the new reader will share resources with the previous
 49 | /// one when possible.  Search of an index is done entirely
 50 | /// through this abstract interface, so that any subclass which
 51 | /// implements it is searchable.
 52 | ///
 53 | /// IndexReader instances for indexes on disk are usually constructed
 54 | /// with a call to one of the static StandardDirectoryReader::open() methods.
 55 | ///
 56 | /// For efficiency, in this API documents are often referred to via
 57 | /// *document numbers*, non-negative integers which each name a unique
 58 | /// document in the index.  These document numbers are ephemeral -- they may change
 59 | /// as documents are added to and deleted from an index.  Clients should thus not
 60 | /// rely on a given document having the same number between sessions.
 61 | ///
 62 | /// NOTE: `IndexReader` instances are completely thread
 63 | /// safe, meaning multiple threads can call any of its methods,
 64 | /// concurrently.  If your application requires external
 65 | /// synchronization, you should *not* synchronize on the
 66 | /// `IndexReader` instance; use your own (non-Lucene) objects instead.
 67 | pub trait IndexReader {
 68 |     type Codec: Codec;
 69 |     fn leaves(&self) -> Vec<LeafReaderContext<'_, Self::Codec>>;
 70 |     fn term_vector(&self, doc_id: DocId) -> Result<Option<CodecTVFields<Self::Codec>>>;
 71 |     fn document(&self, doc_id: DocId, fields: &[String]) -> Result<Document>;
 72 |     fn max_doc(&self) -> i32;
 73 |     fn num_docs(&self) -> i32;
 74 |     fn num_deleted_docs(&self) -> i32 {
 75 |         self.max_doc() - self.num_docs()
 76 |     }
 77 |     fn has_deletions(&self) -> bool {
 78 |         self.num_deleted_docs() > 0
 79 |     }
 80 |     fn leaf_reader_for_doc(&self, doc: DocId) -> LeafReaderContext<'_, Self::Codec> {
 81 |         let leaves = self.leaves();
 82 |         let size = leaves.len();
 83 |         let mut lo = 0usize;
 84 |         let mut hi = size - 1;
 85 |         while hi >= lo {
 86 |             let mut mid = (lo + hi) >> 1;
 87 |             let mid_value = leaves[mid].doc_base;
 88 |             if doc < mid_value {
 89 |                 hi = mid - 1;
 90 |             } else if doc > mid_value {
 91 |                 lo = mid + 1;
 92 |             } else {
 93 |                 while mid + 1 < size && leaves[mid + 1].doc_base == mid_value {
 94 |                     mid += 1;
 95 |                 }
 96 |                 return leaves[mid].clone();
 97 |             }
 98 |         }
 99 |         leaves[hi].clone()
100 |     }
101 | 
102 |     // used for refresh
103 |     fn refresh(&self) -> Result<Option<Box<dyn IndexReader<Codec = Self::Codec>>>> {
104 |         Ok(None)
105 |     }
106 | }
107 | 
108 | #[derive(Copy, Clone)]
109 | pub struct ReaderSlice {
110 |     pub start: i32,
111 |     pub length: i32,
112 |     pub reader_index: usize,
113 | }
114 | 
115 | impl ReaderSlice {
116 |     pub fn new(start: i32, length: i32, reader_index: usize) -> ReaderSlice {
117 |         ReaderSlice {
118 |             start,
119 |             length,
120 |             reader_index,
121 |         }
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/src/core/index/writer/delete_policy.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::index::writer::index_file_deleter::CommitPoint;
 15 | use error::Result;
 16 | 
 17 | /// Expert: policy for deletion of stale `IndexCommit index commits`.
 18 | ///
 19 | /// Implement this interface, and pass it to one
 20 | /// of the `IndexWriter` or `IndexReader`
 21 | /// constructors, to customize when older
 22 | /// `IndexCommit point-in-time commits`
 23 | /// are deleted from the index directory.  The default deletion policy
 24 | /// is `KeepOnlyLastCommitDeletionPolicy`, which always
 25 | /// removes old commits as soon as a new commit is done (this
 26 | /// matches the behavior before 2.2).
 27 | ///
 28 | /// One expected use case for this (and the reason why it
 29 | /// was first created) is to work around problems with an
 30 | /// index directory accessed via filesystems like NFS because
 31 | /// NFS does not provide the "delete on last close" semantics
 32 | /// that Lucene's "point in time" search normally relies on.
 33 | /// By implementing a custom deletion policy, such as "a
 34 | /// commit is only removed once it has been stale for more
 35 | /// than X minutes", you can give your readers time to
 36 | /// refresh to the new commit before `IndexWriter`
 37 | /// removes the old commits.  Note that doing so will
 38 | /// increase the storage requirements of the index.  See <a
 39 | /// target="top"
 40 | /// href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
 41 | /// for details.
 42 | ///
 43 | /// Implementers of sub-classes should make sure that `#clone()`
 44 | /// returns an independent instance able to work with any other `IndexWriter`
 45 | /// or `Directory` instance.
 46 | pub trait IndexDeletionPolicy {
 47 |     /// This is called once when a writer is first
 48 |     /// instantiated to give the policy a chance to remove old
 49 |     /// commit points.
 50 |     ///
 51 |     /// The writer locates all index commits present in the
 52 |     /// index directory and calls this method.  The policy may
 53 |     /// choose to delete some of the commit points, doing so by
 54 |     /// calling method `IndexCommit#delete delete()`
 55 |     /// of `IndexCommit`.
 56 |     ///
 57 |     /// <u>Note:</u> the last CommitPoint is the most recent one,
 58 |     /// i.e. the "front index state". Be careful not to delete it,
 59 |     /// unless you know for sure what you are doing, and unless
 60 |     /// you can afford to lose the index content while doing that.
 61 |     ///
 62 |     /// @param commits List of current
 63 |     /// `IndexCommit point-in-time commits`,
 64 |     ///  sorted by age (the 0th one is the oldest commit).
 65 |     ///  Note that for a new index this method is invoked with
 66 |     ///  an empty list.
 67 |     fn on_init(&self, commits: Vec<&mut CommitPoint>) -> Result<()>;
 68 | 
 69 |     /// This is called each time the writer completed a commit.
 70 |     /// This gives the policy a chance to remove old commit points
 71 |     /// with each commit.
 72 |     ///
 73 |     /// The policy may now choose to delete old commit points
 74 |     /// by calling method `IndexCommit#delete delete()`
 75 |     /// of `IndexCommit`.
 76 |     ///
 77 |     /// This method is only called when `IndexWriter#commit`
 78 |     /// or `IndexWriter#close` is called, or possibly not at all
 79 |     /// if the `IndexWriter#rollback` is called.
 80 |     ///
 81 |     /// Note: the last CommitPoint is the most recent one,
 82 |     /// i.e. the "front index state". Be careful not to delete it,
 83 |     /// unless you know for sure what you are doing, and unless
 84 |     /// you can afford to lose the index content while doing that.
 85 |     ///  
 86 |     /// @param commits List of `IndexCommit`,
 87 |     ///  sorted by age (the 0th one is the oldest commit).
 88 |     fn on_commit(&self, commits: Vec<&mut CommitPoint>) -> Result<()>;
 89 | }
 90 | 
 91 | #[derive(Default)]
 92 | pub struct KeepOnlyLastCommitDeletionPolicy;
 93 | 
 94 | impl KeepOnlyLastCommitDeletionPolicy {
 95 |     pub fn on_init(&self, commits: Vec<&mut CommitPoint>) -> Result<()> {
 96 |         self.on_commit(commits)
 97 |     }
 98 | 
 99 |     pub fn on_commit(&self, mut commits: Vec<&mut CommitPoint>) -> Result<()> {
100 |         commits.pop();
101 |         for commit in commits {
102 |             commit.delete()?;
103 |         }
104 |         Ok(())
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/core/index/writer/index_writer_config.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::{Codec, CodecEnum, Lucene62Codec};
 15 | use core::index::merge::MergeScheduler;
 16 | use core::index::merge::SerialMergeScheduler;
 17 | use core::index::merge::{MergePolicy, TieredMergePolicy};
 18 | use core::index::writer::KeepOnlyLastCommitDeletionPolicy;
 19 | use core::search::sort_field::Sort;
 20 | 
 21 | use std::sync::Arc;
 22 | 
 23 | /// Denotes a flush trigger is disabled.
 24 | pub const DISABLE_AUTO_FLUSH: i32 = -1;
 25 | 
 26 | /// Disabled by default (because IndexWriter flushes by RAM usage by default).
 27 | pub const DEFAULT_MAX_BUFFERED_DELETE_TERMS: i32 = DISABLE_AUTO_FLUSH;
 28 | 
 29 | /// Disabled by default (because IndexWriter flushes by RAM usage by default).
 30 | pub const DEFAULT_MAX_BUFFERED_DOCS: i32 = DISABLE_AUTO_FLUSH;
 31 | 
 32 | /// Default setting for `seg_reader_pooling`
 33 | pub const DEFAULT_READER_POOLING: bool = false;
 34 | 
 35 | /// Default value for compound file system for newly written segments
 36 | /// (set to <code>true</code>). For batch indexing with very large
 37 | /// ram buffers use <code>false</code>
 38 | pub const DEFAULT_USE_COMPOUND_FILE_SYSTEM: bool = true;
 39 | 
 40 | #[derive(Copy, Clone, Eq, PartialEq, Debug)]
 41 | pub enum OpenMode {
 42 |     Create,
 43 |     Append,
 44 |     CreateOrAppend,
 45 | }
 46 | 
 47 | /// Holds all the configuration that is used to create an {@link IndexWriter}.
 48 | /// Once {@link IndexWriter} has been created with this object, changes to this
 49 | /// object will not affect the {@link IndexWriter} instance. For that, use
 50 | /// {@link LiveIndexWriterConfig} that is returned from {@link IndexWriter#getConfig()}.
 51 | ///
 52 | /// All setter methods return {@link IndexWriterConfig} to allow chaining
 53 | /// settings conveniently, for example:
 54 | ///
 55 | /// <pre class="prettyprint">
 56 | /// IndexWriterConfig conf = new IndexWriterConfig(analyzer);
 57 | /// conf.setter1().setter2();
 58 | /// </pre>
 59 | ///
 60 | /// @see IndexWriter#getConfig()
 61 | pub struct IndexWriterConfig<C: Codec, MS: MergeScheduler, MP: MergePolicy> {
 62 |     pub use_compound_file: bool,
 63 |     pub max_buffered_delete_terms: Option<u32>,
 64 |     pub max_buffered_docs: Option<u32>,
 65 |     pub merge_policy: MP,
 66 |     pub merge_scheduler: MS,
 67 |     pub index_sort: Option<Sort>,
 68 |     /// True if readers should be pooled.
 69 |     pub reader_pooling: bool,
 70 |     pub open_mode: OpenMode,
 71 |     pub codec: Arc<C>,
 72 |     pub commit_on_close: bool,
 73 | }
 74 | 
 75 | impl Default for IndexWriterConfig<CodecEnum, SerialMergeScheduler, TieredMergePolicy> {
 76 |     fn default() -> Self {
 77 |         Self::new(
 78 |             Arc::new(CodecEnum::Lucene62(Lucene62Codec::default())),
 79 |             SerialMergeScheduler {},
 80 |             TieredMergePolicy::default(),
 81 |         )
 82 |     }
 83 | }
 84 | 
 85 | impl<C: Codec, MS: MergeScheduler, MP: MergePolicy> IndexWriterConfig<C, MS, MP> {
 86 |     pub fn new(codec: Arc<C>, merge_scheduler: MS, merge_policy: MP) -> Self {
 87 |         IndexWriterConfig {
 88 |             use_compound_file: false,
 89 |             max_buffered_delete_terms: None,
 90 |             max_buffered_docs: None,
 91 |             merge_policy,
 92 |             merge_scheduler,
 93 |             index_sort: None,
 94 |             reader_pooling: true,
 95 |             open_mode: OpenMode::CreateOrAppend,
 96 |             codec,
 97 |             commit_on_close: true,
 98 |         }
 99 |     }
100 | 
101 |     pub fn max_buffered_delete_terms(&self) -> u32 {
102 |         self.max_buffered_delete_terms.unwrap_or(0)
103 |     }
104 | 
105 |     pub fn max_buffered_docs(&self) -> u32 {
106 |         self.max_buffered_docs.unwrap_or(0)
107 |     }
108 | 
109 |     pub fn flush_on_delete_terms(&self) -> bool {
110 |         self.max_buffered_delete_terms.is_some()
111 |     }
112 | 
113 |     pub fn flush_on_doc_count(&self) -> bool {
114 |         self.max_buffered_docs.is_some()
115 |     }
116 | 
117 |     pub fn merge_policy(&self) -> &MP {
118 |         &self.merge_policy
119 |     }
120 | 
121 |     pub fn index_sort(&self) -> Option<&Sort> {
122 |         self.index_sort.as_ref()
123 |     }
124 | 
125 |     pub fn index_deletion_policy(&self) -> KeepOnlyLastCommitDeletionPolicy {
126 |         KeepOnlyLastCommitDeletionPolicy::default()
127 |     }
128 | 
129 |     pub fn merge_scheduler(&self) -> MS {
130 |         self.merge_scheduler.clone()
131 |     }
132 | 
133 |     pub fn codec(&self) -> &C {
134 |         self.codec.as_ref()
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/src/core/index/writer/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod index_writer;
15 | 
16 | pub use self::index_writer::*;
17 | 
18 | mod bufferd_updates;
19 | 
20 | pub use self::bufferd_updates::*;
21 | 
22 | mod delete_policy;
23 | 
24 | pub use self::delete_policy::*;
25 | 
26 | mod dir_wrapper;
27 | 
28 | pub use self::dir_wrapper::*;
29 | 
30 | mod doc_consumer;
31 | 
32 | pub use self::doc_consumer::*;
33 | 
34 | mod doc_writer;
35 | 
36 | pub use self::doc_writer::*;
37 | 
38 | mod doc_writer_delete_queue;
39 | 
40 | pub use self::doc_writer_delete_queue::*;
41 | 
42 | mod doc_writer_flush_queue;
43 | 
44 | pub use self::doc_writer_flush_queue::*;
45 | 
46 | mod flush_control;
47 | 
48 | pub use self::flush_control::*;
49 | 
50 | mod flush_policy;
51 | 
52 | pub use self::flush_policy::*;
53 | 
54 | mod index_file_deleter;
55 | 
56 | pub use self::index_file_deleter::*;
57 | 
58 | mod index_writer_config;
59 | 
60 | pub use self::index_writer_config::*;
61 | 
62 | mod doc_writer_per_thread;
63 | 
64 | pub use self::doc_writer_per_thread::*;
65 | 
66 | mod prefix_code_terms;
67 | 
68 | pub use self::prefix_code_terms::*;
69 | 
70 | pub mod doc_values_update;
71 | 
72 | pub use self::doc_values_update::*;
73 | 


--------------------------------------------------------------------------------
/src/core/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | pub mod analysis;
15 | pub mod codec;
16 | pub mod doc;
17 | pub mod highlight;
18 | pub mod index;
19 | pub mod search;
20 | pub mod store;
21 | pub mod util;
22 | 


--------------------------------------------------------------------------------
/src/core/search/cache/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod cache_policy;
15 | 
16 | pub use self::cache_policy::*;
17 | 
18 | mod lru_cache;
19 | 
20 | pub use self::lru_cache::*;
21 | 
22 | mod query_cache;
23 | 
24 | pub use self::query_cache::*;
25 | 


--------------------------------------------------------------------------------
/src/core/search/collector/chain.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::Codec;
15 | use core::index::reader::LeafReaderContext;
16 | use core::search::collector::{Collector, ParallelLeafCollector, SearchCollector};
17 | use core::search::scorer::Scorer;
18 | use core::util::DocId;
19 | use error::Result;
20 | 
21 | /// ChainCollector makes it possible to collect on more than one collector in sequence.
22 | pub struct ChainedCollector<A, B> {
23 |     first: A,
24 |     second: B,
25 | }
26 | 
27 | impl<A, B> ChainedCollector<A, B> {
28 |     /// Constructor
29 |     pub fn new(first: A, second: B) -> ChainedCollector<A, B> {
30 |         ChainedCollector { first, second }
31 |     }
32 | }
33 | 
34 | impl<A, B> SearchCollector for ChainedCollector<A, B>
35 | where
36 |     A: SearchCollector,
37 |     B: SearchCollector,
38 | {
39 |     type LC = ChainedCollector<A::LC, B::LC>;
40 | 
41 |     fn set_next_reader<C: Codec>(&mut self, reader: &LeafReaderContext<'_, C>) -> Result<()> {
42 |         self.first.set_next_reader(reader)?;
43 |         self.second.set_next_reader(reader)
44 |     }
45 | 
46 |     fn support_parallel(&self) -> bool {
47 |         self.first.support_parallel() && self.second.support_parallel()
48 |     }
49 | 
50 |     fn init_parallel(&mut self) {
51 |         self.first.init_parallel();
52 |         self.second.init_parallel();
53 |     }
54 | 
55 |     fn leaf_collector<C: Codec>(
56 |         &self,
57 |         reader: &LeafReaderContext<'_, C>,
58 |     ) -> Result<ChainedCollector<A::LC, B::LC>> {
59 |         Ok(ChainedCollector {
60 |             first: self.first.leaf_collector(reader)?,
61 |             second: self.second.leaf_collector(reader)?,
62 |         })
63 |     }
64 | 
65 |     fn finish_parallel(&mut self) -> Result<()> {
66 |         // reverse order for finish
67 |         self.second.finish_parallel()?;
68 |         self.first.finish_parallel()
69 |     }
70 | }
71 | 
72 | impl<A, B> Collector for ChainedCollector<A, B>
73 | where
74 |     A: Collector,
75 |     B: Collector,
76 | {
77 |     fn needs_scores(&self) -> bool {
78 |         self.first.needs_scores() || self.second.needs_scores()
79 |     }
80 | 
81 |     fn collect<S: Scorer + ?Sized>(&mut self, doc: DocId, scorer: &mut S) -> Result<()> {
82 |         self.first.collect(doc, scorer)?;
83 |         self.second.collect(doc, scorer)
84 |     }
85 | }
86 | 
87 | impl<A, B> ParallelLeafCollector for ChainedCollector<A, B>
88 | where
89 |     A: ParallelLeafCollector,
90 |     B: ParallelLeafCollector,
91 | {
92 |     fn finish_leaf(&mut self) -> Result<()> {
93 |         // reverse order for finish
94 |         self.second.finish_leaf()?;
95 |         self.first.finish_leaf()
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/core/search/collector/early_terminating.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::Codec;
 15 | use core::index::reader::LeafReaderContext;
 16 | use core::search::collector;
 17 | use core::search::collector::{Collector, ParallelLeafCollector, SearchCollector};
 18 | use core::search::scorer::Scorer;
 19 | use core::util::external::Volatile;
 20 | use core::util::DocId;
 21 | use error::{ErrorKind, Result};
 22 | use std::sync::Arc;
 23 | 
 24 | pub struct EarlyTerminatingSortingCollector {
 25 |     early_terminated: Arc<Volatile<bool>>,
 26 |     num_docs_to_collect_per_reader: usize,
 27 |     num_docs_collected_per_reader: usize,
 28 | }
 29 | 
 30 | impl EarlyTerminatingSortingCollector {
 31 |     pub fn new(num_docs_to_collect_per_reader: usize) -> EarlyTerminatingSortingCollector {
 32 |         assert!(
 33 |             num_docs_to_collect_per_reader > 0,
 34 |             format!(
 35 |                 "num_docs_to_collect_per_reader must always be > 0, got {}",
 36 |                 num_docs_to_collect_per_reader
 37 |             )
 38 |         );
 39 | 
 40 |         EarlyTerminatingSortingCollector {
 41 |             early_terminated: Arc::new(Volatile::new(false)),
 42 |             num_docs_to_collect_per_reader,
 43 |             num_docs_collected_per_reader: 0,
 44 |         }
 45 |     }
 46 | 
 47 |     pub fn early_terminated(&self) -> bool {
 48 |         self.early_terminated.read()
 49 |     }
 50 | }
 51 | 
 52 | impl SearchCollector for EarlyTerminatingSortingCollector {
 53 |     type LC = EarlyTerminatingLeafCollector;
 54 |     fn set_next_reader<C: Codec>(&mut self, _reader: &LeafReaderContext<'_, C>) -> Result<()> {
 55 |         self.num_docs_collected_per_reader = 0;
 56 |         Ok(())
 57 |     }
 58 | 
 59 |     fn support_parallel(&self) -> bool {
 60 |         true
 61 |     }
 62 | 
 63 |     fn leaf_collector<C: Codec>(&self, _reader: &LeafReaderContext<'_, C>) -> Result<Self::LC> {
 64 |         assert!(self.support_parallel());
 65 |         Ok(EarlyTerminatingLeafCollector::new(
 66 |             self.num_docs_to_collect_per_reader,
 67 |             Arc::clone(&self.early_terminated),
 68 |         ))
 69 |     }
 70 | 
 71 |     fn finish_parallel(&mut self) -> Result<()> {
 72 |         Ok(())
 73 |     }
 74 | }
 75 | 
 76 | impl Collector for EarlyTerminatingSortingCollector {
 77 |     fn needs_scores(&self) -> bool {
 78 |         false
 79 |     }
 80 | 
 81 |     fn collect<S: Scorer + ?Sized>(&mut self, _doc: DocId, _scorer: &mut S) -> Result<()> {
 82 |         self.num_docs_collected_per_reader += 1;
 83 | 
 84 |         if self.num_docs_collected_per_reader > self.num_docs_to_collect_per_reader {
 85 |             self.early_terminated.write(true);
 86 |             bail!(ErrorKind::Collector(
 87 |                 collector::ErrorKind::LeafCollectionTerminated,
 88 |             ))
 89 |         }
 90 |         Ok(())
 91 |     }
 92 | }
 93 | 
 94 | /// A `Collector` that early terminates collection of documents on a
 95 | /// per-segment basis, if the segment was sorted according to the given
 96 | /// `Sort`.
 97 | ///
 98 | /// *NOTE:* the `Collector` detects segments sorted according to a
 99 | /// an `IndexWriterConfig#setIndexSort`. Also, it collects up to a specified
100 | /// `num_docs_to_collect_per_reader` from each segment, and therefore is mostly suitable
101 | /// for use in conjunction with collectors such as `TopDocsCollector`, and
102 | /// not e.g. `TotalHitCountCollector`.
103 | ///
104 | /// *NOTE*: If you wrap a `TopDocsCollector` that sorts in the same
105 | /// order as the index order, the returned top docs will be correct.
106 | /// However the total of hit count will be vastly underestimated since not all matching documents
107 | /// will have been collected.
108 | pub struct EarlyTerminatingLeafCollector {
109 |     early_terminated: Arc<Volatile<bool>>,
110 |     num_docs_to_collect: usize,
111 |     num_docs_collected: usize,
112 | }
113 | 
114 | impl EarlyTerminatingLeafCollector {
115 |     pub fn new(
116 |         num_docs_to_collect: usize,
117 |         early_terminated: Arc<Volatile<bool>>,
118 |     ) -> EarlyTerminatingLeafCollector {
119 |         EarlyTerminatingLeafCollector {
120 |             early_terminated,
121 |             num_docs_to_collect,
122 |             num_docs_collected: 0,
123 |         }
124 |     }
125 | 
126 |     pub fn early_terminated(&self) -> bool {
127 |         self.early_terminated.read()
128 |     }
129 | }
130 | 
131 | impl ParallelLeafCollector for EarlyTerminatingLeafCollector {
132 |     fn finish_leaf(&mut self) -> Result<()> {
133 |         Ok(())
134 |     }
135 | }
136 | 
137 | impl Collector for EarlyTerminatingLeafCollector {
138 |     fn needs_scores(&self) -> bool {
139 |         false
140 |     }
141 | 
142 |     fn collect<S: Scorer + ?Sized>(&mut self, _doc: i32, _scorer: &mut S) -> Result<()> {
143 |         self.num_docs_collected += 1;
144 | 
145 |         if self.num_docs_collected > self.num_docs_to_collect {
146 |             self.early_terminated.write(true);
147 |             bail!(ErrorKind::Collector(
148 |                 collector::ErrorKind::LeafCollectionTerminated,
149 |             ))
150 |         }
151 |         Ok(())
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/core/search/collector/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | mod top_docs;
 15 | 
 16 | pub use self::top_docs::*;
 17 | 
 18 | mod early_terminating;
 19 | 
 20 | pub use self::early_terminating::*;
 21 | 
 22 | mod timeout;
 23 | 
 24 | pub use self::timeout::*;
 25 | 
 26 | mod chain;
 27 | 
 28 | pub use self::chain::*;
 29 | 
 30 | use error::Result;
 31 | 
 32 | use core::codec::Codec;
 33 | use core::index::reader::LeafReaderContext;
 34 | use core::search::scorer::Scorer;
 35 | use core::util::DocId;
 36 | 
 37 | error_chain! {
 38 |     types {
 39 |         Error, ErrorKind, ResultExt;
 40 |     }
 41 |     errors {
 42 |         LeafCollectionTerminated {
 43 |             description("Leaf collection terminated")
 44 |         }
 45 |         CollectionTerminated {
 46 |             description("Collection terminated")
 47 |         }
 48 |         CollectionTimeout {
 49 |             description("Collection timeout")
 50 |         }
 51 | 
 52 |         CollectionFailed {
 53 |             description("Collection failed")
 54 |         }
 55 |     }
 56 | }
 57 | 
 58 | /// Expert: Collectors are primarily meant to be used to
 59 | /// gather raw results from a search, and implement sorting
 60 | /// or custom result filtering, collation, etc.
 61 | ///
 62 | /// `Collector` decouples the score from the collected doc:
 63 | /// the score computation is skipped entirely if it's not
 64 | /// needed. If your collector may request the
 65 | /// score for a single hit multiple times, you should use
 66 | /// `ScoreCachingWrappingScorer`.
 67 | ///
 68 | /// *NOTE:* The doc that is passed to the collect
 69 | /// method is relative to the current reader. If your
 70 | /// collector needs to resolve this to the docID space of the
 71 | /// Multi*Reader, you must re-base it by recording the
 72 | /// docBase from the most recent setNextReader call.
 73 | ///
 74 | /// Not all collectors will need to rebase the docID.  For
 75 | /// example, a collector that simply counts the total number
 76 | /// of hits would skip it.
 77 | pub trait SearchCollector: Collector {
 78 |     type LC: ParallelLeafCollector;
 79 |     /// This method is called before collecting on a new leaf.
 80 |     fn set_next_reader<C: Codec>(&mut self, reader: &LeafReaderContext<'_, C>) -> Result<()>;
 81 | 
 82 |     /// iff this collector support parallel collect
 83 |     fn support_parallel(&self) -> bool;
 84 |     fn init_parallel(&mut self) {}
 85 | 
 86 |     /// segment collector for parallel search
 87 |     fn leaf_collector<C: Codec>(&self, reader: &LeafReaderContext<'_, C>) -> Result<Self::LC>;
 88 | 
 89 |     fn finish_parallel(&mut self) -> Result<()>;
 90 | }
 91 | 
 92 | impl<'a, T: SearchCollector + 'a> SearchCollector for &'a mut T {
 93 |     type LC = T::LC;
 94 | 
 95 |     fn set_next_reader<C: Codec>(&mut self, reader: &LeafReaderContext<'_, C>) -> Result<()> {
 96 |         (**self).set_next_reader(reader)
 97 |     }
 98 | 
 99 |     fn support_parallel(&self) -> bool {
100 |         (**self).support_parallel()
101 |     }
102 | 
103 |     fn init_parallel(&mut self) {
104 |         (**self).init_parallel()
105 |     }
106 | 
107 |     fn leaf_collector<C: Codec>(&self, reader: &LeafReaderContext<'_, C>) -> Result<Self::LC> {
108 |         (**self).leaf_collector(reader)
109 |     }
110 | 
111 |     fn finish_parallel(&mut self) -> Result<()> {
112 |         (**self).finish_parallel()
113 |     }
114 | }
115 | 
116 | pub trait Collector {
117 |     /// Indicates if document scores are needed by this collector.
118 |     /// return `true` if scores are needed.
119 |     fn needs_scores(&self) -> bool;
120 | 
121 |     /// Called once for every document matching a query, with the unbased document
122 |     /// number.
123 |     /// Note: The collection of the current segment can be terminated by throwing
124 |     /// a `ErrorKind::LeafCollectionTerminated`. In this case, the last docs of the
125 |     /// current `LeafReader` will be skipped and `IndexSearcher`
126 |     /// will swallow the exception and continue collection with the next leaf.
127 |     ///
128 |     /// Note: This is called in an inner search loop. For good search performance,
129 |     /// implementations of this method should not call `IndexSearcher::doc(DocId)` on every hit.
130 |     /// Doing so can slow searches by an order of magnitude or more.
131 |     fn collect<S: Scorer + ?Sized>(&mut self, doc: DocId, scorer: &mut S) -> Result<()>;
132 | }
133 | 
134 | impl<'a, T: Collector + 'a> Collector for &'a mut T {
135 |     fn needs_scores(&self) -> bool {
136 |         (**self).needs_scores()
137 |     }
138 | 
139 |     fn collect<S: Scorer + ?Sized>(&mut self, doc: i32, scorer: &mut S) -> Result<()> {
140 |         (**self).collect(doc, scorer)
141 |     }
142 | }
143 | 
144 | /// `Collector` that collect parallel for a single segment.
145 | ///
146 | /// once finished, the `finish_leaf` method must be
147 | /// called to notify to main thread.
148 | pub trait ParallelLeafCollector: Collector + Send + 'static {
149 |     fn finish_leaf(&mut self) -> Result<()>;
150 | }
151 | 


--------------------------------------------------------------------------------
/src/core/search/collector/timeout.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::Codec;
 15 | use core::index::reader::LeafReaderContext;
 16 | use core::search::collector;
 17 | use core::search::collector::{Collector, ParallelLeafCollector, SearchCollector};
 18 | use core::search::scorer::Scorer;
 19 | use core::util::external::Volatile;
 20 | use core::util::DocId;
 21 | use error::{ErrorKind, Result};
 22 | use std::sync::Arc;
 23 | use std::time::{Duration, SystemTime};
 24 | 
 25 | /// the `TimeoutCollector` collector is used to timeout search requests that
 26 | /// take longer than the maximum allowed search time limit.
 27 | ///
 28 | /// After this time is exceeded, the search thread is stopped by return a
 29 | /// `CollectionTerminated` error.
 30 | ///
 31 | /// this collector is useful if your search must be complete at some specific time.
 32 | /// you can use the `ChainedCollector` to compose this collector with eg.
 33 | /// `TopDocsCollector`.
 34 | pub struct TimeoutCollector {
 35 |     timeout_duration: Duration,
 36 |     start_time: SystemTime,
 37 |     timeout: Arc<Volatile<bool>>,
 38 | }
 39 | 
 40 | impl TimeoutCollector {
 41 |     pub fn new(timeout_duration: Duration, start_time: SystemTime) -> TimeoutCollector {
 42 |         TimeoutCollector {
 43 |             timeout_duration,
 44 |             start_time,
 45 |             timeout: Arc::new(Volatile::new(false)),
 46 |         }
 47 |     }
 48 | 
 49 |     pub fn timeout(&self) -> bool {
 50 |         self.timeout.read()
 51 |     }
 52 | }
 53 | 
 54 | impl SearchCollector for TimeoutCollector {
 55 |     type LC = TimeoutLeafCollector;
 56 | 
 57 |     fn set_next_reader<C: Codec>(&mut self, _reader: &LeafReaderContext<'_, C>) -> Result<()> {
 58 |         Ok(())
 59 |     }
 60 | 
 61 |     fn support_parallel(&self) -> bool {
 62 |         true
 63 |     }
 64 | 
 65 |     fn leaf_collector<C: Codec>(
 66 |         &self,
 67 |         _reader: &LeafReaderContext<'_, C>,
 68 |     ) -> Result<TimeoutLeafCollector> {
 69 |         Ok(TimeoutLeafCollector::new(
 70 |             self.timeout_duration,
 71 |             self.start_time,
 72 |             Arc::clone(&self.timeout),
 73 |         ))
 74 |     }
 75 | 
 76 |     fn finish_parallel(&mut self) -> Result<()> {
 77 |         Ok(())
 78 |     }
 79 | }
 80 | 
 81 | impl Collector for TimeoutCollector {
 82 |     fn needs_scores(&self) -> bool {
 83 |         false
 84 |     }
 85 | 
 86 |     fn collect<S: Scorer + ?Sized>(&mut self, _doc: DocId, _scorer: &mut S) -> Result<()> {
 87 |         let now = SystemTime::now();
 88 |         if self.start_time < now && now.duration_since(self.start_time)? >= self.timeout_duration {
 89 |             self.timeout.write(true);
 90 |             bail!(ErrorKind::Collector(
 91 |                 collector::ErrorKind::CollectionTimeout,
 92 |             ))
 93 |         }
 94 |         Ok(())
 95 |     }
 96 | }
 97 | 
 98 | pub struct TimeoutLeafCollector {
 99 |     timeout_duration: Duration,
100 |     start_time: SystemTime,
101 |     timeout: Arc<Volatile<bool>>,
102 | }
103 | 
104 | impl TimeoutLeafCollector {
105 |     pub fn new(
106 |         timeout_duration: Duration,
107 |         start_time: SystemTime,
108 |         timeout: Arc<Volatile<bool>>,
109 |     ) -> TimeoutLeafCollector {
110 |         TimeoutLeafCollector {
111 |             timeout_duration,
112 |             start_time,
113 |             timeout,
114 |         }
115 |     }
116 | }
117 | 
118 | impl Collector for TimeoutLeafCollector {
119 |     fn needs_scores(&self) -> bool {
120 |         false
121 |     }
122 | 
123 |     fn collect<S: Scorer + ?Sized>(&mut self, _doc: i32, _scorer: &mut S) -> Result<()> {
124 |         let now = SystemTime::now();
125 |         if self.start_time < now && now.duration_since(self.start_time)? >= self.timeout_duration {
126 |             self.timeout.write(true);
127 |             bail!(ErrorKind::Collector(
128 |                 collector::ErrorKind::CollectionTerminated,
129 |             ))
130 |         }
131 |         Ok(())
132 |     }
133 | }
134 | 
135 | impl ParallelLeafCollector for TimeoutLeafCollector {
136 |     fn finish_leaf(&mut self) -> Result<()> {
137 |         Ok(())
138 |     }
139 | }
140 | 


--------------------------------------------------------------------------------
/src/core/search/explanation.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | #[derive(Serialize, Deserialize)]
15 | pub struct Explanation {
16 |     is_match: bool,
17 |     value: f32,
18 |     description: String,
19 |     details: Vec<Explanation>,
20 | }
21 | 
22 | impl Explanation {
23 |     pub fn new(
24 |         is_match: bool,
25 |         value: f32,
26 |         description: String,
27 |         details: Vec<Explanation>,
28 |     ) -> Explanation {
29 |         let value = if !is_match { 0.0f32 } else { value };
30 | 
31 |         Explanation {
32 |             is_match,
33 |             value,
34 |             description,
35 |             details,
36 |         }
37 |     }
38 | 
39 |     pub fn is_match(&self) -> bool {
40 |         self.is_match
41 |     }
42 | 
43 |     pub fn value(&self) -> f32 {
44 |         self.value
45 |     }
46 | 
47 |     pub fn description(&self) -> String {
48 |         self.description.clone()
49 |     }
50 | 
51 |     pub fn summary(&self) -> String {
52 |         format!("{} = {}", self.value, self.description)
53 |     }
54 | 
55 |     pub fn details(&self) -> &[Explanation] {
56 |         self.details.as_ref()
57 |     }
58 | 
59 |     pub fn to_string(&self, depth: i32) -> String {
60 |         let mut buffer = String::from("");
61 | 
62 |         for _i in 0..depth {
63 |             buffer.push_str("  ");
64 |         }
65 | 
66 |         buffer.push_str(&self.summary());
67 |         buffer.push_str("\n");
68 | 
69 |         for detail in &self.details {
70 |             buffer.push_str(&detail.to_string(depth + 1))
71 |         }
72 | 
73 |         buffer
74 |     }
75 | }
76 | 
77 | impl Clone for Explanation {
78 |     fn clone(&self) -> Self {
79 |         let mut details: Vec<Explanation> = vec![];
80 |         for detail in &self.details {
81 |             details.push(detail.clone());
82 |         }
83 |         Explanation {
84 |             is_match: self.is_match,
85 |             value: self.value(),
86 |             description: self.description(),
87 |             details,
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/core/search/query/boost_query.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use std::any::Any;
 15 | use std::f32;
 16 | use std::fmt;
 17 | 
 18 | use core::codec::Codec;
 19 | use core::index::reader::LeafReaderContext;
 20 | use core::search::explanation::Explanation;
 21 | use core::search::query::{Query, TermQuery, Weight};
 22 | use core::search::scorer::Scorer;
 23 | use core::search::searcher::SearchPlanBuilder;
 24 | use core::util::DocId;
 25 | 
 26 | use error::Result;
 27 | 
 28 | const BOOST_QUERY: &str = "boost";
 29 | 
 30 | /// A `Query` wrapper that allows to give a boost to the wrapped query.
 31 | ///
 32 | /// Boost values that are less than one will give less importance to this
 33 | /// query compared to other ones while values that are greater than one will
 34 | /// give more importance to the scores returned by this query.
 35 | pub struct BoostQuery<C: Codec> {
 36 |     query: Box<dyn Query<C>>,
 37 |     boost: f32,
 38 | }
 39 | 
 40 | impl<C: Codec> BoostQuery<C> {
 41 |     pub fn build(query: Box<dyn Query<C>>, boost: f32) -> Box<dyn Query<C>> {
 42 |         if (boost - 1.0f32).abs() <= f32::EPSILON {
 43 |             query
 44 |         } else {
 45 |             Box::new(BoostQuery { query, boost })
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl<C: Codec> Query<C> for BoostQuery<C> {
 51 |     fn create_weight(
 52 |         &self,
 53 |         searcher: &dyn SearchPlanBuilder<C>,
 54 |         needs_scores: bool,
 55 |     ) -> Result<Box<dyn Weight<C>>> {
 56 |         let mut weight = self.query.create_weight(searcher, needs_scores)?;
 57 |         Weight::<C>::normalize(weight.as_mut(), 1.0f32, self.boost);
 58 |         // weight.normalize(1.0f32, self.boost);
 59 |         Ok(Box::new(BoostWeight::new(weight, self.boost)))
 60 |     }
 61 | 
 62 |     fn extract_terms(&self) -> Vec<TermQuery> {
 63 |         self.query.extract_terms()
 64 |     }
 65 | 
 66 |     fn as_any(&self) -> &dyn Any {
 67 |         self
 68 |     }
 69 | }
 70 | 
 71 | impl<C: Codec> fmt::Display for BoostQuery<C> {
 72 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 73 |         write!(
 74 |             f,
 75 |             "BoostQuery(query: {}, boost: {})",
 76 |             &self.query, self.boost
 77 |         )
 78 |     }
 79 | }
 80 | 
 81 | struct BoostWeight<C: Codec> {
 82 |     weight: Box<dyn Weight<C>>,
 83 |     boost: f32,
 84 | }
 85 | 
 86 | impl<C: Codec> BoostWeight<C> {
 87 |     pub fn new(weight: Box<dyn Weight<C>>, boost: f32) -> BoostWeight<C> {
 88 |         assert!((boost - 1.0f32).abs() > f32::EPSILON);
 89 | 
 90 |         BoostWeight { weight, boost }
 91 |     }
 92 | }
 93 | 
 94 | impl<C: Codec> Weight<C> for BoostWeight<C> {
 95 |     fn create_scorer(
 96 |         &self,
 97 |         leaf_reader: &LeafReaderContext<'_, C>,
 98 |     ) -> Result<Option<Box<dyn Scorer>>> {
 99 |         self.weight.create_scorer(leaf_reader)
100 |     }
101 | 
102 |     fn query_type(&self) -> &'static str {
103 |         BOOST_QUERY
104 |     }
105 | 
106 |     fn actual_query_type(&self) -> &'static str {
107 |         self.weight.query_type()
108 |     }
109 | 
110 |     fn normalize(&mut self, norm: f32, boost: f32) {
111 |         self.weight.normalize(norm, boost * self.boost)
112 |     }
113 | 
114 |     fn value_for_normalization(&self) -> f32 {
115 |         self.weight.value_for_normalization()
116 |     }
117 | 
118 |     fn needs_scores(&self) -> bool {
119 |         self.weight.needs_scores()
120 |     }
121 | 
122 |     fn explain(&self, reader: &LeafReaderContext<'_, C>, doc: DocId) -> Result<Explanation> {
123 |         self.weight.explain(reader, doc)
124 |     }
125 | }
126 | 
127 | impl<C: Codec> fmt::Display for BoostWeight<C> {
128 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129 |         write!(
130 |             f,
131 |             "BoostWeight(weight: {}, boost: {})",
132 |             &self.weight, self.boost
133 |         )
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/src/core/search/query/boosting_query.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use std::any::Any;
 15 | use std::fmt;
 16 | 
 17 | use core::codec::Codec;
 18 | use core::index::reader::LeafReaderContext;
 19 | use core::search::explanation::Explanation;
 20 | use core::search::query::{Query, TermQuery, Weight};
 21 | use core::search::scorer::BoostingScorer;
 22 | use core::search::scorer::Scorer;
 23 | use core::search::searcher::SearchPlanBuilder;
 24 | use core::util::DocId;
 25 | use error::Result;
 26 | 
 27 | const BOOSTING_QUERY: &str = "boosting";
 28 | 
 29 | pub struct BoostingQuery<C: Codec> {
 30 |     positive: Box<dyn Query<C>>,
 31 |     negative: Box<dyn Query<C>>,
 32 |     negative_boost: f32,
 33 | }
 34 | 
 35 | impl<C: Codec> BoostingQuery<C> {
 36 |     pub fn build(
 37 |         positive: Box<dyn Query<C>>,
 38 |         negative: Box<dyn Query<C>>,
 39 |         negative_boost: f32,
 40 |     ) -> Box<dyn Query<C>> {
 41 |         Box::new(BoostingQuery {
 42 |             positive,
 43 |             negative,
 44 |             negative_boost,
 45 |         })
 46 |     }
 47 | }
 48 | 
 49 | impl<C: Codec> Query<C> for BoostingQuery<C> {
 50 |     fn create_weight(
 51 |         &self,
 52 |         searcher: &dyn SearchPlanBuilder<C>,
 53 |         needs_scores: bool,
 54 |     ) -> Result<Box<dyn Weight<C>>> {
 55 |         Ok(Box::new(BoostingWeight::new(
 56 |             self.positive.create_weight(searcher, needs_scores)?,
 57 |             self.negative.create_weight(searcher, false)?,
 58 |             self.negative_boost,
 59 |         )))
 60 |     }
 61 | 
 62 |     fn extract_terms(&self) -> Vec<TermQuery> {
 63 |         self.positive.extract_terms()
 64 |     }
 65 | 
 66 |     fn as_any(&self) -> &dyn Any {
 67 |         self
 68 |     }
 69 | }
 70 | 
 71 | impl<C: Codec> fmt::Display for BoostingQuery<C> {
 72 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 73 |         write!(
 74 |             f,
 75 |             "BoostingQuery(positive: {}, negative: {}, negative_boost: {})",
 76 |             &self.positive, &self.negative, self.negative_boost
 77 |         )
 78 |     }
 79 | }
 80 | 
 81 | struct BoostingWeight<C: Codec> {
 82 |     positive_weight: Box<dyn Weight<C>>,
 83 |     negative_weight: Box<dyn Weight<C>>,
 84 |     negative_boost: f32,
 85 | }
 86 | 
 87 | impl<C: Codec> BoostingWeight<C> {
 88 |     pub fn new(
 89 |         positive_weight: Box<dyn Weight<C>>,
 90 |         negative_weight: Box<dyn Weight<C>>,
 91 |         negative_boost: f32,
 92 |     ) -> BoostingWeight<C> {
 93 |         BoostingWeight {
 94 |             positive_weight,
 95 |             negative_weight,
 96 |             negative_boost,
 97 |         }
 98 |     }
 99 | }
100 | 
101 | impl<C: Codec> Weight<C> for BoostingWeight<C> {
102 |     fn create_scorer(
103 |         &self,
104 |         leaf_reader: &LeafReaderContext<'_, C>,
105 |     ) -> Result<Option<Box<dyn Scorer>>> {
106 |         if let (Some(positive_scorer), Some(negative_scorer)) = (
107 |             self.positive_weight.create_scorer(leaf_reader)?,
108 |             self.negative_weight.create_scorer(leaf_reader)?,
109 |         ) {
110 |             Ok(Some(Box::new(BoostingScorer::new(
111 |                 positive_scorer,
112 |                 negative_scorer,
113 |                 self.negative_boost,
114 |             ))))
115 |         } else {
116 |             Ok(None)
117 |         }
118 |     }
119 | 
120 |     fn query_type(&self) -> &'static str {
121 |         BOOSTING_QUERY
122 |     }
123 | 
124 |     fn actual_query_type(&self) -> &'static str {
125 |         BOOSTING_QUERY
126 |     }
127 | 
128 |     fn normalize(&mut self, norm: f32, boost: f32) {
129 |         self.positive_weight.normalize(norm, boost)
130 |     }
131 | 
132 |     fn value_for_normalization(&self) -> f32 {
133 |         self.positive_weight.value_for_normalization()
134 |     }
135 | 
136 |     fn needs_scores(&self) -> bool {
137 |         self.positive_weight.needs_scores()
138 |     }
139 | 
140 |     fn explain(&self, reader: &LeafReaderContext<'_, C>, doc: DocId) -> Result<Explanation> {
141 |         self.positive_weight.explain(reader, doc)
142 |     }
143 | }
144 | 
145 | impl<C: Codec> fmt::Display for BoostingWeight<C> {
146 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147 |         write!(
148 |             f,
149 |             "BoostingWeight(positive: {}, negative: {}, negative_boost: {})",
150 |             &self.positive_weight, &self.negative_weight, self.negative_boost
151 |         )
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/core/search/query/exists_query.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use std::any::Any;
 15 | use std::fmt;
 16 | 
 17 | use core::codec::doc_values::DocValuesIterator;
 18 | use core::codec::Codec;
 19 | use core::index::reader::LeafReaderContext;
 20 | use core::search::explanation::Explanation;
 21 | use core::search::query::{Query, TermQuery, Weight};
 22 | use core::search::scorer::ConstantScoreScorer;
 23 | use core::search::scorer::Scorer;
 24 | use core::search::searcher::SearchPlanBuilder;
 25 | use core::util::DocId;
 26 | use error::Result;
 27 | 
 28 | const EXISTS_QUERY: &str = "exists";
 29 | 
 30 | pub struct ExistsQuery {
 31 |     field: String,
 32 | }
 33 | 
 34 | impl ExistsQuery {
 35 |     pub fn build(field: String) -> ExistsQuery {
 36 |         ExistsQuery { field }
 37 |     }
 38 | }
 39 | 
 40 | impl<C: Codec> Query<C> for ExistsQuery {
 41 |     fn create_weight(
 42 |         &self,
 43 |         _searcher: &dyn SearchPlanBuilder<C>,
 44 |         _needs_scores: bool,
 45 |     ) -> Result<Box<dyn Weight<C>>> {
 46 |         Ok(Box::new(ExistsWeight::new(self.field.clone())))
 47 |     }
 48 | 
 49 |     fn extract_terms(&self) -> Vec<TermQuery> {
 50 |         vec![]
 51 |     }
 52 | 
 53 |     fn as_any(&self) -> &dyn Any {
 54 |         self
 55 |     }
 56 | }
 57 | 
 58 | impl fmt::Display for ExistsQuery {
 59 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 60 |         write!(f, "ExistsQuery(field={})", &self.field)
 61 |     }
 62 | }
 63 | 
 64 | struct ExistsWeight {
 65 |     field: String,
 66 |     weight: f32,
 67 |     norm: f32,
 68 | }
 69 | 
 70 | impl ExistsWeight {
 71 |     pub fn new(field: String) -> ExistsWeight {
 72 |         ExistsWeight {
 73 |             field,
 74 |             weight: 0f32,
 75 |             norm: 0f32,
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | impl<C: Codec> Weight<C> for ExistsWeight {
 81 |     fn create_scorer(
 82 |         &self,
 83 |         leaf_reader: &LeafReaderContext<'_, C>,
 84 |     ) -> Result<Option<Box<dyn Scorer>>> {
 85 |         if let Some(field_info) = leaf_reader.reader.field_info(self.field.as_str()) {
 86 |             let cost: i32 = leaf_reader.reader.max_doc();
 87 |             let doc_iterator = DocValuesIterator::new(field_info.name.as_str(), cost, leaf_reader);
 88 | 
 89 |             return Ok(Some(Box::new(ConstantScoreScorer::new(
 90 |                 self.weight,
 91 |                 doc_iterator,
 92 |                 cost as usize,
 93 |             ))));
 94 |         }
 95 | 
 96 |         Ok(None)
 97 |     }
 98 | 
 99 |     fn query_type(&self) -> &'static str {
100 |         EXISTS_QUERY
101 |     }
102 | 
103 |     fn actual_query_type(&self) -> &'static str {
104 |         EXISTS_QUERY
105 |     }
106 | 
107 |     fn normalize(&mut self, norm: f32, boost: f32) {
108 |         self.norm = norm;
109 |         self.weight = norm * boost;
110 |     }
111 | 
112 |     fn value_for_normalization(&self) -> f32 {
113 |         self.weight * self.weight
114 |     }
115 | 
116 |     fn needs_scores(&self) -> bool {
117 |         false
118 |     }
119 | 
120 |     fn explain(&self, _reader: &LeafReaderContext<'_, C>, _doc: DocId) -> Result<Explanation> {
121 |         Ok(Explanation::new(
122 |             true,
123 |             self.weight,
124 |             format!("{}, product of:", self),
125 |             vec![Explanation::new(
126 |                 true,
127 |                 self.weight,
128 |                 "exists".to_string(),
129 |                 vec![],
130 |             )],
131 |         ))
132 |     }
133 | }
134 | 
135 | impl fmt::Display for ExistsWeight {
136 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
137 |         write!(
138 |             f,
139 |             "ExistsWeight(field={}, weight={}, norm={})",
140 |             &self.field, self.weight, self.norm
141 |         )
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/core/search/query/spans/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | #[macro_use]
15 | mod span;
16 | 
17 | pub use self::span::*;
18 | 
19 | mod span_boost;
20 | 
21 | pub use self::span_boost::*;
22 | 
23 | mod span_near;
24 | 
25 | pub use self::span_near::*;
26 | 
27 | mod span_or;
28 | 
29 | pub use self::span_or::*;
30 | 
31 | mod span_term;
32 | 
33 | pub use self::span_term::*;
34 | 


--------------------------------------------------------------------------------
/src/core/search/scorer/boosting_scorer.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::search::scorer::Scorer;
15 | use core::search::DocIterator;
16 | use core::util::DocId;
17 | use error::Result;
18 | 
19 | pub struct BoostingScorer {
20 |     positive: Box<dyn Scorer>,
21 |     negative: Box<dyn Scorer>,
22 |     negative_boost: f32,
23 | }
24 | 
25 | impl BoostingScorer {
26 |     pub fn new(
27 |         positive: Box<dyn Scorer>,
28 |         negative: Box<dyn Scorer>,
29 |         negative_boost: f32,
30 |     ) -> BoostingScorer {
31 |         debug_assert!(negative_boost > 0.0 && negative_boost < 1.0);
32 |         BoostingScorer {
33 |             positive,
34 |             negative,
35 |             negative_boost,
36 |         }
37 |     }
38 | }
39 | 
40 | impl Scorer for BoostingScorer {
41 |     fn score(&mut self) -> Result<f32> {
42 |         let current_doc = self.positive.doc_id();
43 |         let mut score = self.positive.score()?;
44 | 
45 |         if current_doc == self.negative.advance(current_doc)? {
46 |             score *= self.negative_boost;
47 |         }
48 | 
49 |         Ok(score)
50 |     }
51 | }
52 | 
53 | impl DocIterator for BoostingScorer {
54 |     fn doc_id(&self) -> DocId {
55 |         self.positive.doc_id()
56 |     }
57 | 
58 |     fn next(&mut self) -> Result<DocId> {
59 |         self.positive.next()
60 |     }
61 | 
62 |     fn advance(&mut self, target: DocId) -> Result<DocId> {
63 |         self.positive.advance(target)
64 |     }
65 | 
66 |     fn cost(&self) -> usize {
67 |         self.positive.cost()
68 |     }
69 | 
70 |     fn matches(&mut self) -> Result<bool> {
71 |         self.positive.matches()
72 |     }
73 | 
74 |     fn approximate_next(&mut self) -> Result<DocId> {
75 |         self.positive.approximate_next()
76 |     }
77 | 
78 |     fn approximate_advance(&mut self, target: DocId) -> Result<DocId> {
79 |         self.positive.approximate_advance(target)
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/core/search/scorer/min_scorer.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use error::Result;
15 | 
16 | use core::search::scorer::Scorer;
17 | use core::search::DocIterator;
18 | use core::util::DocId;
19 | 
20 | // currently directory merge `ScoreCachingWrappingScorer` into this class
21 | pub struct MinScoreScorer<S: Scorer> {
22 |     origin: S,
23 |     min_score: f32,
24 |     // cache these two fields to avoid calculate score twice
25 |     cur_doc: DocId,
26 |     cur_score: f32,
27 | }
28 | 
29 | impl<S: Scorer> MinScoreScorer<S> {
30 |     pub fn new(origin: S, min_score: f32) -> Self {
31 |         MinScoreScorer {
32 |             origin,
33 |             min_score,
34 |             cur_doc: -1,
35 |             cur_score: 0f32,
36 |         }
37 |     }
38 | }
39 | 
40 | impl<S: Scorer> Scorer for MinScoreScorer<S> {
41 |     fn score(&mut self) -> Result<f32> {
42 |         let doc = self.origin.doc_id();
43 |         if doc != self.cur_doc {
44 |             self.cur_score = self.origin.score()?;
45 |             self.cur_doc = doc;
46 |         }
47 |         Ok(self.cur_score)
48 |     }
49 | }
50 | 
51 | impl<S: Scorer> DocIterator for MinScoreScorer<S> {
52 |     fn doc_id(&self) -> DocId {
53 |         self.origin.doc_id()
54 |     }
55 | 
56 |     fn next(&mut self) -> Result<DocId> {
57 |         self.approximate_next()
58 |     }
59 | 
60 |     fn advance(&mut self, target: DocId) -> Result<DocId> {
61 |         self.approximate_advance(target)
62 |     }
63 | 
64 |     fn cost(&self) -> usize {
65 |         self.origin.cost()
66 |     }
67 | 
68 |     fn matches(&mut self) -> Result<bool> {
69 |         Ok(self.origin.matches()? && self.score()? > self.min_score)
70 |     }
71 | 
72 |     fn approximate_next(&mut self) -> Result<DocId> {
73 |         self.origin.approximate_next()
74 |     }
75 | 
76 |     fn approximate_advance(&mut self, target: DocId) -> Result<DocId> {
77 |         self.origin.approximate_advance(target)
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/core/search/scorer/req_opt_scorer.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::search::scorer::Scorer;
 15 | use core::search::DocIterator;
 16 | use core::util::DocId;
 17 | use error::Result;
 18 | 
 19 | const OPT_SCORE_THRESHOLD: usize = 100;
 20 | 
 21 | /// A Scorer for queries with a required part and an optional part.
 22 | /// Delays `advance()` on the optional part until a `score()` is needed.
 23 | pub struct ReqOptScorer {
 24 |     req_scorer: Box<dyn Scorer>,
 25 |     opt_scorer: Box<dyn Scorer>,
 26 |     scores_sum: f32,
 27 |     scores_num: usize,
 28 | }
 29 | 
 30 | impl ReqOptScorer {
 31 |     pub fn new(req_scorer: Box<dyn Scorer>, opt_scorer: Box<dyn Scorer>) -> ReqOptScorer {
 32 |         ReqOptScorer {
 33 |             req_scorer,
 34 |             opt_scorer,
 35 |             scores_sum: 0f32,
 36 |             scores_num: 0usize,
 37 |         }
 38 |     }
 39 | }
 40 | 
 41 | impl Scorer for ReqOptScorer {
 42 |     fn score(&mut self) -> Result<f32> {
 43 |         let current_doc = self.req_scorer.doc_id();
 44 |         let mut score = self.req_scorer.score()?;
 45 | 
 46 |         if self.scores_num > OPT_SCORE_THRESHOLD {
 47 |             if 2.0 * score < self.scores_sum / self.scores_num as f32 {
 48 |                 return Ok(score);
 49 |             }
 50 |         }
 51 | 
 52 |         self.scores_sum += score;
 53 |         self.scores_num += 1;
 54 | 
 55 |         let mut opt_doc = self.opt_scorer.doc_id();
 56 |         if opt_doc < current_doc {
 57 |             opt_doc = self.opt_scorer.advance(current_doc)?;
 58 |         }
 59 | 
 60 |         if opt_doc == current_doc {
 61 |             score += self.opt_scorer.score()?;
 62 |         }
 63 | 
 64 |         Ok(score)
 65 |     }
 66 | }
 67 | 
 68 | impl DocIterator for ReqOptScorer {
 69 |     fn doc_id(&self) -> DocId {
 70 |         self.req_scorer.doc_id()
 71 |     }
 72 | 
 73 |     fn next(&mut self) -> Result<DocId> {
 74 |         self.req_scorer.next()
 75 |     }
 76 | 
 77 |     fn advance(&mut self, target: DocId) -> Result<DocId> {
 78 |         self.req_scorer.advance(target)
 79 |     }
 80 | 
 81 |     fn cost(&self) -> usize {
 82 |         self.req_scorer.cost()
 83 |     }
 84 | 
 85 |     fn matches(&mut self) -> Result<bool> {
 86 |         self.req_scorer.matches()
 87 |     }
 88 | 
 89 |     fn approximate_next(&mut self) -> Result<DocId> {
 90 |         self.req_scorer.approximate_next()
 91 |     }
 92 | 
 93 |     fn approximate_advance(&mut self, target: DocId) -> Result<DocId> {
 94 |         self.req_scorer.approximate_advance(target)
 95 |     }
 96 | }
 97 | 
 98 | #[cfg(test)]
 99 | mod tests {
100 |     use super::*;
101 |     use core::search::scorer::*;
102 |     use core::search::tests::*;
103 |     use core::search::*;
104 | 
105 |     #[test]
106 |     fn test_score() {
107 |         let s1 = create_mock_scorer(vec![1, 2, 3, 4, 5]);
108 |         let s2 = create_mock_scorer(vec![2, 3, 5]);
109 |         let s3 = create_mock_scorer(vec![2, 5]);
110 |         let s4 = create_mock_scorer(vec![3, 4, 5]);
111 | 
112 |         let conjunction_scorer: Box<dyn Scorer> = Box::new(ConjunctionScorer::new(vec![s1, s2]));
113 |         let disjunction_scorer: Box<dyn Scorer> =
114 |             Box::new(DisjunctionSumScorer::new(vec![s3, s4], true, 0));
115 |         let mut scorer = ReqOptScorer::new(conjunction_scorer, disjunction_scorer);
116 | 
117 |         assert_eq!(scorer.doc_id(), -1);
118 | 
119 |         assert_eq!(scorer.next().unwrap(), 2);
120 |         assert!((scorer.score().unwrap() - 6.0) < ::std::f32::EPSILON);
121 | 
122 |         assert_eq!(scorer.next().unwrap(), 3);
123 |         assert!((scorer.score().unwrap() - 9.0) < ::std::f32::EPSILON);
124 | 
125 |         assert_eq!(scorer.next().unwrap(), 5);
126 |         assert!((scorer.score().unwrap() - 20.0) < ::std::f32::EPSILON);
127 | 
128 |         assert_eq!(scorer.next().unwrap(), NO_MORE_DOCS);
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/src/core/search/scorer/term_scorer.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::codec::PostingIterator;
15 | use core::search::scorer::Scorer;
16 | use core::search::similarity::SimScorer;
17 | use core::search::DocIterator;
18 | use core::util::DocId;
19 | use error::Result;
20 | 
21 | pub struct TermScorer<T: PostingIterator> {
22 |     sim_scorer: Box<dyn SimScorer>,
23 |     postings_iterator: T,
24 | }
25 | 
26 | impl<T: PostingIterator> TermScorer<T> {
27 |     pub fn new(sim_scorer: Box<dyn SimScorer>, postings_iterator: T) -> Self {
28 |         TermScorer {
29 |             sim_scorer,
30 |             postings_iterator,
31 |         }
32 |     }
33 | 
34 |     fn freq(&self) -> i32 {
35 |         if let Ok(f) = self.postings_iterator.freq() {
36 |             f
37 |         } else {
38 |             1
39 |         }
40 |     }
41 | }
42 | 
43 | impl<T: PostingIterator> Scorer for TermScorer<T> {
44 |     fn score(&mut self) -> Result<f32> {
45 |         let doc_id = self.doc_id();
46 |         let freq = self.freq();
47 |         Ok(self.sim_scorer.score(doc_id, freq as f32)?)
48 |     }
49 | }
50 | 
51 | impl<T: PostingIterator> DocIterator for TermScorer<T> {
52 |     fn doc_id(&self) -> DocId {
53 |         self.postings_iterator.doc_id()
54 |     }
55 | 
56 |     fn next(&mut self) -> Result<DocId> {
57 |         self.postings_iterator.next()
58 |     }
59 | 
60 |     fn advance(&mut self, target: DocId) -> Result<DocId> {
61 |         self.postings_iterator.advance(target)
62 |     }
63 | 
64 |     fn cost(&self) -> usize {
65 |         self.postings_iterator.cost()
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/core/search/sort_field/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | mod field_comparator;
 15 | 
 16 | pub use self::field_comparator::*;
 17 | 
 18 | mod sort_field;
 19 | 
 20 | pub use self::sort_field::*;
 21 | 
 22 | mod collapse_top_docs;
 23 | 
 24 | pub use self::collapse_top_docs::*;
 25 | 
 26 | mod search_group;
 27 | 
 28 | pub use self::search_group::*;
 29 | 
 30 | /// Encapsulates sort criteria for returned hits.
 31 | ///
 32 | /// The fields used to determine sort order must be carefully chosen.
 33 | /// Documents must contain a single term in such a field,
 34 | /// and the value of the term should indicate the document's relative position in
 35 | /// a given sort order.  The field must be indexed, but should not be tokenized,
 36 | /// and does not need to be stored (unless you happen to want it back with the
 37 | /// rest of your document data).
 38 | ///
 39 | /// ### Valid Types of Values
 40 | ///
 41 | /// There are four possible kinds of term values which may be put into
 42 | /// sorting fields: Integers, Longs, Floats, or Strings.  Unless
 43 | /// `SortField` objects are specified, the type of value
 44 | /// in the field is determined by parsing the first term in the field.
 45 | ///
 46 | /// Integer term values should contain only digits and an optional
 47 | /// preceding negative sign.  Values must be base 10 and in the range
 48 | /// `i32::min_value()` and `i32::max_value()` inclusive.
 49 | /// Documents which should appear first in the sort
 50 | /// should have low value integers, later documents high values
 51 | /// (i.e. the documents should be numbered `1..n` where
 52 | /// `1` is the first and `n` the last).
 53 | ///
 54 | /// Long term values should contain only digits and an optional
 55 | /// preceding negative sign.  Values must be base 10 and in the range
 56 | /// `i64::min_value()` and `i64::max_value()` inclusive.
 57 | /// Documents which should appear first in the sort
 58 | /// should have low value integers, later documents high values.
 59 | ///
 60 | /// Float term values should conform to values accepted by
 61 | /// {@link Float Float.valueOf(String)} (except that `NaN`
 62 | /// and `Infinity` are not supported).
 63 | /// Documents which should appear first in the sort
 64 | /// should have low values, later documents high values.
 65 | ///
 66 | /// String term values can contain any valid String, but should
 67 | /// not be tokenized.  The values are sorted according to their
 68 | /// {@link Comparable natural order}.  Note that using this type
 69 | /// of term value has higher memory requirements than the other
 70 | /// two types.
 71 | ///
 72 | /// ### Object Reuse
 73 | ///
 74 | /// One of these objects can be
 75 | /// used multiple times and the sort order changed between usages.
 76 | ///
 77 | /// This class is thread safe.
 78 | ///
 79 | /// ### Memory Usage
 80 | ///
 81 | /// Sorting uses of caches of term values maintained by the
 82 | /// internal HitQueue(s).  The cache is static and contains an integer
 83 | /// or float array of length `IndexReader.max_doc()` for each field
 84 | /// name for which a sort is performed.  In other words, the size of the
 85 | /// cache in bytes is:
 86 | ///
 87 | /// `4 * IndexReader.max_doc() * (# of different fields actually used to sort)`
 88 | ///
 89 | /// For String fields, the cache is larger: in addition to the
 90 | /// above array, the value of every term in the field is kept in memory.
 91 | /// If there are many unique terms in the field, this could
 92 | /// be quite large.
 93 | ///
 94 | /// Note that the size of the cache is not affected by how many
 95 | /// fields are in the index and *might* be used to sort - only by
 96 | /// the ones actually used to sort a result set.
 97 | #[derive(Clone, Eq, PartialEq, Debug)]
 98 | pub struct Sort {
 99 |     fields: Vec<SortField>,
100 | }
101 | 
102 | impl Sort {
103 |     pub fn new(fields: Vec<SortField>) -> Sort {
104 |         Sort { fields }
105 |     }
106 | 
107 |     pub fn get_sort(&self) -> &[SortField] {
108 |         &self.fields
109 |     }
110 | 
111 |     pub fn needs_scores(&self) -> bool {
112 |         self.fields.iter().any(|f| f.needs_scores())
113 |     }
114 | }
115 | 
116 | #[cfg(test)]
117 | mod tests {
118 |     use super::*;
119 | 
120 |     #[test]
121 |     fn test_sort() {
122 |         let sort_fields: Vec<SortField> = vec![
123 |             SortField::Simple(SimpleSortField::new(
124 |                 String::from("field_one"),
125 |                 SortFieldType::Score,
126 |                 true,
127 |             )),
128 |             SortField::Simple(SimpleSortField::new(
129 |                 String::from("field_two"),
130 |                 SortFieldType::Doc,
131 |                 false,
132 |             )),
133 |         ];
134 |         let sort = Sort::new(sort_fields);
135 | 
136 |         assert!(sort.needs_scores());
137 | 
138 |         let fields = sort.get_sort();
139 |         assert_eq!(fields.len(), 2);
140 | 
141 |         let score_field = &fields[0];
142 |         assert_eq!(score_field.field(), &String::from("field_one"));
143 | 
144 |         let doc_field = &fields[1];
145 |         assert_eq!(doc_field.field(), &String::from("field_two"));
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/src/core/search/statistics.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::util::DocId;
15 | 
16 | /// Contains statistics for a collection (field)
17 | #[derive(Clone)]
18 | pub struct CollectionStatistics {
19 |     pub field: String,
20 |     pub doc_base: DocId,
21 |     pub max_doc: i64,
22 |     pub doc_count: i64,
23 |     pub sum_total_term_freq: i64,
24 |     pub sum_doc_freq: i64,
25 | }
26 | 
27 | impl CollectionStatistics {
28 |     pub fn new(
29 |         field: String,
30 |         doc_base: DocId,
31 |         max_doc: i64,
32 |         doc_count: i64,
33 |         sum_total_term_freq: i64,
34 |         sum_doc_freq: i64,
35 |     ) -> CollectionStatistics {
36 |         debug_assert!(max_doc >= 0);
37 |         debug_assert!(doc_count >= -1 && doc_count <= max_doc); // #docs with field must be <= #docs
38 |         debug_assert!(sum_doc_freq == -1 || sum_doc_freq >= doc_count); // #postings must be >= #docs with field
39 |         debug_assert!(sum_total_term_freq == -1 || sum_total_term_freq >= sum_doc_freq); // #positions must be >= #postings
40 |         CollectionStatistics {
41 |             field,
42 |             doc_base,
43 |             max_doc,
44 |             doc_count,
45 |             sum_total_term_freq,
46 |             sum_doc_freq,
47 |         }
48 |     }
49 | }
50 | 
51 | /// Contains statistics for a specific term
52 | pub struct TermStatistics {
53 |     pub term: Vec<u8>,
54 |     pub doc_freq: i64,
55 |     pub total_term_freq: i64,
56 | }
57 | 
58 | impl TermStatistics {
59 |     pub fn new(term: Vec<u8>, doc_freq: i64, total_term_freq: i64) -> TermStatistics {
60 |         debug_assert!(doc_freq >= 0);
61 |         debug_assert!(total_term_freq == -1 || total_term_freq >= doc_freq);
62 | 
63 |         TermStatistics {
64 |             term,
65 |             doc_freq,
66 |             total_term_freq,
67 |         }
68 |     }
69 | }
70 | 
71 | #[cfg(test)]
72 | mod tests {
73 |     use super::*;
74 |     use std::string::String;
75 | 
76 |     #[test]
77 |     fn test_collection_statistics() {
78 |         let collection_statistics =
79 |             CollectionStatistics::new(String::from("hello"), 0, 25, 10, 14, 13);
80 |         assert_eq!(collection_statistics.field, "hello");
81 |         assert_eq!(collection_statistics.max_doc, 25);
82 |         assert_eq!(collection_statistics.doc_count, 10);
83 |         assert_eq!(collection_statistics.sum_total_term_freq, 14);
84 |         assert_eq!(collection_statistics.sum_doc_freq, 13);
85 |     }
86 | 
87 |     #[test]
88 |     fn test_term_statistics() {
89 |         let mut v: Vec<u8> = Vec::new();
90 |         v.push(1);
91 |         let term_statistics = TermStatistics::new(v, 1, 1);
92 |         assert_eq!(term_statistics.term[0], 1);
93 |         assert_eq!(term_statistics.doc_freq, 1);
94 |         assert_eq!(term_statistics.total_term_freq, 1);
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/src/core/store/directory/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod directory;
15 | 
16 | pub use self::directory::*;
17 | 
18 | mod fs_directory;
19 | 
20 | pub use self::fs_directory::*;
21 | 
22 | mod mmap_directory;
23 | 
24 | pub use self::mmap_directory::*;
25 | 
26 | mod tracking_directory_wrapper;
27 | 
28 | pub use self::tracking_directory_wrapper::*;
29 | 


--------------------------------------------------------------------------------
/src/core/store/directory/tracking_directory_wrapper.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::directory::{Directory, FilterDirectory};
 15 | use core::store::io::{IndexInput, IndexOutput};
 16 | use core::store::IOContext;
 17 | 
 18 | use error::Result;
 19 | 
 20 | use std::collections::HashSet;
 21 | use std::fmt;
 22 | use std::ops::Deref;
 23 | use std::sync::Mutex;
 24 | 
 25 | /// A delegating Directory that records which files were written to and deleted.
 26 | pub struct TrackingDirectoryWrapper<D: Directory, T: Deref<Target = D>> {
 27 |     create_file_names: Mutex<HashSet<String>>,
 28 |     pub directory: T,
 29 | }
 30 | 
 31 | impl<D: Directory, T: Deref<Target = D>> TrackingDirectoryWrapper<D, T> {
 32 |     pub fn new(directory: T) -> TrackingDirectoryWrapper<D, T> {
 33 |         TrackingDirectoryWrapper {
 34 |             create_file_names: Mutex::new(HashSet::new()),
 35 |             directory,
 36 |         }
 37 |     }
 38 | 
 39 |     pub fn get_create_files(&self) -> HashSet<String> {
 40 |         self.create_file_names.lock().unwrap().clone()
 41 |     }
 42 | }
 43 | 
 44 | impl<D, T> FilterDirectory for TrackingDirectoryWrapper<D, T>
 45 | where
 46 |     D: Directory,
 47 |     T: Deref<Target = D>,
 48 | {
 49 |     type Dir = D;
 50 | 
 51 |     #[inline]
 52 |     fn dir(&self) -> &Self::Dir {
 53 |         &*self.directory
 54 |     }
 55 | }
 56 | 
 57 | impl<D, T> Directory for TrackingDirectoryWrapper<D, T>
 58 | where
 59 |     D: Directory,
 60 |     T: Deref<Target = D>,
 61 | {
 62 |     type IndexOutput = D::IndexOutput;
 63 |     type TempOutput = D::TempOutput;
 64 | 
 65 |     fn create_output(&self, name: &str, ctx: &IOContext) -> Result<Self::IndexOutput> {
 66 |         let output = self.directory.create_output(name, ctx)?;
 67 |         self.create_file_names.lock()?.insert(name.to_string());
 68 |         Ok(output)
 69 |     }
 70 | 
 71 |     fn open_input(&self, name: &str, ctx: &IOContext) -> Result<Box<dyn IndexInput>> {
 72 |         self.directory.open_input(name, ctx)
 73 |     }
 74 | 
 75 |     fn create_temp_output(
 76 |         &self,
 77 |         prefix: &str,
 78 |         suffix: &str,
 79 |         ctx: &IOContext,
 80 |     ) -> Result<Self::TempOutput> {
 81 |         let temp_output = self.directory.create_temp_output(prefix, suffix, ctx)?;
 82 |         self.create_file_names
 83 |             .lock()?
 84 |             .insert(temp_output.name().to_string());
 85 |         Ok(temp_output)
 86 |     }
 87 | 
 88 |     fn delete_file(&self, name: &str) -> Result<()> {
 89 |         self.directory.delete_file(name)?;
 90 |         self.create_file_names.lock()?.remove(name);
 91 |         Ok(())
 92 |     }
 93 | 
 94 |     fn rename(&self, source: &str, dest: &str) -> Result<()> {
 95 |         self.directory.rename(source, dest)?;
 96 |         let mut guard = self.create_file_names.lock()?;
 97 |         guard.insert(dest.to_string());
 98 |         guard.remove(source);
 99 |         Ok(())
100 |     }
101 | 
102 |     fn create_files(&self) -> HashSet<String> {
103 |         self.create_file_names.lock().unwrap().clone()
104 |     }
105 | }
106 | 
107 | impl<D, T> fmt::Display for TrackingDirectoryWrapper<D, T>
108 | where
109 |     D: Directory,
110 |     T: Deref<Target = D>,
111 | {
112 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 |         write!(f, "TrackingDirectoryWrapper({})", &*self.directory)
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/src/core/store/io/buffered_checksum_index_input.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | extern crate crc;
 15 | 
 16 | use core::store::io::{ChecksumIndexInput, DataInput, IndexInput, RandomAccessInput};
 17 | 
 18 | use error::ErrorKind::IllegalArgument;
 19 | use error::Result;
 20 | 
 21 | use crc::{crc32, Hasher32};
 22 | use std::io::Read;
 23 | 
 24 | /// Simple implementation of `ChecksumIndexInput` that wraps
 25 | /// another input and delegates calls.
 26 | pub struct BufferedChecksumIndexInput {
 27 |     index_input: Box<dyn IndexInput>,
 28 |     digest: crc32::Digest,
 29 |     name: String,
 30 | }
 31 | 
 32 | impl BufferedChecksumIndexInput {
 33 |     pub fn new(index_input: Box<dyn IndexInput>) -> BufferedChecksumIndexInput {
 34 |         let digest = crc32::Digest::new_with_initial(crc32::IEEE, 0u32);
 35 |         let name = String::from(index_input.name());
 36 |         BufferedChecksumIndexInput {
 37 |             index_input,
 38 |             digest,
 39 |             name,
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | impl ChecksumIndexInput for BufferedChecksumIndexInput {
 45 |     fn checksum(&self) -> i64 {
 46 |         i64::from(self.digest.sum32())
 47 |     }
 48 | }
 49 | 
 50 | impl DataInput for BufferedChecksumIndexInput {}
 51 | 
 52 | impl Read for BufferedChecksumIndexInput {
 53 |     fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result<usize> {
 54 |         let length = self.index_input.read(buf)?;
 55 |         self.digest.write(&buf[0..length]);
 56 |         Ok(length)
 57 |     }
 58 | }
 59 | 
 60 | impl IndexInput for BufferedChecksumIndexInput {
 61 |     fn clone(&self) -> Result<Box<dyn IndexInput>> {
 62 |         Ok(Box::new(Self {
 63 |             index_input: self.index_input.clone()?,
 64 |             digest: crc32::Digest::new_with_initial(crc32::IEEE, self.digest.sum32()),
 65 |             name: self.name.clone(),
 66 |         }))
 67 |     }
 68 |     fn file_pointer(&self) -> i64 {
 69 |         self.index_input.file_pointer()
 70 |     }
 71 | 
 72 |     fn seek(&mut self, pos: i64) -> Result<()> {
 73 |         let curr_pos = self.file_pointer();
 74 |         let to_skip = pos - curr_pos;
 75 |         if to_skip < 0 {
 76 |             bail!(IllegalArgument(format!(
 77 |                 "Can't seek backwards: {} => {}",
 78 |                 curr_pos, pos
 79 |             )));
 80 |         }
 81 |         self.skip_bytes(to_skip as usize)
 82 |     }
 83 | 
 84 |     fn len(&self) -> u64 {
 85 |         self.index_input.len()
 86 |     }
 87 | 
 88 |     fn name(&self) -> &str {
 89 |         &self.name
 90 |     }
 91 | 
 92 |     fn random_access_slice(
 93 |         &self,
 94 |         _offset: i64,
 95 |         _length: i64,
 96 |     ) -> Result<Box<dyn RandomAccessInput>> {
 97 |         unimplemented!()
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/core/store/io/byte_array_data_input.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::io::{DataInput, DataOutput};
 15 | 
 16 | use error::Result;
 17 | use std::cmp::min;
 18 | use std::io::{self, Read, Write};
 19 | use std::sync::Arc;
 20 | 
 21 | pub struct ByteArrayRef(Arc<Vec<u8>>);
 22 | 
 23 | impl ByteArrayRef {
 24 |     pub fn new(v: Arc<Vec<u8>>) -> ByteArrayRef {
 25 |         ByteArrayRef(v)
 26 |     }
 27 | }
 28 | 
 29 | impl AsRef<[u8]> for ByteArrayRef {
 30 |     fn as_ref(&self) -> &[u8] {
 31 |         &self.0
 32 |     }
 33 | }
 34 | 
 35 | /// DataInput backed by a byte array.
 36 | ///
 37 | /// *WARNING:* This class omits all low-level checks.
 38 | pub struct ByteArrayDataInput<T: AsRef<[u8]>> {
 39 |     bytes: T,
 40 |     pos: usize,
 41 | }
 42 | 
 43 | impl<T: AsRef<[u8]>> ByteArrayDataInput<T> {
 44 |     pub fn new(bytes: T) -> ByteArrayDataInput<T> {
 45 |         ByteArrayDataInput { bytes, pos: 0usize }
 46 |     }
 47 | 
 48 |     pub fn rewind(&mut self) {
 49 |         self.pos = 0;
 50 |     }
 51 | 
 52 |     pub fn position(&self) -> usize {
 53 |         self.pos
 54 |     }
 55 | 
 56 |     pub fn set_position(&mut self, pos: usize) {
 57 |         self.pos = pos;
 58 |     }
 59 | 
 60 |     pub fn length(&self) -> usize {
 61 |         self.bytes.as_ref().len()
 62 |     }
 63 | 
 64 |     pub fn eof(&self) -> bool {
 65 |         self.pos == self.length()
 66 |     }
 67 | 
 68 |     pub fn reset(&mut self, bytes: T) {
 69 |         self.bytes = bytes;
 70 |         self.pos = 0;
 71 |     }
 72 | 
 73 |     pub fn get_slice(&self, pos: usize, len: usize) -> Result<&[u8]> {
 74 |         let limit = self.bytes.as_ref().len();
 75 |         if pos < self.pos || pos > limit || pos + len > limit {
 76 |             bail!(
 77 |                 "Invalid Argument: slice ({}, {}) is beyond valid range of ({}, {})",
 78 |                 pos,
 79 |                 pos + len,
 80 |                 self.pos,
 81 |                 limit
 82 |             )
 83 |         }
 84 |         Ok(&self.bytes.as_ref()[pos..pos + len])
 85 |     }
 86 | }
 87 | 
 88 | impl<T: AsRef<[u8]>> DataInput for ByteArrayDataInput<T> {
 89 |     fn read_byte(&mut self) -> Result<u8> {
 90 |         let b = self.bytes.as_ref()[self.pos];
 91 |         self.pos += 1;
 92 |         Ok(b)
 93 |     }
 94 | 
 95 |     fn read_bytes(&mut self, b: &mut [u8], offset: usize, len: usize) -> Result<()> {
 96 |         b[offset..offset + len].copy_from_slice(&self.bytes.as_ref()[self.pos..self.pos + len]);
 97 |         self.pos += len;
 98 |         Ok(())
 99 |     }
100 | 
101 |     fn skip_bytes(&mut self, count: usize) -> Result<()> {
102 |         self.pos += count;
103 |         Ok(())
104 |     }
105 | }
106 | 
107 | impl<T: AsRef<[u8]>> Read for ByteArrayDataInput<T> {
108 |     fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result<usize> {
109 |         let size = ::std::cmp::min(buf.len(), self.length() - self.pos);
110 |         buf[0..size].copy_from_slice(&self.bytes.as_ref()[self.pos..self.pos + size]);
111 |         self.pos += size;
112 |         Ok(size)
113 |     }
114 | }
115 | 
116 | /// DataOutput backed by a byte array.
117 | pub struct ByteArrayDataOutput<T> {
118 |     bytes: T,
119 |     pub pos: usize,
120 |     limit: usize,
121 | }
122 | 
123 | impl<T> ByteArrayDataOutput<T>
124 | where
125 |     T: AsMut<[u8]>,
126 | {
127 |     pub fn new(bytes: T, offset: usize, len: usize) -> ByteArrayDataOutput<T> {
128 |         ByteArrayDataOutput {
129 |             bytes,
130 |             pos: offset,
131 |             limit: offset + len,
132 |         }
133 |     }
134 | 
135 |     #[inline]
136 |     fn bytes_slice(&mut self) -> &mut [u8] {
137 |         self.bytes.as_mut()
138 |     }
139 | }
140 | 
141 | impl<T> Write for ByteArrayDataOutput<T>
142 | where
143 |     T: AsMut<[u8]>,
144 | {
145 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
146 |         let length = min(self.limit - self.pos, buf.len());
147 |         let pos = self.pos;
148 |         self.bytes_slice()[pos..pos + length].copy_from_slice(&buf[..length]);
149 |         self.pos += length;
150 |         Ok(length)
151 |     }
152 | 
153 |     fn flush(&mut self) -> io::Result<()> {
154 |         Ok(())
155 |     }
156 | }
157 | 
158 | impl<T> DataOutput for ByteArrayDataOutput<T> where T: AsMut<[u8]> {}
159 | 


--------------------------------------------------------------------------------
/src/core/store/io/checksum_index_input.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::store::io::IndexInput;
15 | 
16 | pub trait ChecksumIndexInput: IndexInput {
17 |     fn checksum(&self) -> i64;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/core/store/io/data_output.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::io::DataInput;
 15 | 
 16 | use core::util::ZigZagEncoding;
 17 | use error::ErrorKind::IllegalArgument;
 18 | use error::Result;
 19 | 
 20 | use std::collections::{HashMap, HashSet};
 21 | use std::io::Write;
 22 | use std::mem;
 23 | 
 24 | /// Trait for performing write operations of Lucene's low-level data types.
 25 | pub trait DataOutput: Write {
 26 |     fn write_byte(&mut self, b: u8) -> Result<()> {
 27 |         let buf = [b; 1];
 28 |         self.write_all(&buf)?;
 29 |         Ok(())
 30 |     }
 31 | 
 32 |     #[inline]
 33 |     fn write_bytes(&mut self, b: &[u8], offset: usize, length: usize) -> Result<()> {
 34 |         debug_assert!(offset + length <= b.len());
 35 |         self.write_all(&b[offset..offset + length])?;
 36 |         Ok(())
 37 |     }
 38 | 
 39 |     fn write_short(&mut self, i: i16) -> Result<()> {
 40 |         let bytes = unsafe { mem::transmute::<_, [u8; 2]>(i.to_be()) };
 41 |         self.write_all(&bytes)?;
 42 |         Ok(())
 43 |     }
 44 | 
 45 |     fn write_int(&mut self, i: i32) -> Result<()> {
 46 |         let bytes = unsafe { mem::transmute::<_, [u8; 4]>(i.to_be()) };
 47 |         self.write_all(&bytes)?;
 48 |         Ok(())
 49 |     }
 50 | 
 51 |     fn write_vint(&mut self, i: i32) -> Result<()> {
 52 |         let mut i = i as u32;
 53 |         while (i & !0x7f_u32) != 0 {
 54 |             self.write_byte(((i & 0x7f) | 0x80) as u8)?;
 55 |             i >>= 7;
 56 |         }
 57 |         self.write_byte(i as u8)
 58 |     }
 59 | 
 60 |     fn write_zint(&mut self, i: i32) -> Result<()> {
 61 |         self.write_vint(i.encode())
 62 |     }
 63 | 
 64 |     fn write_long(&mut self, i: i64) -> Result<()> {
 65 |         let bytes = unsafe { mem::transmute::<_, [u8; 8]>(i.to_be()) };
 66 |         self.write_all(&bytes)?;
 67 |         Ok(())
 68 |     }
 69 | 
 70 |     fn _write_signed_vlong(&mut self, i: i64) -> Result<()> {
 71 |         let mut i = i as u64;
 72 |         while (i & !0x7f_u64) != 0 {
 73 |             self.write_byte(((i & 0x7f_u64) | 0x80_u64) as u8)?;
 74 |             i >>= 7;
 75 |         }
 76 |         self.write_byte(i as u8)
 77 |     }
 78 | 
 79 |     fn write_vlong(&mut self, i: i64) -> Result<()> {
 80 |         if i < 0 {
 81 |             bail!(IllegalArgument("Can't write negative vLong".to_owned()));
 82 |         }
 83 |         self._write_signed_vlong(i)
 84 |     }
 85 | 
 86 |     fn write_zlong(&mut self, i: i64) -> Result<()> {
 87 |         self._write_signed_vlong(i.encode())
 88 |     }
 89 | 
 90 |     fn write_string(&mut self, s: &str) -> Result<()> {
 91 |         let s = s.as_bytes();
 92 |         self.write_vint(s.len() as i32)?;
 93 |         self.write_all(s)?;
 94 |         Ok(())
 95 |     }
 96 | 
 97 |     fn write_map_of_strings(&mut self, map: &HashMap<String, String>) -> Result<()> {
 98 |         self.write_vint(map.len() as i32)?;
 99 | 
100 |         let mut keys: Vec<&String> = map.keys().collect();
101 |         keys.sort();
102 |         for k in keys {
103 |             self.write_string(k)?;
104 |             self.write_string(map.get(k).unwrap())?;
105 |         }
106 |         Ok(())
107 |     }
108 | 
109 |     fn write_set_of_strings(&mut self, set: &HashSet<String>) -> Result<()> {
110 |         self.write_vint(set.len() as i32)?;
111 | 
112 |         let mut keys: Vec<&String> = set.iter().collect();
113 |         keys.sort();
114 |         for k in keys {
115 |             self.write_string(k)?;
116 |         }
117 |         Ok(())
118 |     }
119 | 
120 |     fn copy_bytes<I: DataInput + ?Sized>(&mut self, from: &mut I, len: usize) -> Result<()> {
121 |         const COPY_BUFFER_SIZE: usize = 16384;
122 |         let mut left = len as i64;
123 |         let mut copy_buffer = [0u8; COPY_BUFFER_SIZE];
124 |         while left > 0 {
125 |             let to_copy = if left as usize > COPY_BUFFER_SIZE {
126 |                 COPY_BUFFER_SIZE
127 |             } else {
128 |                 left as usize
129 |             };
130 |             from.read_bytes(&mut copy_buffer, 0, to_copy)?;
131 |             self.write_all(&copy_buffer[..to_copy])?;
132 |             left -= to_copy as i64;
133 |         }
134 |         Ok(())
135 |     }
136 | }
137 | 
138 | // a implement that can use Vec<u8> as a data output
139 | impl DataOutput for Vec<u8> {}
140 | 


--------------------------------------------------------------------------------
/src/core/store/io/fs_index_output.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::store::io::{DataOutput, IndexOutput};
15 | 
16 | use error::Result;
17 | 
18 | use std::fs::{File, OpenOptions};
19 | use std::io::BufWriter;
20 | use std::io::Write;
21 | use std::path::Path;
22 | 
23 | use flate2::CrcWriter;
24 | 
25 | const CHUNK_SIZE: usize = 8192;
26 | 
27 | /// `IndexOutput` implement for `FsDirectory`
28 | pub struct FSIndexOutput {
29 |     name: String,
30 |     writer: CrcWriter<BufWriter<File>>,
31 |     bytes_written: usize,
32 | }
33 | 
34 | impl FSIndexOutput {
35 |     pub fn new<P: AsRef<Path>>(name: String, path: P) -> Result<FSIndexOutput> {
36 |         let file = OpenOptions::new().write(true).create(true).open(path)?;
37 |         Ok(FSIndexOutput {
38 |             name,
39 |             writer: CrcWriter::new(BufWriter::with_capacity(CHUNK_SIZE, file)),
40 |             bytes_written: 0,
41 |         })
42 |     }
43 | }
44 | 
45 | impl Drop for FSIndexOutput {
46 |     fn drop(&mut self) {
47 |         if let Err(ref desc) = self.writer.flush() {
48 |             error!("Oops, failed to flush {}, errmsg: {}", self.name, desc);
49 |         }
50 |         self.bytes_written = 0;
51 |     }
52 | }
53 | 
54 | impl DataOutput for FSIndexOutput {}
55 | 
56 | impl Write for FSIndexOutput {
57 |     fn write(&mut self, buf: &[u8]) -> ::std::io::Result<usize> {
58 |         let count = self.writer.write(buf)?;
59 |         self.bytes_written += count;
60 |         Ok(count)
61 |     }
62 | 
63 |     fn flush(&mut self) -> ::std::io::Result<()> {
64 |         self.writer.flush()
65 |     }
66 | }
67 | 
68 | impl IndexOutput for FSIndexOutput {
69 |     fn name(&self) -> &str {
70 |         &self.name
71 |     }
72 | 
73 |     fn file_pointer(&self) -> i64 {
74 |         self.bytes_written as i64
75 |     }
76 | 
77 |     fn checksum(&self) -> Result<i64> {
78 |         // self.writer.flush()?;
79 |         Ok((self.writer.crc().sum() as i64) & 0xffff_ffffi64)
80 |     }
81 | }
82 | 
83 | #[cfg(test)]
84 | mod tests {
85 |     use super::*;
86 |     use std::path::{Path, PathBuf};
87 | 
88 |     #[test]
89 |     fn test_write_byte() {
90 |         let name = "hello.txt";
91 |         let path: PathBuf = Path::new(name).into();
92 |         let mut fsout = FSIndexOutput::new(name.to_string(), &path).unwrap();
93 |         fsout.write_byte(b'a').unwrap();
94 |         assert_eq!(fsout.file_pointer(), 1);
95 |         ::std::fs::remove_file("hello.txt").unwrap();
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/core/store/io/growable_byte_array_output.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::store::io::DataOutput;
15 | 
16 | use std::io::Write;
17 | 
18 | const MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING: usize = 65536;
19 | 
20 | /// a `IndexOutput` that can be used to build a bytes array.
21 | pub struct GrowableByteArrayDataOutput {
22 |     pub bytes: Vec<u8>,
23 |     length: usize,
24 |     /* scratch for utf8 encoding of small strings
25 |      * _scratch_bytes: Vec<u8>, */
26 | }
27 | 
28 | impl GrowableByteArrayDataOutput {
29 |     pub fn new(cp: usize) -> GrowableByteArrayDataOutput {
30 |         GrowableByteArrayDataOutput {
31 |             bytes: vec![0u8; cp + MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING],
32 |             length: 0,
33 |             //_scratch_bytes: vec![0; cp + MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING],
34 |         }
35 |     }
36 | 
37 |     pub fn position(&self) -> usize {
38 |         self.length
39 |     }
40 | 
41 |     pub fn reset(&mut self) {
42 |         self.length = 0;
43 |     }
44 | }
45 | 
46 | impl Write for GrowableByteArrayDataOutput {
47 |     fn write(&mut self, buf: &[u8]) -> ::std::io::Result<usize> {
48 |         let buf_len = buf.len();
49 |         let new_len = self.length + buf_len;
50 |         if self.bytes.len() < new_len {
51 |             self.bytes.resize(new_len, 0u8);
52 |         }
53 |         self.bytes[self.length..new_len].copy_from_slice(buf);
54 |         self.length += buf_len;
55 |         Ok(buf_len)
56 |     }
57 | 
58 |     fn flush(&mut self) -> ::std::io::Result<()> {
59 |         Ok(())
60 |     }
61 | }
62 | 
63 | impl DataOutput for GrowableByteArrayDataOutput {}
64 | 


--------------------------------------------------------------------------------
/src/core/store/io/index_input.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::store::io::{DataInput, RandomAccessInput};
15 | 
16 | use error::Result;
17 | 
18 | pub trait IndexInput: DataInput + Send + Sync {
19 |     fn clone(&self) -> Result<Box<dyn IndexInput>>;
20 | 
21 |     fn file_pointer(&self) -> i64;
22 |     fn seek(&mut self, pos: i64) -> Result<()>;
23 |     fn len(&self) -> u64;
24 |     fn is_empty(&self) -> bool {
25 |         self.len() == 0
26 |     }
27 |     fn name(&self) -> &str;
28 | 
29 |     fn random_access_slice(&self, _offset: i64, _length: i64)
30 |         -> Result<Box<dyn RandomAccessInput>>;
31 | 
32 |     fn slice(&self, _description: &str, _offset: i64, _length: i64) -> Result<Box<dyn IndexInput>> {
33 |         unimplemented!();
34 |     }
35 | 
36 |     unsafe fn get_and_advance(&mut self, _length: usize) -> *const u8 {
37 |         unimplemented!()
38 |     }
39 | 
40 |     fn is_buffered(&self) -> bool {
41 |         false
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/core/store/io/index_output.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::io::DataOutput;
 15 | use core::store::RateLimiter;
 16 | 
 17 | use error::Result;
 18 | use std::io;
 19 | use std::sync::Arc;
 20 | 
 21 | /// Trait for output to a file in a Directory.
 22 | ///
 23 | /// A random-access output stream.  Used for all Lucene index output operations.
 24 | pub trait IndexOutput: DataOutput {
 25 |     fn name(&self) -> &str;
 26 |     fn file_pointer(&self) -> i64;
 27 |     fn checksum(&self) -> Result<i64>;
 28 | }
 29 | 
 30 | pub struct IndexOutputRef<T: IndexOutput> {
 31 |     // TODO: we need GAT for the lifetime declaration
 32 |     // so, currently directly use raw pointer instead
 33 |     output: *mut T,
 34 | }
 35 | 
 36 | impl<T: IndexOutput> IndexOutputRef<T> {
 37 |     pub fn new(output: &mut T) -> Self {
 38 |         Self { output }
 39 |     }
 40 | }
 41 | 
 42 | impl<T: IndexOutput> IndexOutput for IndexOutputRef<T> {
 43 |     fn name(&self) -> &str {
 44 |         unsafe { (*self.output).name() }
 45 |     }
 46 | 
 47 |     fn file_pointer(&self) -> i64 {
 48 |         unsafe { (*self.output).file_pointer() }
 49 |     }
 50 | 
 51 |     fn checksum(&self) -> Result<i64> {
 52 |         unsafe { (*self.output).checksum() }
 53 |     }
 54 | }
 55 | 
 56 | impl<T: IndexOutput> DataOutput for IndexOutputRef<T> {}
 57 | 
 58 | impl<T: IndexOutput> io::Write for IndexOutputRef<T> {
 59 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
 60 |         unsafe { (*self.output).write(buf) }
 61 |     }
 62 | 
 63 |     fn flush(&mut self) -> io::Result<()> {
 64 |         unsafe { (*self.output).flush() }
 65 |     }
 66 | }
 67 | 
 68 | pub struct InvalidIndexOutput {}
 69 | 
 70 | impl io::Write for InvalidIndexOutput {
 71 |     fn write(&mut self, _buf: &[u8]) -> io::Result<usize> {
 72 |         unreachable!()
 73 |     }
 74 | 
 75 |     fn flush(&mut self) -> io::Result<()> {
 76 |         unreachable!()
 77 |     }
 78 | }
 79 | 
 80 | impl DataOutput for InvalidIndexOutput {}
 81 | 
 82 | impl IndexOutput for InvalidIndexOutput {
 83 |     fn name(&self) -> &str {
 84 |         "invalid"
 85 |     }
 86 | 
 87 |     fn file_pointer(&self) -> i64 {
 88 |         -1
 89 |     }
 90 | 
 91 |     fn checksum(&self) -> Result<i64> {
 92 |         unreachable!()
 93 |     }
 94 | }
 95 | 
 96 | /// a rate limiting `IndexOutput`
 97 | pub struct RateLimitIndexOutput<O: IndexOutput, RL: RateLimiter + ?Sized> {
 98 |     delegate: O,
 99 |     rate_limiter: Arc<RL>,
100 |     /// How many bytes we've written since we last called rateLimiter.pause.
101 |     bytes_since_last_pause: usize,
102 |     /// Cached here not not always have to call RateLimiter#getMinPauseCheckBytes()
103 |     /// which does volatile read
104 |     current_min_pause_check_bytes: usize,
105 | }
106 | 
107 | impl<O: IndexOutput, RL: RateLimiter + ?Sized> RateLimitIndexOutput<O, RL> {
108 |     pub fn new(rate_limiter: Arc<RL>, delegate: O) -> Self {
109 |         let current_min_pause_check_bytes = rate_limiter.min_pause_check_bytes() as usize;
110 |         RateLimitIndexOutput {
111 |             delegate,
112 |             rate_limiter,
113 |             bytes_since_last_pause: 0,
114 |             current_min_pause_check_bytes,
115 |         }
116 |     }
117 | 
118 |     fn check_rate(&mut self) -> Result<()> {
119 |         if self.bytes_since_last_pause > self.current_min_pause_check_bytes {
120 |             self.rate_limiter
121 |                 .pause(self.bytes_since_last_pause as u64)?;
122 |             self.bytes_since_last_pause = 0;
123 |             self.current_min_pause_check_bytes = self.rate_limiter.min_pause_check_bytes() as usize;
124 |         }
125 |         Ok(())
126 |     }
127 | }
128 | 
129 | impl<O: IndexOutput, RL: RateLimiter + ?Sized> IndexOutput for RateLimitIndexOutput<O, RL> {
130 |     fn name(&self) -> &str {
131 |         self.delegate.name()
132 |     }
133 | 
134 |     fn file_pointer(&self) -> i64 {
135 |         self.delegate.file_pointer()
136 |     }
137 | 
138 |     fn checksum(&self) -> Result<i64> {
139 |         self.delegate.checksum()
140 |     }
141 | }
142 | 
143 | impl<O: IndexOutput, RL: RateLimiter + ?Sized> DataOutput for RateLimitIndexOutput<O, RL> {}
144 | 
145 | impl<O: IndexOutput, RL: RateLimiter + ?Sized> io::Write for RateLimitIndexOutput<O, RL> {
146 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
147 |         self.bytes_since_last_pause += buf.len();
148 |         if let Err(_e) = self.check_rate() {
149 |             return Err(io::Error::from(io::ErrorKind::WouldBlock));
150 |         }
151 |         self.delegate.write(buf)
152 |     }
153 | 
154 |     fn flush(&mut self) -> io::Result<()> {
155 |         self.delegate.flush()
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/core/store/io/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod data_input;
15 | 
16 | pub use self::data_input::*;
17 | 
18 | mod index_input;
19 | 
20 | pub use self::index_input::*;
21 | 
22 | mod random_access_input;
23 | 
24 | pub use self::random_access_input::*;
25 | 
26 | mod checksum_index_input;
27 | 
28 | pub use self::checksum_index_input::*;
29 | 
30 | mod buffered_checksum_index_input;
31 | 
32 | pub use self::buffered_checksum_index_input::*;
33 | 
34 | mod mmap_index_input;
35 | 
36 | pub use self::mmap_index_input::*;
37 | 
38 | mod data_output;
39 | 
40 | pub use self::data_output::*;
41 | 
42 | mod index_output;
43 | 
44 | pub use self::index_output::*;
45 | 
46 | mod fs_index_output;
47 | 
48 | pub use self::fs_index_output::*;
49 | 
50 | mod byte_array_data_input;
51 | 
52 | pub use self::byte_array_data_input::*;
53 | 
54 | mod growable_byte_array_output;
55 | 
56 | pub use self::growable_byte_array_output::*;
57 | 
58 | mod ram_output;
59 | 
60 | pub use self::ram_output::*;
61 | 


--------------------------------------------------------------------------------
/src/core/store/io/ram_output.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::io::{ByteArrayDataOutput, DataOutput, IndexOutput};
 15 | 
 16 | use error::{ErrorKind, Result};
 17 | 
 18 | use std::io::{self, Write};
 19 | 
 20 | use flate2::Crc;
 21 | 
 22 | use core::util::fst::BytesStore;
 23 | 
 24 | const CHUNK_SIZE: usize = 8192;
 25 | 
 26 | /// A memory-resident `IndexOutput` implementation.
 27 | /// Use `BytesStore` to represent in memory output store
 28 | pub struct RAMOutputStream {
 29 |     name: String,
 30 |     pub store: BytesStore,
 31 |     crc: Option<Crc>,
 32 | }
 33 | 
 34 | impl RAMOutputStream {
 35 |     pub fn new(checksum: bool) -> Self {
 36 |         Self::with_chunk_size(CHUNK_SIZE, checksum)
 37 |     }
 38 | 
 39 |     pub fn from_store(store: BytesStore) -> Self {
 40 |         RAMOutputStream {
 41 |             name: "noname".into(),
 42 |             store,
 43 |             crc: None,
 44 |         }
 45 |     }
 46 | 
 47 |     pub fn with_chunk_size(chunk_size: usize, checksum: bool) -> Self {
 48 |         let store = BytesStore::with_block_bits(chunk_size.trailing_zeros() as usize);
 49 |         let crc = if checksum { Some(Crc::new()) } else { None };
 50 | 
 51 |         RAMOutputStream {
 52 |             name: "noname".into(),
 53 |             store,
 54 |             crc,
 55 |         }
 56 |     }
 57 | 
 58 |     pub fn write_to(&self, out: &mut impl DataOutput) -> Result<()> {
 59 |         // self.flush();
 60 |         self.store.write_to(out)
 61 |     }
 62 | 
 63 |     pub fn write_to_buf(&self, out: &mut [u8]) -> Result<()> {
 64 |         let length = out.len();
 65 |         let mut output = ByteArrayDataOutput::new(out, 0, length);
 66 |         self.write_to(&mut output)
 67 |     }
 68 | 
 69 |     pub fn reset(&mut self) {
 70 |         self.store.truncate(0);
 71 |         if let Some(ref mut crc) = self.crc {
 72 |             crc.reset();
 73 |         }
 74 |     }
 75 | }
 76 | 
 77 | impl Write for RAMOutputStream {
 78 |     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
 79 |         let size = self.store.write(buf)?;
 80 |         if size > 0 {
 81 |             if let Some(ref mut crc) = self.crc {
 82 |                 crc.update(&buf[0..size]);
 83 |             }
 84 |         }
 85 |         Ok(size)
 86 |     }
 87 | 
 88 |     fn flush(&mut self) -> io::Result<()> {
 89 |         self.store.flush()
 90 |     }
 91 | }
 92 | 
 93 | impl DataOutput for RAMOutputStream {}
 94 | 
 95 | impl IndexOutput for RAMOutputStream {
 96 |     fn name(&self) -> &str {
 97 |         &self.name
 98 |     }
 99 | 
100 |     fn file_pointer(&self) -> i64 {
101 |         self.store.get_position() as i64
102 |     }
103 | 
104 |     fn checksum(&self) -> Result<i64> {
105 |         if let Some(ref crc) = self.crc {
106 |             Ok((crc.sum() as i64) & 0xffff_ffffi64)
107 |         } else {
108 |             bail!(ErrorKind::IllegalState(
109 |                 "internal RAMOutputStream created with checksum disabled".into()
110 |             ))
111 |         }
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/core/store/io/random_access_input.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use error::Result;
15 | 
16 | /// Random Access Index API.
17 | ///
18 | /// Unlike `IndexInput`, this has no concept of file position, all reads
19 | /// are absolute. However, like IndexInput, it is only intended for use by a single thread.
20 | pub trait RandomAccessInput: Send + Sync {
21 |     fn read_byte(&self, pos: u64) -> Result<u8>;
22 |     fn read_short(&self, pos: u64) -> Result<i16>;
23 |     fn read_int(&self, pos: u64) -> Result<i32>;
24 |     fn read_long(&self, pos: u64) -> Result<i64>;
25 | }
26 | 


--------------------------------------------------------------------------------
/src/core/store/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | pub mod directory;
 15 | pub mod io;
 16 | 
 17 | use error::Result;
 18 | 
 19 | use std::sync::Arc;
 20 | use std::time::Duration;
 21 | 
 22 | /// IOContext holds additional details on the merge/search context and
 23 | /// specifies the context in which the Directory is being used for.
 24 | #[derive(PartialEq, Eq, Clone, Copy)]
 25 | pub enum IOContext {
 26 |     Read(bool),
 27 |     Default,
 28 |     Flush(FlushInfo),
 29 |     Merge(MergeInfo),
 30 | }
 31 | 
 32 | impl IOContext {
 33 |     pub const READ: IOContext = IOContext::Read(false);
 34 |     pub const READ_ONCE: IOContext = IOContext::Read(true);
 35 |     pub fn is_merge(&self) -> bool {
 36 |         match self {
 37 |             IOContext::Merge(_) => true,
 38 |             _ => false,
 39 |         }
 40 |     }
 41 | }
 42 | 
 43 | /// A FlushInfo provides information required for a FLUSH context.
 44 | ///
 45 | /// It is used as part of an `IOContext` in case of FLUSH context.
 46 | #[derive(PartialEq, Eq, Hash, Clone, Copy)]
 47 | pub struct FlushInfo {
 48 |     num_docs: u32,
 49 | }
 50 | 
 51 | impl FlushInfo {
 52 |     pub fn new(num_docs: u32) -> Self {
 53 |         FlushInfo { num_docs }
 54 |     }
 55 | }
 56 | 
 57 | /// A MergeInfo provides information required for a MERGE context.
 58 | ///
 59 | /// It is used as part of an `IOContext` in case of MERGE context.
 60 | #[derive(PartialEq, Eq, Hash, Clone, Copy)]
 61 | pub struct MergeInfo {
 62 |     total_max_doc: u32,
 63 |     estimated_merge_bytes: u64,
 64 |     is_external: bool,
 65 |     merge_max_num_segments: Option<u32>,
 66 | }
 67 | 
 68 | impl MergeInfo {
 69 |     pub fn new(
 70 |         total_max_doc: u32,
 71 |         estimated_merge_bytes: u64,
 72 |         is_external: bool,
 73 |         merge_max_num_segments: Option<u32>,
 74 |     ) -> Self {
 75 |         MergeInfo {
 76 |             total_max_doc,
 77 |             estimated_merge_bytes,
 78 |             is_external,
 79 |             merge_max_num_segments,
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | /// Trait base class to rate limit IO.
 85 | ///
 86 | /// Typically implementations are shared across multiple IndexInputs
 87 | /// or IndexOutputs (for example those involved all merging).  Those IndexInputs and
 88 | /// IndexOutputs would call {@link #pause} whenever the have read
 89 | /// or written more than {@link #getMinPauseCheckBytes} bytes.
 90 | 
 91 | pub trait RateLimiter: Sync + Send {
 92 |     /// Sets an updated MB per second rate limit.
 93 |     fn set_mb_per_sec(&self, mb_per_sec: f64);
 94 | 
 95 |     /// The current MB per second rate limit.
 96 |     fn mb_per_sec(&self) -> f64;
 97 | 
 98 |     /// Pauses, if necessary, to keep the instantaneous IO rate
 99 |     /// at or below the target
100 |     ///
101 |     /// Note: the implementation is thread-safe
102 |     fn pause(&self, bytes: u64) -> Result<Duration>;
103 | 
104 |     /// how many bytes caller should add up isself before invoking `#pause`
105 |     fn min_pause_check_bytes(&self) -> u64;
106 | }
107 | 
108 | impl RateLimiter for Arc<dyn RateLimiter> {
109 |     fn set_mb_per_sec(&self, mb_per_sec: f64) {
110 |         (**self).set_mb_per_sec(mb_per_sec);
111 |     }
112 | 
113 |     fn mb_per_sec(&self) -> f64 {
114 |         (**self).mb_per_sec()
115 |     }
116 | 
117 |     fn pause(&self, bytes: u64) -> Result<Duration> {
118 |         (**self).pause(bytes)
119 |     }
120 | 
121 |     fn min_pause_check_bytes(&self) -> u64 {
122 |         (**self).min_pause_check_bytes()
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/core/util/byte_slice_reader.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::io::DataInput;
 15 | use core::util::ByteBlockPool;
 16 | 
 17 | use std::io;
 18 | use std::ptr;
 19 | 
 20 | /// IndexInput that knows how to read the byte slices written
 21 | /// by Posting and PostingVector.  We read the bytes in
 22 | /// each slice until we hit the end of that slice at which
 23 | /// point we read the forwarding address of the next slice
 24 | /// and then jump to it.
 25 | pub struct ByteSliceReader {
 26 |     pool: *const ByteBlockPool,
 27 |     buffer_upto: usize,
 28 |     // current buffer index of pool.buffers
 29 |     upto: usize,
 30 |     limit: usize,
 31 |     level: usize,
 32 |     buffer_offset: usize,
 33 |     end_index: usize,
 34 | }
 35 | 
 36 | impl Default for ByteSliceReader {
 37 |     fn default() -> Self {
 38 |         ByteSliceReader {
 39 |             pool: ptr::null(),
 40 |             buffer_upto: 0,
 41 |             upto: 0,
 42 |             limit: 0,
 43 |             level: 0,
 44 |             buffer_offset: 0,
 45 |             end_index: 0,
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl Clone for ByteSliceReader {
 51 |     fn clone(&self) -> Self {
 52 |         ByteSliceReader {
 53 |             pool: self.pool,
 54 |             buffer_upto: self.buffer_upto,
 55 |             upto: self.upto,
 56 |             limit: self.limit,
 57 |             level: self.level,
 58 |             buffer_offset: self.buffer_offset,
 59 |             end_index: self.end_index,
 60 |         }
 61 |     }
 62 | }
 63 | 
 64 | impl ByteSliceReader {
 65 |     pub fn init(&mut self, pool: &ByteBlockPool, start_index: usize, end_index: usize) {
 66 |         debug_assert!(end_index >= start_index);
 67 | 
 68 |         self.pool = pool;
 69 |         self.end_index = end_index;
 70 |         self.level = 0;
 71 |         self.buffer_upto = start_index / ByteBlockPool::BYTE_BLOCK_SIZE;
 72 |         self.buffer_offset = self.buffer_upto * ByteBlockPool::BYTE_BLOCK_SIZE;
 73 |         self.upto = start_index & ByteBlockPool::BYTE_BLOCK_MASK;
 74 | 
 75 |         let first_size = ByteBlockPool::LEVEL_SIZE_ARRAY[0];
 76 |         self.limit = if start_index + first_size >= end_index {
 77 |             // There is noly this one slice to read
 78 |             end_index & ByteBlockPool::BYTE_BLOCK_MASK
 79 |         } else {
 80 |             self.upto + first_size - 4
 81 |         };
 82 |     }
 83 | 
 84 |     pub fn eof(&self) -> bool {
 85 |         debug_assert!(self.upto + self.buffer_offset <= self.end_index);
 86 |         self.upto + self.buffer_offset == self.end_index
 87 |     }
 88 | 
 89 |     unsafe fn next_slice(&mut self) {
 90 |         let pool = &*self.pool;
 91 |         // skip to next slice
 92 |         let next_index = {
 93 |             let buffer = &pool.buffers[self.buffer_upto];
 94 |             ((buffer[self.limit] as usize) << 24)
 95 |                 + ((buffer[self.limit + 1] as usize) << 16)
 96 |                 + ((buffer[self.limit + 2] as usize) << 8)
 97 |                 + (buffer[self.limit + 3] as usize)
 98 |         };
 99 |         self.level = ByteBlockPool::NEXT_LEVEL_ARRAY[self.level];
100 |         let new_size = ByteBlockPool::LEVEL_SIZE_ARRAY[self.level];
101 | 
102 |         self.buffer_upto = next_index / ByteBlockPool::BYTE_BLOCK_SIZE;
103 |         self.buffer_offset = self.buffer_upto * ByteBlockPool::BYTE_BLOCK_SIZE;
104 |         self.upto = next_index & ByteBlockPool::BYTE_BLOCK_MASK;
105 |         if next_index + new_size >= self.end_index {
106 |             // We are advancing to the final slice
107 |             debug_assert!(self.end_index >= next_index);
108 |             self.limit = self.end_index - self.buffer_offset;
109 |         } else {
110 |             // This is not the final slice (subtract 4 for the
111 |             // forwarding address at the end of this new slice)
112 |             self.limit = self.upto + new_size - 4;
113 |         }
114 |     }
115 | }
116 | 
117 | impl io::Read for ByteSliceReader {
118 |     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
119 |         let mut len = buf.len();
120 |         let mut offset = 0;
121 |         while len > 0 {
122 |             let num_left = self.limit - self.upto;
123 |             unsafe {
124 |                 if num_left < len {
125 |                     buf[offset..offset + num_left].copy_from_slice(
126 |                         &(*self.pool).buffers[self.buffer_upto][self.upto..self.upto + num_left],
127 |                     );
128 |                     offset += num_left;
129 |                     len -= num_left;
130 |                     self.next_slice();
131 |                 } else {
132 |                     // This slice is the last one
133 |                     buf[offset..offset + len].copy_from_slice(
134 |                         &(*self.pool).buffers[self.buffer_upto][self.upto..self.upto + len],
135 |                     );
136 |                     self.upto += len;
137 |                     break;
138 |                 }
139 |             }
140 |         }
141 |         Ok(buf.len())
142 |     }
143 | }
144 | 
145 | impl DataInput for ByteSliceReader {}
146 | 


--------------------------------------------------------------------------------
/src/core/util/bytes_ref.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use std::cmp::Ordering;
 15 | use std::fmt;
 16 | 
 17 | #[derive(Copy, Clone)]
 18 | pub struct BytesRef {
 19 |     slice: *const [u8],
 20 | }
 21 | 
 22 | const DUMMY_BYTE: [u8; 0] = [];
 23 | 
 24 | // return a dummy `BytesPtr` for some place need dummy init
 25 | // in order to avoid `Option`
 26 | impl Default for BytesRef {
 27 |     fn default() -> Self {
 28 |         BytesRef::new(&DUMMY_BYTE)
 29 |     }
 30 | }
 31 | 
 32 | impl BytesRef {
 33 |     pub fn new(bytes: &[u8]) -> BytesRef {
 34 |         BytesRef {
 35 |             slice: bytes as *const [u8],
 36 |         }
 37 |     }
 38 | 
 39 |     pub fn bytes(&self) -> &[u8] {
 40 |         unsafe { &*self.slice }
 41 |     }
 42 | 
 43 |     pub fn set_bytes(&mut self, bytes: &[u8]) {
 44 |         self.slice = bytes as *const [u8];
 45 |     }
 46 | 
 47 |     pub fn is_empty(&self) -> bool {
 48 |         self.len() == 0
 49 |     }
 50 | 
 51 |     pub fn len(&self) -> usize {
 52 |         unsafe { (&*self.slice).len() }
 53 |     }
 54 | 
 55 |     pub fn byte_at(&self, idx: usize) -> u8 {
 56 |         unsafe { (&*self.slice)[idx] }
 57 |     }
 58 | }
 59 | 
 60 | impl AsRef<[u8]> for BytesRef {
 61 |     fn as_ref(&self) -> &[u8] {
 62 |         self.bytes()
 63 |     }
 64 | }
 65 | 
 66 | impl fmt::Debug for BytesRef {
 67 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 68 |         f.debug_struct("BytesPtr")
 69 |             .field("bytes", &self.bytes())
 70 |             .finish()
 71 |     }
 72 | }
 73 | 
 74 | impl Eq for BytesRef {}
 75 | 
 76 | impl PartialEq for BytesRef {
 77 |     fn eq(&self, other: &Self) -> bool {
 78 |         self.bytes().eq(other.bytes())
 79 |     }
 80 | }
 81 | 
 82 | impl Ord for BytesRef {
 83 |     fn cmp(&self, other: &Self) -> Ordering {
 84 |         self.bytes().cmp(other.bytes())
 85 |     }
 86 | }
 87 | 
 88 | impl PartialOrd for BytesRef {
 89 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 90 |         Some(self.cmp(other))
 91 |     }
 92 | }
 93 | 
 94 | // A builder for `BytesRef` instances
 95 | #[derive(Default)]
 96 | pub struct BytesRefBuilder {
 97 |     pub buffer: Vec<u8>,
 98 |     pub offset: usize,
 99 |     pub length: usize,
100 | }
101 | 
102 | impl BytesRefBuilder {
103 |     pub fn new() -> Self {
104 |         Default::default()
105 |     }
106 | 
107 |     pub fn bytes_mut(&mut self) -> &mut [u8] {
108 |         &mut self.buffer
109 |     }
110 | 
111 |     pub fn grow(&mut self, size: usize) {
112 |         self.buffer.resize(size, 0u8);
113 |     }
114 | 
115 |     pub fn append(&mut self, b: u8) {
116 |         let pos = self.offset + self.length;
117 |         if pos >= self.buffer.len() {
118 |             self.buffer.resize(pos + 1, 0u8);
119 |         }
120 |         self.buffer[pos] = b;
121 |         self.length += 1;
122 |     }
123 | 
124 |     pub fn appends(&mut self, bytes: &[u8]) {
125 |         let start = self.offset + self.length;
126 |         let end = start + bytes.len();
127 |         if end >= self.buffer.len() {
128 |             self.buffer.resize(end, 0u8);
129 |         }
130 |         self.buffer[start..end].copy_from_slice(bytes);
131 |         self.length += bytes.len();
132 |     }
133 | 
134 |     pub fn get(&self) -> BytesRef {
135 |         BytesRef::new(&self.buffer[self.offset..self.length])
136 |     }
137 | 
138 |     pub fn copy_from(&mut self, bytes: &[u8]) {
139 |         if self.buffer.len() < bytes.len() {
140 |             self.buffer.resize(bytes.len(), 0u8);
141 |         }
142 |         self.buffer[0..bytes.len()].copy_from_slice(bytes);
143 |         self.offset = 0;
144 |         self.length = bytes.len();
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/src/core/util/counter.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use std::sync::atomic::{AtomicI64, Ordering};
 15 | 
 16 | /// Simple counter trait
 17 | pub trait Count {
 18 |     fn add_get(&mut self, delta: i64) -> i64;
 19 | 
 20 |     fn get(&self) -> i64;
 21 | }
 22 | 
 23 | struct SerialCounter {
 24 |     count: i64,
 25 | }
 26 | 
 27 | impl Count for SerialCounter {
 28 |     fn add_get(&mut self, delta: i64) -> i64 {
 29 |         self.count += delta;
 30 |         self.count
 31 |     }
 32 | 
 33 |     fn get(&self) -> i64 {
 34 |         self.count
 35 |     }
 36 | }
 37 | 
 38 | struct AtomicCounter {
 39 |     count: AtomicI64,
 40 | }
 41 | 
 42 | impl Count for AtomicCounter {
 43 |     fn add_get(&mut self, delta: i64) -> i64 {
 44 |         self.count.fetch_add(delta, Ordering::Release);
 45 |         self.get()
 46 |     }
 47 | 
 48 |     fn get(&self) -> i64 {
 49 |         self.count.load(Ordering::Acquire)
 50 |     }
 51 | }
 52 | 
 53 | enum CounterEnum {
 54 |     Serial(Box<SerialCounter>),
 55 |     Atomic(Box<AtomicCounter>),
 56 |     Borrowed(*mut dyn Count),
 57 |     // TODO unsafe use for borrow a exist counter
 58 | }
 59 | 
 60 | impl Count for CounterEnum {
 61 |     fn add_get(&mut self, delta: i64) -> i64 {
 62 |         match *self {
 63 |             CounterEnum::Serial(ref mut s) => s.add_get(delta),
 64 |             CounterEnum::Atomic(ref mut s) => s.add_get(delta),
 65 |             CounterEnum::Borrowed(b) => unsafe { (*b).add_get(delta) },
 66 |         }
 67 |     }
 68 | 
 69 |     fn get(&self) -> i64 {
 70 |         match *self {
 71 |             CounterEnum::Serial(ref s) => s.get(),
 72 |             CounterEnum::Atomic(ref s) => s.get(),
 73 |             CounterEnum::Borrowed(b) => unsafe { (*b).get() },
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | pub struct Counter {
 79 |     count: CounterEnum,
 80 | }
 81 | 
 82 | impl Default for Counter {
 83 |     fn default() -> Self {
 84 |         Self::new(false)
 85 |     }
 86 | }
 87 | 
 88 | impl Counter {
 89 |     pub fn new(thread_safe: bool) -> Self {
 90 |         let count = if thread_safe {
 91 |             CounterEnum::Atomic(Box::new(AtomicCounter {
 92 |                 count: AtomicI64::new(0),
 93 |             }))
 94 |         } else {
 95 |             CounterEnum::Serial(Box::new(SerialCounter { count: 0 }))
 96 |         };
 97 |         Counter { count }
 98 |     }
 99 | 
100 |     pub fn borrow(counter: &dyn Count) -> Self {
101 |         Counter {
102 |             count: CounterEnum::Borrowed(counter as *const dyn Count as *mut dyn Count),
103 |         }
104 |     }
105 | 
106 |     fn borrow_raw(counter: *mut dyn Count) -> Self {
107 |         Counter {
108 |             count: CounterEnum::Borrowed(counter),
109 |         }
110 |     }
111 | 
112 |     // TODO this copy while share the inner count of self,
113 |     // so it is not safe if self's lifetime is shorter than the copy one
114 |     pub unsafe fn shallow_copy(&self) -> Counter {
115 |         match self.count {
116 |             CounterEnum::Borrowed(b) => Counter::borrow_raw(b),
117 |             CounterEnum::Atomic(ref a) => Counter::borrow(a.as_ref() as &dyn Count),
118 |             CounterEnum::Serial(ref s) => Counter::borrow(s.as_ref() as &dyn Count),
119 |         }
120 |     }
121 | 
122 |     pub fn ptr(&self) -> *const dyn Count {
123 |         match self.count {
124 |             CounterEnum::Serial(ref s) => s.as_ref(),
125 |             CounterEnum::Atomic(ref s) => s.as_ref(),
126 |             CounterEnum::Borrowed(b) => b,
127 |         }
128 |     }
129 | }
130 | 
131 | impl Count for Counter {
132 |     fn add_get(&mut self, delta: i64) -> i64 {
133 |         self.count.add_get(delta)
134 |     }
135 | 
136 |     fn get(&self) -> i64 {
137 |         self.count.get()
138 |     }
139 | }
140 | 


--------------------------------------------------------------------------------
/src/core/util/external/deferred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | // TODO: copy from package `crossbeam-epoch` from it's not a public module
15 | // we use this to manage callback functions
16 | 
17 | use std::mem::MaybeUninit;
18 | use std::{fmt, mem, ptr};
19 | 
20 | /// Number of words a piece of `Data` can hold.
21 | ///
22 | /// Three words should be enough for the majority of cases. For example, you can fit inside it the
23 | /// function pointer together with a fat pointer representing an object that needs to be destroyed.
24 | const DATA_WORDS: usize = 3;
25 | 
26 | /// Some space to keep a `FnOnce()` object on the stack.
27 | type Data = [usize; DATA_WORDS];
28 | 
29 | /// A `FnOnce()` that is stored inline if small, or otherwise boxed on the heap.
30 | ///
31 | /// This is a handy way of keeping an unsized `FnOnce()` within a sized structure.
32 | pub struct Deferred {
33 |     call: unsafe fn(*mut u8),
34 |     data: MaybeUninit<Data>,
35 | }
36 | 
37 | impl fmt::Debug for Deferred {
38 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
39 |         write!(f, "Deferred {{ ... }}")
40 |     }
41 | }
42 | 
43 | impl Drop for Deferred {
44 |     fn drop(&mut self) {
45 |         unsafe {
46 |             ptr::drop_in_place(self.data.as_mut_ptr());
47 |         }
48 |     }
49 | }
50 | 
51 | impl Deferred {
52 |     /// Constructs a new `Deferred` from a `FnOnce()`.
53 |     #[allow(clippy::cast_ptr_alignment)]
54 |     pub fn new<F: FnOnce() + 'static>(f: F) -> Self {
55 |         let size = mem::size_of::<F>();
56 |         let align = mem::align_of::<F>();
57 | 
58 |         unsafe {
59 |             if size <= mem::size_of::<Data>() && align <= mem::align_of::<Data>() {
60 |                 let mut data = MaybeUninit::<Data>::uninit();
61 |                 ptr::write(data.as_mut_ptr() as *mut F, f);
62 | 
63 |                 unsafe fn call<F: FnOnce()>(raw: *mut u8) {
64 |                     let f: F = ptr::read(raw as *mut F);
65 |                     f();
66 |                 }
67 | 
68 |                 Deferred {
69 |                     call: call::<F>,
70 |                     data,
71 |                 }
72 |             } else {
73 |                 let b: Box<F> = Box::new(f);
74 |                 let mut data = MaybeUninit::<Data>::uninit();
75 |                 ptr::write(data.as_mut_ptr() as *mut Box<F>, b);
76 | 
77 |                 unsafe fn call<F: FnOnce()>(raw: *mut u8) {
78 |                     let b: Box<F> = ptr::read(raw as *mut Box<F>);
79 |                     (*b)();
80 |                 }
81 | 
82 |                 Deferred {
83 |                     call: call::<F>,
84 |                     data,
85 |                 }
86 |             }
87 |         }
88 |     }
89 | 
90 |     /// Calls the function.
91 |     #[inline]
92 |     pub fn call(mut self) {
93 |         let call = self.call;
94 |         unsafe { call(self.data.as_mut_ptr() as *mut u8) };
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/src/core/util/external/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | // this package is use to place modules copy from external packages for some reason
15 | 
16 | mod deferred;
17 | 
18 | pub use self::deferred::*;
19 | 
20 | mod volatile;
21 | 
22 | pub use self::volatile::*;
23 | 
24 | mod binary_heap;
25 | 
26 | pub use self::binary_heap::*;
27 | 
28 | mod thread_pool;
29 | 
30 | pub use self::thread_pool::*;
31 | 


--------------------------------------------------------------------------------
/src/core/util/external/volatile.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | // The MIT License (MIT)
 15 | //
 16 | // Copyright (c) Philipp Oppermann <dev@phil-opp.com>
 17 | //
 18 | // Permission is hereby granted, free of charge, to any
 19 | // person obtaining a copy of this software and associated
 20 | // documentation files (the "Software"), to deal in the
 21 | // Software without restriction, including without
 22 | // limitation the rights to use, copy, modify, merge,
 23 | // publish, distribute, sublicense, and/or sell copies of
 24 | // the Software, and to permit persons to whom the Software
 25 | // is furnished to do so
 26 | 
 27 | // copy from https://github.com/embed-rs/volatile/blob/master/src/lib.rs
 28 | 
 29 | //! Provides wrapper types `Volatile`, `ReadOnly`, `WriteOnly`, `ReadWrite`, which wrap any
 30 | //! copy-able type and allows for volatile memory access to wrapped value. Volatile memory accesses
 31 | //! are never optimized away by the compiler, and are useful in many low-level systems programming
 32 | //! and concurrent contexts.
 33 | //!
 34 | //! The wrapper types *do not* enforce any atomicity guarantees; to also get atomicity, consider
 35 | //! looking at the `Atomic` wrapper type found in `libcore` or `libstd`.
 36 | //!
 37 | //! These wrappers do not depend on the standard library and never panic.
 38 | //!
 39 | //! # Dealing with Volatile Pointers
 40 | //!
 41 | //! Frequently, one may have to deal with volatile pointers, eg, writes to specific memory
 42 | //! locations. The canonical way to solve this is to cast the pointer to a volatile wrapper
 43 | //! directly, eg:
 44 | //!
 45 | //! ```rust
 46 | //! use rucene::core::util::external::Volatile;
 47 | //!
 48 | //! let mut_ptr = 0xFEE00000 as *mut u32;
 49 | //!
 50 | //! let volatile_ptr = mut_ptr as *mut Volatile<u32>;
 51 | //! ```
 52 | //!
 53 | //! and then perform operations on the pointer as usual in a volatile way. This method works as all
 54 | //! of the volatile wrapper types are the same size as their contained values.
 55 | 
 56 | use std::ptr;
 57 | 
 58 | /// A wrapper type around a volatile variable, which allows for volatile reads and writes
 59 | /// to the contained value. The stored type needs to be `Copy`, as volatile reads and writes
 60 | /// take and return copies of the value.
 61 | ///
 62 | /// The size of this struct is the same as the size of the contained type.
 63 | #[derive(Debug)]
 64 | #[repr(transparent)]
 65 | pub struct Volatile<T: Copy>(T);
 66 | 
 67 | impl<T: Copy> Volatile<T> {
 68 |     /// Construct a new volatile instance wrapping the given value.
 69 |     ///
 70 |     /// This method never panics.
 71 |     #[cfg(feature = "const_fn")]
 72 |     pub const fn new(value: T) -> Volatile<T> {
 73 |         Volatile(value)
 74 |     }
 75 | 
 76 |     /// Construct a new volatile instance wrapping the given value.
 77 |     ///
 78 |     /// This method never panics.
 79 |     #[cfg(not(feature = "const_fn"))]
 80 |     pub fn new(value: T) -> Volatile<T> {
 81 |         Volatile(value)
 82 |     }
 83 | 
 84 |     /// Performs a volatile read of the contained value, returning a copy
 85 |     /// of the read value. Volatile reads are guaranteed not to be optimized
 86 |     /// away by the compiler, but by themselves do not have atomic ordering
 87 |     /// guarantees. To also get atomicity, consider looking at the `Atomic` wrapper type.
 88 |     ///
 89 |     /// This method never panics.
 90 |     pub fn read(&self) -> T {
 91 |         // UNSAFE: Safe, as we know that our internal value exists.
 92 |         unsafe { ptr::read_volatile(&self.0) }
 93 |     }
 94 | 
 95 |     /// Performs a volatile write, setting the contained value to the given value `value`. Volatile
 96 |     /// writes are guaranteed to not be optimized away by the compiler, but by themselves do not
 97 |     /// have atomic ordering guarantees. To also get atomicity, consider looking at the `Atomic`
 98 |     /// wrapper type.
 99 |     ///
100 |     /// This method never panics.
101 |     ///
102 |     /// TODO, we force convert immutable reference to mutable pointer, because
103 |     /// we needn't guarantee the race condition if multi-write at the same time,
104 |     /// else we need to use Atomic instead
105 |     pub fn write(&self, value: T) {
106 |         // UNSAFE: Safe, as we know that our internal value exists.
107 |         unsafe { ptr::write_volatile(&self.0 as *const T as *mut T, value) };
108 |     }
109 | 
110 |     /// Performs a volatile read of the contained value, passes a mutable reference to it to the
111 |     /// function `f`, and then performs a volatile write of the (potentially updated) value back to
112 |     /// the contained value.
113 |     ///
114 |     /// Ths method never panics.
115 |     pub fn update<F>(&self, f: F)
116 |     where
117 |         F: FnOnce(&mut T),
118 |     {
119 |         let mut value = self.read();
120 |         f(&mut value);
121 |         self.write(value);
122 |     }
123 | }
124 | 
125 | impl<T: Copy> Clone for Volatile<T> {
126 |     fn clone(&self) -> Self {
127 |         Volatile(self.read())
128 |     }
129 | }
130 | 


--------------------------------------------------------------------------------
/src/core/util/math.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use error::ErrorKind::IllegalState;
15 | use error::Result;
16 | 
17 | use core::util::bit_util::UnsignedShift;
18 | 
19 | use std::mem;
20 | 
21 | pub fn log(mut x: i64, base: i32) -> i32 {
22 |     debug_assert!(base > 1);
23 | 
24 |     let base = i64::from(base);
25 |     let mut ret = 0;
26 |     while x >= base {
27 |         x /= base;
28 |         ret += 1;
29 |     }
30 | 
31 |     ret
32 | }
33 | 
34 | pub fn long_to_int_exact(val: i64) -> Result<i32> {
35 |     let ans = val as i32;
36 |     if i64::from(ans) != val {
37 |         bail!(IllegalState("integer overflow".to_owned()));
38 |     }
39 |     Ok(ans)
40 | }
41 | 
42 | // see http://en.wikipedia.org/wiki/Binary_GCD_algorithm#Iterative_version_in_C.2B.2B_using_ctz_.28count_trailing_zeros.29
43 | pub fn gcd(a: i64, b: i64) -> i64 {
44 |     debug_assert_ne!(a, i64::min_value());
45 |     debug_assert_ne!(b, i64::min_value());
46 |     let mut a = a.abs();
47 |     let mut b = b.abs();
48 | 
49 |     if a == 0 {
50 |         return b;
51 |     } else if b == 0 {
52 |         return a;
53 |     }
54 | 
55 |     let common_trailing_zeros = (a | b).trailing_zeros();
56 |     a = a.unsigned_shift(a.trailing_zeros() as usize);
57 | 
58 |     loop {
59 |         b = b.unsigned_shift(b.trailing_zeros() as usize);
60 |         if a == b {
61 |             break;
62 |         } else if a > b || a == i64::min_value() {
63 |             // MIN_VALUE is treated as 2^64
64 |             mem::swap(&mut a, &mut b);
65 |         }
66 | 
67 |         if a == 1 {
68 |             break;
69 |         }
70 | 
71 |         b -= a;
72 |     }
73 | 
74 |     a << common_trailing_zeros
75 | }
76 | 


--------------------------------------------------------------------------------
/src/core/util/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | pub type DocId = i32;
 15 | 
 16 | pub mod bkd;
 17 | pub mod external;
 18 | pub mod fst;
 19 | pub mod packed;
 20 | 
 21 | mod numeric;
 22 | 
 23 | pub use self::numeric::*;
 24 | 
 25 | mod variant_value;
 26 | 
 27 | pub use self::variant_value::*;
 28 | 
 29 | mod bits;
 30 | 
 31 | pub use self::bits::*;
 32 | 
 33 | mod version;
 34 | 
 35 | pub use self::version::*;
 36 | 
 37 | mod paged_bytes;
 38 | 
 39 | pub use self::paged_bytes::*;
 40 | 
 41 | mod doc_id_set_builder;
 42 | 
 43 | pub use self::doc_id_set_builder::*;
 44 | 
 45 | mod context;
 46 | 
 47 | pub use self::context::*;
 48 | 
 49 | mod counter;
 50 | 
 51 | pub use self::counter::*;
 52 | 
 53 | mod bytes_ref;
 54 | 
 55 | pub use self::bytes_ref::*;
 56 | 
 57 | mod bit_set;
 58 | 
 59 | pub use self::bit_set::*;
 60 | 
 61 | mod bit_util;
 62 | 
 63 | pub use self::bit_util::*;
 64 | 
 65 | mod byte_block_pool;
 66 | 
 67 | pub use self::byte_block_pool::*;
 68 | 
 69 | mod byte_slice_reader;
 70 | 
 71 | pub use self::byte_slice_reader::*;
 72 | 
 73 | mod bytes_ref_hash;
 74 | 
 75 | pub use self::bytes_ref_hash::*;
 76 | 
 77 | mod doc_id_set;
 78 | 
 79 | pub use self::doc_id_set::*;
 80 | 
 81 | mod int_block_pool;
 82 | 
 83 | pub use self::int_block_pool::*;
 84 | 
 85 | mod ints_ref;
 86 | 
 87 | pub use self::ints_ref::*;
 88 | 
 89 | mod math;
 90 | 
 91 | pub use self::math::*;
 92 | 
 93 | mod selector;
 94 | 
 95 | pub use self::selector::*;
 96 | 
 97 | mod small_float;
 98 | 
 99 | pub use self::small_float::*;
100 | 
101 | mod sorter;
102 | 
103 | pub use self::sorter::*;
104 | 
105 | mod string_util;
106 | 
107 | pub use self::string_util::*;
108 | 
109 | mod compression;
110 | 
111 | pub use self::compression::*;
112 | 
113 | mod disi;
114 | 
115 | pub use self::disi::*;
116 | 
117 | use std::ops::Deref;
118 | 
119 | use core::codec::doc_values::NumericDocValues;
120 | 
121 | use error::Result;
122 | 
123 | // a iterator that can be used over and over by call reset
124 | pub trait ReusableIterator: Iterator {
125 |     fn reset(&mut self);
126 | }
127 | 
128 | pub fn fill_slice<T: Copy>(array: &mut [T], value: T) {
129 |     for i in array {
130 |         *i = value;
131 |     }
132 | }
133 | 
134 | pub fn over_size(size: usize) -> usize {
135 |     let mut size = size;
136 |     let mut extra = size >> 3;
137 |     if extra < 3 {
138 |         // for very small arrays, where constant overhead of
139 |         // realloc is presumably relatively high, we grow
140 |         // faster
141 |         extra = 3;
142 |     }
143 |     size += extra;
144 |     size
145 | }
146 | 
147 | pub const BM25_SIMILARITY_IDF: &str = "idf";
148 | 
149 | pub struct DerefWrapper<T>(pub T);
150 | 
151 | impl<T> Deref for DerefWrapper<T> {
152 |     type Target = T;
153 | 
154 |     #[inline]
155 |     fn deref(&self) -> &Self::Target {
156 |         &self.0
157 |     }
158 | }
159 | 
160 | /// Abstraction over an array of longs.
161 | ///
162 | /// This class extends `NumericDocValues` so that we don't need to add another
163 | /// level of abstraction every time we want eg. to use the `PackedInts`
164 | /// utility classes to represent a `NumericDocValues` instance.
165 | pub trait LongValues: NumericDocValues {
166 |     fn get64(&self, index: i64) -> Result<i64>;
167 | 
168 |     fn get64_mut(&mut self, index: i64) -> Result<i64> {
169 |         self.get64(index)
170 |     }
171 | }
172 | 
173 | pub trait CloneableLongValues: LongValues {
174 |     fn cloned(&self) -> Box<dyn CloneableLongValues>;
175 | 
176 |     fn cloned_lv(&self) -> Box<dyn LongValues>;
177 | }
178 | 
179 | impl<T: LongValues + Clone + 'static> CloneableLongValues for T {
180 |     fn cloned(&self) -> Box<dyn CloneableLongValues> {
181 |         Box::new(self.clone())
182 |     }
183 | 
184 |     fn cloned_lv(&self) -> Box<dyn LongValues> {
185 |         Box::new(self.clone())
186 |     }
187 | }
188 | 


--------------------------------------------------------------------------------
/src/core/util/packed/direct_monotonic_reader.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::doc_values::NumericDocValues;
 15 | use core::store::io::{IndexInput, RandomAccessInput};
 16 | use core::util::{packed::DirectReader, DocId, LongValues};
 17 | use error::Result;
 18 | 
 19 | use core::util::packed::direct_reader::DirectPackedReader;
 20 | use std::sync::Arc;
 21 | 
 22 | pub struct DirectMonotonicMeta {
 23 |     #[allow(dead_code)]
 24 |     num_values: i64,
 25 |     block_shift: i32,
 26 |     num_blocks: usize,
 27 |     mins: Arc<Vec<i64>>,
 28 |     avgs: Arc<Vec<f32>>,
 29 |     bpvs: Arc<Vec<u8>>,
 30 |     offsets: Arc<Vec<i64>>,
 31 | }
 32 | 
 33 | pub struct DirectMonotonicReader;
 34 | 
 35 | impl DirectMonotonicReader {
 36 |     pub fn load_meta(
 37 |         meta_in: &mut dyn IndexInput,
 38 |         num_values: i64,
 39 |         block_shift: i32,
 40 |     ) -> Result<DirectMonotonicMeta> {
 41 |         let mut num_blocks = num_values >> block_shift;
 42 |         if (num_blocks << block_shift) < num_values {
 43 |             num_blocks += 1;
 44 |         }
 45 |         let num_blocks = num_blocks as usize;
 46 | 
 47 |         let mut mins = vec![0i64; num_blocks];
 48 |         let mut avgs = vec![0f32; num_blocks];
 49 |         let mut bpvs = vec![0u8; num_blocks];
 50 |         let mut offsets = vec![0i64; num_blocks];
 51 | 
 52 |         for i in 0..num_blocks {
 53 |             mins[i] = meta_in.read_long()?;
 54 |             avgs[i] = f32::from_bits(meta_in.read_int()? as u32);
 55 |             offsets[i] = meta_in.read_long()?;
 56 |             bpvs[i] = meta_in.read_byte()?;
 57 |         }
 58 |         Ok(DirectMonotonicMeta {
 59 |             num_values,
 60 |             block_shift,
 61 |             num_blocks,
 62 |             mins: Arc::new(mins),
 63 |             avgs: Arc::new(avgs),
 64 |             bpvs: Arc::new(bpvs),
 65 |             offsets: Arc::new(offsets),
 66 |         })
 67 |     }
 68 | 
 69 |     pub fn get_instance(
 70 |         meta: &DirectMonotonicMeta,
 71 |         data: &Arc<dyn RandomAccessInput>,
 72 |     ) -> Result<MixinMonotonicLongValues> {
 73 |         let mut readers = Vec::with_capacity(meta.num_blocks);
 74 |         for i in 0..meta.num_blocks {
 75 |             let reader = if meta.bpvs[i] == 0 {
 76 |                 None
 77 |             } else {
 78 |                 Some(DirectReader::get_instance(
 79 |                     Arc::clone(data),
 80 |                     i32::from(meta.bpvs[i]),
 81 |                     meta.offsets[i],
 82 |                 )?)
 83 |             };
 84 |             readers.push(reader);
 85 |         }
 86 | 
 87 |         Ok(MixinMonotonicLongValues {
 88 |             readers: Arc::from(Box::from(readers)),
 89 |             block_shift: meta.block_shift,
 90 |             mins: Arc::clone(&meta.mins),
 91 |             avgs: Arc::clone(&meta.avgs),
 92 |         })
 93 |     }
 94 | }
 95 | 
 96 | #[derive(Clone)]
 97 | pub struct MixinMonotonicLongValues {
 98 |     readers: Arc<[Option<DirectPackedReader>]>,
 99 |     block_shift: i32,
100 |     mins: Arc<Vec<i64>>,
101 |     avgs: Arc<Vec<f32>>,
102 | }
103 | 
104 | impl LongValues for MixinMonotonicLongValues {
105 |     fn get64(&self, index: i64) -> Result<i64> {
106 |         // we know all readers don't require context
107 |         let block = ((index as u64) >> self.block_shift) as usize;
108 |         let block_index: i64 = index & ((1 << self.block_shift) - 1);
109 |         let delta = if let Some(ref reader) = self.readers[block] {
110 |             reader.get64(block_index)?
111 |         } else {
112 |             0
113 |         };
114 |         Ok(self.mins[block] + (self.avgs[block] * block_index as f32) as i64 + delta)
115 |     }
116 | }
117 | 
118 | impl NumericDocValues for MixinMonotonicLongValues {
119 |     fn get(&self, doc_id: DocId) -> Result<i64> {
120 |         self.get64(i64::from(doc_id))
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/core/util/packed/direct_monotonic_writer.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::store::io::IndexOutput;
 15 | use core::util::packed::DirectWriter;
 16 | 
 17 | use error::{
 18 |     ErrorKind::{IllegalArgument, IllegalState},
 19 |     Result,
 20 | };
 21 | 
 22 | pub const MIN_BLOCK_SHIFT: i32 = 3;
 23 | pub const MAX_BLOCK_SHIFT: i32 = 30;
 24 | 
 25 | pub struct DirectMonotonicWriter<'a, O: IndexOutput> {
 26 |     meta: &'a mut O,
 27 |     data: &'a mut O,
 28 |     num_values: usize,
 29 |     base_data_pointer: i64,
 30 |     buffer: Vec<i64>,
 31 |     buffer_size: usize,
 32 |     count: usize,
 33 |     finished: bool,
 34 |     previous: i64,
 35 | }
 36 | 
 37 | impl<'a, O: IndexOutput> DirectMonotonicWriter<'a, O> {
 38 |     pub fn new(
 39 |         meta: &'a mut O,
 40 |         data: &'a mut O,
 41 |         num_values: i64,
 42 |         block_shift: i32,
 43 |     ) -> Result<DirectMonotonicWriter<'a, O>> {
 44 |         if block_shift < MIN_BLOCK_SHIFT || block_shift > MAX_BLOCK_SHIFT {
 45 |             bail!(IllegalArgument(format!(
 46 |                 "block_shift must be in [3-30], got {}",
 47 |                 block_shift
 48 |             )));
 49 |         }
 50 | 
 51 |         let base_data_pointer = data.file_pointer();
 52 | 
 53 |         Ok(DirectMonotonicWriter {
 54 |             meta,
 55 |             data,
 56 |             num_values: num_values as usize,
 57 |             base_data_pointer,
 58 |             buffer: vec![0i64; (1 << block_shift) as usize],
 59 |             buffer_size: 0,
 60 |             count: 0,
 61 |             finished: false,
 62 |             previous: i64::min_value(),
 63 |         })
 64 |     }
 65 | 
 66 |     pub fn add(&mut self, v: i64) -> Result<()> {
 67 |         if v < self.previous {
 68 |             bail!(IllegalArgument(format!(
 69 |                 "Values do not come in order: {}, {}",
 70 |                 self.previous, v
 71 |             )));
 72 |         }
 73 | 
 74 |         if self.buffer_size == self.buffer.len() {
 75 |             self.flush()?;
 76 |         }
 77 | 
 78 |         self.buffer[self.buffer_size] = v;
 79 |         self.buffer_size += 1;
 80 |         self.previous = v;
 81 |         self.count += 1;
 82 |         Ok(())
 83 |     }
 84 | 
 85 |     pub fn finish(&mut self) -> Result<()> {
 86 |         if self.count != self.num_values {
 87 |             bail!(IllegalState(format!(
 88 |                 "Wrong number of values added, expected: {}, got: {}",
 89 |                 self.num_values, self.count
 90 |             )));
 91 |         }
 92 | 
 93 |         if self.finished {
 94 |             bail!(IllegalState("#finish has been called already".into()));
 95 |         }
 96 | 
 97 |         if self.buffer_size > 0 {
 98 |             self.flush()?;
 99 |         }
100 | 
101 |         self.finished = true;
102 | 
103 |         Ok(())
104 |     }
105 | 
106 |     pub fn get_instance(
107 |         meta: &'a mut O,
108 |         data: &'a mut O,
109 |         num_values: i64,
110 |         block_shift: i32,
111 |     ) -> Result<DirectMonotonicWriter<'a, O>> {
112 |         DirectMonotonicWriter::new(meta, data, num_values, block_shift)
113 |     }
114 | 
115 |     fn flush(&mut self) -> Result<()> {
116 |         debug_assert!(self.buffer_size != 0);
117 | 
118 |         let avg_inc = ((self.buffer[self.buffer_size - 1] - self.buffer[0]) as f64
119 |             / (self.buffer_size - 1).max(1) as f64) as f32;
120 |         for i in 0..self.buffer_size {
121 |             let expected = (avg_inc * i as f32) as i64;
122 |             self.buffer[i] -= expected;
123 |         }
124 | 
125 |         let mut min: i64 = self.buffer[0];
126 |         for i in 1..self.buffer_size {
127 |             min = min.min(self.buffer[i]);
128 |         }
129 | 
130 |         let mut max_delta = 0;
131 |         for i in 0..self.buffer_size {
132 |             self.buffer[i] -= min;
133 |             // use | will change nothing when it comes to computing required bits
134 |             // but has the benefit of working fine with negative values too
135 |             // (in case of overflow)
136 |             max_delta |= self.buffer[i];
137 |         }
138 | 
139 |         self.meta.write_long(min)?;
140 |         self.meta.write_int(avg_inc.to_bits() as i32)?;
141 |         self.meta
142 |             .write_long(self.data.file_pointer() - self.base_data_pointer)?;
143 | 
144 |         if max_delta == 0 {
145 |             self.meta.write_byte(0u8)?;
146 |         } else {
147 |             let bits_required = DirectWriter::<O>::unsigned_bits_required(max_delta);
148 |             let mut writer =
149 |                 DirectWriter::get_instance(self.data, self.buffer_size as i64, bits_required)?;
150 |             for i in 0..self.buffer_size {
151 |                 writer.add(self.buffer[i])?;
152 |             }
153 |             writer.finish()?;
154 |             self.meta.write_byte(bits_required as u8)?;
155 |         }
156 |         self.buffer_size = 0;
157 | 
158 |         Ok(())
159 |     }
160 | }
161 | 


--------------------------------------------------------------------------------
/src/core/util/packed/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | mod direct_monotonic_reader;
15 | 
16 | pub use self::direct_monotonic_reader::*;
17 | 
18 | mod direct_monotonic_writer;
19 | 
20 | pub use self::direct_monotonic_writer::*;
21 | 
22 | mod direct_reader;
23 | 
24 | pub use self::direct_reader::*;
25 | 
26 | mod direct_writer;
27 | 
28 | pub use self::direct_writer::*;
29 | 
30 | mod monotonic_block_packed_reader;
31 | 
32 | pub use self::monotonic_block_packed_reader::*;
33 | 
34 | mod monotonic_block_packed_writer;
35 | 
36 | pub use self::monotonic_block_packed_writer::*;
37 | 
38 | mod packed_misc;
39 | 
40 | pub use self::packed_misc::*;
41 | 
42 | mod packed_ints_null_reader;
43 | 
44 | pub use self::packed_ints_null_reader::*;
45 | 
46 | mod paged_mutable;
47 | 
48 | pub use self::paged_mutable::*;
49 | 
50 | mod packed_long_values;
51 | 
52 | pub use self::packed_long_values::*;
53 | 
54 | mod block_packed_writer;
55 | 
56 | pub use self::block_packed_writer::*;
57 | 
58 | mod elias_fano_encoder;
59 | 
60 | pub use self::elias_fano_encoder::*;
61 | 
62 | mod elias_fano_decoder;
63 | 
64 | pub use self::elias_fano_decoder::*;
65 | 
66 | mod packed_simd;
67 | 
68 | pub use self::packed_simd::*;
69 | 


--------------------------------------------------------------------------------
/src/core/util/packed/monotonic_block_packed_reader.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | use core::codec::doc_values::NumericDocValues;
 15 | use core::store::io::IndexInput;
 16 | use core::util::packed::PackedIntsNullReader;
 17 | use core::util::packed::{self, Reader, ReaderEnum};
 18 | use core::util::{DocId, LongValues};
 19 | use error::ErrorKind::{CorruptIndex, IllegalArgument};
 20 | use error::Result;
 21 | 
 22 | use std::sync::Arc;
 23 | 
 24 | /// Provides random access to a stream written with MonotonicBlockPackedWriter
 25 | #[derive(Clone)]
 26 | pub struct MonotonicBlockPackedReader {
 27 |     inner: Arc<MonotonicBlockPackedReaderInner>,
 28 | }
 29 | 
 30 | struct MonotonicBlockPackedReaderInner {
 31 |     block_shift: usize,
 32 |     block_mask: usize,
 33 |     value_count: usize,
 34 |     min_values: Vec<i64>,
 35 |     averages: Vec<f32>,
 36 |     sub_readers: Vec<ReaderEnum>,
 37 |     #[allow(dead_code)]
 38 |     sum_bpv: i64,
 39 | }
 40 | 
 41 | impl MonotonicBlockPackedReader {
 42 |     pub fn expected(origin: i64, average: f32, index: i32) -> i64 {
 43 |         origin + (average * index as f32) as i64
 44 |     }
 45 | 
 46 |     pub fn new(
 47 |         input: &mut dyn IndexInput,
 48 |         packed_ints_version: i32,
 49 |         block_size: usize,
 50 |         value_count: usize,
 51 |         direct: bool,
 52 |     ) -> Result<MonotonicBlockPackedReader> {
 53 |         let block_shift =
 54 |             packed::check_block_size(block_size, packed::MIN_BLOCK_SIZE, packed::MAX_BLOCK_SIZE);
 55 |         let block_mask = block_size - 1;
 56 |         let num_blocks = packed::num_blocks(value_count, block_size);
 57 |         let mut min_values = vec![0_i64; num_blocks];
 58 |         let mut averages = vec![0.0_f32; num_blocks];
 59 |         let mut sub_readers = Vec::new();
 60 |         let mut sum_bpv: i64 = 0;
 61 | 
 62 |         for i in 0..num_blocks {
 63 |             min_values[i] = input.read_zlong()?;
 64 |             averages[i] = f32::from_bits(input.read_int()? as u32);
 65 |             let bits_per_value = input.read_vint()?;
 66 |             sum_bpv += i64::from(bits_per_value);
 67 |             if bits_per_value > 64 {
 68 |                 bail!(CorruptIndex("bits_per_value > 64".to_owned()));
 69 |             }
 70 |             if bits_per_value == 0 {
 71 |                 sub_readers.push(ReaderEnum::PackedIntsNull(PackedIntsNullReader::new(
 72 |                     block_size,
 73 |                 )));
 74 |             } else {
 75 |                 let left = value_count - i * block_size;
 76 |                 let size = ::std::cmp::min(left, block_size);
 77 |                 if direct {
 78 |                     unimplemented!();
 79 |                 } else {
 80 |                     let one_reader = packed::get_reader_no_header(
 81 |                         input,
 82 |                         packed::Format::Packed,
 83 |                         packed_ints_version,
 84 |                         size,
 85 |                         bits_per_value,
 86 |                     )?;
 87 |                     sub_readers.push(one_reader);
 88 |                 }
 89 |             }
 90 |         }
 91 | 
 92 |         let inner = MonotonicBlockPackedReaderInner {
 93 |             block_shift,
 94 |             block_mask,
 95 |             value_count,
 96 |             min_values,
 97 |             averages,
 98 |             sub_readers,
 99 |             sum_bpv,
100 |         };
101 | 
102 |         Ok(Self {
103 |             inner: Arc::new(inner),
104 |         })
105 |     }
106 | 
107 |     /// Returns the number of values
108 |     pub fn size(&self) -> usize {
109 |         self.inner.value_count
110 |     }
111 | }
112 | 
113 | impl LongValues for MonotonicBlockPackedReader {
114 |     fn get64(&self, index: i64) -> Result<i64> {
115 |         if !(index >= 0 && index < self.inner.value_count as i64) {
116 |             bail!(IllegalArgument(format!("index {} out of range", index)))
117 |         }
118 |         let block = (index >> self.inner.block_shift) as usize;
119 |         let idx = (index & (self.inner.block_mask as i64)) as i32;
120 |         let val = Self::expected(
121 |             self.inner.min_values[block],
122 |             self.inner.averages[block],
123 |             idx,
124 |         ) + self.inner.sub_readers[block].get(idx as usize);
125 |         Ok(val)
126 |     }
127 | }
128 | 
129 | impl NumericDocValues for MonotonicBlockPackedReader {
130 |     fn get(&self, doc_id: DocId) -> Result<i64> {
131 |         self.get64(i64::from(doc_id))
132 |     }
133 | }
134 | 


--------------------------------------------------------------------------------
/src/core/util/packed/monotonic_block_packed_writer.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::store::io::DataOutput;
15 | use core::util::bit_util::BitsRequired;
16 | use core::util::packed::MonotonicBlockPackedReader;
17 | use core::util::packed::{AbstractBlockPackedWriter, BaseBlockPackedWriter};
18 | use error::Result;
19 | 
20 | pub struct MonotonicBlockPackedWriter {
21 |     base_writer: BaseBlockPackedWriter,
22 | }
23 | 
24 | impl MonotonicBlockPackedWriter {
25 |     pub fn new(block_size: usize) -> MonotonicBlockPackedWriter {
26 |         MonotonicBlockPackedWriter {
27 |             base_writer: BaseBlockPackedWriter::new(block_size),
28 |         }
29 |     }
30 | }
31 | 
32 | impl AbstractBlockPackedWriter for MonotonicBlockPackedWriter {
33 |     fn add(&mut self, l: i64, out: &mut impl DataOutput) -> Result<()> {
34 |         debug_assert!(l >= 0);
35 |         self.base_writer.check_not_finished()?;
36 |         if self.base_writer.off == self.base_writer.values.len() {
37 |             self.flush(out)?;
38 |         }
39 |         self.base_writer.values[self.base_writer.off] = l;
40 |         self.base_writer.off += 1;
41 |         self.base_writer.ord += 1;
42 |         Ok(())
43 |     }
44 | 
45 |     fn finish(&mut self, out: &mut impl DataOutput) -> Result<()> {
46 |         self.base_writer.check_not_finished()?;
47 |         if self.base_writer.off > 0 {
48 |             self.flush(out)?;
49 |         }
50 |         self.base_writer.finished = true;
51 |         Ok(())
52 |     }
53 | 
54 |     fn flush(&mut self, out: &mut impl DataOutput) -> Result<()> {
55 |         debug_assert!(self.base_writer.off > 0);
56 |         let avg = if self.base_writer.off == 1 {
57 |             0f32
58 |         } else {
59 |             (self.base_writer.values[self.base_writer.off - 1] - self.base_writer.values[0]) as f32
60 |                 / (self.base_writer.off - 1) as f32
61 |         };
62 |         let mut min = self.base_writer.values[0];
63 |         // adjust min so that all deltas will be positive
64 |         for i in 1..self.base_writer.off {
65 |             let actual = self.base_writer.values[i];
66 |             let expected = MonotonicBlockPackedReader::expected(min, avg, i as i32);
67 |             if expected > actual {
68 |                 min -= expected - actual;
69 |             }
70 |         }
71 | 
72 |         let mut max_delta = 0i64;
73 |         for i in 0..self.base_writer.off {
74 |             self.base_writer.values[i] -= MonotonicBlockPackedReader::expected(min, avg, i as i32);
75 |             max_delta = max_delta.max(self.base_writer.values[i]);
76 |         }
77 | 
78 |         out.write_zlong(min)?;
79 |         out.write_int(avg.to_bits() as i32)?;
80 |         if max_delta == 0 {
81 |             out.write_vint(0)?;
82 |         } else {
83 |             let bits_required = max_delta.bits_required() as i32;
84 |             out.write_vint(bits_required)?;
85 |             self.base_writer.write_values(bits_required, out)?;
86 |         }
87 | 
88 |         self.base_writer.off = 0;
89 |         Ok(())
90 |     }
91 | 
92 |     fn reset(&mut self) {
93 |         self.base_writer.reset();
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/core/util/packed/packed_ints_null_reader.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use core::util::packed::Reader;
15 | 
16 | pub struct PackedIntsNullReader {
17 |     value_count: usize,
18 | }
19 | 
20 | impl PackedIntsNullReader {
21 |     pub fn new(value_count: usize) -> PackedIntsNullReader {
22 |         PackedIntsNullReader { value_count }
23 |     }
24 | }
25 | 
26 | impl Reader for PackedIntsNullReader {
27 |     fn get(&self, _doc_id: usize) -> i64 {
28 |         0
29 |     }
30 | 
31 |     // FIXME: usize-> docId
32 |     fn bulk_get(&self, index: usize, output: &mut [i64], len: usize) -> usize {
33 |         assert!(index < self.value_count);
34 |         let len = ::std::cmp::min(len, self.value_count - index);
35 |         unsafe {
36 |             let slice = output.as_mut_ptr();
37 |             ::std::ptr::write_bytes(slice, 0, len);
38 |         }
39 |         len
40 |     }
41 |     fn size(&self) -> usize {
42 |         self.value_count
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/core/util/small_float.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // See the License for the specific language governing permissions and
 12 | // limitations under the License.
 13 | 
 14 | pub struct SmallFloat;
 15 | impl SmallFloat {
 16 |     pub fn float_to_byte315(f: f32) -> u8 {
 17 |         let bits = f.to_bits() as i32;
 18 |         let small_float = (bits >> (24 - 3)) as i32;
 19 |         if small_float <= ((63 - 15) << 3) as i32 {
 20 |             return if bits <= 0 { 0u8 } else { 1u8 };
 21 |         }
 22 |         if small_float >= ((63 - 15) << 3) as i32 + 0x100 {
 23 |             return 255u8;
 24 |         }
 25 |         (small_float - ((63 - 15) << 3) as i32) as u8
 26 |     }
 27 | 
 28 |     pub fn byte315_to_float(b: u8) -> f32 {
 29 |         if b == 0 {
 30 |             0f32
 31 |         } else {
 32 |             let mut bits = u32::from(b) << (24 - 3);
 33 |             bits += (63 - 15) << 24;
 34 |             f32::from_bits(bits)
 35 |         }
 36 |     }
 37 | }
 38 | 
 39 | #[cfg(test)]
 40 | pub mod tests {
 41 |     extern crate rand;
 42 | 
 43 |     use super::*;
 44 | 
 45 |     fn origin_byte_to_float(b: u8) -> f32 {
 46 |         if b == 0 {
 47 |             return 0f32;
 48 |         }
 49 |         let mantissa = b & 7;
 50 |         let exponent = (b >> 3) & 31;
 51 |         let bits = ((u32::from(exponent) + (63 - 15)) << 24) | ((u32::from(mantissa)) << 21) as u32;
 52 |         f32::from_bits(bits as u32)
 53 |     }
 54 | 
 55 |     fn origin_float_to_byte(f: f32) -> u8 {
 56 |         if f < 0.0f32 {
 57 |             return 0u8;
 58 |         }
 59 | 
 60 |         let bits = f.to_bits() as i32;
 61 |         let mut mantissa = (bits & 0xff_ffff) >> 21 as i32;
 62 |         let mut exponent = (((bits >> 24) & 0x7f) - 63) + 15;
 63 | 
 64 |         if exponent > 31 {
 65 |             exponent = 31;
 66 |             mantissa = 7;
 67 |         }
 68 | 
 69 |         if exponent < 0 || (exponent == 0 && mantissa == 0) {
 70 |             exponent = 0;
 71 |             mantissa = 1;
 72 |         }
 73 |         ((exponent << 3) | mantissa) as u8
 74 |     }
 75 | 
 76 |     #[test]
 77 |     fn test_float_to_byte315() {
 78 |         let min_value = 1.4e-45f32;
 79 |         let positive_infinity = 1.0f32 / 0.0f32;
 80 |         let negative_infinity = -1.0f32 / 0.0f32;
 81 |         let max_value = 3.402_823_5e+38f32;
 82 | 
 83 |         assert_eq!(1, origin_float_to_byte(5.812_381_7E-10f32));
 84 |         assert_eq!(1, SmallFloat::float_to_byte315(5.812_381_7E-10f32));
 85 | 
 86 |         assert_eq!(0, SmallFloat::float_to_byte315(0f32));
 87 |         assert_eq!(1, SmallFloat::float_to_byte315(min_value));
 88 |         assert_eq!(255, SmallFloat::float_to_byte315(max_value));
 89 |         assert_eq!(255, SmallFloat::float_to_byte315(positive_infinity));
 90 | 
 91 |         assert_eq!(0, SmallFloat::float_to_byte315(-min_value));
 92 |         assert_eq!(0, SmallFloat::float_to_byte315(-max_value));
 93 |         assert_eq!(0, SmallFloat::float_to_byte315(negative_infinity));
 94 | 
 95 |         let num = 100_000;
 96 |         for _ in 0..num {
 97 |             let m: u32 = rand::random::<u32>();
 98 |             let f = f32::from_bits(m);
 99 |             if f.is_nan() {
100 |                 continue;
101 |             }
102 |             let b1 = origin_float_to_byte(f);
103 |             let b2 = SmallFloat::float_to_byte315(f);
104 |             assert_eq!(b1, b2);
105 |         }
106 |     }
107 | 
108 |     #[test]
109 |     fn test_byte315_to_float() {
110 |         for i in 0..256 {
111 |             let f1 = origin_byte_to_float(i as u8);
112 |             let f2 = SmallFloat::byte315_to_float(i as u8);
113 |             assert!((f1 - f2) < ::std::f32::EPSILON);
114 |         }
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/src/core/util/string_util.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | use rand::{thread_rng, Rng};
15 | 
16 | /// length in bytes of an ID
17 | pub const ID_LENGTH: usize = 16;
18 | 
19 | /// Generates a non-cryptographic globally unique id.
20 | pub fn random_id() -> [u8; ID_LENGTH] {
21 |     let mut id = [0u8; ID_LENGTH];
22 |     thread_rng().fill(&mut id);
23 |     id
24 | }
25 | 
26 | pub fn id2str(id: &[u8]) -> String {
27 |     let strs: Vec<String> = id.iter().map(|b| format!("{:02X}", b)).collect();
28 |     strs.join("")
29 | }
30 | 
31 | pub fn bytes_subtract(bytes_per_dim: usize, dim: usize, a: &[u8], b: &[u8], result: &mut [u8]) {
32 |     let start = dim * bytes_per_dim;
33 |     let end = start + bytes_per_dim;
34 |     let mut borrow = 0;
35 |     let mut i = end - 1;
36 |     while i >= start {
37 |         let mut diff: i32 = (a[i] as u32 as i32) - (b[i] as u32 as i32) - borrow;
38 |         if diff < 0 {
39 |             diff += 256;
40 |             borrow = 1;
41 |         } else {
42 |             borrow = 0;
43 |         }
44 | 
45 |         result[i - start] = diff as u8;
46 |         i -= 1;
47 |     }
48 | 
49 |     if borrow != 0 {
50 |         panic!("a<b")
51 |     }
52 | }
53 | 
54 | /// Compares two {@link BytesRef}, element by element, and returns the
55 | /// number of elements common to both arrays (from the start of each).
56 | pub fn bytes_difference(left: &[u8], right: &[u8]) -> i32 {
57 |     let len = left.len().min(right.len());
58 |     for i in 0..len {
59 |         if left[i] != right[i] {
60 |             return i as i32;
61 |         }
62 |     }
63 | 
64 |     len as i32
65 | }
66 | 
67 | /// Returns the length of {@code currentTerm} needed for use as a sort key.
68 | /// so that {@link BytesRef#compareTo(BytesRef)} still returns the same result.
69 | /// This method assumes currentTerm comes after priorTerm.
70 | pub fn sort_key_length(prior_term: &[u8], current_term: &[u8]) -> usize {
71 |     let current_term_offset = 0usize;
72 |     let prior_term_offset = 0usize;
73 |     let limit = prior_term.len().min(current_term.len());
74 | 
75 |     for i in 0..limit {
76 |         if prior_term[prior_term_offset + i] != current_term[current_term_offset + i] {
77 |             return i + 1;
78 |         }
79 |     }
80 | 
81 |     current_term.len().min(1 + prior_term.len())
82 | }
83 | 
84 | #[cfg(test)]
85 | mod tests {
86 |     use super::*;
87 | 
88 |     #[test]
89 |     fn test_id2str() {
90 |         let v = vec![65u8, 97u8, 4u8, 127u8];
91 |         let strv = id2str(&v[..]);
92 |         assert_eq!("4161047F", strv);
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | extern crate error_chain;
15 | extern crate serde_json;
16 | 
17 | use core::index;
18 | use core::search;
19 | use core::search::collector;
20 | 
21 | use std::borrow::Cow;
22 | use std::sync::PoisonError;
23 | 
24 | error_chain! {
25 |     types {
26 |         Error, ErrorKind, ResultExt, Result;
27 |     }
28 |     errors {
29 |         Poisoned {
30 |             description("a thread holding the locked panicked and poisoned the lock")
31 |         }
32 | 
33 |         IllegalState(desc: String) {
34 |             description(desc)
35 |             display("Illegal state: {}", desc)
36 |         }
37 | 
38 |         IllegalArgument(desc: String) {
39 |             description(desc)
40 |             display("Illegal argument: {}", desc)
41 |         }
42 | 
43 |         UnexpectedEOF(errmsg: String) {
44 |             description(errmsg)
45 |             display("Unexpected EOF: {}", errmsg)
46 |         }
47 | 
48 |         CorruptIndex(errmsg: String) {
49 |             description(errmsg)
50 |             display("Corrupt Index: {}", errmsg)
51 |         }
52 | 
53 |         UnsupportedOperation(errmsg: Cow<'static, str>) {
54 |             description(errmsg),
55 |             display("Unsupported Operation: {}", errmsg)
56 |         }
57 | 
58 |         AlreadyClosed(errmsg: String) {
59 |             description(errmsg)
60 |             display("Already Closed: {}", errmsg)
61 |         }
62 | 
63 |         IOError(errmsg: String) {
64 |             description(errmsg)
65 |             display("IO Error: {}", errmsg)
66 |         }
67 | 
68 |         RuntimeError(errmsg: String) {
69 |             description(errmsg)
70 |             display("Runtime Error: {}", errmsg)
71 |         }
72 |     }
73 | 
74 |     foreign_links {
75 |         FmtError(::std::fmt::Error);
76 |         IoError(::std::io::Error);
77 |         FromUtf8Err(::std::string::FromUtf8Error);
78 |         Utf8Error(::std::str::Utf8Error);
79 |         NumError(::std::num::ParseIntError);
80 |         ParseFloatError(::std::num::ParseFloatError);
81 |         SerdeJsonError(self::serde_json::Error);
82 |         NulError(::std::ffi::NulError);
83 |         TimeError(::std::time::SystemTimeError);
84 |     }
85 | 
86 |     links {
87 |         Collector(collector::Error, collector::ErrorKind);
88 |         Search(search::Error, search::ErrorKind);
89 |         Index(index::Error, index::ErrorKind);
90 |     }
91 | }
92 | 
93 | impl<Guard> From<PoisonError<Guard>> for Error {
94 |     fn from(_: PoisonError<Guard>) -> Error {
95 |         ErrorKind::Poisoned.into()
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Zhizhesihai (Beijing) Technology Limited.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | 
14 | #![recursion_limit = "1024"]
15 | #![cfg_attr(feature = "clippy", feature(plugin))]
16 | #![cfg_attr(feature = "clippy", plugin(clippy))]
17 | #![cfg_attr(not(feature = "clippy"), allow(unknown_lints))]
18 | #![feature(exact_size_is_empty)]
19 | #![feature(drain_filter)]
20 | #![feature(hashmap_internals)]
21 | #![feature(integer_atomics)]
22 | #![feature(vec_remove_item)]
23 | #![feature(specialization)]
24 | #![allow(clippy::cast_lossless)]
25 | #![feature(fn_traits)]
26 | #![feature(maybe_uninit_ref)]
27 | #![feature(maybe_uninit_extra)]
28 | #![feature(in_band_lifetimes)]
29 | #![feature(vec_into_raw_parts)]
30 | #![feature(core_intrinsics)]
31 | #![feature(stmt_expr_attributes)]
32 | 
33 | #[macro_use]
34 | extern crate error_chain;
35 | #[macro_use]
36 | extern crate lazy_static;
37 | #[macro_use]
38 | extern crate log;
39 | extern crate rand;
40 | extern crate regex;
41 | extern crate serde;
42 | #[macro_use]
43 | extern crate serde_derive;
44 | extern crate serde_json;
45 | 
46 | extern crate alloc;
47 | extern crate byteorder;
48 | extern crate bytes;
49 | extern crate crc;
50 | extern crate crossbeam;
51 | extern crate fasthash;
52 | extern crate flate2;
53 | extern crate memmap;
54 | extern crate num_cpus;
55 | extern crate num_traits;
56 | extern crate smallvec;
57 | extern crate thread_local;
58 | extern crate unicode_reader;
59 | #[macro_use]
60 | extern crate crunchy;
61 | 
62 | pub mod core;
63 | pub mod error;
64 | 


--------------------------------------------------------------------------------