├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── dotnet-core.yml │ └── publish-docs.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── LunrCore.sln ├── LunrCore.sln.DotSettings ├── LunrCore ├── Assets │ └── LunrCore.png ├── AsyncEnumerableExtensions.cs ├── Builder.cs ├── Clause.cs ├── Document.cs ├── EnglishStemmer.cs ├── EnglishStopWordFilter.cs ├── Extensions │ ├── DictionaryExtensions.cs │ └── StringBuilderExtensions.cs ├── Field.cs ├── FieldMatchMetadata.cs ├── FieldMatches.cs ├── FieldReference.cs ├── FieldTermFrequencies.cs ├── ITokenizer.cs ├── Index.cs ├── InvertedIndex.cs ├── InvertedIndexEntry.cs ├── Lexeme.cs ├── LexemeType.cs ├── LunrCore.csproj ├── MatchData.cs ├── Pipeline.cs ├── PipelineFunctionRegistry.cs ├── Query.cs ├── QueryLexer.cs ├── QueryParser.cs ├── QueryParserException.cs ├── QueryPresence.cs ├── QueryString.cs ├── QueryWildcard.cs ├── Result.cs ├── Serialization │ ├── IndexJsonConverter.cs │ ├── InvertedIndexEntryJsonConverter.cs │ ├── InvertedIndexJsonConverter.cs │ ├── JsonConverterExtensions.cs │ ├── SliceConverter.cs │ └── VectorJsonConverter.cs ├── Set.cs ├── Slice.cs ├── StemmerBase.cs ├── StopWordFilterBase.cs ├── TermFrequencies.cs ├── Token.cs ├── TokenMetadata.cs ├── TokenSet.cs ├── TokenSetIdProvider.cs ├── TokenizeDelegate.cs ├── Tokenizer.cs ├── Trimmer.cs ├── Util.cs └── Vector.cs ├── LunrCoreLmdb ├── Assets │ └── LunrCoreLmdb.png ├── DelegatedIndex.cs ├── DeserializeContext.cs ├── IReadOnlyIndex.cs ├── KeyBuilder.cs ├── LmdbBuilder.cs ├── LmdbIndex.cs ├── LunrCoreLmdb.csproj ├── SerializationExtensions.cs └── SerializeContext.cs ├── LunrCoreLmdbPerf ├── BlockCopyVsLinqConcat.cs ├── DelegatedIndexExtensions.cs ├── InterpolateVsAdd.cs ├── LunrCoreLmdbPerf.csproj ├── Program.cs ├── SearchBenchmarkBase.cs ├── SearchBenchmarkLmdb.cs ├── SearchBenchmarkWrappedIndex.cs └── SpanVsGetPinnableReference.cs ├── LunrCoreLmdbTests ├── LmdbBuilderTests.cs ├── LunrCoreLmdbTests.csproj ├── SearchTests.cs ├── SerializationTests.cs ├── TempDirectory.cs └── TempDirectoryCollection.cs ├── LunrCorePerf ├── BuilderBenchmark.cs ├── LunrCorePerf.csproj ├── PipelineBenchmark.cs ├── Program.cs ├── QueryParserBenchmark.cs ├── SearchBenchmark.cs ├── StemmerBenchmark.cs ├── TokenSetBenchmark.cs ├── TokenizerBenchmark.cs ├── VectorBenchmark.cs ├── Words.cs └── fixtures │ └── words.txt ├── LunrCoreTests ├── BuilderTests.cs ├── EnglishStopWordFilterTests.cs ├── FieldReferenceTests.cs ├── LunrCoreTests.csproj ├── MatchDataTests.cs ├── MultipleMandatoryFieldsTest.cs ├── PipelineTests.cs ├── QueryLexerTests.cs ├── QueryParserTests.cs ├── QueryTests.cs ├── SearchTests.cs ├── SerializationTest.cs ├── SetTests.cs ├── StemmerTests.cs ├── StopWordFilterTests.cs ├── TestHelpers.cs ├── TokenSetTests.cs ├── TokenTests.cs ├── TokenizerTests.cs ├── TrimmerTests.cs ├── VectorTests.cs └── fixtures │ └── stemming_vocab.json ├── README.md ├── docs ├── README.md ├── getting-started │ └── README.md └── requirements.txt └── mkdocs.yml /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [bleroy] 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '[BUG] ' 5 | labels: '' 6 | assignees: bleroy 7 | 8 | --- 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior. 12 | 13 | **Expected behavior** 14 | A clear and concise description of what you expected to happen. 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[FEATURE] ' 5 | labels: '' 6 | assignees: bleroy 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/workflows/dotnet-core.yml: -------------------------------------------------------------------------------- 1 | name: .NET Core 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Setup .NET Core 17 | uses: actions/setup-dotnet@v1 18 | with: 19 | dotnet-version: 6.0.101 20 | - name: Checkout lmdb 21 | uses: actions/checkout@v2 22 | with: 23 | repository: LMDB/lmdb 24 | ref: mdb.master 25 | path: lmdb 26 | - name: Build lmdb 27 | working-directory: ${{ runner.workspace }}/lunr-core/lmdb/libraries/liblmdb/ 28 | run: make 29 | - name: Install dependencies 30 | run: dotnet restore 31 | - name: Build 32 | run: dotnet build --configuration Release --no-restore 33 | - name: Test 34 | run: LD_LIBRARY_PATH=${{ runner.workspace }}/lunr-core/lmdb/libraries/liblmdb/:$LD_LIBRARY_PATH dotnet test --no-restore --verbosity normal 35 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs.yml: -------------------------------------------------------------------------------- 1 | name: Publish docs via GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | # Only rebuild website when docs have changed 9 | - 'docs/**' 10 | 11 | jobs: 12 | build: 13 | name: Deploy docs 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout master 17 | uses: actions/checkout@v3 18 | 19 | - name: Set up Python 3.11 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: 3.11 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r docs/requirements.txt 28 | 29 | - name: Deploy docs 30 | run: mkdocs gh-deploy --force -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs 2 | bin 3 | obj 4 | /*.sln.DotSettings.* 5 | .idea 6 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at bertrandleroy@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Bertrand Le Roy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LunrCore.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30223.230 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCore", "LunrCore\LunrCore.csproj", "{35E0C04E-9508-408D-B1EB-61402BEAECBA}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCoreTests", "LunrCoreTests\LunrCoreTests.csproj", "{7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}" 9 | EndProject 10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Metadata", "Metadata", "{333885CB-3ADF-4462-8910-27A16B6C5F55}" 11 | ProjectSection(SolutionItems) = preProject 12 | CODE_OF_CONDUCT.md = CODE_OF_CONDUCT.md 13 | LICENSE = LICENSE 14 | README.md = README.md 15 | EndProjectSection 16 | EndProject 17 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCorePerf", "LunrCorePerf\LunrCorePerf.csproj", "{863FDAF5-53CD-4D58-911C-B55AF19148BA}" 18 | EndProject 19 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCoreLmdb", "LunrCoreLmdb\LunrCoreLmdb.csproj", "{E5799A1F-31B5-4E14-8C49-18CE73793FEC}" 20 | EndProject 21 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCoreLmdbTests", "LunrCoreLmdbTests\LunrCoreLmdbTests.csproj", "{2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}" 22 | EndProject 23 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LunrCoreLmdbPerf", "LunrCoreLmdbPerf\LunrCoreLmdbPerf.csproj", "{41BB51FD-462C-4AAB-9B4D-127FD784B566}" 24 | EndProject 25 | Global 26 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 27 | Debug|Any CPU = Debug|Any CPU 28 | Release|Any CPU = Release|Any CPU 29 | EndGlobalSection 30 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 31 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 32 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Debug|Any CPU.Build.0 = Debug|Any CPU 33 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Release|Any CPU.ActiveCfg = Release|Any CPU 34 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Release|Any CPU.Build.0 = Release|Any CPU 35 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 36 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Debug|Any CPU.Build.0 = Debug|Any CPU 37 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Release|Any CPU.ActiveCfg = Release|Any CPU 38 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Release|Any CPU.Build.0 = Release|Any CPU 39 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 40 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Debug|Any CPU.Build.0 = Debug|Any CPU 41 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Release|Any CPU.ActiveCfg = Release|Any CPU 42 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Release|Any CPU.Build.0 = Release|Any CPU 43 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 44 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Debug|Any CPU.Build.0 = Debug|Any CPU 45 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Release|Any CPU.ActiveCfg = Release|Any CPU 46 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Release|Any CPU.Build.0 = Release|Any CPU 47 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 48 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Debug|Any CPU.Build.0 = Debug|Any CPU 49 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Release|Any CPU.ActiveCfg = Release|Any CPU 50 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Release|Any CPU.Build.0 = Release|Any CPU 51 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 52 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Debug|Any CPU.Build.0 = Debug|Any CPU 53 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Release|Any CPU.ActiveCfg = Release|Any CPU 54 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Release|Any CPU.Build.0 = Release|Any CPU 55 | EndGlobalSection 56 | GlobalSection(SolutionProperties) = preSolution 57 | HideSolutionNode = FALSE 58 | EndGlobalSection 59 | GlobalSection(ExtensibilityGlobals) = postSolution 60 | SolutionGuid = {684039DE-9AA9-47A8-B12B-354F9147B73E} 61 | EndGlobalSection 62 | EndGlobal 63 | -------------------------------------------------------------------------------- /LunrCore.sln.DotSettings: -------------------------------------------------------------------------------- 1 |  2 | True 3 | True 4 | True 5 | True -------------------------------------------------------------------------------- /LunrCore/Assets/LunrCore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleroy/lunr-core/8ba2fa163a8ccd19efa95428b77a028b6ce6e183/LunrCore/Assets/LunrCore.png -------------------------------------------------------------------------------- /LunrCore/AsyncEnumerableExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Runtime.CompilerServices; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | 7 | namespace Lunr 8 | { 9 | /// 10 | /// A set of internal simple extensions to work with asynchronous enumerable 11 | /// without importing System.Linq.Async. 12 | /// 13 | public static class AsyncEnumerableExtensions 14 | { 15 | /// 16 | /// Applies the provided selector on each item in the source enumeration. 17 | /// 18 | /// The source enumeration. 19 | /// The selector to apply on each item in the enumeration. 20 | /// A cancellation token. 21 | /// The async enumeration of results from applying the selector. 22 | public static async IAsyncEnumerable Select( 23 | this IAsyncEnumerable source, 24 | Func selector, 25 | [EnumeratorCancellation] CancellationToken cancellationToken) 26 | { 27 | await foreach (TSource sourceItem in source.WithCancellation(cancellationToken)) 28 | { 29 | if (cancellationToken.IsCancellationRequested) yield break; 30 | yield return selector(sourceItem); 31 | } 32 | } 33 | 34 | /// 35 | /// Creates an async enumerable from a regular enumerable. 36 | /// 37 | /// The enumerable. 38 | /// A cancellation token. 39 | /// The async enumerable. 40 | public static async IAsyncEnumerable ToAsyncEnumerable( 41 | this IEnumerable source, 42 | [EnumeratorCancellation] CancellationToken cancellationToken) 43 | { 44 | foreach (T item in source) 45 | { 46 | if (cancellationToken.IsCancellationRequested) yield break; 47 | yield return await new ValueTask(item).ConfigureAwait(false); 48 | } 49 | } 50 | 51 | /// 52 | /// Builds a list from an async enumerable. 53 | /// This enumerates the whole thing, so use with caution, 54 | /// there's probably a reason why that was async enumerable. 55 | /// 56 | /// The async enumerable. 57 | /// A cancellation token. 58 | /// 59 | public static async ValueTask> ToList( 60 | this IAsyncEnumerable source, 61 | CancellationToken? cancellationToken = null) 62 | { 63 | var result = new List(); 64 | await foreach (T item in source) 65 | { 66 | if (cancellationToken?.IsCancellationRequested ?? false) 67 | { 68 | return result; 69 | } 70 | result.Add(item); 71 | } 72 | return result; 73 | } 74 | 75 | /// 76 | /// Tests if an async enumerable has any elements satisfying a condition. 77 | /// 78 | /// The enumerable. 79 | /// 80 | /// An optional predicate that an element of the enumerable must satisfy. 81 | /// If this is not provided, any element will do. 82 | /// 83 | /// True if any element satisfy the condition. 84 | public static async ValueTask Any( 85 | this IAsyncEnumerable source, 86 | Func? predicate = null) 87 | { 88 | await foreach(T item in source) 89 | { 90 | if (predicate is null || predicate(item)) 91 | { 92 | return true; 93 | } 94 | } 95 | return false; 96 | } 97 | 98 | /// 99 | /// An empty async enumerable. 100 | /// 101 | /// An empty async enumerable of the specified type. 102 | public static async IAsyncEnumerable Empty() 103 | { 104 | await Task.CompletedTask.ConfigureAwait(false); 105 | yield break; 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /LunrCore/Clause.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Diagnostics; 4 | using System.Linq; 5 | 6 | namespace Lunr 7 | { 8 | /// 9 | /// A single clause in a `Query` contains a term and details on how to 10 | /// match that term against an `Index`. 11 | /// 12 | [DebuggerDisplay("{" + nameof(DebuggerDisplay) + ",nq}")] 13 | public sealed class Clause 14 | { 15 | public static readonly Clause Empty = new Clause(); 16 | 17 | /// 18 | /// Builds a new clause. 19 | /// 20 | /// The term to search for. 21 | /// Any boost that should be applied when matching this clause. 22 | /// Whether the term should have fuzzy matching applied, and how fuzzy the match should be. 23 | /// Whether the term should be passed through the search pipeline. 24 | /// Whether the term should have wildcards appended or prepended. 25 | /// The terms presence in any matching documents. 26 | /// The fields in an index this clause should be matched against. 27 | public Clause( 28 | string term = "", 29 | double boost = 1, 30 | int editDistance = 0, 31 | bool usePipeline = true, 32 | QueryWildcard wildcard = QueryWildcard.None, 33 | QueryPresence presence = QueryPresence.Optional, 34 | IEnumerable? fields = null) 35 | { 36 | Fields = fields ?? Array.Empty(); 37 | Boost = boost; 38 | EditDistance = editDistance; 39 | UsePipeline = usePipeline; 40 | Wildcard = wildcard; 41 | Presence = presence; 42 | Term = ((wildcard & QueryWildcard.Leading) != 0 && (term[0] != Query.Wildcard) ? "*" : "") + 43 | term + 44 | ((wildcard & QueryWildcard.Trailing) != 0 && (term[term.Length - 1] != Query.Wildcard) ? "*" : ""); 45 | } 46 | 47 | /// 48 | /// Builds a new clause. 49 | /// 50 | /// The term to search for. 51 | /// The fields in an index this clause should be matched against. 52 | /// Any boost that should be applied when matching this clause. 53 | /// Whether the term should have fuzzy matching applied, and how fuzzy the match should be. 54 | /// Whether the term should be passed through the search pipeline. 55 | /// Whether the term should have wildcards appended or prepended. 56 | /// The terms presence in any matching documents. 57 | public Clause( 58 | string term = "", 59 | double boost = 1, 60 | int editDistance = 0, 61 | bool usePipeline = true, 62 | QueryWildcard wildcard = QueryWildcard.None, 63 | QueryPresence presence = QueryPresence.Optional, 64 | params string[] fields) 65 | : this( 66 | term, 67 | boost, 68 | editDistance, 69 | usePipeline, 70 | wildcard, 71 | presence, 72 | (IEnumerable)fields) { } 73 | 74 | /// 75 | /// The fields in an index this clause should be matched against. 76 | /// 77 | public IEnumerable Fields { get; } 78 | 79 | /// 80 | /// Any boost that should be applied when matching this clause. 81 | /// 82 | public double Boost { get; } 83 | 84 | /// 85 | /// Whether the term should have fuzzy matching applied, and how fuzzy the match should be. 86 | /// 87 | public int EditDistance { get; } 88 | 89 | /// 90 | /// Whether the term should be passed through the search pipeline. 91 | /// 92 | public bool UsePipeline { get; } 93 | 94 | /// 95 | /// Whether the term should have wildcards appended or prepended. 96 | /// 97 | public QueryWildcard Wildcard { get; } 98 | 99 | /// 100 | /// The terms presence in any matching documents. 101 | /// 102 | public QueryPresence Presence { get; } 103 | 104 | /// 105 | /// The term to search for. 106 | /// 107 | public string Term { get; } 108 | 109 | /// 110 | /// Creates a clone of this clause with the specified term. 111 | /// 112 | /// The new term. 113 | /// the new clause. 114 | public Clause WithTerm(string term) 115 | => new Clause(term, Boost, EditDistance, UsePipeline, Wildcard, Presence, Fields); 116 | 117 | /// 118 | /// Creates a clone of this clause with the specified presence. 119 | /// 120 | /// The new presence. 121 | /// the new clause. 122 | public Clause WithPresence(QueryPresence presence) 123 | => new Clause(Term, Boost, EditDistance, UsePipeline, Wildcard, presence, Fields); 124 | 125 | /// 126 | /// Creates a clone of this clause with the specified edit distance. 127 | /// 128 | /// The new edit distance. 129 | /// the new clause. 130 | public Clause WithEditDistance(int editDistance) 131 | => new Clause(Term, Boost, editDistance, UsePipeline, Wildcard, Presence, Fields); 132 | 133 | /// 134 | /// Creates a clone of this clause with the specified boost. 135 | /// 136 | /// The new boost. 137 | /// the new clause. 138 | public Clause WithBoost(double boost) 139 | => new Clause(Term, boost, EditDistance, UsePipeline, Wildcard, Presence, Fields); 140 | 141 | /// 142 | /// Creates a clone of this clause with the specified pipeline usage. 143 | /// 144 | /// The new pipeline usage. 145 | /// the new clause. 146 | public Clause WithUsePipeline(bool usePipeline) 147 | => new Clause(Term, Boost, EditDistance, usePipeline, Wildcard, Presence, Fields); 148 | 149 | /// 150 | /// Creates a clone of this clause with the specified list of fields appended. 151 | /// 152 | /// The list of fields to append. 153 | /// the new clause. 154 | public Clause WithFields(IEnumerable fields) 155 | => new Clause(Term, Boost, EditDistance, UsePipeline, Wildcard, Presence, Fields.Concat(fields).ToArray()); 156 | 157 | /// 158 | /// Creates a clone of this clause with the specified list of fields appended. 159 | /// 160 | /// The list of fields to append. 161 | /// the new clause. 162 | public Clause WithFields(params string[] fields) 163 | => new Clause(Term, Boost, EditDistance, UsePipeline, Wildcard, Presence, Fields.Concat(fields).ToArray()); 164 | 165 | private string DebuggerDisplay => (Fields.Any() ? string.Join(", ", Fields) + ":" : "") + 166 | (Presence switch { QueryPresence.Required => "+", QueryPresence.Prohibited => "-", _ => "" }) + 167 | ((Wildcard & QueryWildcard.Leading) == 0 ? "" : "*") + 168 | Term + 169 | (Boost == 1 ? "" : "^" + Boost) + 170 | (EditDistance == 0 ? "" : "~" + EditDistance) + 171 | ((Wildcard & QueryWildcard.Trailing) == 0 ? "" : "*") + 172 | (UsePipeline ? " (use pipeline)" : ""); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /LunrCore/Document.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class Document : Dictionary 6 | { 7 | public Document() 8 | { } 9 | 10 | public Document(IDictionary dict) : base(dict) { } 11 | 12 | public double Boost { get; set; } = 1; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /LunrCore/EnglishStemmer.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Globalization; 3 | using System.Text.RegularExpressions; 4 | 5 | namespace Lunr 6 | { 7 | public sealed class EnglishStemmer : StemmerBase 8 | { 9 | private static readonly CultureInfo culture = CultureInfo.CreateSpecificCulture("en"); 10 | 11 | private static readonly Dictionary step2list = new Dictionary 12 | { 13 | { "ational", "ate" }, 14 | { "tional", "tion" }, 15 | { "enci", "ence" }, 16 | { "anci", "ance" }, 17 | { "izer", "ize" }, 18 | { "bli", "ble" }, 19 | { "alli", "al" }, 20 | { "entli", "ent" }, 21 | { "eli", "e" }, 22 | { "ousli", "ous" }, 23 | { "ization", "ize" }, 24 | { "ation", "ate" }, 25 | { "ator", "ate" }, 26 | { "alism", "al" }, 27 | { "iveness", "ive" }, 28 | { "fulness", "ful" }, 29 | { "ousness", "ous" }, 30 | { "aliti", "al" }, 31 | { "iviti", "ive" }, 32 | { "biliti", "ble" }, 33 | { "logi", "log" } 34 | }; 35 | 36 | private static readonly Dictionary step3list = new Dictionary 37 | { 38 | { "icate", "ic" }, 39 | { "ative", "" }, 40 | { "alize", "al" }, 41 | { "iciti", "ic" }, 42 | { "ical", "ic" }, 43 | { "ful", "" }, 44 | { "ness", "" } 45 | }; 46 | 47 | private const string c = "[^aeiou]"; // consonant 48 | private const string v = "[aeiouy]"; // vowel 49 | private const string C = c + "[^aeiouy]*"; // consonant sequence 50 | private const string V = v + "[aeiou]*"; // vowel sequence 51 | private const string mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 52 | private const string meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 53 | private const string mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 54 | private const string s_v = "^(" + C + ")?" + v; // vowel in stem 55 | 56 | private static readonly Regex re_mgr0 = new Regex(mgr0); 57 | private static readonly Regex re_mgr1 = new Regex(mgr1); 58 | private static readonly Regex re_meq1 = new Regex(meq1); 59 | private static readonly Regex re_s_v = new Regex(s_v); 60 | 61 | private static readonly Regex re_1a = new Regex("^(.+?)(ss|i)es$"); 62 | private static readonly Regex re2_1a = new Regex("^(.+?)([^s])s$"); 63 | private static readonly Regex re_1b = new Regex("^(.+?)eed$"); 64 | private static readonly Regex re2_1b = new Regex("^(.+?)(ed|ing)$"); 65 | private static readonly Regex re_1b_2 = new Regex(".$"); 66 | private static readonly Regex re2_1b_2 = new Regex("(at|bl|iz)$"); 67 | private static readonly Regex re3_1b_2 = new Regex("([^aeiouylsz])\\1$"); 68 | private static readonly Regex re4_1b_2 = new Regex("^" + C + v + "[^aeiouwxy]$"); 69 | 70 | private static readonly Regex re_1c = new Regex("^(.+?[^aeiou])y$"); 71 | private static readonly Regex re_2 = new Regex("^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$"); 72 | 73 | private static readonly Regex re_3 = new Regex("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$"); 74 | 75 | private static readonly Regex re_4 = new Regex("^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$"); 76 | private static readonly Regex re2_4 = new Regex("^(.+?)(s|t)(ion)$"); 77 | 78 | private static readonly Regex re_5 = new Regex("^(.+?)e$"); 79 | private static readonly Regex re_5_1 = new Regex("ll$"); 80 | private static readonly Regex re3_5 = new Regex("^" + C + v + "[^aeiouwxy]$"); 81 | 82 | public override string Stem(string w) 83 | { 84 | if (w.Length < 3) return w; 85 | 86 | char firstch = w[0]; 87 | if (firstch == 'y') 88 | { 89 | w = char.ToUpper(firstch, culture) + w.Substring(1); 90 | } 91 | 92 | // Step 1a 93 | Regex re = re_1a; 94 | Regex re2 = re2_1a; 95 | 96 | if (re.IsMatch(w)) { w = re.Replace(w, "$1$2"); } 97 | else if (re2.IsMatch(w)) { w = re2.Replace(w, "$1$2"); } 98 | 99 | // Step 1b 100 | re = re_1b; 101 | re2 = re2_1b; 102 | if (re.IsMatch(w)) 103 | { 104 | GroupCollection fp = re.Match(w).Groups; 105 | re = re_mgr0; 106 | if (re.IsMatch(fp[1].Value)) 107 | { 108 | re = re_1b_2; 109 | w = re.Replace(w, ""); 110 | } 111 | } 112 | else if (re2.IsMatch(w)) 113 | { 114 | GroupCollection fp = re2.Match(w).Groups; 115 | string stem = fp[1].Value; 116 | re2 = re_s_v; 117 | if (re2.IsMatch(stem)) 118 | { 119 | w = stem; 120 | re2 = re2_1b_2; 121 | Regex re3 = re3_1b_2; 122 | Regex re4 = re4_1b_2; 123 | if (re2.IsMatch(w)) { w += "e"; } 124 | else if (re3.IsMatch(w)) { re = re_1b_2; w = re.Replace(w, ""); } 125 | else if (re4.IsMatch(w)) { w += "e"; } 126 | } 127 | } 128 | 129 | // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) 130 | re = re_1c; 131 | if (re.IsMatch(w)) 132 | { 133 | GroupCollection fp = re.Match(w).Groups; 134 | string stem = fp[1].Value; 135 | w = stem + "i"; 136 | } 137 | 138 | // Step 2 139 | re = re_2; 140 | if (re.IsMatch(w)) 141 | { 142 | GroupCollection fp = re.Match(w).Groups; 143 | string stem = fp[1].Value; 144 | string suffix = fp[2].Value; 145 | re = re_mgr0; 146 | if (re.IsMatch(stem)) 147 | { 148 | w = stem + step2list[suffix]; 149 | } 150 | } 151 | 152 | // Step 3 153 | re = re_3; 154 | if (re.IsMatch(w)) 155 | { 156 | GroupCollection fp = re.Match(w).Groups; 157 | string stem = fp[1].Value; 158 | string suffix = fp[2].Value; 159 | re = re_mgr0; 160 | if (re.IsMatch(stem)) 161 | { 162 | w = stem + step3list[suffix]; 163 | } 164 | } 165 | 166 | // Step 4 167 | re = re_4; 168 | re2 = re2_4; 169 | if (re.IsMatch(w)) 170 | { 171 | GroupCollection fp = re.Match(w).Groups; 172 | string stem = fp[1].Value; 173 | re = re_mgr1; 174 | if (re.IsMatch(stem)) 175 | { 176 | w = stem; 177 | } 178 | } 179 | else if (re2.IsMatch(w)) 180 | { 181 | GroupCollection fp = re2.Match(w).Groups; 182 | string stem = fp[1].Value + fp[2].Value; 183 | re2 = re_mgr1; 184 | if (re2.IsMatch(stem)) 185 | { 186 | w = stem; 187 | } 188 | } 189 | 190 | // Step 5 191 | re = re_5; 192 | if (re.IsMatch(w)) 193 | { 194 | GroupCollection fp = re.Match(w).Groups; 195 | string stem = fp[1].Value; 196 | re = re_mgr1; 197 | re2 = re_meq1; 198 | Regex re3 = re3_5; 199 | if (re.IsMatch(stem) || (re2.IsMatch(stem) && !(re3.IsMatch(stem)))) 200 | { 201 | w = stem; 202 | } 203 | } 204 | 205 | re = re_5_1; 206 | re2 = re_mgr1; 207 | if (re.IsMatch(w) && re2.IsMatch(w)) 208 | { 209 | re = re_1b_2; 210 | w = re.Replace(w, ""); 211 | } 212 | 213 | // and turn initial Y back to y 214 | if (firstch == 'y') 215 | { 216 | w = char.ToLower(firstch, culture) + w.Substring(1); 217 | } 218 | 219 | return w; 220 | } 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /LunrCore/EnglishStopWordFilter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class EnglishStopWordFilter : StopWordFilterBase 6 | { 7 | private static readonly Set _stopWords = new Set( 8 | new [] 9 | { 10 | "a", 11 | "able", 12 | "about", 13 | "across", 14 | "after", 15 | "all", 16 | "almost", 17 | "also", 18 | "am", 19 | "among", 20 | "an", 21 | "and", 22 | "any", 23 | "are", 24 | "as", 25 | "at", 26 | "be", 27 | "because", 28 | "been", 29 | "but", 30 | "by", 31 | "can", 32 | "cannot", 33 | "could", 34 | "dear", 35 | "did", 36 | "do", 37 | "does", 38 | "either", 39 | "else", 40 | "ever", 41 | "every", 42 | "for", 43 | "from", 44 | "get", 45 | "got", 46 | "had", 47 | "has", 48 | "have", 49 | "he", 50 | "her", 51 | "hers", 52 | "him", 53 | "his", 54 | "how", 55 | "however", 56 | "i", 57 | "if", 58 | "in", 59 | "into", 60 | "is", 61 | "it", 62 | "its", 63 | "just", 64 | "least", 65 | "let", 66 | "like", 67 | "likely", 68 | "may", 69 | "me", 70 | "might", 71 | "most", 72 | "must", 73 | "my", 74 | "neither", 75 | "no", 76 | "nor", 77 | "not", 78 | "of", 79 | "off", 80 | "often", 81 | "on", 82 | "only", 83 | "or", 84 | "other", 85 | "our", 86 | "own", 87 | "rather", 88 | "said", 89 | "say", 90 | "says", 91 | "she", 92 | "should", 93 | "since", 94 | "so", 95 | "some", 96 | "than", 97 | "that", 98 | "the", 99 | "their", 100 | "them", 101 | "then", 102 | "there", 103 | "these", 104 | "they", 105 | "this", 106 | "tis", 107 | "to", 108 | "too", 109 | "twas", 110 | "us", 111 | "wants", 112 | "was", 113 | "we", 114 | "were", 115 | "what", 116 | "when", 117 | "where", 118 | "which", 119 | "while", 120 | "who", 121 | "whom", 122 | "why", 123 | "will", 124 | "with", 125 | "would", 126 | "yet", 127 | "you", 128 | "your" 129 | }, 130 | StringComparer.OrdinalIgnoreCase); 131 | 132 | protected override ISet StopWords => _stopWords; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /LunrCore/Extensions/DictionaryExtensions.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Runtime.InteropServices; 3 | 4 | namespace Lunr; 5 | 6 | internal static class DictionaryExtensions 7 | { 8 | public static void Increment(this Dictionary dic, string fieldName, int amount = 1) 9 | { 10 | #if NET6_0_OR_GREATER 11 | ref int value = ref CollectionsMarshal.GetValueRefOrAddDefault(dic, fieldName, out _); 12 | 13 | value += amount; 14 | #else 15 | if (dic.ContainsKey(fieldName)) 16 | { 17 | dic[fieldName] += amount; 18 | } 19 | else 20 | { 21 | dic.Add(fieldName, amount); 22 | } 23 | #endif 24 | 25 | } 26 | 27 | public static void Increment(this Dictionary dic, string fieldName, double amount = 1) 28 | { 29 | #if NET6_0_OR_GREATER 30 | ref double value = ref CollectionsMarshal.GetValueRefOrAddDefault(dic, fieldName, out _); 31 | 32 | value += amount; 33 | #else 34 | if (dic.ContainsKey(fieldName)) 35 | { 36 | dic[fieldName] += amount; 37 | } 38 | else 39 | { 40 | dic.Add(fieldName, amount); 41 | } 42 | #endif 43 | 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /LunrCore/Extensions/StringBuilderExtensions.cs: -------------------------------------------------------------------------------- 1 | #if NETSTANDARD2_0 2 | 3 | using System; 4 | using System.Text; 5 | 6 | namespace Lunr; 7 | 8 | internal static class StringBuilderExtensions 9 | { 10 | public static void Append(this StringBuilder sb, ReadOnlySpan text) 11 | { 12 | sb.Append(text.ToString()); 13 | } 14 | } 15 | 16 | #endif -------------------------------------------------------------------------------- /LunrCore/Field.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics; 3 | using System.Threading.Tasks; 4 | 5 | namespace Lunr 6 | { 7 | /// 8 | /// A field of indeterminate type. 9 | /// 10 | [DebuggerDisplay("{" + nameof(DebuggerDisplay) + ",nq}")] 11 | public abstract class Field 12 | { 13 | protected Field(string name, double boost = 1) 14 | { 15 | if (name is "") throw new InvalidOperationException("Can't create a field with an empty name."); 16 | if (name.IndexOf('/') != -1) throw new InvalidOperationException($"Can't create a field with a '/' character in its name \"{name}\"."); 17 | 18 | Name = name; 19 | Boost = boost; 20 | } 21 | 22 | /// 23 | /// The name of the field. 24 | /// 25 | public string Name { get; } 26 | 27 | /// 28 | /// Boost applied to all terms within this field. 29 | /// 30 | public double Boost { get; } 31 | 32 | public abstract ValueTask ExtractValue(Document doc); 33 | 34 | private string DebuggerDisplay => Boost != 1 ? $"{Name} x{Boost}" : Name; 35 | } 36 | 37 | /// 38 | /// Represents an index field. 39 | /// 40 | public sealed class Field : Field 41 | { 42 | public Field(string name, double boost = 1, Func>? extractor = null) : base(name, boost) 43 | => Extractor = extractor ?? (doc => new ValueTask((T)doc[name])); 44 | 45 | /// 46 | /// Function to extract a field from a document. 47 | /// 48 | public Func> Extractor { get; } 49 | 50 | public override async ValueTask ExtractValue(Document doc) 51 | => await Extractor(doc).ConfigureAwait(false); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /LunrCore/FieldMatchMetadata.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | /// 6 | /// The metadata associated with a token match on a field. 7 | /// The keys are the metadata entry names, the values are lists of values. 8 | /// 9 | public sealed class FieldMatchMetadata : Dictionary> 10 | { 11 | public FieldMatchMetadata() 12 | { } 13 | 14 | public FieldMatchMetadata(int capacity) : base(capacity) { } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /LunrCore/FieldMatches.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | /// 6 | /// Represents a set of matches for a field. 7 | /// The key is a token, and the value is the metadata associated with this token match for this field. 8 | /// 9 | public sealed class FieldMatches : Dictionary 10 | { 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /LunrCore/FieldReference.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class FieldReference 6 | { 7 | public const char Joiner = '/'; 8 | 9 | private string? _stringValue; 10 | 11 | public FieldReference(string documentReference, string fieldName, string? stringValue = null) 12 | { 13 | DocumentReference = documentReference; 14 | FieldName = fieldName; 15 | _stringValue = stringValue; 16 | } 17 | 18 | public string DocumentReference { get; } 19 | public string FieldName { get; } 20 | 21 | public static FieldReference FromString(string s) 22 | { 23 | int n = s.IndexOf(Joiner); 24 | 25 | if (n == -1) throw new InvalidOperationException($"Malformed field reference string: \"{s}\"."); 26 | 27 | return new FieldReference(s.Substring(n + 1), s.Substring(0, n), s); 28 | } 29 | 30 | public override string ToString() 31 | => _stringValue ??= FieldName + Joiner + DocumentReference; 32 | 33 | public override bool Equals(object? obj) 34 | => obj is FieldReference otherRef 35 | && otherRef.FieldName.Equals(FieldName, StringComparison.Ordinal) 36 | && otherRef.DocumentReference.Equals(DocumentReference, StringComparison.Ordinal); 37 | 38 | public override int GetHashCode() => (DocumentReference, FieldName).GetHashCode(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /LunrCore/FieldTermFrequencies.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class FieldTermFrequencies : Dictionary 6 | { 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /LunrCore/ITokenizer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | 5 | namespace Lunr 6 | { 7 | public interface ITokenizer 8 | { 9 | /// 10 | /// A function for splitting a string into tokens ready to be inserted into 11 | /// the search index. 12 | /// 13 | /// This tokenizer will convert its parameter to a string by calling `ToString` and 14 | /// then will split this string on white space, punctuation and separators. 15 | /// 16 | /// The object to tokenize. 17 | /// Optional metadata can be passed to the tokenizer, this metadata will be cloned and 18 | /// added as metadata to every token that is created from the object to tokenize. 19 | /// The culture to use to tokenize. 20 | /// An optional function that returns tru if a character is a separator 21 | /// The list of tokens extracted from the string. 22 | IEnumerable Tokenize( 23 | object obj, 24 | TokenMetadata metadata, 25 | CultureInfo culture, 26 | Func? separator = null); 27 | } 28 | } -------------------------------------------------------------------------------- /LunrCore/InvertedIndex.cs: -------------------------------------------------------------------------------- 1 | using Lunr.Serialization; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text.Json.Serialization; 5 | 6 | namespace Lunr 7 | { 8 | /// 9 | /// Inverted index. 10 | /// term -> field -> document -> metadataKey -> metadataValue[] 11 | /// 12 | [JsonConverter(typeof(InvertedIndexJsonConverter))] 13 | public sealed class InvertedIndex : Dictionary 14 | { 15 | public InvertedIndex() 16 | { } 17 | 18 | public InvertedIndex(IEnumerable<(string term, InvertedIndexEntry entry)> entries) 19 | : base(entries.ToDictionary(e => e.term, e => e.entry)) { } 20 | 21 | public InvertedIndex(int capacity) 22 | : base(capacity) { } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /LunrCore/InvertedIndexEntry.cs: -------------------------------------------------------------------------------- 1 | using Lunr.Serialization; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text.Json.Serialization; 5 | 6 | namespace Lunr 7 | { 8 | /// 9 | /// Inverted index entry. 10 | /// field -> document -> metadataKey -> metadataValue[] 11 | /// 12 | [JsonConverter(typeof(InvertedIndexEntryJsonConverter))] 13 | public sealed class InvertedIndexEntry : Dictionary 14 | { 15 | public InvertedIndexEntry() 16 | { } 17 | 18 | public InvertedIndexEntry(int capacity) 19 | : base(capacity) { } 20 | 21 | public InvertedIndexEntry(IEnumerable<(string term, FieldMatches occurrences)> entries) 22 | : base(entries.ToDictionary(e => e.term, e => e.occurrences)) { } 23 | 24 | public int Index { get; set; } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /LunrCore/Lexeme.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | 3 | namespace Lunr 4 | { 5 | [DebuggerDisplay("{" + nameof(DebuggerDisplay) + ",nq}")] 6 | public sealed class Lexeme 7 | { 8 | public static readonly Lexeme Empty 9 | = new Lexeme(LexemeType.Empty, "", 0, 0); 10 | 11 | public Lexeme(LexemeType type, string value, int start, int end) 12 | { 13 | Type = type; 14 | Value = value; 15 | Start = start; 16 | End = end; 17 | } 18 | 19 | public LexemeType Type { get; } 20 | public string Value { get; } 21 | public int Start { get; } 22 | public int End { get; } 23 | 24 | private string DebuggerDisplay => $"{Type}: {Value} ({Start}-{End})"; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /LunrCore/LexemeType.cs: -------------------------------------------------------------------------------- 1 | namespace Lunr 2 | { 3 | public enum LexemeType 4 | { 5 | EOS, 6 | Field, 7 | Term, 8 | EditDistance, 9 | Boost, 10 | Presence, 11 | Empty 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /LunrCore/LunrCore.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.0;net5.0;net6.0 5 | Lunr 6 | 2.3.10.0 7 | Bertrand Le Roy 8 | Decent Consulting 9 | A .NET Core port of Oliver Nightingale's lunr.js library, a lightweight full-text indexing library that is "a bit like Solr, but much smaller and not as bright." Icon adapted from https://commons.wikimedia.org/wiki/File:Internal_Structure_of_the_Moon.JPG by Iqbal Mahmud under Creative Commons Attribution Share Alike 4.0 International 10 | 2020 Bertrand Le Roy 11 | https://github.com/bleroy/lunr-core 12 | LunrCore.png 13 | LICENSE 14 | https://github.com/bleroy/lunr-core 15 | true 16 | true 17 | 10 18 | enable 19 | true 20 | snupkg 21 | true 22 | true 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /LunrCore/MatchData.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | /// 6 | /// Contains and collects metadata about a matching document. 7 | /// A single instance of `MatchData` is returned as part of every index result. 8 | /// 9 | public sealed class MatchData 10 | { 11 | public static readonly MatchData Empty = new MatchData("", "", new FieldMatchMetadata()); 12 | 13 | /// Constructs a `MatchData`. 14 | /// The term this match data is associated with. 15 | /// The field in which the term was found. 16 | /// The metadata recorded about this term in this field. 17 | public MatchData( 18 | string term, 19 | string field, 20 | FieldMatchMetadata metadata) 21 | { 22 | Term = term; 23 | Field = field; 24 | 25 | // Cloning the metadata to prevent the original being mutated during match data combination. 26 | // Metadata is kept in an array within the inverted index. 27 | var clonedMetadata = new FieldMatchMetadata(capacity: metadata.Count); 28 | 29 | foreach((string key, IEnumerable value) in metadata) 30 | { 31 | clonedMetadata.Add(key, new List(value)); 32 | } 33 | 34 | Posting = new InvertedIndexEntry 35 | { 36 | { 37 | term, 38 | new FieldMatches 39 | { 40 | { field, clonedMetadata } 41 | } 42 | } 43 | }; 44 | } 45 | 46 | /// 47 | /// The term this match data is associated with. 48 | /// 49 | public string Term { get; } 50 | 51 | /// 52 | /// The field in which the term was found. 53 | /// 54 | public string Field { get; } 55 | 56 | /// 57 | /// A cloned collection of metadata associated with this document. 58 | /// 59 | public InvertedIndexEntry Posting { get; } 60 | 61 | /// 62 | /// An instance of `MatchData` will be created for every term that matches a 63 | /// document. However only one instance is required in an index result. This 64 | /// method combines metadata from another instance of `MatchData` with this 65 | /// object's metadata. 66 | /// 67 | /// Another instance of match data to merge with this one. 68 | public void Combine(MatchData otherMatchData) 69 | { 70 | IEnumerable terms = otherMatchData.Posting.Keys; 71 | 72 | foreach(string term in terms) 73 | { 74 | IEnumerable fields = otherMatchData.Posting[term].Keys; 75 | if (!Posting.ContainsKey(term)) 76 | { 77 | Posting.Add(term, new FieldMatches()); 78 | } 79 | Dictionary thisTermEntry = Posting[term]; 80 | foreach (string field in fields) 81 | { 82 | IEnumerable keys = otherMatchData.Posting[term][field].Keys; 83 | if (!thisTermEntry.ContainsKey(field)) 84 | { 85 | thisTermEntry.Add(field, new FieldMatchMetadata(capacity: otherMatchData.Posting[term][field].Keys.Count)); 86 | } 87 | FieldMatchMetadata thisFieldEntry = thisTermEntry[field]; 88 | foreach(string key in keys) 89 | { 90 | IList otherData = otherMatchData.Posting[term][field][key]; 91 | if (!thisFieldEntry.ContainsKey(key)) 92 | { 93 | thisFieldEntry.Add(key, new List(otherData)); 94 | } 95 | else 96 | { 97 | thisFieldEntry[key] = Concat(thisFieldEntry[key], otherData); 98 | } 99 | } 100 | } 101 | } 102 | } 103 | 104 | /// 105 | /// Add metadata for a term/field pair to this instance of match data. 106 | /// 107 | /// The term this match data is associated with. 108 | /// The field in which the term was found. 109 | /// The metadata recorded about this term in this field. 110 | public void Add(string term, string field, FieldMatchMetadata metadata) 111 | { 112 | if (!Posting.ContainsKey(term)) 113 | { 114 | Posting.Add(term, new FieldMatches 115 | { 116 | { 117 | field, 118 | metadata 119 | } 120 | }); 121 | return; 122 | } 123 | 124 | FieldMatches termMetadata = Posting[term]; 125 | if (!termMetadata.ContainsKey(field)) 126 | { 127 | termMetadata.Add(field, metadata); 128 | return; 129 | } 130 | 131 | foreach(string key in metadata.Keys) 132 | { 133 | FieldMatchMetadata fieldMetadata = termMetadata[field]; 134 | if (fieldMetadata.ContainsKey(key)) 135 | { 136 | fieldMetadata[key] = Concat(fieldMetadata[key], metadata[key]); 137 | } 138 | else 139 | { 140 | fieldMetadata[key] = metadata[key]; 141 | } 142 | } 143 | } 144 | 145 | private static IList Concat(IList a, IList b) 146 | { 147 | var result = new object?[a.Count + b.Count]; 148 | int position = 0; 149 | 150 | for (int i = 0; i < a.Count; i++) 151 | { 152 | result[position] = a[i]; 153 | 154 | position++; 155 | } 156 | 157 | for (int i = 0; i < b.Count; i++) 158 | { 159 | result[position] = b[i]; 160 | 161 | position++; 162 | } 163 | 164 | return result; 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /LunrCore/PipelineFunctionRegistry.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | 4 | namespace Lunr 5 | { 6 | /// 7 | /// A registry of named pipeline functions. 8 | /// 9 | public sealed class PipelineFunctionRegistry : Dictionary 10 | { 11 | public PipelineFunctionRegistry() 12 | { } 13 | 14 | public PipelineFunctionRegistry(IDictionary dictionary) : base(dictionary) { } 15 | 16 | public PipelineFunctionRegistry(params (string name, Pipeline.Function function)[] functions) 17 | : base(functions.ToDictionary(kv => kv.name, kv => kv.function)) { } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /LunrCore/Query.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | 4 | namespace Lunr 5 | { 6 | /// 7 | /// A `Query` provides a programmatic way of defining queries to be performed 8 | /// against an `Index`. 9 | /// 10 | /// Prefer constructing a `Query` using the `Index.Query` method 11 | /// so the query object is pre -initialized with the right index fields. 12 | /// 13 | public class Query 14 | { 15 | /// 16 | /// Builds a new query. 17 | /// 18 | /// An array of all available fields in a `Index`. 19 | public Query(params string[] allFields) 20 | { 21 | AllFields = allFields; 22 | } 23 | 24 | /// 25 | /// Builds a new query. 26 | /// 27 | /// An array of all available fields in a `Index`. 28 | internal Query(IEnumerable allFields) 29 | { 30 | AllFields = new List(allFields); 31 | } 32 | 33 | public static readonly char Wildcard = '*'; 34 | 35 | /// 36 | /// An array of all available fields. 37 | /// 38 | public IList AllFields { get; } 39 | 40 | /// 41 | /// An list of query clauses. 42 | /// 43 | public IList Clauses { get; } = new List(); 44 | 45 | /// 46 | /// A negated query is one in which every clause has a presence of 47 | /// prohibited.These queries require some special processing to return 48 | /// the expected results. 49 | /// 50 | public bool IsNegated => Clauses.All(clause => clause.Presence == QueryPresence.Prohibited); 51 | 52 | /// 53 | /// Adds a `Clause` to this query. 54 | /// Unless the clause contains the fields to be matched all fields will be matched. 55 | /// In addition, a default boost of 1 is applied to the clause. 56 | /// 57 | /// The clause to add to this query. 58 | /// The query. 59 | public Query AddClause(Clause clause) 60 | { 61 | Clauses.Add(!clause.Fields.Any() ? clause.WithFields(AllFields) : clause); 62 | return this; 63 | } 64 | 65 | /// 66 | /// Adds multiple terms using copies of a single clause to this query. 67 | /// Unless the clause contains the fields to be matched all fields will be matched. 68 | /// In addition, a default boost of 1 is applied to the clause. 69 | /// 70 | /// The clause to copy with terms to add to this query. 71 | /// The terms to add with common parameters defined by the clause. 72 | /// The query. 73 | public Query AddTerms(Clause clause, params string[] terms) 74 | { 75 | foreach(string term in terms) 76 | { 77 | AddClause(clause.WithTerm(term)); 78 | } 79 | return this; 80 | } 81 | 82 | /// 83 | /// Adds a term to the current query, under the covers this will create a `Clause` 84 | /// to the list of clauses that make up this query. 85 | /// 86 | /// The term is used as is, i.e.no tokenization will be performed by this method. 87 | /// Instead, conversion to a token or token-like string should be done before calling this method. 88 | /// 89 | /// The term to add to the query. 90 | /// An optional boost for the term. 91 | /// The maximum edit distance from the term. 92 | /// Set to false to bypass the pipeline. 93 | /// An optional wildcard. 94 | /// The type of presence for this term. 95 | /// An optional list of fields to look for the term in. 96 | /// The query. 97 | public Query AddTerm( 98 | string term = "", 99 | double boost = 1, 100 | int editDistance = 0, 101 | bool usePipeline = true, 102 | QueryWildcard wildcard = QueryWildcard.None, 103 | QueryPresence presence = QueryPresence.Optional, 104 | IEnumerable? fields = null) 105 | => AddClause(new Clause(term, boost, editDistance, usePipeline, wildcard, presence, fields)); 106 | 107 | /// 108 | /// Adds multiple terms to the current query, under the covers this will create a `Clause` 109 | /// to the list of clauses that make up this query. 110 | /// 111 | /// The term is used as is, i.e.no tokenization will be performed by this method. 112 | /// Instead, conversion to a token or token-like string should be done before calling this method. 113 | /// 114 | /// The terms to add to the query. 115 | /// The query. 116 | public Query AddTerms(params string[] terms) 117 | => AddTerms((IEnumerable)terms); 118 | 119 | /// 120 | /// Adds multiple terms to the current query, under the covers this will create a `Clause` 121 | /// to the list of clauses that make up this query. 122 | /// 123 | /// The term is used as is, i.e.no tokenization will be performed by this method. 124 | /// Instead, conversion to a token or token-like string should be done before calling this method. 125 | /// 126 | /// The terms to add to the query. 127 | /// The query. 128 | public Query AddTerms(IEnumerable terms) 129 | { 130 | foreach (string term in terms) 131 | { 132 | AddTerm(term); 133 | } 134 | return this; 135 | } 136 | 137 | /// 138 | /// Adds multiple terms to the current query, under the covers this will create a `Clause` 139 | /// to the list of clauses that make up this query. 140 | /// 141 | /// The term is used as is, i.e.no tokenization will be performed by this method. 142 | /// Instead, conversion to a token or token-like string should be done before calling this method. 143 | /// 144 | /// The terms to add to the query. 145 | /// The query. 146 | public Query AddTerms(IEnumerable terms) 147 | => AddTerms(terms.Select(t => t.String)); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /LunrCore/QueryLexer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace Lunr 6 | { 7 | public class QueryLexer 8 | { 9 | /// 10 | /// A lex is a function that takes a lexer and returns the next lex. 11 | /// 12 | /// The lexer. 13 | /// The next lex 14 | private delegate Lex Lex(QueryLexer lexer); 15 | 16 | private readonly string _str; 17 | private readonly int _length; 18 | private readonly IList _escapeCharPositions = new List(); 19 | private int _pos; 20 | private int _start; 21 | 22 | public QueryLexer(string str) 23 | { 24 | _str = str; 25 | _length = str.Length; 26 | } 27 | 28 | public IList Lexemes { get; } = new List(); 29 | 30 | private int Width => _pos - _start; 31 | private bool HasMore => _pos < _length; 32 | 33 | public void Run() 34 | { 35 | Lex state = LexText; 36 | 37 | while (state != LexPastEOS) 38 | { 39 | state = state(this); 40 | } 41 | } 42 | 43 | private static Lex LexPastEOS(QueryLexer lexer) => throw new InvalidOperationException("End of string should never be called"); 44 | 45 | private static Lex LexField(QueryLexer lexer) 46 | { 47 | lexer.Backup(); 48 | lexer.Emit(LexemeType.Field); 49 | lexer.Ignore(); 50 | return LexText; 51 | } 52 | 53 | private static Lex LexTerm(QueryLexer lexer) 54 | { 55 | if (lexer.Width > 1) 56 | { 57 | lexer.Backup(); 58 | lexer.Emit(LexemeType.Term); 59 | } 60 | 61 | lexer.Ignore(); 62 | 63 | if (lexer.HasMore) return LexText; 64 | 65 | return LexPastEOS; 66 | } 67 | 68 | private static Lex LexEditDistance(QueryLexer lexer) 69 | { 70 | lexer.Ignore(); 71 | lexer.AcceptDigitRun(); 72 | lexer.Emit(LexemeType.EditDistance); 73 | return LexText; 74 | } 75 | 76 | private static Lex LexBoost(QueryLexer lexer) 77 | { 78 | lexer.Ignore(); 79 | lexer.AcceptDigitRun(); 80 | lexer.Emit(LexemeType.Boost); 81 | return LexText; 82 | } 83 | 84 | private static Lex LexEOS(QueryLexer lexer) 85 | { 86 | if (lexer.Width > 0) 87 | { 88 | lexer.Emit(LexemeType.Term); 89 | } 90 | return LexPastEOS; 91 | } 92 | 93 | private static Lex LexText(QueryLexer lexer) 94 | { 95 | while(true) 96 | { 97 | (bool EOS, char ch) = lexer.Next(); 98 | 99 | if (EOS) return LexEOS; 100 | 101 | // Escape character is '\' 102 | if (ch == '\\') 103 | { 104 | lexer.EscapeCharacter(); 105 | continue; 106 | } 107 | 108 | if (ch == ':') return LexField; 109 | 110 | if (ch == '~') 111 | { 112 | lexer.Backup(); 113 | if (lexer.Width > 0) lexer.Emit(LexemeType.Term); 114 | return LexEditDistance; 115 | } 116 | 117 | if (ch == '^') 118 | { 119 | lexer.Backup(); 120 | if (lexer.Width > 0) lexer.Emit(LexemeType.Term); 121 | return LexBoost; 122 | } 123 | 124 | // "+" indicates term presence is required 125 | // checking for length to ensure that only 126 | // leading "+" are considered 127 | if (ch == '+' && lexer.Width == 1) 128 | { 129 | lexer.Emit(LexemeType.Presence); 130 | return LexText; 131 | } 132 | 133 | // "-" indicates term presence is prohibited 134 | // checking for length to ensure that only 135 | // leading "-" are considered 136 | if (ch == '-' && lexer.Width == 1) 137 | { 138 | lexer.Emit(LexemeType.Presence); 139 | return LexText; 140 | } 141 | 142 | if (ch.IsLunrSeparator()) 143 | { 144 | return LexTerm; 145 | } 146 | } 147 | } 148 | 149 | private string SliceString() 150 | { 151 | var subSlices = new StringBuilder(); 152 | int sliceStart = _start; 153 | 154 | foreach (int escapeCharPosition in _escapeCharPositions) 155 | { 156 | int sliceEnd = escapeCharPosition; 157 | subSlices.Append(_str.AsSpan(sliceStart, sliceEnd - sliceStart)); 158 | sliceStart = sliceEnd + 1; 159 | } 160 | 161 | subSlices.Append(_str.AsSpan(sliceStart, _pos - sliceStart)); 162 | _escapeCharPositions.Clear(); 163 | 164 | return subSlices.ToString(); 165 | } 166 | 167 | private void Emit(LexemeType type) 168 | { 169 | Lexemes.Add(new Lexeme(type, SliceString(), _start, _pos)); 170 | _start = _pos; 171 | } 172 | 173 | private void EscapeCharacter() 174 | { 175 | _escapeCharPositions.Add(_pos - 1); 176 | _pos++; 177 | } 178 | 179 | private (bool EOS, char nextChar) Next() 180 | => _pos >= _length ? (true, char.MinValue) : (false, _str[_pos++]); 181 | 182 | private void Ignore() 183 | { 184 | if (_start == _pos) _pos++; 185 | _start = _pos; 186 | } 187 | 188 | private void Backup() => _pos--; 189 | 190 | private void AcceptDigitRun() 191 | { 192 | char ch; 193 | bool EOS; 194 | 195 | do (EOS, ch) = Next(); 196 | while (char.IsDigit(ch)); 197 | 198 | if (!EOS) Backup(); 199 | } 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /LunrCore/QueryParserException.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class QueryParserException : Exception 6 | { 7 | public QueryParserException(string message, int start, int end) : base(message) 8 | { 9 | Start = start; 10 | End = end; 11 | } 12 | 13 | public int Start { get; } 14 | public int End { get; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /LunrCore/QueryPresence.cs: -------------------------------------------------------------------------------- 1 | namespace Lunr 2 | { 3 | /// 4 | /// What kind of presence a term must have in matching documents. 5 | /// 6 | public enum QueryPresence 7 | { 8 | /// 9 | /// Term's presence in a document is optional, this is the default value. 10 | /// 11 | Optional = 0, 12 | /// 13 | /// Term's presence in a document is required, documents that do not contain this term will not be returned. 14 | /// 15 | Required = 1, 16 | /// 17 | /// Term's presence in a document is prohibited, documents that do contain this term will not be returned. 18 | /// 19 | Prohibited = 2 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LunrCore/QueryString.cs: -------------------------------------------------------------------------------- 1 | namespace Lunr 2 | { 3 | /// 4 | /// Although lunr provides the ability to create queries using `Query`, it also provides a simple 5 | /// query language which itself is parsed into an instance of lunr.Query. 6 | /// 7 | /// For programmatically building queries it is advised to directly use `Query`, the query language 8 | /// is best used for human entered text rather than program generated text. 9 | /// 10 | /// At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported 11 | /// and will be combined with OR, e.g `hello world` will match documents that contain either 'hello' 12 | /// or 'world', though those that contain both will rank higher in the results. 13 | /// 14 | /// Wildcards can be included in terms to match one or more unspecified characters, these wildcards can 15 | /// be inserted anywhere within the term, and more than one wildcard can exist in a single term.Adding 16 | /// wildcards will increase the number of documents that will be found but can also have a negative 17 | /// impact on query performance, especially with wildcards at the beginning of a term. 18 | /// 19 | /// Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term 20 | /// hello in the title field will match this query.Using a field not present in the index will lead 21 | /// to an error being thrown. 22 | /// 23 | /// Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms.A term 24 | /// boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported 25 | /// to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2. 26 | /// Avoid large values for edit distance to improve query performance. 27 | /// 28 | /// Each term also supports a presence modifier.By default a term's presence in document is optional, however 29 | /// this can be changed to either required or prohibited. For a term's presence to be required in a document the 30 | /// term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and 31 | /// optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e.it must not 32 | /// appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'. 33 | /// 34 | /// To escape special characters the backslash character '\' can be used, this allows searches to include 35 | /// characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead 36 | /// of attempting to apply a boost of 2 to the search term "foo". 37 | /// 38 | /// Simple single term query: "hello" 39 | /// Multiple term query: "hello world" 40 | /// Term scoped to a field: "title:hello" 41 | /// Term with a boost of 10: "hello^10" 42 | /// Term with an edit distance of 2: "hello~2" 43 | /// Terms with presence modifiers: "-foo +bar baz" 44 | public readonly struct QueryString 45 | { 46 | /// 47 | /// Constructs a query string. 48 | /// 49 | /// The string value of the query string. 50 | public QueryString(string value) => Value = value; 51 | 52 | /// 53 | /// The string value of the query string. 54 | /// 55 | public string Value { get; } 56 | 57 | public static implicit operator string(QueryString qs) => qs.Value; 58 | public static implicit operator QueryString(string s) => new QueryString(s); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /LunrCore/QueryWildcard.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lunr 4 | { 5 | /// 6 | /// What kind of automatic wildcard insertion will be used when constructing a query clause. 7 | /// 8 | /// This allows wildcards to be added to the beginning and end of a term without having to manually do any string 9 | /// concatenation. 10 | /// 11 | /// The wildcards can be combined to select both leading and trailing wildcards. 12 | /// 13 | [Flags] 14 | public enum QueryWildcard 15 | { 16 | None = 0, 17 | Leading = 1, 18 | Trailing = 2, 19 | Both = 3 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LunrCore/Result.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics; 3 | 4 | namespace Lunr 5 | { 6 | /// 7 | /// A result contains details of a document matching a search query. 8 | /// 9 | [DebuggerDisplay("{" + nameof(DebuggerDisplay) + ",nq}")] 10 | public class Result 11 | { 12 | /// 13 | /// Builds a new result. 14 | /// 15 | /// The reference of the document this result represents. 16 | /// A number between 0 and 1 representing how similar this document is to the query. 17 | /// Contains metadata about this match including which term(s) caused the match. 18 | public Result(string documentReference, double score, MatchData matchData) 19 | { 20 | DocumentReference = documentReference; 21 | Score = score; 22 | MatchData = matchData; 23 | } 24 | 25 | /// 26 | /// The reference of the document this result represents. 27 | /// 28 | public string DocumentReference { get; } 29 | 30 | /// 31 | /// A number between 0 and 1 representing how similar this document is to the query. 32 | /// 33 | public double Score { get; set; } 34 | 35 | /// 36 | /// Contains metadata about this match including which term(s) caused the match. 37 | /// 38 | public MatchData MatchData { get; } 39 | 40 | private string DebuggerDisplay => $"{DocumentReference} ({Math.Round(Score, 1)})"; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /LunrCore/Serialization/IndexJsonConverter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text.Json; 4 | using System.Text.Json.Serialization; 5 | 6 | namespace Lunr.Serialization 7 | { 8 | internal sealed class IndexJsonConverter : JsonConverter 9 | { 10 | /// 11 | /// The lunr.js version that this version of the library is designed to be compatible with. 12 | /// 13 | private static readonly string VersionString = "2.3.9"; 14 | private static readonly Version Version = Version.Parse(VersionString); 15 | 16 | public override Index Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) 17 | { 18 | InvertedIndex? invertedIndex = null; 19 | Dictionary? fieldVectors = null; 20 | Pipeline? pipeline = null; 21 | IEnumerable? fields = null; 22 | 23 | var tokenSetBuilder = new TokenSet.Builder(); 24 | 25 | if (reader.TokenType != JsonTokenType.StartObject) 26 | { 27 | throw new JsonException("An index can only be deserialized from an object."); 28 | } 29 | reader.Read(); 30 | while (reader.AdvanceTo(JsonTokenType.PropertyName, JsonTokenType.EndObject) != JsonTokenType.EndObject) 31 | { 32 | string propertyName = reader.ReadValue(options); 33 | switch (propertyName) 34 | { 35 | case "version": 36 | var parsedVersion = Version.Parse(reader.ReadValue(options)); 37 | if (parsedVersion.Major != Version.Major || parsedVersion.Minor != Version.Minor) 38 | { 39 | System.Diagnostics.Debug.Write($"Version mismatch when loading serialised index. Current version of Lunr '{VersionString}' does not match serialized index '{parsedVersion}'"); 40 | } 41 | 42 | break; 43 | case "invertedIndex": 44 | invertedIndex = reader.ReadValue(options); 45 | break; 46 | case "fieldVectors": 47 | fieldVectors = reader.ReadDictionaryFromKeyValueSequence(options); 48 | break; 49 | case "pipeline": 50 | pipeline = new Pipeline(reader.ReadArray(options)); 51 | break; 52 | case "fields": 53 | fields = reader.ReadArray(options); 54 | break; 55 | } 56 | } 57 | if (invertedIndex is null) throw new JsonException("Serialized index is missing invertedIndex."); 58 | if (fieldVectors is null) throw new JsonException("Serialized index is missing fieldVectors."); 59 | if (pipeline is null) throw new JsonException("Serialized index is missing a pipeline."); 60 | if (fields is null) throw new JsonException("Serialized index is missing a list of fields."); 61 | 62 | foreach (string term in invertedIndex.Keys) 63 | { 64 | tokenSetBuilder.Insert(term); 65 | } 66 | tokenSetBuilder.Finish(); 67 | 68 | return new Index(invertedIndex, fieldVectors, tokenSetBuilder.Root, fields, pipeline); 69 | } 70 | 71 | public override void Write(Utf8JsonWriter writer, Index value, JsonSerializerOptions options) 72 | { 73 | writer.WriteStartObject(); 74 | writer.WriteString("version", VersionString); 75 | writer.WritePropertyName("fields"); 76 | writer.WriteStartArray(); 77 | foreach (string field in value.Fields) 78 | { 79 | writer.WriteStringValue(field); 80 | } 81 | writer.WriteEndArray(); 82 | writer.WritePropertyName("fieldVectors"); 83 | writer.WriteStartArray(); 84 | foreach ((string field, Vector vector) in value.FieldVectors) 85 | { 86 | writer.WriteStartArray(); 87 | writer.WriteStringValue(field); 88 | writer.WriteValue(vector, options); 89 | writer.WriteEndArray(); 90 | } 91 | writer.WriteEndArray(); 92 | writer.WriteProperty("invertedIndex", value.InvertedIndex, options); 93 | writer.WritePropertyName("pipeline"); 94 | writer.WriteStartArray(); 95 | foreach (string fun in value.Pipeline.Save()) 96 | { 97 | writer.WriteStringValue(fun); 98 | } 99 | writer.WriteEndArray(); 100 | writer.WriteEndObject(); 101 | } 102 | } 103 | } -------------------------------------------------------------------------------- /LunrCore/Serialization/InvertedIndexEntryJsonConverter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text.Json; 4 | using System.Text.Json.Serialization; 5 | 6 | namespace Lunr.Serialization 7 | { 8 | internal sealed class InvertedIndexEntryJsonConverter : JsonConverter 9 | { 10 | public override InvertedIndexEntry Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) 11 | { 12 | if (reader.TokenType != JsonTokenType.StartObject) 13 | { 14 | throw new JsonException("An inverted index entry can only be deserialized from an object."); 15 | } 16 | var result = new InvertedIndexEntry(); 17 | reader.ReadOrThrow(); 18 | while (reader.AdvanceTo(JsonTokenType.PropertyName, JsonTokenType.EndObject) != JsonTokenType.EndObject) 19 | { 20 | string fieldName = reader.ReadValue(options); 21 | if (fieldName is "_index") 22 | { 23 | result.Index = reader.ReadValue(options); 24 | } 25 | else 26 | { 27 | var fieldMatches = new FieldMatches(); 28 | reader.AdvanceTo(JsonTokenType.StartObject); 29 | while (reader.AdvanceTo(JsonTokenType.PropertyName, JsonTokenType.EndObject) != JsonTokenType.EndObject) 30 | { 31 | string token = reader.ReadValue(options); 32 | var metadata = new FieldMatchMetadata(); 33 | reader.AdvanceTo(JsonTokenType.StartObject); 34 | while (reader.AdvanceTo(JsonTokenType.PropertyName, JsonTokenType.EndObject) != JsonTokenType.EndObject) 35 | { 36 | string metadataName = reader.ReadValue(options); 37 | reader.AdvanceTo(JsonTokenType.StartArray); 38 | reader.ReadOrThrow(); 39 | var data = new List(); 40 | while (reader.TokenType != JsonTokenType.EndArray) 41 | { 42 | // Special-case known metadata 43 | if (metadataName is "position") 44 | { 45 | // Position is serialized as an array of slices. 46 | while (reader.AdvanceTo(JsonTokenType.StartArray, JsonTokenType.EndArray) != JsonTokenType.EndArray) 47 | { 48 | data.Add(JsonSerializer.Deserialize(ref reader, options)); 49 | reader.ReadOrThrow(); 50 | } 51 | } 52 | else 53 | { 54 | data.Add(reader.ReadObject(options)); 55 | } 56 | } 57 | reader.ReadOrThrow(); 58 | metadata.Add(metadataName, data); 59 | } 60 | reader.ReadOrThrow(); 61 | fieldMatches.Add(token, metadata); 62 | } 63 | reader.ReadOrThrow(); 64 | result.Add(fieldName, fieldMatches); 65 | } 66 | } 67 | reader.ReadOrThrow(); 68 | return result; 69 | } 70 | 71 | public override void Write(Utf8JsonWriter writer, InvertedIndexEntry value, JsonSerializerOptions options) 72 | { 73 | writer.WriteStartObject(); 74 | writer.WriteNumber("_index", value.Index); 75 | foreach((string field, FieldMatches occurrences) in value) 76 | { 77 | writer.WritePropertyName(field); 78 | writer.WriteStartObject(); 79 | foreach ((string doc, FieldMatchMetadata metadata) in occurrences) 80 | { 81 | writer.WritePropertyName(doc); 82 | writer.WriteStartObject(); 83 | foreach((string key, IList data) in metadata) 84 | { 85 | writer.WritePropertyName(key); 86 | writer.WriteStartArray(); 87 | foreach (object? datum in data) 88 | { 89 | JsonSerializer.Serialize(writer, datum, options); 90 | } 91 | writer.WriteEndArray(); 92 | } 93 | writer.WriteEndObject(); 94 | } 95 | writer.WriteEndObject(); 96 | } 97 | writer.WriteEndObject(); 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /LunrCore/Serialization/InvertedIndexJsonConverter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text.Json; 5 | using System.Text.Json.Serialization; 6 | 7 | namespace Lunr.Serialization 8 | { 9 | internal sealed class InvertedIndexJsonConverter : JsonConverter 10 | { 11 | public override InvertedIndex Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) 12 | { 13 | if (reader.TokenType != JsonTokenType.StartArray) 14 | { 15 | throw new JsonException("An inverted index can only be deserialized from an array."); 16 | } 17 | var serializedVectors = new List<(string term, InvertedIndexEntry posting)>(); 18 | reader.ReadOrThrow(); 19 | while (reader.AdvanceTo(JsonTokenType.StartArray, JsonTokenType.EndArray) != JsonTokenType.EndArray) 20 | { 21 | reader.AdvanceTo(JsonTokenType.String); 22 | serializedVectors.Add(( 23 | reader.ReadValue(options), 24 | reader.ReadValue(options))); 25 | } 26 | return new InvertedIndex(serializedVectors); 27 | } 28 | 29 | public override void Write(Utf8JsonWriter writer, InvertedIndex value, JsonSerializerOptions options) 30 | { 31 | writer.WriteStartArray(); 32 | foreach((string term, InvertedIndexEntry entry) in value.OrderBy(kvp => kvp.Key, StringComparer.Ordinal)) 33 | { 34 | writer.WriteStartArray(); 35 | writer.WriteValue(term, options); 36 | writer.WriteValue(entry, options); 37 | writer.WriteEndArray(); 38 | } 39 | writer.WriteEndArray(); 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /LunrCore/Serialization/SliceConverter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text.Json; 3 | using System.Text.Json.Serialization; 4 | 5 | namespace Lunr.Serialization 6 | { 7 | internal sealed class SliceConverter : JsonConverter 8 | { 9 | public override Slice Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) 10 | { 11 | if (reader.TokenType != JsonTokenType.StartArray) 12 | { 13 | throw new JsonException("A slice can only be deserialized from an array containing two integers."); 14 | } 15 | reader.AdvanceTo(JsonTokenType.Number); 16 | 17 | int start = reader.ReadValue(options); 18 | int length = reader.ReadValue(options); 19 | 20 | if (reader.TokenType != JsonTokenType.EndArray) 21 | { 22 | throw new JsonException("A slice can only be deserialized from an array containing two integers."); 23 | } 24 | 25 | return new Slice(start, length); 26 | } 27 | 28 | public override void Write(Utf8JsonWriter writer, Slice value, JsonSerializerOptions options) 29 | { 30 | writer.WriteStartArray(); 31 | writer.WriteValue(value.Start, options); 32 | writer.WriteValue(value.Length, options); 33 | writer.WriteEndArray(); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /LunrCore/Serialization/VectorJsonConverter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text.Json; 3 | using System.Text.Json.Serialization; 4 | 5 | namespace Lunr.Serialization 6 | { 7 | internal sealed class VectorJsonConverter : JsonConverter 8 | { 9 | public override Vector Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) 10 | { 11 | var result = new Vector(); 12 | 13 | if (reader.TokenType != JsonTokenType.StartArray) 14 | { 15 | throw new JsonException("A vector can only be deserialized from an array."); 16 | } 17 | while (reader.AdvanceTo(JsonTokenType.Number, JsonTokenType.EndArray) != JsonTokenType.EndArray) 18 | { 19 | int index = reader.ReadValue(options); 20 | double value = reader.ReadValue(options); 21 | result.Insert(index, value); 22 | } 23 | reader.ReadOrThrow(); 24 | return result; 25 | } 26 | 27 | public override void Write(Utf8JsonWriter writer, Vector value, JsonSerializerOptions options) 28 | { 29 | writer.WriteStartArray(); 30 | foreach (double component in value.Save()) 31 | { 32 | writer.WriteNumberValue(component); 33 | } 34 | writer.WriteEndArray(); 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /LunrCore/Set.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | public interface ISet 6 | { 7 | bool Contains(T item); 8 | 9 | ISet Intersect(ISet other); 10 | 11 | ISet Union(ISet other); 12 | } 13 | 14 | public sealed class Set : ISet 15 | { 16 | public static ISet Empty = new EmptySet(); 17 | public static ISet Complete = new CompleteSet(); 18 | 19 | private readonly HashSet _innerSet; 20 | 21 | public Set(IEnumerable elements) 22 | => _innerSet = new HashSet(elements); 23 | 24 | public Set(IEnumerable elements, IEqualityComparer comparer) 25 | => _innerSet = new HashSet(elements, comparer); 26 | 27 | public Set(params T[] elements) : this((IEnumerable)elements) { } 28 | 29 | public bool Contains(T item) => _innerSet.Contains(item); 30 | 31 | public ISet Intersect(ISet other) 32 | { 33 | if (other is EmptySet) return Empty; 34 | if (other is CompleteSet) return this; 35 | if (other is Set otherSet) 36 | { 37 | var result = new HashSet(_innerSet); 38 | result.IntersectWith(otherSet._innerSet); 39 | return new Set(result); 40 | } 41 | return other.Intersect(this); 42 | } 43 | 44 | public ISet Union(ISet other) 45 | { 46 | if (other is EmptySet) return this; 47 | if (other is CompleteSet) return Complete; 48 | if (other is Set otherSet) 49 | { 50 | var result = new HashSet(_innerSet); 51 | result.UnionWith(otherSet._innerSet); 52 | return new Set(result); 53 | } 54 | return other.Union(this); 55 | } 56 | 57 | private class EmptySet : ISet 58 | { 59 | public bool Contains(TEmpty item) => false; 60 | 61 | public ISet Intersect(ISet other) => Set.Empty; 62 | 63 | public ISet Union(ISet other) => other; 64 | } 65 | 66 | private class CompleteSet : ISet 67 | { 68 | public bool Contains(TComplete item) => true; 69 | 70 | public ISet Intersect(ISet other) => other; 71 | 72 | public ISet Union(ISet other) => Set.Complete; 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /LunrCore/Slice.cs: -------------------------------------------------------------------------------- 1 | using Lunr.Serialization; 2 | using System; 3 | using System.Text.Json.Serialization; 4 | 5 | namespace Lunr 6 | { 7 | /// 8 | /// References a slice of text. 9 | /// 10 | [JsonConverter(typeof(SliceConverter))] 11 | public class Slice 12 | { 13 | public Slice(int start, int length) 14 | { 15 | if (start < 0) 16 | { 17 | throw new ArgumentOutOfRangeException(nameof(start), $"{nameof(start)} should be positive."); 18 | } 19 | if (length < 0) 20 | { 21 | throw new ArgumentOutOfRangeException(nameof(length), $"{nameof(length)} should be positive."); 22 | } 23 | Start = start; 24 | Length = length; 25 | } 26 | 27 | /// 28 | /// The start index of the slice. 29 | /// 30 | public int Start { get; } 31 | 32 | /// 33 | /// The length of the slice. 34 | /// 35 | public int Length { get; } 36 | 37 | public void Deconstruct(out int start, out int length) => (start, length) = (Start, Length); 38 | 39 | public override bool Equals(object? obj) 40 | { 41 | return obj is Slice otherSlice && Start == otherSlice.Start && Length == otherSlice.Length; 42 | } 43 | 44 | public override int GetHashCode() => (Start, Length).GetHashCode(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /LunrCore/StemmerBase.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Threading; 3 | 4 | namespace Lunr 5 | { 6 | public abstract class StemmerBase 7 | { 8 | public abstract string Stem(string w); 9 | 10 | private IAsyncEnumerable StemWrapper( 11 | Token token, 12 | int i, 13 | IAsyncEnumerable 14 | tokens, 15 | CancellationToken cancellationToken) 16 | { 17 | return new Token[] { token.Clone(Stem) }.ToAsyncEnumerable(cancellationToken); 18 | } 19 | 20 | public Pipeline.Function StemmerFunction => StemWrapper; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /LunrCore/StopWordFilterBase.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Runtime.CompilerServices; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace Lunr 7 | { 8 | public abstract class StopWordFilterBase 9 | { 10 | protected abstract ISet StopWords { get; } 11 | 12 | private async IAsyncEnumerable StopFilterImplementation( 13 | Token token, 14 | int i, 15 | IAsyncEnumerable 16 | tokens, 17 | [EnumeratorCancellation] CancellationToken cancellationToken) 18 | { 19 | if (cancellationToken.IsCancellationRequested || IsStopWord(token.String)) yield break; 20 | yield return await new ValueTask(token); 21 | } 22 | 23 | public virtual bool IsStopWord(string word) => StopWords.Contains(word); 24 | 25 | public Pipeline.Function FilterFunction => StopFilterImplementation; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /LunrCore/TermFrequencies.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class TermFrequencies : Dictionary 6 | { 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /LunrCore/Token.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lunr 4 | { 5 | public sealed class Token 6 | { 7 | /// 8 | /// Creates a new token from a string. 9 | /// 10 | /// The token string. 11 | /// Metadata associated with this token. 12 | public Token(string tokenString, TokenMetadata? metadata = null) 13 | { 14 | String = tokenString ?? ""; 15 | Metadata = metadata ?? new TokenMetadata(); 16 | } 17 | 18 | /// 19 | /// Creates a new token from a string. 20 | /// 21 | /// The token string. 22 | /// Metadata associated with this token. 23 | public Token(string tokenString, params (string key, object? value)[] metadata) 24 | : this(tokenString, new TokenMetadata(metadata)) { } 25 | 26 | /// 27 | /// The string token being wrapped. 28 | /// 29 | public string String { get; private set; } 30 | 31 | /// 32 | /// Metadata associated with this token. 33 | /// 34 | public TokenMetadata Metadata { get; } 35 | 36 | /// 37 | /// Applies the given function to the wrapped string token. 38 | /// 39 | /// A transformation on the token string. 40 | /// The same token (not a clone), but its string has been mutated. 41 | public Token Update(Func transformation) 42 | { 43 | String = transformation(String); 44 | return this; 45 | } 46 | 47 | /// 48 | /// Applies the given function to the wrapped string token. 49 | /// 50 | /// A transformation on the token string. 51 | /// The same token (not a clone), but its string has been mutated. 52 | public Token Update(Func transformation) 53 | { 54 | String = transformation(String, Metadata); 55 | return this; 56 | } 57 | 58 | /// 59 | /// Clones this token, optionally applying a transformation to the token string. 60 | /// 61 | /// An optional transformation to apply to the token string. 62 | /// A clone of the token. 63 | public Token Clone(Func? transformation = null) 64 | => new Token(transformation is null ? String : transformation(String), new TokenMetadata(Metadata)); 65 | 66 | public override string ToString() => String; 67 | 68 | public static implicit operator string(Token token) => token.String; 69 | 70 | public override bool Equals(object? obj) 71 | => obj switch 72 | { 73 | null => false, 74 | Token t => String.Equals(t.String), 75 | _ => false 76 | }; 77 | 78 | public override int GetHashCode() => String.GetHashCode(); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /LunrCore/TokenMetadata.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | 4 | namespace Lunr 5 | { 6 | /// 7 | /// Represents the metadata associated with a token. 8 | /// 9 | public sealed class TokenMetadata : Dictionary 10 | { 11 | public TokenMetadata() : base() { } 12 | public TokenMetadata(int capacity) : base(capacity) { } 13 | public TokenMetadata(IEnumerable<(string key, object? value)> data) : base(data.ToDictionary(d => d.key, d => d.value)) { } 14 | public TokenMetadata(IDictionary data) : base(data) { } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /LunrCore/TokenSetIdProvider.cs: -------------------------------------------------------------------------------- 1 | using System.Threading; 2 | 3 | namespace Lunr 4 | { 5 | public class TokenSetIdProvider 6 | { 7 | private int _counter = 0; 8 | 9 | public static readonly TokenSetIdProvider Instance = new TokenSetIdProvider(); 10 | 11 | public TokenSetIdProvider() { } 12 | 13 | public int Next() 14 | { 15 | Interlocked.Increment(ref _counter); 16 | return _counter; 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /LunrCore/TokenizeDelegate.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Globalization; 3 | 4 | namespace Lunr 5 | { 6 | public delegate IEnumerable TokenizeDelegate(object obj, TokenMetadata metadata, CultureInfo culture); 7 | } 8 | -------------------------------------------------------------------------------- /LunrCore/Trimmer.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Runtime.CompilerServices; 3 | using System.Text.RegularExpressions; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | 7 | namespace Lunr 8 | { 9 | public abstract class TrimmerBase 10 | { 11 | public abstract string Trim(string s); 12 | 13 | private async IAsyncEnumerable TrimImplementation( 14 | Token token, 15 | int i, 16 | IAsyncEnumerable 17 | tokens, 18 | [EnumeratorCancellation] CancellationToken cancellationToken) 19 | { 20 | if (cancellationToken.IsCancellationRequested) yield break; 21 | yield return await new ValueTask(token.Clone(Trim)); 22 | } 23 | 24 | public Pipeline.Function FilterFunction => TrimImplementation; 25 | } 26 | 27 | public sealed class Trimmer : TrimmerBase 28 | { 29 | private static readonly Regex _trimStartExpression = new Regex(@"^\W+"); 30 | private static readonly Regex _trimEndExpression = new Regex(@"\W+$"); 31 | 32 | public override string Trim(string s) 33 | => _trimEndExpression.Replace(_trimStartExpression.Replace(s, ""), ""); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /LunrCore/Util.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace Lunr 5 | { 6 | /// 7 | /// A bunch of helpers functions. 8 | /// 9 | public static class Util 10 | { 11 | /// 12 | /// A function to calculate the inverse document frequency for 13 | /// a posting.This is shared between the builder and the index. 14 | /// 15 | /// The posting for a given term. 16 | /// The total number of documents. 17 | /// The inverse document frequency. 18 | public static double InverseDocumentFrequency(InvertedIndexEntry posting, int documentCount) 19 | { 20 | int documentsWithTerm = 0; 21 | 22 | foreach ((string fieldName, FieldMatches value) in posting) 23 | { 24 | if (fieldName is "_index") continue; // Ignore the term index, its not a field 25 | documentsWithTerm += value.Count; 26 | } 27 | 28 | double x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5); 29 | 30 | return Math.Log(1 + Math.Abs(x)); 31 | } 32 | 33 | /// 34 | /// An extension method that enables the deconstruction of dictionary entries. 35 | /// 36 | /// 37 | /// ```cs 38 | /// foreach ((string key, Foo value) in someDictionaryOfFoos) 39 | /// { 40 | /// // Do something with `key` and `value`... 41 | /// } 42 | /// ``` 43 | /// 44 | /// The key value pair to deconstruct. 45 | /// The deconstructed key. 46 | /// The deconstructed value. 47 | public static void Deconstruct(this KeyValuePair kvp, out TKey key, out TValue value) 48 | { 49 | key = kvp.Key; 50 | value = kvp.Value; 51 | } 52 | 53 | /// 54 | /// Formats a date like ECMA-269 specifies its ToString(). 55 | /// 56 | /// The DateTime to format. 57 | /// The formatted string. 58 | internal static string ToEcmaString(this DateTime dt) 59 | { 60 | string timeZoneString = dt.ToString("zzz"); 61 | return dt.ToString("ddd MMM dd yyyy HH:mm:ss") + " GMT" + timeZoneString.Substring(0, 3) + timeZoneString.Substring(4); 62 | } 63 | 64 | internal static readonly Func IsLunrSeparatorFunc = IsLunrSeparator; 65 | 66 | /// 67 | /// Tests if a character is whitespace or a hyphen. 68 | /// 69 | /// 70 | /// True if ch is whitespace or a hyphen. 71 | internal static bool IsLunrSeparator(this char ch) => char.IsWhiteSpace(ch) || ch is '-'; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /LunrCore/Vector.cs: -------------------------------------------------------------------------------- 1 | using Lunr.Serialization; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text.Json.Serialization; 6 | 7 | namespace Lunr 8 | { 9 | /// 10 | /// A vector is used to construct the vector space of documents and queries.These 11 | /// vectors support operations to determine the similarity between two documents or 12 | /// a document and a query. 13 | /// 14 | /// Normally no parameters are required for initializing a vector, but in the case of 15 | /// loading a previously dumped vector the raw elements can be provided to the constructor. 16 | /// 17 | /// For performance with large numbers of dimensions, this is implemented as a list of index and value. 18 | /// 19 | [JsonConverter(typeof(VectorJsonConverter))] 20 | public class Vector 21 | { 22 | private readonly List<(int index, double value)> _elements; 23 | private double _magnitude = 0; 24 | 25 | public int Count => _elements.Count; 26 | 27 | public Vector(params (int index, double value)[] elements) 28 | => _elements = new List<(int, double)>(elements); 29 | 30 | public Vector() => _elements = new List<(int, double)>(); 31 | 32 | public Vector(int capacity) => _elements = new List<(int, double)>(capacity); 33 | 34 | /// 35 | /// Inserts an element at an index within the vector. 36 | /// Does not allow duplicates, will throw an error if there is already an entry 37 | /// for this index. 38 | /// 39 | /// The index at which the element should be inserted. 40 | /// The value to be inserted into the vector. 41 | public void Insert(int index, double value) 42 | => Upsert(index, value, (_, __) => throw new InvalidOperationException($"Duplicate index {index}.")); 43 | 44 | /// 45 | /// Calculates the magnitude of this vector. 46 | /// 47 | public double Magnitude 48 | => _magnitude == 0 ? _magnitude = Math.Sqrt(_elements.Sum(static el => el.value * el.value)) : _magnitude; 49 | 50 | /// 51 | /// Calculates the dot product of this vector and another vector. 52 | /// 53 | /// The vector to compute the dot product with. 54 | /// The dot product of the two vectors. 55 | public double Dot(Vector other) 56 | { 57 | int i = 0, j = 0; 58 | double dotProduct = 0; 59 | 60 | while (i < _elements.Count && j < other._elements.Count) 61 | { 62 | (int index, double value) = _elements[i]; 63 | (int otherIndex, double otherValue) = other._elements[j]; 64 | 65 | if (index < otherIndex) 66 | { 67 | i++; 68 | } 69 | else if (index > otherIndex) 70 | { 71 | j++; 72 | } 73 | else 74 | { 75 | dotProduct += value * otherValue; 76 | i++; j++; 77 | } 78 | } 79 | return dotProduct; 80 | } 81 | 82 | /// 83 | /// Calculates the similarity between this vector and another vector. 84 | /// 85 | /// The other vector to calculate the similarity with. 86 | /// The similarity with the other vector 87 | public double Similarity(Vector other) => Magnitude == 0 ? 0 : Dot(other) / Magnitude; 88 | 89 | /// 90 | /// Saves the contents of the vector for serialization. 91 | /// 92 | /// The alternating list of indices and values. 93 | public IEnumerable Save() 94 | { 95 | foreach((int index, double value) in _elements) 96 | { 97 | yield return index; 98 | yield return value; 99 | } 100 | } 101 | 102 | /// 103 | /// Calculates the position within the vector to insert a given index. 104 | /// 105 | /// This is used internally by insert and upsert.If there are duplicate indexes then 106 | /// the position is returned as if the value for that index were to be updated, but it 107 | /// is the callers responsibility to check whether there is a duplicate at that index. 108 | /// 109 | /// Performs a binary search to find the insert point for a new element. 110 | /// 111 | /// The new index to insert. 112 | /// The position where to insert the new coordinate. 113 | public int PositionForIndex(int index) 114 | { 115 | // For an empty vector the tuple can be inserted at the beginning 116 | if (_elements.Count is 0) 117 | { 118 | return 0; 119 | } 120 | 121 | int start = 0, 122 | end = _elements.Count, 123 | sliceLength = end - start, 124 | pivotPoint = sliceLength >> 1, 125 | pivotIndex = _elements[pivotPoint].index; 126 | 127 | while (sliceLength > 1) 128 | { 129 | if (pivotIndex < index) 130 | { 131 | start = pivotPoint; 132 | } 133 | 134 | if (pivotIndex > index) 135 | { 136 | end = pivotPoint; 137 | } 138 | 139 | if (pivotIndex == index) 140 | { 141 | break; 142 | } 143 | 144 | sliceLength = end - start; 145 | pivotPoint = start + (sliceLength >> 1); 146 | pivotIndex = _elements[pivotPoint].index; 147 | } 148 | 149 | return pivotIndex >= index ? pivotPoint : pivotPoint + 1; 150 | } 151 | 152 | /// 153 | /// Inserts or updates an existing index within the vector. 154 | /// 155 | /// The index at which the element should be inserted. 156 | /// The value to be inserted into the vector. 157 | /// 158 | /// A function that is called for updates, the existing value and the 159 | /// requested value are passed as arguments. It returns the new resolved value. 160 | /// 161 | public void Upsert(int insertIdx, double value, Func conflictResolutionFunction) 162 | { 163 | _magnitude = 0; 164 | 165 | int position = PositionForIndex(insertIdx); 166 | 167 | if (position == _elements.Count) 168 | { 169 | _elements.Add((insertIdx, value)); 170 | } 171 | else if (_elements[position].index == insertIdx) 172 | { 173 | _elements[position] = (insertIdx, conflictResolutionFunction(_elements[position].value, value)); 174 | } 175 | else 176 | { 177 | _elements.Insert(position, (insertIdx, value)); 178 | } 179 | } 180 | 181 | /// 182 | /// Inserts or updates an existing index within the vector. 183 | /// 184 | /// The index at which the element should be inserted. 185 | /// The value to be inserted into the vector. 186 | public void Upsert(int index, double value) 187 | => Upsert(index, value, (existing, requested) => requested); 188 | 189 | /// 190 | /// Converts the vector to an array of the elements within the vector. 191 | /// 192 | /// The array of elements. 193 | public double[] ToArray() 194 | { 195 | var result = new double[_elements.Count]; 196 | 197 | for (int i = 0; i < result.Length; i++) 198 | { 199 | result[i] = _elements[i].value; 200 | } 201 | 202 | return result; 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /LunrCoreLmdb/Assets/LunrCoreLmdb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleroy/lunr-core/8ba2fa163a8ccd19efa95428b77a028b6ce6e183/LunrCoreLmdb/Assets/LunrCoreLmdb.png -------------------------------------------------------------------------------- /LunrCoreLmdb/DeserializeContext.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Buffers.Binary; 3 | using System.Runtime.Serialization; 4 | using System.Text; 5 | 6 | namespace LunrCoreLmdb 7 | { 8 | public sealed class DeserializeContext 9 | { 10 | public int Version { get; } 11 | 12 | public DeserializeContext(ref ReadOnlySpan buffer) 13 | { 14 | Version = ReadInt32(ref buffer); 15 | if(Version > SerializeContext.FormatVersion) 16 | throw new SerializationException("Tried to read an object with a version that is too new"); 17 | } 18 | 19 | public int ReadInt32(ref ReadOnlySpan buffer) 20 | { 21 | var value = BitConverter.IsLittleEndian ? BinaryPrimitives.ReadInt32LittleEndian(buffer) : BinaryPrimitives.ReadInt32BigEndian(buffer); 22 | buffer = buffer.Slice(sizeof(int)); 23 | return value; 24 | } 25 | 26 | public double ReadDouble(ref ReadOnlySpan buffer) 27 | { 28 | var value = BitConverter.Int64BitsToDouble(BitConverter.IsLittleEndian ? BinaryPrimitives.ReadInt64LittleEndian(buffer) : BinaryPrimitives.ReadInt64BigEndian(buffer)); 29 | buffer = buffer.Slice(sizeof(long)); 30 | return value; 31 | } 32 | 33 | public char ReadChar(ref ReadOnlySpan buffer) 34 | { 35 | var value = (char) buffer[0]; 36 | buffer = buffer.Slice(1); 37 | return value; 38 | } 39 | 40 | public bool ReadBoolean(ref ReadOnlySpan buffer) 41 | { 42 | var value = buffer[0] == 1; 43 | buffer = buffer.Slice(1); 44 | return value; 45 | } 46 | 47 | public string ReadString(ref ReadOnlySpan buffer) 48 | { 49 | var length = ReadInt32(ref buffer); 50 | var sb = new StringBuilder(length); 51 | for (var i = 0; i < length; i++) 52 | { 53 | var c = ReadChar(ref buffer); 54 | sb.Append(c); 55 | } 56 | return sb.ToString(); 57 | } 58 | 59 | public ReadOnlySpan ReadBytes(ref ReadOnlySpan buffer) 60 | { 61 | var length = ReadInt32(ref buffer); 62 | var value = buffer.Slice(0, length); 63 | buffer = buffer.Slice(length); 64 | return value; 65 | } 66 | 67 | } 68 | } -------------------------------------------------------------------------------- /LunrCoreLmdb/IReadOnlyIndex.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Lunr; 3 | 4 | namespace LunrCoreLmdb 5 | { 6 | public interface IReadOnlyIndex 7 | { 8 | /// 9 | /// An index of term/field to document reference. 10 | /// 11 | InvertedIndexEntry? GetInvertedIndexEntryByKey(string key); 12 | 13 | /// 14 | /// Field vectors. 15 | /// 16 | IEnumerable GetFieldVectorKeys(); 17 | 18 | /// 19 | /// Field vectors. 20 | /// 21 | Vector? GetFieldVectorByKey(string key); 22 | 23 | /// 24 | /// A set of all corpus tokens. 25 | /// 26 | TokenSet IntersectTokenSets(TokenSet other); 27 | 28 | /// 29 | /// The names of indexed document fields. 30 | /// 31 | IEnumerable GetFields(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /LunrCoreLmdb/KeyBuilder.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text; 3 | 4 | namespace LunrCoreLmdb 5 | { 6 | internal static class KeyBuilder 7 | { 8 | #region Fields 9 | 10 | private static readonly byte[] FieldPrefix = Encoding.UTF8.GetBytes("F:"); 11 | public static byte[] GetAllFieldsKey() => FieldPrefix; 12 | public static byte[] BuildFieldKey(string field) => FieldPrefix.Concat(Encoding.UTF8.GetBytes(field)); 13 | 14 | #endregion 15 | 16 | #region Field Vectors 17 | 18 | private static readonly byte[] FieldVectorKeyPrefix = Encoding.UTF8.GetBytes("K:"); 19 | public static byte[] GetAllFieldVectorKeys() => FieldVectorKeyPrefix; 20 | public static ReadOnlySpan BuildFieldVectorKeyKey(string key) => FieldVectorKeyPrefix.Concat(Encoding.UTF8.GetBytes(key)); 21 | 22 | private static readonly byte[] FieldVectorValuePrefix = Encoding.UTF8.GetBytes("V:"); 23 | public static byte[] BuildFieldVectorValueKey(string key) => FieldVectorValuePrefix.Concat(Encoding.UTF8.GetBytes(key)); 24 | 25 | #endregion 26 | 27 | #region Inverted Indices 28 | 29 | private static readonly byte[] InvertedIndexEntryPrefix = Encoding.UTF8.GetBytes("E:"); 30 | 31 | public static byte[] BuildInvertedIndexEntryKey(string key) => InvertedIndexEntryPrefix.Concat(Encoding.UTF8.GetBytes(key)); 32 | 33 | #endregion 34 | 35 | #region TokenSet 36 | 37 | private static readonly byte[] TokenSetWordPrefix = Encoding.UTF8.GetBytes("T:"); 38 | public static byte[] BuildTokenSetWordKey(string word) => TokenSetWordPrefix.Concat(Encoding.UTF8.GetBytes(word)); 39 | public static byte[] BuildAllTokenSetWordKeys() => TokenSetWordPrefix; 40 | 41 | #endregion 42 | 43 | private static byte[] Concat(this byte[] left, byte[] right) 44 | { 45 | var buffer = new byte[left.Length + right.Length]; 46 | Buffer.BlockCopy(left, 0, buffer, 0, left.Length); 47 | Buffer.BlockCopy(right, 0, buffer, left.Length, right.Length); 48 | return buffer; 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /LunrCoreLmdb/LunrCoreLmdb.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Lunr 5 | 2.3.10.0 6 | Daniel Crenna, Bertrand Le Roy 7 | Decent Consulting 8 | A .NET Core, LMDB-backed port of Oliver Nightingale's lunr.js library, a lightweight full-text indexing library that is "a bit like Solr, but much smaller and not as bright." Icon adapted from https://commons.wikimedia.org/wiki/File:Internal_Structure_of_the_Moon.JPG by Iqbal Mahmud under Creative Commons Attribution Share Alike 4.0 International 9 | 2020 Bertrand Le Roy 10 | https://github.com/bleroy/lunr-core 11 | LunrCoreLmdb.png 12 | LICENSE 13 | https://github.com/bleroy/lunr-core 14 | true 15 | true 16 | netstandard2.0;net5.0 17 | 9 18 | true 19 | enable 20 | true 21 | true 22 | snupkg 23 | true 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /LunrCoreLmdb/SerializeContext.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Runtime.InteropServices; 5 | using System.Text; 6 | using Lunr; 7 | 8 | namespace LunrCoreLmdb 9 | { 10 | public sealed class SerializeContext 11 | { 12 | public const int FormatVersion = 1; 13 | 14 | private readonly BinaryWriter _bw; 15 | 16 | public SerializeContext(BinaryWriter bw, int version = FormatVersion) 17 | { 18 | _bw = bw; 19 | if (Version > FormatVersion) 20 | throw new Exception("Tried to write an object with a version that is too new"); 21 | Version = version; 22 | _bw.Write(Version); 23 | } 24 | 25 | public int Version { get; } 26 | 27 | public void Write(int value) => _bw.Write(value); 28 | 29 | public void Write(double value) => _bw.Write(value); 30 | 31 | public bool Write(bool value) 32 | { 33 | _bw.Write(value); 34 | return value; 35 | } 36 | 37 | public void Write(byte[] value) 38 | { 39 | _bw.Write(value.Length); 40 | _bw.Write(value); 41 | } 42 | 43 | public void Write(string value) 44 | { 45 | _bw.Write(value.Length); 46 | foreach (var c in value) 47 | _bw.Write((byte) c); 48 | } 49 | 50 | internal static readonly Dictionary, Func deserialize)> KnownTypes; 51 | 52 | static SerializeContext() 53 | { 54 | KnownTypes = new Dictionary, Func deserialize)> 55 | { 56 | {typeof(short), (v => BitConverter.GetBytes((short) v), b => BitConverter.ToInt16(b, 0))}, 57 | {typeof(int), (v => BitConverter.GetBytes((int) v), b => BitConverter.ToInt32(b, 0))}, 58 | {typeof(long), (v => BitConverter.GetBytes((long) v), b => BitConverter.ToInt64(b, 0))}, 59 | {typeof(ushort), (v => BitConverter.GetBytes((ushort) v), b => BitConverter.ToUInt16(b, 0))}, 60 | {typeof(uint), (v => BitConverter.GetBytes((uint) v), b => BitConverter.ToUInt32(b, 0))}, 61 | {typeof(ulong), (v => BitConverter.GetBytes((ulong) v), b => BitConverter.ToUInt64(b, 0))}, 62 | {typeof(float), (v => BitConverter.GetBytes((int) v), b => BitConverter.ToSingle(b, 0))}, 63 | {typeof(double), (v => BitConverter.GetBytes((int) v), b => BitConverter.ToDouble(b, 0))}, 64 | {typeof(bool), (v => BitConverter.GetBytes((bool) v), b => BitConverter.ToBoolean(b, 0))}, 65 | {typeof(char), (v => BitConverter.GetBytes((char) v), b => BitConverter.ToChar(b, 0))}, 66 | {typeof(string), (v => Encoding.UTF8.GetBytes((string) v), b => Encoding.UTF8.GetString(b))}, 67 | {typeof(Slice), (v => 68 | { 69 | var (start, length) = (Slice) v; 70 | return Encoding.UTF8.GetBytes(start + "/" + length); 71 | }, b => 72 | { 73 | var value = Encoding.UTF8.GetString(b); 74 | var tokens = value.Split(new[] {"/"}, StringSplitOptions.RemoveEmptyEntries); 75 | return new Slice(Convert.ToInt32(tokens[0]), Convert.ToInt32(tokens[1])); 76 | })} 77 | }; 78 | } 79 | 80 | public static void AddKnownType(Func typeToMemory, Func memoryToType) => KnownTypes[typeof(T)] = (v => typeToMemory((T) v), b => memoryToType(b)!); 81 | } 82 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/BlockCopyVsLinqConcat.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using System.Text; 4 | using BenchmarkDotNet.Attributes; 5 | using BenchmarkDotNet.Engines; 6 | 7 | namespace LunrCoreLmdbPerf 8 | { 9 | [SimpleJob(RunStrategy.Throughput)] 10 | public class BlockCopyVsLinqConcat 11 | { 12 | private byte[] _left; 13 | private byte[] _right; 14 | 15 | [GlobalSetup] 16 | public void GlobalSetUp() 17 | { 18 | _left = Encoding.UTF8.GetBytes("T:"); 19 | _right = Encoding.UTF8.GetBytes("word"); 20 | } 21 | 22 | [Benchmark] 23 | public void BlockCopy() 24 | { 25 | var buffer = new byte[_left.Length + _right.Length]; 26 | Buffer.BlockCopy(_left, 0, buffer, 0, _left.Length); 27 | Buffer.BlockCopy(_right, 0, buffer, _left.Length, _right.Length); 28 | } 29 | 30 | [Benchmark] 31 | public void LinqConcat() 32 | { 33 | var buffer = _left.Concat(_right).ToArray(); 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/DelegatedIndexExtensions.cs: -------------------------------------------------------------------------------- 1 | #nullable enable 2 | 3 | using System.Collections.Generic; 4 | using Lunr; 5 | using LunrCoreLmdb; 6 | 7 | namespace LunrCoreLmdbPerf 8 | { 9 | internal static class DelegatedIndexExtensions 10 | { 11 | /// This wraps a standard in-memory index, so that it can be used as a delegated index. 12 | public static DelegatedIndex AsDelegated(this Index index) => new DelegatedIndex(new ReadOnlyIndex(index), index.Pipeline); 13 | 14 | internal class ReadOnlyIndex : IReadOnlyIndex 15 | { 16 | private readonly Index _index; 17 | 18 | public ReadOnlyIndex(Index index) => _index = index; 19 | public InvertedIndexEntry? GetInvertedIndexEntryByKey(string key) => _index.InvertedIndex[key]; 20 | public IEnumerable GetFieldVectorKeys() => _index.FieldVectors.Keys; 21 | public Vector? GetFieldVectorByKey(string key) => _index.FieldVectors[key]; 22 | public TokenSet IntersectTokenSets(TokenSet other) => _index.TokenSet.Intersect(other); 23 | public IEnumerable GetFields() => _index.Fields; 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/InterpolateVsAdd.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | using BenchmarkDotNet.Attributes; 3 | using BenchmarkDotNet.Engines; 4 | 5 | namespace LunrCoreLmdbPerf 6 | { 7 | [SimpleJob(RunStrategy.Throughput)] 8 | public class InterpolateVsAdd 9 | { 10 | [Benchmark] 11 | public void Interpolate() 12 | { 13 | const int a = 1; 14 | const int b = 2; 15 | Encoding.UTF8.GetBytes($"{a}/{b}"); 16 | } 17 | 18 | [Benchmark] 19 | public void Add() 20 | { 21 | const int a = 1; 22 | const int b = 2; 23 | Encoding.UTF8.GetBytes(a + "/" + b); 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/LunrCoreLmdbPerf.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net6.0 6 | true 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/Program.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Running; 2 | 3 | namespace LunrCoreLmdbPerf 4 | { 5 | class Program 6 | { 7 | static void Main(string[] args) 8 | { 9 | BenchmarkRunner.Run(typeof(Program).Assembly); 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/SearchBenchmarkBase.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | using BenchmarkDotNet.Attributes; 3 | using Lunr; 4 | using LunrCoreLmdb; 5 | 6 | namespace LunrCoreLmdbPerf 7 | { 8 | public abstract class SearchBenchmarkBase 9 | { 10 | protected DelegatedIndex Index; 11 | 12 | protected readonly Document[] Documents = { 13 | new Document 14 | { 15 | { "id", "a" }, 16 | { "title", "Mr. Green kills Colonel Mustard" }, 17 | { "body", "Mr. Green killed Colonel Mustard in the study with the candlestick. Mr. Green is not a very nice fellow." }, 18 | { "wordCount", 19 } 19 | }, 20 | new Document 21 | { 22 | { "id", "b" }, 23 | { "title", "Plumb waters plant" }, 24 | { "body", "Professor Plumb has a green plant in his study" }, 25 | { "wordCount", 9 } 26 | }, 27 | new Document 28 | { 29 | { "id", "c" }, 30 | { "title", "Scarlett helps Professor" }, 31 | { "body", "Miss Scarlett watered Professor Plumbs green plant while he was away from his office last week." }, 32 | { "wordCount", 16 } 33 | } 34 | }; 35 | 36 | [Benchmark] 37 | public async Task SearchSingleTerm() 38 | { 39 | await foreach (Result _ in Index.Search("green")) { } 40 | } 41 | 42 | [Benchmark] 43 | public async Task SearchMultipleTerms() 44 | { 45 | await foreach (Result _ in Index.Search("green plant")) { } 46 | } 47 | 48 | [Benchmark] 49 | public async Task SearchTrailingWildcard() 50 | { 51 | await foreach (Result _ in Index.Search("pl*")) { } 52 | } 53 | 54 | [Benchmark] 55 | public async Task SearchLeadingWildcard() 56 | { 57 | await foreach (Result _ in Index.Search("*ant")) { } 58 | } 59 | 60 | [Benchmark] 61 | public async Task SearchContainedWildcard() 62 | { 63 | await foreach (Result _ in Index.Search("p*t")) { } 64 | } 65 | 66 | [Benchmark] 67 | public async Task SearchWithField() 68 | { 69 | await foreach (Result _ in Index.Search("title:plant")) { } 70 | } 71 | 72 | [Benchmark] 73 | public async Task SearchWithEditDistance() 74 | { 75 | await foreach (Result _ in Index.Search("plint~2")) { } 76 | } 77 | 78 | [Benchmark] 79 | public async Task SearchTypeAhead() 80 | { 81 | await foreach (Result _ in Index.Query(q => 82 | { 83 | q.AddTerm("pl", boost: 100, usePipeline: true); 84 | q.AddTerm("pl", boost: 10, usePipeline: false, wildcard: QueryWildcard.Trailing); 85 | q.AddTerm("pl", boost: 1, editDistance: 1); 86 | })) { } 87 | } 88 | 89 | [Benchmark] 90 | public async Task SearchNegatedQuery() 91 | { 92 | await foreach (Result _ in Index.Search("-plant")) { } 93 | } 94 | 95 | [Benchmark] 96 | public async Task SearchRequiredTerm() 97 | { 98 | await foreach (Result _ in Index.Search("green +plant")) { } 99 | } 100 | 101 | public async Task PlainIndex() 102 | { 103 | var index = await Lunr.Index.Build(config: async builder => 104 | { 105 | builder.ReferenceField = "id"; 106 | 107 | builder 108 | .AddField("title") 109 | .AddField("body", boost: 10); 110 | 111 | foreach (Document doc in Documents) 112 | { 113 | await builder.Add(doc); 114 | } 115 | }); 116 | 117 | return index; 118 | } 119 | } 120 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/SearchBenchmarkLmdb.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics; 3 | using System.IO; 4 | using System.Threading.Tasks; 5 | using BenchmarkDotNet.Attributes; 6 | using LunrCoreLmdb; 7 | 8 | namespace LunrCoreLmdbPerf 9 | { 10 | public class SearchBenchmarkLmdb : SearchBenchmarkBase 11 | { 12 | private string _path; 13 | private LmdbIndex _lmdb; 14 | 15 | [GlobalSetup] 16 | public async Task GlobalSetup() 17 | { 18 | _path = Guid.NewGuid().ToString(); 19 | 20 | var plain = await PlainIndex(); 21 | 22 | _lmdb = new LmdbIndex(_path); 23 | 24 | foreach (var field in plain.Fields) 25 | _lmdb.AddField(field); 26 | 27 | foreach (var (k, v) in plain.FieldVectors) 28 | _lmdb.AddFieldVector(k, v); 29 | 30 | foreach (var (k, v) in plain.InvertedIndex) 31 | _lmdb.AddInvertedIndexEntry(k, v); 32 | 33 | Index = new DelegatedIndex(_lmdb, plain.Pipeline); 34 | } 35 | 36 | [GlobalCleanup] 37 | public void GlobalCleanup() 38 | { 39 | Index.Dispose(); 40 | 41 | try 42 | { 43 | Directory.Delete(_path, recursive: true); 44 | } 45 | catch (Exception exception) 46 | { 47 | Trace.TraceError(exception.ToString()); 48 | } 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/SearchBenchmarkWrappedIndex.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | using BenchmarkDotNet.Attributes; 3 | using LunrCoreLmdb; 4 | 5 | namespace LunrCoreLmdbPerf 6 | { 7 | public class SearchBenchmarkWrappedIndex : SearchBenchmarkBase 8 | { 9 | [GlobalSetup] 10 | public async Task Setup() 11 | { 12 | Index = (await PlainIndex()).AsDelegated(); 13 | } 14 | } 15 | 16 | 17 | } -------------------------------------------------------------------------------- /LunrCoreLmdbPerf/SpanVsGetPinnableReference.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Buffers.Binary; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Linq; 6 | using BenchmarkDotNet.Attributes; 7 | using Lunr; 8 | using LunrCoreLmdb; 9 | 10 | namespace LunrCoreLmdbPerf 11 | { 12 | public class SpanVsGetPinnableReference 13 | { 14 | private byte[] _buffer; 15 | 16 | [GlobalSetup] 17 | public void GlobalSetUp() 18 | { 19 | _buffer = VectorFrom(4, 5, 6).Serialize().ToArray(); 20 | } 21 | 22 | [Benchmark] 23 | public void GetPinnableReference() 24 | { 25 | var span = _buffer.AsSpan(); 26 | 27 | unsafe 28 | { 29 | fixed(byte* buf = &span.GetPinnableReference()) 30 | { 31 | var ms = new UnmanagedMemoryStream(buf, _buffer.Length); 32 | var br = new BinaryReader(ms); 33 | 34 | var count = br.ReadInt32(); 35 | var values = new List<(int, double)>(); 36 | for (var i = 0; i < count; i++) 37 | { 38 | var index = br.ReadDouble(); 39 | var value = br.ReadDouble(); 40 | values.Add(((int) index, value)); 41 | } 42 | 43 | var vector = new Vector(values.ToArray()); 44 | } 45 | } 46 | } 47 | 48 | [Benchmark] 49 | public void Span() 50 | { 51 | var span = _buffer.AsSpan(); 52 | 53 | var count = BinaryPrimitives.ReadInt32LittleEndian(span); 54 | span = span[4..]; 55 | 56 | var values = new List<(int, double)>(); 57 | for (var i = 0; i < count; i++) 58 | { 59 | var index = BitConverter.Int64BitsToDouble(BinaryPrimitives.ReadInt64LittleEndian(span)); 60 | span = span[8..]; 61 | 62 | var value = BitConverter.Int64BitsToDouble(BinaryPrimitives.ReadInt64LittleEndian(span)); 63 | span = span[8..]; 64 | 65 | values.Add(((int) index, value)); 66 | } 67 | 68 | var vector = new Vector(values.ToArray()); 69 | } 70 | 71 | private static Vector VectorFrom(params double[] elements) 72 | => new Vector(elements.Select((el, i) => (i, el)).ToArray()); 73 | } 74 | } -------------------------------------------------------------------------------- /LunrCoreLmdbTests/LunrCoreLmdbTests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net6.0 5 | enable 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | runtime; build; native; contentfiles; analyzers; buildtransitive 14 | all 15 | 16 | 17 | runtime; build; native; contentfiles; analyzers; buildtransitive 18 | all 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /LunrCoreLmdbTests/SerializationTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using System.Threading; 4 | using Lunr; 5 | using LunrCoreLmdb; 6 | using Xunit; 7 | using Index = Lunr.Index; 8 | 9 | namespace LunrCoreLmdbTests 10 | { 11 | [Collection(nameof(TempDirectory))] 12 | public class SerializationTests : IDisposable 13 | { 14 | private readonly TempDirectory _tempDir; 15 | 16 | public SerializationTests(TempDirectory tempDir) 17 | { 18 | _tempDir = tempDir; 19 | } 20 | 21 | [Fact] 22 | public void Can_persist_fields() 23 | { 24 | const string field = "Field"; 25 | 26 | using var index = new LmdbIndex(_tempDir.NewDirectory()); 27 | 28 | var addedField = index.AddField(field); 29 | Assert.True(addedField); 30 | 31 | var fields = index.GetFields(); 32 | Assert.NotNull(fields); 33 | Assert.Equal(field, fields.Single()); 34 | 35 | var removedField = index.RemoveField(field); 36 | Assert.True(removedField); 37 | 38 | var noFields = index.GetFields(); 39 | Assert.NotNull(fields); 40 | Assert.Empty(noFields); 41 | } 42 | 43 | [Fact] 44 | public void Can_persist_vectors() 45 | { 46 | const string key = "Key"; 47 | 48 | using var index = new LmdbIndex(_tempDir.NewDirectory()); 49 | 50 | Vector vector = VectorFrom(4, 5, 6); 51 | Assert.Equal(Math.Sqrt(77), vector.Magnitude); 52 | 53 | var addedVector = index.AddFieldVector(key, vector, CancellationToken.None); 54 | Assert.True(addedVector); 55 | 56 | var getByKey = index.GetFieldVectorByKey(key); 57 | Assert.NotNull(getByKey); 58 | Assert.Equal(Math.Sqrt(77), getByKey?.Magnitude); 59 | 60 | var getKeys = index.GetFieldVectorKeys().ToList(); 61 | Assert.Single(getKeys); 62 | Assert.Equal(getKeys[0], key); 63 | 64 | var removedVector = index.RemoveFieldVector(key, CancellationToken.None); 65 | Assert.True(removedVector); 66 | 67 | var noVector = index.GetFieldVectorByKey(key); 68 | Assert.Null(noVector); 69 | 70 | var noVectorKeys = index.GetFieldVectorKeys(CancellationToken.None); 71 | Assert.Empty(noVectorKeys); 72 | } 73 | 74 | [Fact] 75 | public void Can_persist_inverted_index_entries() 76 | { 77 | using var lmdb = new LmdbIndex(_tempDir.NewDirectory()); 78 | 79 | var builder = new Builder(); 80 | builder.AddField("title"); 81 | builder.Add(new Document 82 | { 83 | { "id", "id" }, 84 | { "title", "test" }, 85 | { "body", "missing" } 86 | }).ConfigureAwait(false).GetAwaiter().GetResult(); 87 | Index index = builder.Build(); 88 | 89 | var firstKey = index.InvertedIndex.Keys.FirstOrDefault() ?? throw new InvalidOperationException(); 90 | Assert.NotNull(firstKey); 91 | 92 | var added = lmdb.AddInvertedIndexEntry(firstKey, index.InvertedIndex[firstKey], CancellationToken.None); 93 | Assert.True(added); 94 | 95 | var getInvertedIndexEntry = lmdb.GetInvertedIndexEntryByKey(firstKey); 96 | Assert.NotNull(getInvertedIndexEntry); 97 | 98 | var tokenSet = lmdb.IntersectTokenSets(index.TokenSet); 99 | Assert.Single(tokenSet.Edges); 100 | } 101 | 102 | [Fact] 103 | public void Can_round_trip_vectors() 104 | { 105 | Vector original = VectorFrom(4, 5, 6); 106 | Assert.Equal(Math.Sqrt(77), original.Magnitude); 107 | 108 | var buffer = original.Serialize(); 109 | var deserialized = buffer.DeserializeFieldVector(); 110 | 111 | Assert.NotSame(original, deserialized); 112 | Assert.Equal(Math.Sqrt(77), deserialized.Magnitude); 113 | } 114 | 115 | [Fact] 116 | public void Can_round_trip_inverted_indexes() 117 | { 118 | var builder = new Builder(); 119 | builder.AddField("title"); 120 | builder.Add(new Document 121 | { 122 | { "id", "id" }, 123 | { "title", "test" }, 124 | { "body", "missing" } 125 | }).ConfigureAwait(false).GetAwaiter().GetResult(); 126 | Index index = builder.Build(); 127 | 128 | var original = index.InvertedIndex; 129 | var deserialized = original.Serialize().DeserializeInvertedIndex(); 130 | 131 | AssertInvertedIndex(original, deserialized); 132 | } 133 | 134 | [Fact] 135 | public void Can_round_trip_index_with_multiple_occurrences_and_position_metadata() 136 | { 137 | var builder = new Builder(); 138 | builder.AllowMetadata("position"); 139 | builder.AddField("body"); 140 | builder.Add(new Document 141 | { 142 | { "id", "id" }, 143 | { "body", "test test2 test" } 144 | }).ConfigureAwait(false).GetAwaiter().GetResult(); 145 | Index index = builder.Build(); 146 | string json = index.ToJson(); 147 | 148 | Index deserialized = Index.LoadFromJson(json); 149 | AssertInvertedIndex(index.InvertedIndex, deserialized.InvertedIndex); 150 | } 151 | 152 | [Fact] 153 | public void Can_round_trip_inverted_index_entries() 154 | { 155 | var builder = new Builder(); 156 | builder.AddField("title"); 157 | builder.Add(new Document 158 | { 159 | { "id", "id" }, 160 | { "title", "test" }, 161 | { "body", "missing" } 162 | }).ConfigureAwait(false).GetAwaiter().GetResult(); 163 | Index index = builder.Build(); 164 | 165 | foreach (var (_, original) in index.InvertedIndex) 166 | { 167 | var buffer = original.Serialize(); 168 | var deserialized = buffer.DeserializeInvertedIndexEntry(); 169 | 170 | AssertInvertedIndexEntry(original, deserialized); 171 | } 172 | } 173 | 174 | [Fact] 175 | public void Can_round_trip_token_set() 176 | { 177 | var builder = new Builder(); 178 | builder.AddField("title"); 179 | builder.Add(new Document 180 | { 181 | { "id", "id" }, 182 | { "title", "test" }, 183 | { "body", "missing" } 184 | }).ConfigureAwait(false).GetAwaiter().GetResult(); 185 | Index index = builder.Build(); 186 | 187 | var original = index.TokenSet; 188 | var deserialized = original.Serialize().DeserializeTokenSet(); 189 | 190 | Assert.NotSame(original, deserialized); 191 | Assert.Equal(original.ToEnumeration(), deserialized.ToEnumeration()); 192 | } 193 | 194 | private static void AssertInvertedIndex(InvertedIndex left, InvertedIndex right) 195 | { 196 | Assert.Equal(left.Count, right.Count); 197 | 198 | var all = left.Zip(right, (entriesLeft, entriesRight) => 199 | { 200 | Assert.Equal(entriesLeft.Key, entriesRight.Key); 201 | AssertInvertedIndexEntry(entriesLeft.Value, entriesRight.Value); 202 | return true; 203 | }).ToList(); 204 | 205 | Assert.All(all, Assert.True); 206 | } 207 | 208 | private static void AssertInvertedIndexEntry(InvertedIndexEntry left, InvertedIndexEntry right) 209 | { 210 | Assert.Equal(left.Index, right.Index); 211 | Assert.Equal(left.Count, right.Count); 212 | 213 | var allFieldMatches = left.Zip(right, (fieldMatchesLeft, fieldMatchesRight) => 214 | { 215 | Assert.Equal(fieldMatchesLeft.Key, fieldMatchesRight.Key); 216 | Assert.Equal(fieldMatchesLeft.Value.Count, fieldMatchesRight.Value.Count); 217 | 218 | var allFieldMatchMetadata = fieldMatchesLeft.Value.Zip(fieldMatchesRight.Value, (fieldMatchMetadataLeft, fieldMatchMetadataRight) => 219 | { 220 | Assert.Equal(fieldMatchMetadataLeft.Key, fieldMatchMetadataRight.Key); 221 | return true; 222 | }).ToList(); 223 | 224 | Assert.All(allFieldMatchMetadata, Assert.True); 225 | return true; 226 | }).ToList(); 227 | 228 | Assert.All(allFieldMatches, Assert.True); 229 | } 230 | 231 | private static Vector VectorFrom(params double[] elements) 232 | => new Vector(elements.Select((el, i) => (i, el)).ToArray()); 233 | 234 | public void Dispose() 235 | { 236 | _tempDir.Dispose(); 237 | } 238 | } 239 | } -------------------------------------------------------------------------------- /LunrCoreLmdbTests/TempDirectory.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics; 3 | using System.IO; 4 | 5 | namespace LunrCoreLmdbTests 6 | { 7 | public class TempDirectory : IDisposable 8 | { 9 | private readonly string _directory; 10 | 11 | public TempDirectory() 12 | { 13 | _directory = Path.Combine(Directory.GetCurrentDirectory(), "lmdb"); 14 | } 15 | 16 | public void Dispose() 17 | { 18 | try 19 | { 20 | if (Directory.Exists(_directory)) 21 | { 22 | Directory.Delete(_directory, true); 23 | } 24 | } 25 | catch (Exception e) 26 | { 27 | Trace.TraceError(e.ToString()); 28 | } 29 | } 30 | 31 | public string NewDirectory() 32 | { 33 | var path = Path.Combine(_directory, Guid.NewGuid().ToString()); 34 | Directory.CreateDirectory(path); 35 | return path; 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /LunrCoreLmdbTests/TempDirectoryCollection.cs: -------------------------------------------------------------------------------- 1 | using Xunit; 2 | 3 | namespace LunrCoreLmdbTests 4 | { 5 | [CollectionDefinition(nameof(TempDirectory))] 6 | public class TempDirectoryCollection : ICollectionFixture { } 7 | } -------------------------------------------------------------------------------- /LunrCorePerf/BuilderBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | using System.Threading.Tasks; 4 | 5 | namespace LunrCorePerf 6 | { 7 | public class BuilderBenchmark 8 | { 9 | private readonly Document[] _documents = new[] 10 | { 11 | new Document 12 | { 13 | { "id", "a" }, 14 | { "title", "Mr. Green kills Colonel Mustard" }, 15 | { "body", "Mr. Green killed Colonel Mustard in the study with the candlestick. Mr. Green is not a very nice fellow." }, 16 | { "wordCount", 19 } 17 | }, 18 | new Document 19 | { 20 | { "id", "b" }, 21 | { "title", "Plumb waters plant" }, 22 | { "body", "Professor Plumb has a green plant in his study" }, 23 | { "wordCount", 9 } 24 | }, 25 | new Document 26 | { 27 | { "id", "c" }, 28 | { "title", "Scarlett helps Professor" }, 29 | { "body", "Miss Scarlett watered Professor Plumbs green plant while he was away from his office last week." }, 30 | { "wordCount", 16 } 31 | } 32 | }; 33 | 34 | [Benchmark] 35 | public async Task AddDocuments() 36 | => await Index.Build(config: async builder => 37 | { 38 | builder.ReferenceField = "id"; 39 | 40 | builder 41 | .AddField("title") 42 | .AddField("body", boost: 10); 43 | 44 | foreach (Document doc in _documents) 45 | { 46 | await builder.Add(doc); 47 | } 48 | }); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /LunrCorePerf/LunrCorePerf.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | PreserveNewest 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /LunrCorePerf/PipelineBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Threading; 6 | using System.Threading.Tasks; 7 | 8 | namespace LunrCorePerf 9 | { 10 | public class PipelineBenchmark 11 | { 12 | private static readonly string[] _first1000Words = Words.First(1000); 13 | private Pipeline _tokenToTokenPipeline; 14 | private Pipeline _tokenToTokenArrayPipeline; 15 | private IEnumerable _fewTokens; 16 | private IEnumerable _manyTokens; 17 | 18 | [GlobalSetup] 19 | public void Setup() 20 | { 21 | _fewTokens = BuildTokens(50); 22 | _manyTokens = BuildTokens(1000); 23 | _tokenToTokenPipeline = new Pipeline(Pipeline.BuildFunction(TokenToToken)); 24 | _tokenToTokenArrayPipeline = new Pipeline(Pipeline.BuildFunction(TokenToTokenArray)); 25 | } 26 | 27 | private static IEnumerable BuildTokens(int count) 28 | => _first1000Words.Take(count).Select(word => new Token(word)); 29 | 30 | private static Token[] TokenToTokenArray(Token token) => new[] { token, token }; 31 | 32 | private static Token TokenToToken(Token token) => token; 33 | 34 | [Benchmark] 35 | public async Task FewTokensTokenToTokenPipeline() 36 | { 37 | var cToken = new CancellationToken(); 38 | await foreach(Token _ in _tokenToTokenPipeline.Run(_fewTokens.ToAsyncEnumerable(cToken), cToken)) { } 39 | } 40 | 41 | [Benchmark] 42 | public async Task ManyTokensTokenToTokenPipeline() 43 | { 44 | var cToken = new CancellationToken(); 45 | await foreach (Token _ in _tokenToTokenPipeline.Run(_manyTokens.ToAsyncEnumerable(cToken), cToken)) { } 46 | } 47 | 48 | [Benchmark] 49 | public async Task FewTokensTokenToTokenArrayPipeline() 50 | { 51 | var cToken = new CancellationToken(); 52 | await foreach (Token _ in _tokenToTokenArrayPipeline.Run(_fewTokens.ToAsyncEnumerable(cToken), cToken)) { } 53 | } 54 | 55 | [Benchmark] 56 | public async Task ManyTokensTokenToTokenArrayPipeline() 57 | { 58 | var cToken = new CancellationToken(); 59 | await foreach (Token _ in _tokenToTokenArrayPipeline.Run(_manyTokens.ToAsyncEnumerable(cToken), cToken)) { } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /LunrCorePerf/Program.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Running; 2 | 3 | namespace LunrCorePerf 4 | { 5 | class Program 6 | { 7 | static void Main(string[] args) 8 | { 9 | BenchmarkRunner.Run(typeof(Program).Assembly); 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /LunrCorePerf/QueryParserBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | 4 | namespace LunrCorePerf 5 | { 6 | public class QueryParserBenchmark 7 | { 8 | private void Parse(string queryString) 9 | { 10 | var query = new Query("title", "body"); 11 | var parser = new QueryParser(queryString, query); 12 | 13 | parser.Parse(); 14 | } 15 | 16 | [Benchmark] 17 | public void ParseSimpleQuery() 18 | { 19 | Parse("foo bar"); 20 | } 21 | 22 | [Benchmark] 23 | public void ParseFieldQuery() 24 | { 25 | Parse("title:foo bar"); 26 | } 27 | 28 | [Benchmark] 29 | public void ParseModifierQuery() 30 | { 31 | Parse("foo~2 bar"); 32 | } 33 | 34 | [Benchmark] 35 | public void ParseComplexQuery() 36 | { 37 | Parse("title:foo~2^6 bar"); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /LunrCorePerf/SearchBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | using System.Threading.Tasks; 4 | 5 | namespace LunrCorePerf 6 | { 7 | public class SearchBenchmark 8 | { 9 | private Index _index; 10 | 11 | private readonly Document[] _documents = new[] 12 | { 13 | new Document 14 | { 15 | { "id", "a" }, 16 | { "title", "Mr. Green kills Colonel Mustard" }, 17 | { "body", "Mr. Green killed Colonel Mustard in the study with the candlestick. Mr. Green is not a very nice fellow." }, 18 | { "wordCount", 19 } 19 | }, 20 | new Document 21 | { 22 | { "id", "b" }, 23 | { "title", "Plumb waters plant" }, 24 | { "body", "Professor Plumb has a green plant in his study" }, 25 | { "wordCount", 9 } 26 | }, 27 | new Document 28 | { 29 | { "id", "c" }, 30 | { "title", "Scarlett helps Professor" }, 31 | { "body", "Miss Scarlett watered Professor Plumbs green plant while he was away from his office last week." }, 32 | { "wordCount", 16 } 33 | } 34 | }; 35 | 36 | [GlobalSetup] 37 | public async Task Setup() 38 | { 39 | _index = await Index.Build(config: async builder => 40 | { 41 | builder.ReferenceField = "id"; 42 | 43 | builder 44 | .AddField("title") 45 | .AddField("body", boost: 10); 46 | 47 | foreach (Document doc in _documents) 48 | { 49 | await builder.Add(doc); 50 | } 51 | }); 52 | } 53 | 54 | [Benchmark] 55 | public async Task SearchSingleTerm() 56 | { 57 | await foreach (Result _ in _index.Search("green")) { } 58 | } 59 | 60 | [Benchmark] 61 | public async Task SearchMultipleTerms() 62 | { 63 | await foreach (Result _ in _index.Search("green plant")) { } 64 | } 65 | 66 | [Benchmark] 67 | public async Task SearchTrailingWildcard() 68 | { 69 | await foreach (Result _ in _index.Search("pl*")) { } 70 | } 71 | 72 | [Benchmark] 73 | public async Task SearchLeadingWildcard() 74 | { 75 | await foreach (Result _ in _index.Search("*ant")) { } 76 | } 77 | 78 | [Benchmark] 79 | public async Task SearchContainedWildcard() 80 | { 81 | await foreach (Result _ in _index.Search("p*t")) { } 82 | } 83 | 84 | [Benchmark] 85 | public async Task SearchWithField() 86 | { 87 | await foreach (Result _ in _index.Search("title:plant")) { } 88 | } 89 | 90 | [Benchmark] 91 | public async Task SearchWithEditDistance() 92 | { 93 | await foreach (Result _ in _index.Search("plint~2")) { } 94 | } 95 | 96 | [Benchmark] 97 | public async Task SearchTypeAhead() 98 | { 99 | await foreach (Result _ in _index.Query(q => 100 | { 101 | q.AddTerm("pl", boost: 100, usePipeline: true); 102 | q.AddTerm("pl", boost: 10, usePipeline: false, wildcard: QueryWildcard.Trailing); 103 | q.AddTerm("pl", boost: 1, editDistance: 1); 104 | })) { } 105 | } 106 | 107 | [Benchmark] 108 | public async Task SearchNegatedQuery() 109 | { 110 | await foreach (Result _ in _index.Search("-plant")) { } 111 | } 112 | 113 | [Benchmark] 114 | public async Task SearchRequiredTerm() 115 | { 116 | await foreach (Result _ in _index.Search("green +plant")) { } 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /LunrCorePerf/StemmerBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | using System.Linq; 4 | 5 | namespace LunrCorePerf 6 | { 7 | public class StemmerBenchmark 8 | { 9 | private readonly StemmerBase _stemmer = new EnglishStemmer(); 10 | private readonly string[] _words = Words.First(1000).OrderBy(w => w).ToArray(); 11 | 12 | [Benchmark] 13 | public void StemEnglishWords() 14 | { 15 | foreach(string word in _words) 16 | { 17 | _stemmer.Stem(word); 18 | } 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LunrCorePerf/TokenSetBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | using System.Linq; 4 | 5 | namespace LunrCorePerf 6 | { 7 | public class TokenSetBenchmark 8 | { 9 | private TokenSet _tokenSet = TokenSet.FromArray(new[] 10 | { 11 | "january", "february", "march", "april", 12 | "may", "june", "july", "august", 13 | "september", "october", "november", "december" 14 | }.OrderBy(s => s)); 15 | private TokenSet _noWildCard = TokenSet.FromString("september"); 16 | private TokenSet _withWildCard = TokenSet.FromString("*ber"); 17 | 18 | private readonly string[] _words = Words.First(1000).OrderBy(w => w).ToArray(); 19 | 20 | [Benchmark] 21 | public void FromArray() 22 | { 23 | var _ = TokenSet.FromArray(_words); 24 | } 25 | 26 | [Benchmark] 27 | public void FromStringNoWildcard() 28 | { 29 | var _ = TokenSet.FromString("javascript"); 30 | } 31 | 32 | [Benchmark] 33 | public void FromStringWithWildcard() 34 | { 35 | var _ = TokenSet.FromString("java*cript"); 36 | } 37 | 38 | [Benchmark] 39 | public void FromFuzzyString() 40 | { 41 | var _ = TokenSet.FromFuzzyString("javascript", 2); 42 | } 43 | 44 | [Benchmark] 45 | public void ToArray() 46 | { 47 | foreach (string _ in _tokenSet.ToEnumeration()) { } 48 | } 49 | 50 | [Benchmark] 51 | public void ToStringToken() 52 | { 53 | string _ = _tokenSet.ToString(); 54 | } 55 | 56 | [Benchmark] 57 | public void IntersectNoWildcard() 58 | { 59 | TokenSet _ = _tokenSet.Intersect(_noWildCard); 60 | } 61 | 62 | [Benchmark] 63 | public void IntersectWithWildcard() 64 | { 65 | TokenSet _ = _tokenSet.Intersect(_withWildCard); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /LunrCorePerf/TokenizerBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | 4 | namespace LunrCorePerf 5 | { 6 | public class TokenizerBenchmark 7 | { 8 | private readonly string _lorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, " + 9 | "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " + 10 | "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris " + 11 | "nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in " + 12 | "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla " + 13 | "pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa " + 14 | "qui officia deserunt mollit anim id est laborum"; 15 | 16 | [Benchmark] 17 | public void TokenizeLipsum() 18 | { 19 | foreach (Token _ in new Tokenizer().Tokenize(_lorem)) { } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /LunrCorePerf/VectorBenchmark.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Lunr; 3 | using System; 4 | 5 | namespace LunrCorePerf 6 | { 7 | public class VectorBenchmark 8 | { 9 | private readonly Vector _v1 = new Vector(); 10 | private readonly Vector _v2 = new Vector(); 11 | private readonly Random _rnd = new Random(); 12 | 13 | [GlobalSetup] 14 | public void Setup() 15 | { 16 | for (int i = 0; i < 1000; i++) 17 | { 18 | int index = _rnd.Next(i * 100, i * 100 + 100); 19 | double val = _rnd.NextDouble() * 100; 20 | _v1.Insert(index, val); 21 | } 22 | 23 | for (int i = 0; i < 1000; i++) 24 | { 25 | int index = _rnd.Next(i * 100, i * 100 + 100); 26 | double val = _rnd.NextDouble() * 100; 27 | _v2.Insert(index, val); 28 | } 29 | } 30 | 31 | [Benchmark] 32 | public void Magnitude() 33 | { 34 | double _ = _v1.Magnitude; 35 | } 36 | 37 | [Benchmark] 38 | public void Dot() 39 | { 40 | double _ = _v1.Dot(_v2); 41 | } 42 | 43 | [Benchmark] 44 | public void Similarity() 45 | { 46 | double _ = _v1.Similarity(_v2); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /LunrCorePerf/Words.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using System.Linq; 4 | 5 | namespace LunrCorePerf 6 | { 7 | public static class Words 8 | { 9 | public static string[] First(int n) 10 | => Lines(n).ToArray(); 11 | 12 | private static IEnumerable Lines(int limit = 0) 13 | { 14 | int count = 0; 15 | using FileStream file = File.OpenRead(Path.Combine("fixtures", "words.txt")); 16 | using var reader = new StreamReader(file, true); 17 | while (!reader.EndOfStream && count++ < limit) 18 | { 19 | yield return reader.ReadLine(); 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /LunrCoreTests/EnglishStopWordFilterTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System.Threading.Tasks; 3 | using Xunit; 4 | 5 | namespace LunrCoreTests 6 | { 7 | public class EnglishStopWordFilterTests 8 | { 9 | private readonly StopWordFilterBase filter = new EnglishStopWordFilter(); 10 | 11 | [Fact] 12 | public async Task EnglishStopWordFilterIgnoresCase() 13 | { 14 | string[] stopWords = new[] { "the", "The", "THE" }; 15 | 16 | foreach (string word in stopWords) 17 | { 18 | Assert.True(filter.IsStopWord(word)); 19 | Assert.Empty(await filter.FilterFunction.BasicallyRun(word)); 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /LunrCoreTests/FieldReferenceTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System; 3 | using Xunit; 4 | 5 | namespace LunrCoreTests 6 | { 7 | public class FieldReferenceTests 8 | { 9 | [Fact] 10 | public void FieldReferenceToStringCombinesDocumentReferenceAndFieldName() 11 | { 12 | var fieldRef = new FieldReference("123", "title"); 13 | 14 | Assert.Equal("title/123", fieldRef.ToString()); 15 | } 16 | 17 | [Fact] 18 | public void FieldReferenceFromStringSplitsTheStringIntoParts() 19 | { 20 | var fieldRef = FieldReference.FromString("title/123"); 21 | 22 | Assert.Equal("title", fieldRef.FieldName); 23 | Assert.Equal("123", fieldRef.DocumentReference); 24 | } 25 | 26 | [Fact] 27 | public void FromStringLeavesJoinCharacterInDocRef() 28 | { 29 | var fieldRef = FieldReference.FromString("title/http://example.com/123"); 30 | 31 | Assert.Equal("title", fieldRef.FieldName); 32 | Assert.Equal("http://example.com/123", fieldRef.DocumentReference); 33 | } 34 | 35 | [Fact] 36 | public void FromStringWithoutJoinCharacterThrows() 37 | { 38 | Assert.Throws(() => 39 | { 40 | FieldReference.FromString("docRefOnly"); 41 | }); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /LunrCoreTests/LunrCoreTests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net6.0 5 | enable 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | all 14 | runtime; build; native; contentfiles; analyzers; buildtransitive 15 | 16 | 17 | all 18 | runtime; build; native; contentfiles; analyzers; buildtransitive 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | PreserveNewest 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /LunrCoreTests/MatchDataTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System.Collections.Generic; 3 | using Xunit; 4 | 5 | namespace LunrCoreTests 6 | { 7 | public class MatchDataTests 8 | { 9 | [Fact] 10 | public void MatchDataCombines() 11 | { 12 | var match = new MatchData( 13 | "foo", 14 | "title", 15 | new FieldMatchMetadata { 16 | { "position", new List { 1 } } 17 | }); 18 | match.Combine(new MatchData( 19 | "bar", 20 | "title", 21 | new FieldMatchMetadata { 22 | { "position", new List { 2 } } 23 | })); 24 | match.Combine(new MatchData( 25 | "baz", 26 | "body", 27 | new FieldMatchMetadata { 28 | { "position", new List { 3 } } 29 | })); 30 | match.Combine(new MatchData( 31 | "baz", 32 | "body", 33 | new FieldMatchMetadata { 34 | { "position", new List { 4 } } 35 | })); 36 | 37 | Assert.Equal( 38 | new[] { "foo", "bar", "baz" }, 39 | match.Posting.Keys); 40 | 41 | Assert.Equal( 42 | new object[] { 1 }, 43 | match.Posting["foo"]["title"]["position"]); 44 | Assert.Equal( 45 | new object[] { 2 }, 46 | match.Posting["bar"]["title"]["position"]); 47 | Assert.Equal( 48 | new object[] { 3, 4 }, 49 | match.Posting["baz"]["body"]["position"]); 50 | } 51 | 52 | [Fact] 53 | public void CombineDoesntMutateDataSource() 54 | { 55 | var metadata = new FieldMatchMetadata 56 | { 57 | { "foo", new object[] { 1 } } 58 | }; 59 | var matchData1 = new MatchData("foo", "title", metadata); 60 | var matchData2 = new MatchData("foo", "title", metadata); 61 | 62 | matchData1.Combine(matchData2); 63 | 64 | Assert.Equal( 65 | new object[] { 1 }, 66 | metadata["foo"]); 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /LunrCoreTests/MultipleMandatoryFieldsTest.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Diagnostics; 5 | using System.IO; 6 | using System.Linq; 7 | using System.Threading.Tasks; 8 | using Xunit; 9 | 10 | namespace LunrCoreTests 11 | { 12 | public class MultipleMandatoryFieldsTest 13 | { 14 | [Fact] 15 | public async Task SearchShouldNotThrowExceptionWhenNoResult() 16 | { 17 | var index = await Lunr.Index.Build( 18 | async builder => 19 | { 20 | builder 21 | .AddField("ref") 22 | .AddField("lastname", 3) 23 | .AddField("firstname", 2); 24 | 25 | builder.ReferenceField = "ref"; 26 | 27 | await builder.Add(new Lunr.Document() 28 | { 29 | { "ref", "0001" }, 30 | { "lastname", "Wonderland" }, 31 | { "firstname", "Alice" } 32 | }); 33 | 34 | await builder.Add(new Lunr.Document() 35 | { 36 | { "ref", "0002" }, 37 | { "lastname", "Sponge" }, 38 | { "firstname", "Bob" } 39 | }); 40 | } 41 | ); 42 | 43 | // Lunr throws ArgumentException. This patch fixes the exception. 44 | var results = index.Search("+Alice +abc"); 45 | int count = 0; 46 | await foreach (var result in results) 47 | { 48 | count += 1; 49 | } 50 | Assert.Equal(0, count); 51 | 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /LunrCoreTests/QueryTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System.Linq; 3 | using Xunit; 4 | 5 | namespace LunrCoreTests 6 | { 7 | public class QueryTests 8 | { 9 | private static readonly string[] _allFields = new[] { "title", "body" }; 10 | 11 | [Fact] 12 | public void SingleStringTerm() 13 | { 14 | Query query = new Query(_allFields).AddTerm("foo"); 15 | 16 | Assert.Equal("foo", query.Clauses.Single().Term); 17 | } 18 | 19 | [Fact] 20 | public void SingleTokenTerm() 21 | { 22 | Query query = new Query(_allFields).AddTerm(new Token("foo")); 23 | 24 | Assert.Equal("foo", query.Clauses.Single().Term); 25 | } 26 | 27 | [Fact] 28 | public void MultipleStringTerms() 29 | { 30 | Query query = new Query(_allFields).AddTerms("foo", "bar"); 31 | 32 | Assert.Equal( 33 | new[] { "foo", "bar" }, 34 | query.Clauses.Select(c => c.Term)); 35 | Assert.True(query.Clauses.All(c => c.UsePipeline)); 36 | } 37 | 38 | [Fact] 39 | public void MultipleStringTermsWithOptions() 40 | { 41 | Query query = new Query(_allFields) 42 | .AddTerms(new Clause(usePipeline: false), "foo", "bar"); 43 | 44 | Assert.Equal( 45 | new[] { "foo", "bar" }, 46 | query.Clauses.Select(c => c.Term)); 47 | Assert.True(query.Clauses.All(c => !c.UsePipeline)); 48 | } 49 | 50 | [Fact] 51 | public void MultipleTokenTerms() 52 | { 53 | Query query = new Query(_allFields) 54 | .AddTerms(new Tokenizer().Tokenize("foo bar")); 55 | 56 | Assert.Equal( 57 | new[] { "foo", "bar" }, 58 | query.Clauses.Select(c => c.Term)); 59 | } 60 | 61 | [Fact] 62 | public void ClauseDefaults() 63 | { 64 | Query query = new Query(_allFields) 65 | .AddClause(new Clause(term: "foo")); 66 | 67 | Clause clause = query.Clauses.Single(); 68 | 69 | Assert.Equal("foo", clause.Term); 70 | Assert.Equal(_allFields, clause.Fields); 71 | Assert.Equal(1, clause.Boost); 72 | Assert.True(clause.UsePipeline); 73 | } 74 | 75 | [Fact] 76 | public void SpecifiedClause() 77 | { 78 | Query query = new Query(_allFields) 79 | .AddClause(new Clause( 80 | term: "foo", 81 | boost: 10, 82 | fields: new[] { "title" }, 83 | usePipeline: false)); 84 | 85 | Clause clause = query.Clauses.Single(); 86 | 87 | Assert.Equal("foo", clause.Term); 88 | Assert.Equal("title", clause.Fields.Single()); 89 | Assert.Equal(10, clause.Boost); 90 | Assert.False(clause.UsePipeline); 91 | } 92 | 93 | [Fact] 94 | public void NoWildcards() 95 | { 96 | Query query = new Query(_allFields) 97 | .AddClause(new Clause( 98 | term: "foo", 99 | wildcard: QueryWildcard.None)); 100 | 101 | Clause clause = query.Clauses.Single(); 102 | 103 | Assert.Equal("foo", clause.Term); 104 | } 105 | 106 | [Fact] 107 | public void LeadingWildcard() 108 | { 109 | Query query = new Query(_allFields) 110 | .AddClause(new Clause( 111 | term: "foo", 112 | wildcard: QueryWildcard.Leading)); 113 | 114 | Clause clause = query.Clauses.Single(); 115 | 116 | Assert.Equal("*foo", clause.Term); 117 | } 118 | 119 | [Fact] 120 | public void TrailingWildcard() 121 | { 122 | Query query = new Query(_allFields) 123 | .AddClause(new Clause( 124 | term: "foo", 125 | wildcard: QueryWildcard.Trailing)); 126 | 127 | Clause clause = query.Clauses.Single(); 128 | 129 | Assert.Equal("foo*", clause.Term); 130 | } 131 | 132 | [Fact] 133 | public void LeadingAndTrailingWildcards() 134 | { 135 | Query query = new Query(_allFields) 136 | .AddClause(new Clause( 137 | term: "foo", 138 | wildcard: QueryWildcard.Both)); 139 | 140 | Clause clause = query.Clauses.Single(); 141 | 142 | Assert.Equal("*foo*", clause.Term); 143 | } 144 | 145 | [Fact] 146 | public void ExistingWildcards() 147 | { 148 | Query query = new Query(_allFields) 149 | .AddClause(new Clause( 150 | term: "*foo*", 151 | wildcard: QueryWildcard.Both)); 152 | 153 | Clause clause = query.Clauses.Single(); 154 | 155 | Assert.Equal("*foo*", clause.Term); 156 | } 157 | 158 | [Fact] 159 | public void AllProhibitedIsNegated() 160 | { 161 | Query query = new Query(_allFields) 162 | .AddClause(new Clause( 163 | term: "foo", 164 | presence: QueryPresence.Prohibited)) 165 | .AddClause(new Clause( 166 | term: "bar", 167 | presence: QueryPresence.Prohibited)); 168 | 169 | Assert.True(query.IsNegated); 170 | } 171 | 172 | [Fact] 173 | public void SomeProhibitedIsNotNegated() 174 | { 175 | Query query = new Query(_allFields) 176 | .AddClause(new Clause( 177 | term: "foo", 178 | presence: QueryPresence.Prohibited)) 179 | .AddClause(new Clause( 180 | term: "bar", 181 | presence: QueryPresence.Required)); 182 | 183 | Assert.False(query.IsNegated); 184 | } 185 | 186 | [Fact] 187 | public void NoneProhibitedIsNotNegated() 188 | { 189 | Query query = new Query(_allFields) 190 | .AddClause(new Clause( 191 | term: "foo", 192 | presence: QueryPresence.Optional)) 193 | .AddClause(new Clause( 194 | term: "bar", 195 | presence: QueryPresence.Required)); 196 | 197 | Assert.False(query.IsNegated); 198 | } 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /LunrCoreTests/SetTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using Xunit; 3 | 4 | namespace LunrCoreTests 5 | { 6 | public class SetTests 7 | { 8 | [Fact] 9 | public void CompleteSetContainsEverything() 10 | { 11 | Assert.True(Set.Complete.Contains("foo")); 12 | } 13 | 14 | [Fact] 15 | public void EmptySetContainsNothing() 16 | { 17 | Assert.False(Set.Empty.Contains("foo")); 18 | } 19 | 20 | [Fact] 21 | public void SetContainsItsElements() 22 | { 23 | Assert.True(new Set("foo").Contains("foo")); 24 | } 25 | 26 | [Fact] 27 | public void SetDoesNotContainsNonElements() 28 | { 29 | Assert.False(new Set("foo").Contains("bar")); 30 | } 31 | 32 | [Fact] 33 | public void CompleteSetUnionPopulatedSetContainsEverything() 34 | { 35 | ISet union = Set.Complete.Union(new Set("foo")); 36 | Assert.True(union.Contains("foo")); 37 | Assert.True(union.Contains("bar")); 38 | } 39 | 40 | [Fact] 41 | public void EmptySetUnionPopulatedSetIsThePopulatedSet() 42 | { 43 | ISet union = Set.Empty.Union(new Set("foo")); 44 | Assert.True(union.Contains("foo")); 45 | Assert.False(union.Contains("bar")); 46 | } 47 | 48 | [Fact] 49 | public void UnionContainsElementsFromBothSets() 50 | { 51 | ISet union = new Set("bar") 52 | .Union(new Set("foo")); 53 | 54 | Assert.True(union.Contains("foo")); 55 | Assert.True(union.Contains("bar")); 56 | Assert.False(union.Contains("baz")); 57 | } 58 | 59 | [Fact] 60 | public void UnionWithEmptySetContainsAllElements() 61 | { 62 | ISet union = new Set("bar") 63 | .Union(Set.Empty); 64 | 65 | Assert.True(union.Contains("bar")); 66 | Assert.False(union.Contains("baz")); 67 | } 68 | 69 | [Fact] 70 | public void UnionWithCompleteSetContainsEverything() 71 | { 72 | ISet union = new Set("bar") 73 | .Union(Set.Complete); 74 | 75 | Assert.True(union.Contains("foo")); 76 | Assert.True(union.Contains("bar")); 77 | Assert.True(union.Contains("baz")); 78 | } 79 | 80 | [Fact] 81 | public void CompleteSetInterPopulatedSetIsThatSet() 82 | { 83 | ISet inter = Set.Complete 84 | .Intersect(new Set("foo")); 85 | 86 | Assert.True(inter.Contains("foo")); 87 | Assert.False(inter.Contains("bar")); 88 | } 89 | 90 | [Fact] 91 | public void EmptySetInterPopulatedSetIsEmpty() 92 | { 93 | ISet inter = Set.Empty 94 | .Intersect(new Set("foo")); 95 | 96 | Assert.False(inter.Contains("foo")); 97 | } 98 | 99 | [Fact] 100 | public void IntersectionOfTwoNonOverlappingSetsIsEmpty() 101 | { 102 | ISet inter = new Set("bar") 103 | .Intersect(new Set("foo")); 104 | 105 | Assert.False(inter.Contains("foo")); 106 | Assert.False(inter.Contains("bar")); 107 | } 108 | 109 | [Fact] 110 | public void IntersectionOfTwoOverlappingSetsContainsIntersectionElements() 111 | { 112 | ISet inter = new Set("foo", "bar") 113 | .Intersect(new Set("foo")); 114 | 115 | Assert.True(inter.Contains("foo")); 116 | Assert.False(inter.Contains("bar")); 117 | } 118 | 119 | [Fact] 120 | public void IntersectionOfPopulatedSetWithEmptySetIsEmpty() 121 | { 122 | ISet inter = new Set("foo") 123 | .Intersect(Set.Empty); 124 | 125 | Assert.False(inter.Contains("foo")); 126 | } 127 | 128 | [Fact] 129 | public void IntersectionOfCompleteSetWithPopulatedSetContainsSetElements() 130 | { 131 | ISet inter = new Set("foo") 132 | .Intersect(Set.Complete); 133 | 134 | Assert.True(inter.Contains("foo")); 135 | Assert.False(inter.Contains("bar")); 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /LunrCoreTests/StemmerTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Text.Json; 6 | using System.Threading.Tasks; 7 | using Xunit; 8 | 9 | namespace LunrCoreTests 10 | { 11 | public class StemmerTests 12 | { 13 | [Fact] 14 | public async Task StemmerReducesWordsToTheirStem() 15 | { 16 | Dictionary testData = JsonSerializer 17 | .Deserialize>( 18 | File.ReadAllText(Path.Combine("fixtures", "stemming_vocab.json")))!; 19 | 20 | foreach((string word, string expected) in testData) 21 | { 22 | string result = (await new EnglishStemmer() 23 | .StemmerFunction 24 | .BasicallyRun(word)) 25 | .Single(); 26 | Assert.Equal(expected, result); 27 | } 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /LunrCoreTests/StopWordFilterTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | using Xunit; 6 | 7 | namespace LunrCoreTests 8 | { 9 | public class StopWordFilterTests 10 | { 11 | private readonly StopWordFilterBase filter = new EnglishStopWordFilter(); 12 | 13 | [Fact] 14 | public async Task StopWordFilterFiltersStopWords() 15 | { 16 | string[] stopWords = new[] { "the", "and", "but", "than", "when" }; 17 | 18 | foreach (string word in stopWords) 19 | { 20 | Assert.True(filter.IsStopWord(word)); 21 | Assert.Empty(await filter.FilterFunction.BasicallyRun(word)); 22 | } 23 | } 24 | 25 | [Fact] 26 | public async Task StopWordFilterIgnoresNonStopWords() 27 | { 28 | string[] nonStopWords = new[] { "interesting", "words", "pass", "through" }; 29 | 30 | foreach (string word in nonStopWords) 31 | { 32 | Assert.False(filter.IsStopWord(word)); 33 | Assert.Equal(new[] { word }, await filter.FilterFunction.BasicallyRun(word)); 34 | } 35 | } 36 | 37 | // Note: the lunr.js library has other tests here that are more 38 | // implementation specific and that I'm not replicating here. 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /LunrCoreTests/TestHelpers.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Threading; 6 | using System.Threading.Tasks; 7 | 8 | namespace LunrCoreTests 9 | { 10 | public static class TestHelpers 11 | { 12 | public async static Task BasicallyRun( 13 | this Pipeline.Function fun, 14 | string token, 15 | int i = 0, 16 | string[]? tokens = null!) 17 | { 18 | var result = new List(); 19 | var cancellationToken = new CancellationToken(); 20 | await foreach(Token t in fun( 21 | new Token(token), 22 | i, 23 | tokens is null ? 24 | AsyncEnumerableExtensions.Empty() : 25 | tokens 26 | .Select(s => new Token(s)) 27 | .ToAsyncEnumerable(cancellationToken), 28 | cancellationToken)) 29 | { 30 | result.Add(t.String); 31 | } 32 | return result.ToArray(); 33 | } 34 | 35 | public static Pipeline.Function ToPipelineFunction(this Func fun) 36 | => ( 37 | Token token, 38 | int i, 39 | IAsyncEnumerable tokens, 40 | CancellationToken cancellationToken) 41 | => new Token[] { fun(token) }.ToAsyncEnumerable(cancellationToken); 42 | 43 | public static Pipeline.Function ToPipelineFunction(this Action action) 44 | => ( 45 | Token token, 46 | int i, 47 | IAsyncEnumerable tokens, 48 | CancellationToken cancellationToken) 49 | => 50 | { 51 | action(i); 52 | return new Token[] { token }.ToAsyncEnumerable(cancellationToken); 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /LunrCoreTests/TokenTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System.Collections.Generic; 3 | using Xunit; 4 | 5 | namespace LunrCoreTests 6 | { 7 | public class TokenTests 8 | { 9 | [Fact] 10 | public void TokenConvertsToString() 11 | { 12 | var token = new Token("foo"); 13 | Assert.Equal("foo", token); 14 | Assert.Equal("foo", token.ToString()); 15 | Assert.Equal("foo", token.String); 16 | } 17 | 18 | [Fact] 19 | public void CanAttachArbitraryData() 20 | { 21 | var token = new Token("foo", ("length", 3)); 22 | Assert.Equal(3, token.Metadata["length"]); 23 | } 24 | 25 | [Fact] 26 | public void CanUpdateTheTokenValue() 27 | { 28 | var token = new Token("foo"); 29 | 30 | token.Update(s => s.ToUpperInvariant()); 31 | 32 | Assert.Equal("FOO", token); 33 | } 34 | 35 | [Fact] 36 | public void MetadataIsYieldedWhenUpdating() 37 | { 38 | var metadata = new TokenMetadata { { "bar", true } }; 39 | var token = new Token("foo", metadata); 40 | TokenMetadata? yieldedMetadata = null; 41 | 42 | token.Update((s, md) => 43 | { 44 | yieldedMetadata = md; 45 | return s; 46 | }); 47 | 48 | Assert.Equal(metadata, yieldedMetadata); 49 | } 50 | 51 | [Fact] 52 | public void CloneClonesValues() 53 | { 54 | var token = new Token("foo", ("bar", true)); 55 | Assert.Equal(token.ToString(), token.Clone().ToString()); 56 | } 57 | 58 | [Fact] 59 | public void CloneClonesMetadata() 60 | { 61 | var token = new Token("foo", ("bar", true)); 62 | Assert.Equal(token.Metadata, token.Clone().Metadata); 63 | } 64 | 65 | [Fact] 66 | public void CloneAndModify() 67 | { 68 | var token = new Token("foo", ("bar", true)); 69 | Token clone = token.Clone(s => s.ToUpperInvariant()); 70 | 71 | Assert.Equal("FOO", clone); 72 | Assert.Equal("foo", token); 73 | Assert.Equal(token.Metadata, clone.Metadata); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /LunrCoreTests/TokenizerTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Globalization; 5 | using System.Linq; 6 | using Xunit; 7 | 8 | namespace LunrCoreTests 9 | { 10 | public class TokenizerTests 11 | { 12 | [Fact] 13 | public void SplittingIntoTokens() 14 | { 15 | IEnumerable tokens = new Tokenizer() 16 | .Tokenize("foo bar baz") 17 | .Select(t => t.String); 18 | 19 | Assert.Equal(new[] { "foo", "bar", "baz" }, tokens); 20 | } 21 | 22 | [Fact] 23 | public void DownCasesTokens() 24 | { 25 | IEnumerable tokens = new Tokenizer() 26 | .Tokenize("Foo BAR BAZ") 27 | .Select(t => t.String); 28 | 29 | Assert.Equal(new[] { "foo", "bar", "baz" }, tokens); 30 | } 31 | 32 | [Fact] 33 | public void ArrayOfStrings() 34 | { 35 | IEnumerable tokens = new Tokenizer() 36 | .Tokenize(new[] { "foo", "bar", "baz" }) 37 | .Select(t => t.String); 38 | 39 | Assert.Equal(new[] { "foo", "bar", "baz" }, tokens); 40 | } 41 | 42 | [Fact] 43 | public void NullIsConvertedToEmptyString() 44 | { 45 | IEnumerable tokens = new Tokenizer() 46 | .Tokenize(new[] { "foo", null, "baz" }!) 47 | .Select(t => t.String); 48 | 49 | Assert.Equal(new[] { "foo", "", "baz" }, tokens); 50 | } 51 | 52 | [Fact] 53 | public void MultipleWhitespaceIsStripped() 54 | { 55 | IEnumerable tokens = new Tokenizer() 56 | .Tokenize(" foo bar baz ") 57 | .Select(t => t.String); 58 | 59 | Assert.Equal(new[] { "foo", "bar", "baz" }, tokens); 60 | } 61 | 62 | [Fact] 63 | public void HandlingNullArguments() 64 | { 65 | Assert.Empty(new Tokenizer().Tokenize("")); 66 | } 67 | 68 | [Fact] 69 | public void ConvertingADateToTokens() 70 | { 71 | var date = new DateTime(2013, 1, 1, 12, 0, 0, DateTimeKind.Utc); 72 | 73 | // setting explicit culture to avoid culture differences on OSes to fail the test 74 | CultureInfo.CurrentCulture = new CultureInfo("en-US"); 75 | 76 | // NOTE: slicing here to prevent asserting on parts 77 | // of the date that might be affected by the timezone 78 | // the test is running in. 79 | IEnumerable tokenizedDateSlice = new Tokenizer() 80 | .Tokenize(date) 81 | .Take(4) 82 | .Select(t => t.ToString()); 83 | 84 | Assert.Equal(new[] { "tue", "jan", "01", "2013" }, tokenizedDateSlice); 85 | } 86 | 87 | [Fact] 88 | public void ConvertingANumberToTokens() 89 | { 90 | Assert.Equal("41", new Tokenizer().Tokenize(41).First().String); 91 | } 92 | 93 | [Fact] 94 | public void ConvertingABooleanToTokens() 95 | { 96 | Assert.Equal("false", new Tokenizer().Tokenize(false).First().String); 97 | Assert.Equal("true", new Tokenizer().Tokenize(true).First().String); 98 | } 99 | 100 | [Fact] 101 | public void ConvertingAnObjectToTokens() 102 | { 103 | Assert.Equal( 104 | new[] { "custom", "object" }, 105 | new Tokenizer() 106 | .Tokenize(new CustomTestObject()) 107 | .Select(t => t.ToString())); 108 | } 109 | 110 | private class CustomTestObject 111 | { 112 | public override string ToString() => "custom object"; 113 | } 114 | 115 | [Fact] 116 | public void SplitsStringsWithHyphens() 117 | { 118 | Assert.Equal( 119 | new[] { "foo", "bar" }, 120 | new Tokenizer() 121 | .Tokenize("foo-bar") 122 | .Select(t => t.ToString())); 123 | } 124 | 125 | [Fact] 126 | public void SplitsStringsWithHyphensAndSpaces() 127 | { 128 | Assert.Equal( 129 | new[] { "foo", "bar" }, 130 | new Tokenizer() 131 | .Tokenize("foo - bar") 132 | .Select(t => t.ToString())); 133 | } 134 | 135 | [Fact] 136 | public void TrackingTheTokenIndex() 137 | { 138 | IEnumerable tokens = new Tokenizer().Tokenize("foo bar"); 139 | Assert.Equal( 140 | new[] { 0, 1 }, 141 | tokens.Select(t => (int)(t.Metadata["index"]??-1))); 142 | } 143 | 144 | [Fact] 145 | public void TrackingTheTokenPosition() 146 | { 147 | IEnumerable tokens = new Tokenizer().Tokenize("foo bar"); 148 | Assert.Equal( 149 | new[] { new Slice(0, 3), new Slice(4, 3) }, 150 | tokens.Select(t => (Slice?)t.Metadata["position"])); 151 | } 152 | 153 | [Fact] 154 | public void TrackingTheTokenPositionWithAdditionalLeftHandWhiteSpace() 155 | { 156 | IEnumerable tokens = new Tokenizer().Tokenize(" foo bar"); 157 | Assert.Equal( 158 | new[] { new Slice(1, 3), new Slice(5, 3) }, 159 | tokens.Select(t => (Slice?)t.Metadata["position"])); 160 | } 161 | 162 | [Fact] 163 | public void TrackingTheTokenPositionWithAdditionalRightHandWhiteSpace() 164 | { 165 | IEnumerable tokens = new Tokenizer().Tokenize("foo bar "); 166 | Assert.Equal( 167 | new[] { new Slice(0, 3), new Slice(4, 3) }, 168 | tokens.Select(t => (Slice?)t.Metadata["position"])); 169 | } 170 | 171 | [Fact] 172 | public void ProvidingAdditionalMetadata() 173 | { 174 | IEnumerable tokens = new Tokenizer().Tokenize( 175 | "foo bar", 176 | new TokenMetadata { { "hurp", "durp" } }); 177 | Assert.Equal( 178 | new[] { "durp", "durp" }, 179 | tokens.Select(t => (string?)t.Metadata["hurp"])); 180 | } 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /LunrCoreTests/TrimmerTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using Xunit; 3 | 4 | namespace LunrCoreTests 5 | { 6 | public class TrimmerTests 7 | { 8 | [Theory] 9 | [InlineData("hello", "hello")] // word 10 | [InlineData("hello.", "hello")] // full stop 11 | [InlineData("it's", "it's")] // inner apostrophe 12 | [InlineData("james'", "james")] // trailing apostrophe 13 | [InlineData("stop!'", "stop")] // exclamation mark 14 | [InlineData("first,'", "first")] // comma 15 | [InlineData("[tag]'", "tag")] // brackets 16 | public void CheckTrim(string str, string expected) 17 | { 18 | Assert.Equal(expected, new Trimmer().Trim(str)); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LunrCoreTests/VectorTests.cs: -------------------------------------------------------------------------------- 1 | using Lunr; 2 | using System; 3 | using System.Linq; 4 | using Xunit; 5 | 6 | namespace LunrCoreTests 7 | { 8 | public class VectorTests 9 | { 10 | [Fact] 11 | public void CalculatesMagnitudeOfAVector() 12 | { 13 | Vector vector = VectorFrom(4, 5, 6); 14 | Assert.Equal(Math.Sqrt(77), vector.Magnitude); 15 | } 16 | 17 | [Fact] 18 | public void CalculatesDotProductOfTwoVectors() 19 | { 20 | Vector v1 = VectorFrom(1, 3, -5), 21 | v2 = VectorFrom(4, -2, -1); 22 | 23 | Assert.Equal(3, v1.Dot(v2)); 24 | } 25 | 26 | [Fact] 27 | public void CalculatesTheSimilarityBetweenTwoVectors() 28 | { 29 | Vector v1 = VectorFrom(1, 3, -5), 30 | v2 = VectorFrom(4, -2, -1); 31 | 32 | Assert.InRange(v1.Similarity(v2), 0.49, 0.51); 33 | } 34 | 35 | [Fact] 36 | public void EmptyVectorSimilarityIsZero() 37 | { 38 | Vector empty = new Vector(), 39 | v1 = VectorFrom(1); 40 | 41 | Assert.Equal(0, empty.Similarity(v1)); 42 | Assert.Equal(0, v1.Similarity(empty)); 43 | } 44 | 45 | [Fact] 46 | public void NonOverlappingVectorsAreNotSimilar() 47 | { 48 | var v1 = new Vector((1, 1)); 49 | var v2 = new Vector((2, 1)); 50 | 51 | Assert.Equal(0, v1.Similarity(v2)); 52 | Assert.Equal(0, v2.Similarity(v1)); 53 | } 54 | 55 | [Fact] 56 | public void InsertInvalidatesMagnitudeCache() 57 | { 58 | Vector vector = VectorFrom(4, 5, 6); 59 | 60 | Assert.Equal(Math.Sqrt(77), vector.Magnitude); 61 | 62 | vector.Insert(3, 7); 63 | 64 | Assert.Equal(Math.Sqrt(126), vector.Magnitude); 65 | } 66 | 67 | [Fact] 68 | public void InsertKeepsItemsInTheIndexSpecifiedOrder() 69 | { 70 | var vector = new Vector(); 71 | 72 | vector.Insert(2, 4); 73 | vector.Insert(1, 5); 74 | vector.Insert(0, 6); 75 | 76 | Assert.Equal(new[] { 6.0, 5.0, 4.0 }, vector.ToArray()); 77 | } 78 | 79 | [Fact] 80 | public void InsertFailsWhenDuplicateEntry() 81 | { 82 | Vector vector = VectorFrom(4, 5, 6); 83 | Assert.Throws(() => 84 | { 85 | vector.Insert(0, 44); 86 | }); 87 | } 88 | 89 | [Fact] 90 | public void UpsertInvalidatesMagnitudeCache() 91 | { 92 | Vector vector = VectorFrom(4, 5, 6); 93 | 94 | Assert.Equal(Math.Sqrt(77), vector.Magnitude); 95 | 96 | vector.Upsert(3, 7); 97 | 98 | Assert.Equal(Math.Sqrt(126), vector.Magnitude); 99 | } 100 | 101 | [Fact] 102 | public void UpsertKeepsItemsInTheIndexSpecifiedOrder() 103 | { 104 | var vector = new Vector(); 105 | 106 | vector.Upsert(2, 4); 107 | vector.Upsert(1, 5); 108 | vector.Upsert(0, 6); 109 | 110 | Assert.Equal(new[] { 6.0, 5.0, 4.0 }, vector.ToArray()); 111 | } 112 | 113 | [Fact] 114 | public void UpsertCallsFnForValueOnDuplicate() 115 | { 116 | Vector vector = VectorFrom(4, 5, 6); 117 | vector.Upsert(0, 4, (current, passed) => current + passed); 118 | Assert.Equal(new[] { 8.0, 5.0, 6.0 }, vector.ToArray()); 119 | } 120 | 121 | [Fact] 122 | public void PositionForIndex() 123 | { 124 | var vector = new Vector( 125 | (1, 'a'), 126 | (2, 'b'), 127 | (4, 'c'), 128 | (7, 'd'), 129 | (11, 'e')); 130 | 131 | // At the beginning 132 | Assert.Equal(0, vector.PositionForIndex(0)); 133 | // At the end 134 | Assert.Equal(5, vector.PositionForIndex(20)); 135 | // Consecutive 136 | Assert.Equal(2, vector.PositionForIndex(3)); 137 | // Non-consecutive gap after 138 | Assert.Equal(3, vector.PositionForIndex(5)); 139 | // Non-consecutive gap before 140 | Assert.Equal(3, vector.PositionForIndex(6)); 141 | // Non-consecutive gaps before and after 142 | Assert.Equal(4, vector.PositionForIndex(9)); 143 | // Duplicate at the beginning 144 | Assert.Equal(0, vector.PositionForIndex(1)); 145 | // Duplicate at the end 146 | Assert.Equal(4, vector.PositionForIndex(11)); 147 | // Duplicate consecutive 148 | Assert.Equal(2, vector.PositionForIndex(4)); 149 | } 150 | 151 | private static Vector VectorFrom(params double[] elements) 152 | => new Vector(elements.Select((el, i) => (i, el)).ToArray()); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /LunrCoreTests/fixtures/stemming_vocab.json: -------------------------------------------------------------------------------- 1 | {"consign":"consign","consigned":"consign","consigning":"consign","consignment":"consign","consist":"consist","consisted":"consist","consistency":"consist","consistent":"consist","consistently":"consist","consisting":"consist","consists":"consist","consolation":"consol","consolations":"consol","consolatory":"consolatori","console":"consol","consoled":"consol","consoles":"consol","consolidate":"consolid","consolidated":"consolid","consolidating":"consolid","consoling":"consol","consols":"consol","consonant":"conson","consort":"consort","consorted":"consort","consorting":"consort","conspicuous":"conspicu","conspicuously":"conspicu","conspiracy":"conspiraci","conspirator":"conspir","conspirators":"conspir","conspire":"conspir","conspired":"conspir","conspiring":"conspir","constable":"constabl","constables":"constabl","constance":"constanc","constancy":"constanc","constant":"constant","knack":"knack","knackeries":"knackeri","knacks":"knack","knag":"knag","knave":"knave","knaves":"knave","knavish":"knavish","kneaded":"knead","kneading":"knead","knee":"knee","kneel":"kneel","kneeled":"kneel","kneeling":"kneel","kneels":"kneel","knees":"knee","knell":"knell","knelt":"knelt","knew":"knew","knick":"knick","knif":"knif","knife":"knife","knight":"knight","knights":"knight","knit":"knit","knits":"knit","knitted":"knit","knitting":"knit","knives":"knive","knob":"knob","knobs":"knob","knock":"knock","knocked":"knock","knocker":"knocker","knockers":"knocker","knocking":"knock","knocks":"knock","knopp":"knopp","knot":"knot","knots":"knot","lay":"lay","try":"tri"} 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lunr-core 2 | 3 | Lunr-core is a small, full text search library for use in small applications. 4 | 5 | It's a port of [lunr.js](https://lunrjs.com/guides/getting_started.html) to .NET Core. 6 | Lunr is a bit like Solr, but much smaller and not as bright. 7 | 8 | ![.NET Core](https://github.com/bleroy/lunr-core/workflows/.NET%20Core/badge.svg) 9 | 10 | ## TODO / up for grabs 11 | 12 | * Multilingual support (lunr has optional support that remains to be ported) 13 | * Documentation (adapted from [lunr docs](https://lunrjs.com/guides/getting_started.html)) 14 | 15 | ## Example 16 | 17 | A very simple search index can be created using the following: 18 | 19 | ```csharp 20 | var index = await Index.Build(async builder => 21 | { 22 | builder 23 | .AddField("title") 24 | .AddField("body"); 25 | 26 | await builder.Add(new Document 27 | { 28 | { "title", "Twelfth-Night" }, 29 | { "body", "If music be the food of love, play on: Give me excess of it…" }, 30 | { "author", "William Shakespeare" }, 31 | { "id", "1" }, 32 | }); 33 | }); 34 | ``` 35 | 36 | Then searching is as simple as: 37 | 38 | ```csharp 39 | await foreach (Result result in index.Search("love")) 40 | { 41 | // do something with that result 42 | } 43 | ``` 44 | 45 | This returns a list of matching documents with a [score](https://lunrjs.com/guides/searching.html#scoring) of how closely they match, the search query as well as any associated metadata about the match: 46 | 47 | ```csharp 48 | new List 49 | { 50 | new Result( 51 | documentReference: "1", 52 | score: 0.3535533905932737, 53 | matchData: new MatchData( 54 | term: "love", 55 | field: "body" 56 | ) 57 | ) 58 | } 59 | ``` 60 | 61 | 62 | 63 | ## Description 64 | 65 | Lunr-core is a small, full-text search library for use in small applications. 66 | It indexes documents and provides a simple search interface for retrieving documents that best match text queries. 67 | It is 100% compatible with [lunr.js](https://lunrjs.com/guides/getting_started.html), meaning that an index file prepared on the server with lunr-core can be used on the client using lunr.js. 68 | 69 | ## Why 70 | 71 | Lunr-core is suitable for small applications that require a simple search engine but without the overhead of a full-scale search engine such as Lucene. 72 | Its compatibility with lunr.js also opens up some interesting client-side search scenarios. 73 | 74 | 80 | 81 | ## Features 82 | 83 | * Soon: Full text search support for 14 languages 84 | * Boost terms at query time or boost entire documents at index time 85 | * Scope searches to specific fields 86 | * Fuzzy term matching with wildcards or edit distance 87 | * No runtime dependencies beyond SDK, BCL AsyncInterfaces and System.Text.Json 88 | 89 | 94 | 95 | ## Credits 96 | 97 | * Original code by [Oliver Nightingale](https://github.com/olivernn) and contributors, ported to .NET Core by [Bertrand Le Roy](https://github.com/bleroy). 98 | * Icon adapted from https://commons.wikimedia.org/wiki/File:Internal_Structure_of_the_Moon.JPG by Iqbal Mahmud under Creative Commons Attribution Share Alike 4.0 International. 99 | * Perf tests use a [word list by Sindre Sorhus](https://github.com/sindresorhus/word-list). 100 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # lunr-core 2 | A port of [lunr.js](https://lunrjs.com/guides/getting_started.html) to .NET Core. 3 | Lunr is a bit like Solr, but much smaller and not as bright. 4 | 5 | ![.NET Core](https://github.com/bleroy/lunr-core/workflows/.NET%20Core/badge.svg) 6 | 7 | ## TODO / up for grabs 8 | 9 | * Multilingual support (lunr has optional support that remains to be ported) 10 | * Documentation (adapted from [lunr docs](https://lunrjs.com/guides/getting_started.html)) 11 | 12 | ## Example 13 | 14 | A very simple search index can be created using the following: 15 | 16 | ```csharp 17 | var index = await Index.Build(async builder => 18 | { 19 | builder 20 | .AddField("title") 21 | .AddField("body"); 22 | 23 | await builder.Add(new Document 24 | { 25 | { "title", "Twelfth-Night" }, 26 | { "body", "If music be the food of love, play on: Give me excess of it…" }, 27 | { "author", "William Shakespeare" }, 28 | { "id", "1" }, 29 | }); 30 | }); 31 | ``` 32 | 33 | Then searching is as simple as: 34 | 35 | ```csharp 36 | await foreach (Result result in index.Search("love")) 37 | { 38 | // do something with that result 39 | } 40 | ``` 41 | 42 | This returns a list of matching documents with a [score](https://lunrjs.com/guides/searching.html#scoring) of how closely they match, the search query as well as any associated metadata about the match: 43 | 44 | ```csharp 45 | new List 46 | { 47 | new Result( 48 | documentReference: "1", 49 | score: 0.3535533905932737, 50 | matchData: new MatchData( 51 | term: "love", 52 | field: "body" 53 | ) 54 | ) 55 | } 56 | ``` 57 | 58 | 59 | 60 | ## Description 61 | 62 | Lunr-core is a small, full-text search library for use in small applications. 63 | It indexes documents and provides a simple search interface for retrieving documents that best match text queries. 64 | It is 100% compatible with [lunr.js](https://lunrjs.com/guides/getting_started.html), meaning that an index file prepared on the server with lunr-core can be used on the client using lunr.js. 65 | 66 | ## Why 67 | 68 | Lunr-core is suitable for small applications that require a simple search engine but without the overhead of a full-scale search engine such as Lucene. 69 | Its compatibility with lunr.js also opens up some interesting client-side search scenarios. 70 | 71 | 77 | 78 | ## Features 79 | 80 | * Soon: Full text search support for 14 languages 81 | * Boost terms at query time or boost entire documents at index time 82 | * Scope searches to specific fields 83 | * Fuzzy term matching with wildcards or edit distance 84 | * No runtime dependencies beyond SDK, BCL AsyncInterfaces and System.Text.Json 85 | 86 | 91 | 92 | ## Credits 93 | 94 | * Original code by [Oliver Nightingale](https://github.com/olivernn) and contributors, ported to .NET Core by [Bertrand Le Roy](https://github.com/bleroy). 95 | * Icon adapted from https://commons.wikimedia.org/wiki/File:Internal_Structure_of_the_Moon.JPG by Iqbal Mahmud under Creative Commons Attribution Share Alike 4.0 International. 96 | * Perf tests use a [word list by Sindre Sorhus](https://github.com/sindresorhus/word-list). 97 | -------------------------------------------------------------------------------- /docs/getting-started/README.md: -------------------------------------------------------------------------------- 1 | # Getting started with lunr-core 2 | 3 | In this article, we are going to see ... -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs>=1.5.1 2 | mkdocs-material>=9.1.21 3 | mkdocs-git-authors-plugin>=0.7.2 4 | mkdocs-git-revision-date-localized-plugin>=1.2.0 5 | pymdown-extensions>=10.1.0 6 | mkdocs-exclude>=1.0.2 7 | mdx_truly_sane_lists>=1.2 -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: lunr-core 2 | theme: 3 | name: material 4 | 5 | extra: 6 | social: 7 | - icon: fontawesome/brands/github-alt 8 | link: https://github.com/bleroy/lunr-core 9 | 10 | # Repository 11 | repo_name: bleroy/lunr-core 12 | repo_url: https://github.com/bleroy/lunr-core 13 | edit_uri: edit/main/ 14 | 15 | # Extensions 16 | markdown_extensions: 17 | - markdown.extensions.admonition 18 | - markdown.extensions.codehilite 19 | - markdown.extensions.def_list 20 | - markdown.extensions.footnotes 21 | - markdown.extensions.meta 22 | - pymdownx.b64 23 | - pymdownx.caret 24 | - pymdownx.details 25 | - pymdownx.emoji 26 | - pymdownx.magiclink 27 | - pymdownx.smartsymbols 28 | - pymdownx.snippets: 29 | check_paths: true 30 | - pymdownx.superfences 31 | - pymdownx.tabbed: 32 | alternate_style: false 33 | - pymdownx.tasklist 34 | - pymdownx.tilde 35 | - toc: 36 | permalink: true 37 | 38 | plugins: 39 | - search 40 | - git-authors 41 | - git-revision-date-localized 42 | 43 | # Page tree 44 | nav: 45 | - About: README.md 46 | - Getting started: 47 | - Introduction: getting-started/README.md 48 | --------------------------------------------------------------------------------