├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ ├── dotnet-core.yml
│ └── publish-docs.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── LunrCore.sln
├── LunrCore.sln.DotSettings
├── LunrCore
├── Assets
│ └── LunrCore.png
├── AsyncEnumerableExtensions.cs
├── Builder.cs
├── Clause.cs
├── Document.cs
├── EnglishStemmer.cs
├── EnglishStopWordFilter.cs
├── Extensions
│ ├── DictionaryExtensions.cs
│ └── StringBuilderExtensions.cs
├── Field.cs
├── FieldMatchMetadata.cs
├── FieldMatches.cs
├── FieldReference.cs
├── FieldTermFrequencies.cs
├── ITokenizer.cs
├── Index.cs
├── InvertedIndex.cs
├── InvertedIndexEntry.cs
├── Lexeme.cs
├── LexemeType.cs
├── LunrCore.csproj
├── MatchData.cs
├── Pipeline.cs
├── PipelineFunctionRegistry.cs
├── Query.cs
├── QueryLexer.cs
├── QueryParser.cs
├── QueryParserException.cs
├── QueryPresence.cs
├── QueryString.cs
├── QueryWildcard.cs
├── Result.cs
├── Serialization
│ ├── IndexJsonConverter.cs
│ ├── InvertedIndexEntryJsonConverter.cs
│ ├── InvertedIndexJsonConverter.cs
│ ├── JsonConverterExtensions.cs
│ ├── SliceConverter.cs
│ └── VectorJsonConverter.cs
├── Set.cs
├── Slice.cs
├── StemmerBase.cs
├── StopWordFilterBase.cs
├── TermFrequencies.cs
├── Token.cs
├── TokenMetadata.cs
├── TokenSet.cs
├── TokenSetIdProvider.cs
├── TokenizeDelegate.cs
├── Tokenizer.cs
├── Trimmer.cs
├── Util.cs
└── Vector.cs
├── LunrCoreLmdb
├── Assets
│ └── LunrCoreLmdb.png
├── DelegatedIndex.cs
├── DeserializeContext.cs
├── IReadOnlyIndex.cs
├── KeyBuilder.cs
├── LmdbBuilder.cs
├── LmdbIndex.cs
├── LunrCoreLmdb.csproj
├── SerializationExtensions.cs
└── SerializeContext.cs
├── LunrCoreLmdbPerf
├── BlockCopyVsLinqConcat.cs
├── DelegatedIndexExtensions.cs
├── InterpolateVsAdd.cs
├── LunrCoreLmdbPerf.csproj
├── Program.cs
├── SearchBenchmarkBase.cs
├── SearchBenchmarkLmdb.cs
├── SearchBenchmarkWrappedIndex.cs
└── SpanVsGetPinnableReference.cs
├── LunrCoreLmdbTests
├── LmdbBuilderTests.cs
├── LunrCoreLmdbTests.csproj
├── SearchTests.cs
├── SerializationTests.cs
├── TempDirectory.cs
└── TempDirectoryCollection.cs
├── LunrCorePerf
├── BuilderBenchmark.cs
├── LunrCorePerf.csproj
├── PipelineBenchmark.cs
├── Program.cs
├── QueryParserBenchmark.cs
├── SearchBenchmark.cs
├── StemmerBenchmark.cs
├── TokenSetBenchmark.cs
├── TokenizerBenchmark.cs
├── VectorBenchmark.cs
├── Words.cs
└── fixtures
│ └── words.txt
├── LunrCoreTests
├── BuilderTests.cs
├── EnglishStopWordFilterTests.cs
├── FieldReferenceTests.cs
├── LunrCoreTests.csproj
├── MatchDataTests.cs
├── MultipleMandatoryFieldsTest.cs
├── PipelineTests.cs
├── QueryLexerTests.cs
├── QueryParserTests.cs
├── QueryTests.cs
├── SearchTests.cs
├── SerializationTest.cs
├── SetTests.cs
├── StemmerTests.cs
├── StopWordFilterTests.cs
├── TestHelpers.cs
├── TokenSetTests.cs
├── TokenTests.cs
├── TokenizerTests.cs
├── TrimmerTests.cs
├── VectorTests.cs
└── fixtures
│ └── stemming_vocab.json
├── README.md
├── docs
├── README.md
├── getting-started
│ └── README.md
└── requirements.txt
└── mkdocs.yml
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [bleroy]
4 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: '[BUG] '
5 | labels: ''
6 | assignees: bleroy
7 |
8 | ---
9 |
10 | **To Reproduce**
11 | Steps to reproduce the behavior.
12 |
13 | **Expected behavior**
14 | A clear and concise description of what you expected to happen.
15 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: '[FEATURE] '
5 | labels: ''
6 | assignees: bleroy
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 |
--------------------------------------------------------------------------------
/.github/workflows/dotnet-core.yml:
--------------------------------------------------------------------------------
1 | name: .NET Core
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 | - name: Setup .NET Core
17 | uses: actions/setup-dotnet@v1
18 | with:
19 | dotnet-version: 6.0.101
20 | - name: Checkout lmdb
21 | uses: actions/checkout@v2
22 | with:
23 | repository: LMDB/lmdb
24 | ref: mdb.master
25 | path: lmdb
26 | - name: Build lmdb
27 | working-directory: ${{ runner.workspace }}/lunr-core/lmdb/libraries/liblmdb/
28 | run: make
29 | - name: Install dependencies
30 | run: dotnet restore
31 | - name: Build
32 | run: dotnet build --configuration Release --no-restore
33 | - name: Test
34 | run: LD_LIBRARY_PATH=${{ runner.workspace }}/lunr-core/lmdb/libraries/liblmdb/:$LD_LIBRARY_PATH dotnet test --no-restore --verbosity normal
35 |
--------------------------------------------------------------------------------
/.github/workflows/publish-docs.yml:
--------------------------------------------------------------------------------
1 | name: Publish docs via GitHub Pages
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | # Only rebuild website when docs have changed
9 | - 'docs/**'
10 |
11 | jobs:
12 | build:
13 | name: Deploy docs
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Checkout master
17 | uses: actions/checkout@v3
18 |
19 | - name: Set up Python 3.11
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: 3.11
23 |
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install -r docs/requirements.txt
28 |
29 | - name: Deploy docs
30 | run: mkdocs gh-deploy --force
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vs
2 | bin
3 | obj
4 | /*.sln.DotSettings.*
5 | .idea
6 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at bertrandleroy@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Bertrand Le Roy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LunrCore.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.30223.230
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCore", "LunrCore\LunrCore.csproj", "{35E0C04E-9508-408D-B1EB-61402BEAECBA}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCoreTests", "LunrCoreTests\LunrCoreTests.csproj", "{7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}"
9 | EndProject
10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Metadata", "Metadata", "{333885CB-3ADF-4462-8910-27A16B6C5F55}"
11 | ProjectSection(SolutionItems) = preProject
12 | CODE_OF_CONDUCT.md = CODE_OF_CONDUCT.md
13 | LICENSE = LICENSE
14 | README.md = README.md
15 | EndProjectSection
16 | EndProject
17 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCorePerf", "LunrCorePerf\LunrCorePerf.csproj", "{863FDAF5-53CD-4D58-911C-B55AF19148BA}"
18 | EndProject
19 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCoreLmdb", "LunrCoreLmdb\LunrCoreLmdb.csproj", "{E5799A1F-31B5-4E14-8C49-18CE73793FEC}"
20 | EndProject
21 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LunrCoreLmdbTests", "LunrCoreLmdbTests\LunrCoreLmdbTests.csproj", "{2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}"
22 | EndProject
23 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LunrCoreLmdbPerf", "LunrCoreLmdbPerf\LunrCoreLmdbPerf.csproj", "{41BB51FD-462C-4AAB-9B4D-127FD784B566}"
24 | EndProject
25 | Global
26 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
27 | Debug|Any CPU = Debug|Any CPU
28 | Release|Any CPU = Release|Any CPU
29 | EndGlobalSection
30 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
31 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
32 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Debug|Any CPU.Build.0 = Debug|Any CPU
33 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Release|Any CPU.ActiveCfg = Release|Any CPU
34 | {35E0C04E-9508-408D-B1EB-61402BEAECBA}.Release|Any CPU.Build.0 = Release|Any CPU
35 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
36 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Debug|Any CPU.Build.0 = Debug|Any CPU
37 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Release|Any CPU.ActiveCfg = Release|Any CPU
38 | {7BA2CEE1-BE48-48EF-910C-A78A24F6D9C5}.Release|Any CPU.Build.0 = Release|Any CPU
39 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
40 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Debug|Any CPU.Build.0 = Debug|Any CPU
41 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Release|Any CPU.ActiveCfg = Release|Any CPU
42 | {863FDAF5-53CD-4D58-911C-B55AF19148BA}.Release|Any CPU.Build.0 = Release|Any CPU
43 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
44 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Debug|Any CPU.Build.0 = Debug|Any CPU
45 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Release|Any CPU.ActiveCfg = Release|Any CPU
46 | {E5799A1F-31B5-4E14-8C49-18CE73793FEC}.Release|Any CPU.Build.0 = Release|Any CPU
47 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
48 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Debug|Any CPU.Build.0 = Debug|Any CPU
49 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Release|Any CPU.ActiveCfg = Release|Any CPU
50 | {2EF25270-1D0D-4450-ADBB-BCFD08FB9BB6}.Release|Any CPU.Build.0 = Release|Any CPU
51 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
52 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Debug|Any CPU.Build.0 = Debug|Any CPU
53 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Release|Any CPU.ActiveCfg = Release|Any CPU
54 | {41BB51FD-462C-4AAB-9B4D-127FD784B566}.Release|Any CPU.Build.0 = Release|Any CPU
55 | EndGlobalSection
56 | GlobalSection(SolutionProperties) = preSolution
57 | HideSolutionNode = FALSE
58 | EndGlobalSection
59 | GlobalSection(ExtensibilityGlobals) = postSolution
60 | SolutionGuid = {684039DE-9AA9-47A8-B12B-354F9147B73E}
61 | EndGlobalSection
62 | EndGlobal
63 |
--------------------------------------------------------------------------------
/LunrCore.sln.DotSettings:
--------------------------------------------------------------------------------
1 |
2 | True
3 | True
4 | True
5 | True
--------------------------------------------------------------------------------
/LunrCore/Assets/LunrCore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleroy/lunr-core/8ba2fa163a8ccd19efa95428b77a028b6ce6e183/LunrCore/Assets/LunrCore.png
--------------------------------------------------------------------------------
/LunrCore/AsyncEnumerableExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Runtime.CompilerServices;
4 | using System.Threading;
5 | using System.Threading.Tasks;
6 |
7 | namespace Lunr
8 | {
9 | ///
10 | /// A set of internal simple extensions to work with asynchronous enumerable
11 | /// without importing System.Linq.Async.
12 | ///
13 | public static class AsyncEnumerableExtensions
14 | {
15 | ///
16 | /// Applies the provided selector on each item in the source enumeration.
17 | ///
18 | /// The source enumeration.
19 | /// The selector to apply on each item in the enumeration.
20 | /// A cancellation token.
21 | /// The async enumeration of results from applying the selector.
22 | public static async IAsyncEnumerable Select(
23 | this IAsyncEnumerable source,
24 | Func selector,
25 | [EnumeratorCancellation] CancellationToken cancellationToken)
26 | {
27 | await foreach (TSource sourceItem in source.WithCancellation(cancellationToken))
28 | {
29 | if (cancellationToken.IsCancellationRequested) yield break;
30 | yield return selector(sourceItem);
31 | }
32 | }
33 |
34 | ///
35 | /// Creates an async enumerable from a regular enumerable.
36 | ///
37 | /// The enumerable.
38 | /// A cancellation token.
39 | /// The async enumerable.
40 | public static async IAsyncEnumerable ToAsyncEnumerable(
41 | this IEnumerable source,
42 | [EnumeratorCancellation] CancellationToken cancellationToken)
43 | {
44 | foreach (T item in source)
45 | {
46 | if (cancellationToken.IsCancellationRequested) yield break;
47 | yield return await new ValueTask(item).ConfigureAwait(false);
48 | }
49 | }
50 |
51 | ///
52 | /// Builds a list from an async enumerable.
53 | /// This enumerates the whole thing, so use with caution,
54 | /// there's probably a reason why that was async enumerable.
55 | ///
56 | /// The async enumerable.
57 | /// A cancellation token.
58 | ///
59 | public static async ValueTask> ToList(
60 | this IAsyncEnumerable source,
61 | CancellationToken? cancellationToken = null)
62 | {
63 | var result = new List();
64 | await foreach (T item in source)
65 | {
66 | if (cancellationToken?.IsCancellationRequested ?? false)
67 | {
68 | return result;
69 | }
70 | result.Add(item);
71 | }
72 | return result;
73 | }
74 |
75 | ///
76 | /// Tests if an async enumerable has any elements satisfying a condition.
77 | ///
78 | /// The enumerable.
79 | ///
80 | /// An optional predicate that an element of the enumerable must satisfy.
81 | /// If this is not provided, any element will do.
82 | ///
83 | /// True if any element satisfy the condition.
84 | public static async ValueTask Any(
85 | this IAsyncEnumerable source,
86 | Func? predicate = null)
87 | {
88 | await foreach(T item in source)
89 | {
90 | if (predicate is null || predicate(item))
91 | {
92 | return true;
93 | }
94 | }
95 | return false;
96 | }
97 |
98 | ///
99 | /// An empty async enumerable.
100 | ///
101 | /// An empty async enumerable of the specified type.
102 | public static async IAsyncEnumerable Empty()
103 | {
104 | await Task.CompletedTask.ConfigureAwait(false);
105 | yield break;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/LunrCore/Clause.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Diagnostics;
4 | using System.Linq;
5 |
6 | namespace Lunr
7 | {
8 | ///
9 | /// A single clause in a `Query` contains a term and details on how to
10 | /// match that term against an `Index`.
11 | ///
12 | [DebuggerDisplay("{" + nameof(DebuggerDisplay) + ",nq}")]
13 | public sealed class Clause
14 | {
15 | public static readonly Clause Empty = new Clause();
16 |
17 | ///
18 | /// Builds a new clause.
19 | ///
20 | /// The term to search for.
21 | /// Any boost that should be applied when matching this clause.
22 | /// Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
23 | /// Whether the term should be passed through the search pipeline.
24 | /// Whether the term should have wildcards appended or prepended.
25 | /// The terms presence in any matching documents.
26 | /// The fields in an index this clause should be matched against.
27 | public Clause(
28 | string term = "",
29 | double boost = 1,
30 | int editDistance = 0,
31 | bool usePipeline = true,
32 | QueryWildcard wildcard = QueryWildcard.None,
33 | QueryPresence presence = QueryPresence.Optional,
34 | IEnumerable? fields = null)
35 | {
36 | Fields = fields ?? Array.Empty();
37 | Boost = boost;
38 | EditDistance = editDistance;
39 | UsePipeline = usePipeline;
40 | Wildcard = wildcard;
41 | Presence = presence;
42 | Term = ((wildcard & QueryWildcard.Leading) != 0 && (term[0] != Query.Wildcard) ? "*" : "") +
43 | term +
44 | ((wildcard & QueryWildcard.Trailing) != 0 && (term[term.Length - 1] != Query.Wildcard) ? "*" : "");
45 | }
46 |
47 | ///
48 | /// Builds a new clause.
49 | ///
50 | /// The term to search for.
51 | /// The fields in an index this clause should be matched against.
52 | /// Any boost that should be applied when matching this clause.
53 | /// Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
54 | /// Whether the term should be passed through the search pipeline.
55 | /// Whether the term should have wildcards appended or prepended.
56 | /// The terms presence in any matching documents.
57 | public Clause(
58 | string term = "",
59 | double boost = 1,
60 | int editDistance = 0,
61 | bool usePipeline = true,
62 | QueryWildcard wildcard = QueryWildcard.None,
63 | QueryPresence presence = QueryPresence.Optional,
64 | params string[] fields)
65 | : this(
66 | term,
67 | boost,
68 | editDistance,
69 | usePipeline,
70 | wildcard,
71 | presence,
72 | (IEnumerable)fields) { }
73 |
74 | ///
75 | /// The fields in an index this clause should be matched against.
76 | ///
77 | public IEnumerable Fields { get; }
78 |
79 | ///
80 | /// Any boost that should be applied when matching this clause.
81 | ///
82 | public double Boost { get; }
83 |
84 | ///
85 | /// Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
86 | ///
87 | public int EditDistance { get; }
88 |
89 | ///
90 | /// Whether the term should be passed through the search pipeline.
91 | ///
92 | public bool UsePipeline { get; }
93 |
94 | ///
95 | /// Whether the term should have wildcards appended or prepended.
96 | ///
97 | public QueryWildcard Wildcard { get; }
98 |
99 | ///
100 | /// The terms presence in any matching documents.
101 | ///
102 | public QueryPresence Presence { get; }
103 |
104 | ///
105 | /// The term to search for.
106 | ///
107 | public string Term { get; }
108 |
109 | ///
110 | /// Creates a clone of this clause with the specified term.
111 | ///
112 | /// The new term.
113 | /// the new clause.
114 | public Clause WithTerm(string term)
115 | => new Clause(term, Boost, EditDistance, UsePipeline, Wildcard, Presence, Fields);
116 |
117 | ///
118 | /// Creates a clone of this clause with the specified presence.
119 | ///
120 | /// The new presence.
121 | /// the new clause.
122 | public Clause WithPresence(QueryPresence presence)
123 | => new Clause(Term, Boost, EditDistance, UsePipeline, Wildcard, presence, Fields);
124 |
125 | ///
126 | /// Creates a clone of this clause with the specified edit distance.
127 | ///
128 | /// The new edit distance.
129 | /// the new clause.
130 | public Clause WithEditDistance(int editDistance)
131 | => new Clause(Term, Boost, editDistance, UsePipeline, Wildcard, Presence, Fields);
132 |
133 | ///
134 | /// Creates a clone of this clause with the specified boost.
135 | ///
136 | /// The new boost.
137 | /// the new clause.
138 | public Clause WithBoost(double boost)
139 | => new Clause(Term, boost, EditDistance, UsePipeline, Wildcard, Presence, Fields);
140 |
141 | ///
142 | /// Creates a clone of this clause with the specified pipeline usage.
143 | ///
144 | /// The new pipeline usage.
145 | /// the new clause.
146 | public Clause WithUsePipeline(bool usePipeline)
147 | => new Clause(Term, Boost, EditDistance, usePipeline, Wildcard, Presence, Fields);
148 |
149 | ///
150 | /// Creates a clone of this clause with the specified list of fields appended.
151 | ///
152 | /// The list of fields to append.
153 | /// the new clause.
154 | public Clause WithFields(IEnumerable fields)
155 | => new Clause(Term, Boost, EditDistance, UsePipeline, Wildcard, Presence, Fields.Concat(fields).ToArray());
156 |
157 | ///
158 | /// Creates a clone of this clause with the specified list of fields appended.
159 | ///
160 | /// The list of fields to append.
161 | /// the new clause.
162 | public Clause WithFields(params string[] fields)
163 | => new Clause(Term, Boost, EditDistance, UsePipeline, Wildcard, Presence, Fields.Concat(fields).ToArray());
164 |
165 | private string DebuggerDisplay => (Fields.Any() ? string.Join(", ", Fields) + ":" : "") +
166 | (Presence switch { QueryPresence.Required => "+", QueryPresence.Prohibited => "-", _ => "" }) +
167 | ((Wildcard & QueryWildcard.Leading) == 0 ? "" : "*") +
168 | Term +
169 | (Boost == 1 ? "" : "^" + Boost) +
170 | (EditDistance == 0 ? "" : "~" + EditDistance) +
171 | ((Wildcard & QueryWildcard.Trailing) == 0 ? "" : "*") +
172 | (UsePipeline ? " (use pipeline)" : "");
173 | }
174 | }
175 |
--------------------------------------------------------------------------------
/LunrCore/Document.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace Lunr
4 | {
5 | public sealed class Document : Dictionary
6 | {
7 | public Document()
8 | { }
9 |
10 | public Document(IDictionary dict) : base(dict) { }
11 |
12 | public double Boost { get; set; } = 1;
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/LunrCore/EnglishStemmer.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.Globalization;
3 | using System.Text.RegularExpressions;
4 |
5 | namespace Lunr
6 | {
7 | public sealed class EnglishStemmer : StemmerBase
8 | {
9 | private static readonly CultureInfo culture = CultureInfo.CreateSpecificCulture("en");
10 |
11 | private static readonly Dictionary step2list = new Dictionary
12 | {
13 | { "ational", "ate" },
14 | { "tional", "tion" },
15 | { "enci", "ence" },
16 | { "anci", "ance" },
17 | { "izer", "ize" },
18 | { "bli", "ble" },
19 | { "alli", "al" },
20 | { "entli", "ent" },
21 | { "eli", "e" },
22 | { "ousli", "ous" },
23 | { "ization", "ize" },
24 | { "ation", "ate" },
25 | { "ator", "ate" },
26 | { "alism", "al" },
27 | { "iveness", "ive" },
28 | { "fulness", "ful" },
29 | { "ousness", "ous" },
30 | { "aliti", "al" },
31 | { "iviti", "ive" },
32 | { "biliti", "ble" },
33 | { "logi", "log" }
34 | };
35 |
36 | private static readonly Dictionary step3list = new Dictionary
37 | {
38 | { "icate", "ic" },
39 | { "ative", "" },
40 | { "alize", "al" },
41 | { "iciti", "ic" },
42 | { "ical", "ic" },
43 | { "ful", "" },
44 | { "ness", "" }
45 | };
46 |
47 | private const string c = "[^aeiou]"; // consonant
48 | private const string v = "[aeiouy]"; // vowel
49 | private const string C = c + "[^aeiouy]*"; // consonant sequence
50 | private const string V = v + "[aeiou]*"; // vowel sequence
51 | private const string mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
52 | private const string meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
53 | private const string mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
54 | private const string s_v = "^(" + C + ")?" + v; // vowel in stem
55 |
56 | private static readonly Regex re_mgr0 = new Regex(mgr0);
57 | private static readonly Regex re_mgr1 = new Regex(mgr1);
58 | private static readonly Regex re_meq1 = new Regex(meq1);
59 | private static readonly Regex re_s_v = new Regex(s_v);
60 |
61 | private static readonly Regex re_1a = new Regex("^(.+?)(ss|i)es$");
62 | private static readonly Regex re2_1a = new Regex("^(.+?)([^s])s$");
63 | private static readonly Regex re_1b = new Regex("^(.+?)eed$");
64 | private static readonly Regex re2_1b = new Regex("^(.+?)(ed|ing)$");
65 | private static readonly Regex re_1b_2 = new Regex(".$");
66 | private static readonly Regex re2_1b_2 = new Regex("(at|bl|iz)$");
67 | private static readonly Regex re3_1b_2 = new Regex("([^aeiouylsz])\\1$");
68 | private static readonly Regex re4_1b_2 = new Regex("^" + C + v + "[^aeiouwxy]$");
69 |
70 | private static readonly Regex re_1c = new Regex("^(.+?[^aeiou])y$");
71 | private static readonly Regex re_2 = new Regex("^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$");
72 |
73 | private static readonly Regex re_3 = new Regex("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$");
74 |
75 | private static readonly Regex re_4 = new Regex("^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$");
76 | private static readonly Regex re2_4 = new Regex("^(.+?)(s|t)(ion)$");
77 |
78 | private static readonly Regex re_5 = new Regex("^(.+?)e$");
79 | private static readonly Regex re_5_1 = new Regex("ll$");
80 | private static readonly Regex re3_5 = new Regex("^" + C + v + "[^aeiouwxy]$");
81 |
82 | public override string Stem(string w)
83 | {
84 | if (w.Length < 3) return w;
85 |
86 | char firstch = w[0];
87 | if (firstch == 'y')
88 | {
89 | w = char.ToUpper(firstch, culture) + w.Substring(1);
90 | }
91 |
92 | // Step 1a
93 | Regex re = re_1a;
94 | Regex re2 = re2_1a;
95 |
96 | if (re.IsMatch(w)) { w = re.Replace(w, "$1$2"); }
97 | else if (re2.IsMatch(w)) { w = re2.Replace(w, "$1$2"); }
98 |
99 | // Step 1b
100 | re = re_1b;
101 | re2 = re2_1b;
102 | if (re.IsMatch(w))
103 | {
104 | GroupCollection fp = re.Match(w).Groups;
105 | re = re_mgr0;
106 | if (re.IsMatch(fp[1].Value))
107 | {
108 | re = re_1b_2;
109 | w = re.Replace(w, "");
110 | }
111 | }
112 | else if (re2.IsMatch(w))
113 | {
114 | GroupCollection fp = re2.Match(w).Groups;
115 | string stem = fp[1].Value;
116 | re2 = re_s_v;
117 | if (re2.IsMatch(stem))
118 | {
119 | w = stem;
120 | re2 = re2_1b_2;
121 | Regex re3 = re3_1b_2;
122 | Regex re4 = re4_1b_2;
123 | if (re2.IsMatch(w)) { w += "e"; }
124 | else if (re3.IsMatch(w)) { re = re_1b_2; w = re.Replace(w, ""); }
125 | else if (re4.IsMatch(w)) { w += "e"; }
126 | }
127 | }
128 |
129 | // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
130 | re = re_1c;
131 | if (re.IsMatch(w))
132 | {
133 | GroupCollection fp = re.Match(w).Groups;
134 | string stem = fp[1].Value;
135 | w = stem + "i";
136 | }
137 |
138 | // Step 2
139 | re = re_2;
140 | if (re.IsMatch(w))
141 | {
142 | GroupCollection fp = re.Match(w).Groups;
143 | string stem = fp[1].Value;
144 | string suffix = fp[2].Value;
145 | re = re_mgr0;
146 | if (re.IsMatch(stem))
147 | {
148 | w = stem + step2list[suffix];
149 | }
150 | }
151 |
152 | // Step 3
153 | re = re_3;
154 | if (re.IsMatch(w))
155 | {
156 | GroupCollection fp = re.Match(w).Groups;
157 | string stem = fp[1].Value;
158 | string suffix = fp[2].Value;
159 | re = re_mgr0;
160 | if (re.IsMatch(stem))
161 | {
162 | w = stem + step3list[suffix];
163 | }
164 | }
165 |
166 | // Step 4
167 | re = re_4;
168 | re2 = re2_4;
169 | if (re.IsMatch(w))
170 | {
171 | GroupCollection fp = re.Match(w).Groups;
172 | string stem = fp[1].Value;
173 | re = re_mgr1;
174 | if (re.IsMatch(stem))
175 | {
176 | w = stem;
177 | }
178 | }
179 | else if (re2.IsMatch(w))
180 | {
181 | GroupCollection fp = re2.Match(w).Groups;
182 | string stem = fp[1].Value + fp[2].Value;
183 | re2 = re_mgr1;
184 | if (re2.IsMatch(stem))
185 | {
186 | w = stem;
187 | }
188 | }
189 |
190 | // Step 5
191 | re = re_5;
192 | if (re.IsMatch(w))
193 | {
194 | GroupCollection fp = re.Match(w).Groups;
195 | string stem = fp[1].Value;
196 | re = re_mgr1;
197 | re2 = re_meq1;
198 | Regex re3 = re3_5;
199 | if (re.IsMatch(stem) || (re2.IsMatch(stem) && !(re3.IsMatch(stem))))
200 | {
201 | w = stem;
202 | }
203 | }
204 |
205 | re = re_5_1;
206 | re2 = re_mgr1;
207 | if (re.IsMatch(w) && re2.IsMatch(w))
208 | {
209 | re = re_1b_2;
210 | w = re.Replace(w, "");
211 | }
212 |
213 | // and turn initial Y back to y
214 | if (firstch == 'y')
215 | {
216 | w = char.ToLower(firstch, culture) + w.Substring(1);
217 | }
218 |
219 | return w;
220 | }
221 | }
222 | }
223 |
--------------------------------------------------------------------------------
/LunrCore/EnglishStopWordFilter.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace Lunr
4 | {
5 | public sealed class EnglishStopWordFilter : StopWordFilterBase
6 | {
7 | private static readonly Set _stopWords = new Set(
8 | new []
9 | {
10 | "a",
11 | "able",
12 | "about",
13 | "across",
14 | "after",
15 | "all",
16 | "almost",
17 | "also",
18 | "am",
19 | "among",
20 | "an",
21 | "and",
22 | "any",
23 | "are",
24 | "as",
25 | "at",
26 | "be",
27 | "because",
28 | "been",
29 | "but",
30 | "by",
31 | "can",
32 | "cannot",
33 | "could",
34 | "dear",
35 | "did",
36 | "do",
37 | "does",
38 | "either",
39 | "else",
40 | "ever",
41 | "every",
42 | "for",
43 | "from",
44 | "get",
45 | "got",
46 | "had",
47 | "has",
48 | "have",
49 | "he",
50 | "her",
51 | "hers",
52 | "him",
53 | "his",
54 | "how",
55 | "however",
56 | "i",
57 | "if",
58 | "in",
59 | "into",
60 | "is",
61 | "it",
62 | "its",
63 | "just",
64 | "least",
65 | "let",
66 | "like",
67 | "likely",
68 | "may",
69 | "me",
70 | "might",
71 | "most",
72 | "must",
73 | "my",
74 | "neither",
75 | "no",
76 | "nor",
77 | "not",
78 | "of",
79 | "off",
80 | "often",
81 | "on",
82 | "only",
83 | "or",
84 | "other",
85 | "our",
86 | "own",
87 | "rather",
88 | "said",
89 | "say",
90 | "says",
91 | "she",
92 | "should",
93 | "since",
94 | "so",
95 | "some",
96 | "than",
97 | "that",
98 | "the",
99 | "their",
100 | "them",
101 | "then",
102 | "there",
103 | "these",
104 | "they",
105 | "this",
106 | "tis",
107 | "to",
108 | "too",
109 | "twas",
110 | "us",
111 | "wants",
112 | "was",
113 | "we",
114 | "were",
115 | "what",
116 | "when",
117 | "where",
118 | "which",
119 | "while",
120 | "who",
121 | "whom",
122 | "why",
123 | "will",
124 | "with",
125 | "would",
126 | "yet",
127 | "you",
128 | "your"
129 | },
130 | StringComparer.OrdinalIgnoreCase);
131 |
132 | protected override ISet StopWords => _stopWords;
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/LunrCore/Extensions/DictionaryExtensions.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.Runtime.InteropServices;
3 |
4 | namespace Lunr;
5 |
6 | internal static class DictionaryExtensions
7 | {
8 | public static void Increment(this Dictionary dic, string fieldName, int amount = 1)
9 | {
10 | #if NET6_0_OR_GREATER
11 | ref int value = ref CollectionsMarshal.GetValueRefOrAddDefault(dic, fieldName, out _);
12 |
13 | value += amount;
14 | #else
15 | if (dic.ContainsKey(fieldName))
16 | {
17 | dic[fieldName] += amount;
18 | }
19 | else
20 | {
21 | dic.Add(fieldName, amount);
22 | }
23 | #endif
24 |
25 | }
26 |
27 | public static void Increment(this Dictionary dic, string fieldName, double amount = 1)
28 | {
29 | #if NET6_0_OR_GREATER
30 | ref double value = ref CollectionsMarshal.GetValueRefOrAddDefault(dic, fieldName, out _);
31 |
32 | value += amount;
33 | #else
34 | if (dic.ContainsKey(fieldName))
35 | {
36 | dic[fieldName] += amount;
37 | }
38 | else
39 | {
40 | dic.Add(fieldName, amount);
41 | }
42 | #endif
43 |
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/LunrCore/Extensions/StringBuilderExtensions.cs:
--------------------------------------------------------------------------------
1 | #if NETSTANDARD2_0
2 |
3 | using System;
4 | using System.Text;
5 |
6 | namespace Lunr;
7 |
8 | internal static class StringBuilderExtensions
9 | {
10 | public static void Append(this StringBuilder sb, ReadOnlySpan text)
11 | {
12 | sb.Append(text.ToString());
13 | }
14 | }
15 |
16 | #endif
--------------------------------------------------------------------------------
/LunrCore/Field.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Diagnostics;
3 | using System.Threading.Tasks;
4 |
5 | namespace Lunr
6 | {
7 | ///
8 | /// A field of indeterminate type.
9 | ///
10 | [DebuggerDisplay("{" + nameof(DebuggerDisplay) + ",nq}")]
11 | public abstract class Field
12 | {
13 | protected Field(string name, double boost = 1)
14 | {
15 | if (name is "") throw new InvalidOperationException("Can't create a field with an empty name.");
16 | if (name.IndexOf('/') != -1) throw new InvalidOperationException($"Can't create a field with a '/' character in its name \"{name}\".");
17 |
18 | Name = name;
19 | Boost = boost;
20 | }
21 |
22 | ///
23 | /// The name of the field.
24 | ///
25 | public string Name { get; }
26 |
27 | ///
28 | /// Boost applied to all terms within this field.
29 | ///
30 | public double Boost { get; }
31 |
32 | public abstract ValueTask