├── WebSearchDemo
├── Database
│ ├── Post.cs
│ └── DataContext.cs
├── appsettings.json
├── appsettings.Development.json
├── WebSearchDemo.csproj.user
├── Program.cs
├── Properties
│ └── launchSettings.json
├── WebSearchDemo.csproj
├── WebSearchDemo.xml
├── Controllers
│ └── HomeController.cs
└── Startup.cs
├── .gitignore
├── Masuit.LuceneEFCore.SearchEngine
├── Properties
│ └── PublishProfiles
│ │ ├── FolderProfile.pubxml.user
│ │ └── FolderProfile.pubxml
├── Masuit.LuceneEFCore.SearchEngine.csproj.user
├── Interfaces
│ ├── ILuceneSearchResult.cs
│ ├── IScoredSearchResult.cs
│ ├── ILuceneSearchResultCollection.cs
│ ├── ISearchResultCollection.cs
│ ├── IScoredSearchResultCollection.cs
│ ├── ILuceneIndexable.cs
│ ├── ILuceneIndexSearcher.cs
│ ├── ILuceneIndexer.cs
│ └── ISearchEngine.cs
├── JiebaAnalyzer
│ ├── Settings.cs
│ ├── JieBaAnalyzer.cs
│ └── JieBaTokenizer.cs
├── LuceneSearchResult.cs
├── ScoredSearchResult.cs
├── LuceneIndexerOptions.cs
├── LuceneIndexState.cs
├── LuceneSearchResultCollection.cs
├── ScoredSearchResultCollection.cs
├── SearchResultCollection.cs
├── LuceneIndexAttribute.cs
├── LuceneIndexChange.cs
├── Extensions
│ ├── ServiceCollectionExtension.cs
│ ├── StringHelpers.cs
│ └── DocumentExtension.cs
├── LuceneIndexChangeset.cs
├── Linq
│ └── LinqExtension.cs
├── KeywordsManager.cs
├── LuceneIndexableBaseEntity.cs
├── Masuit.LuceneEFCore.SearchEngine.csproj
├── SearchOptions.cs
├── LuceneIndexer.cs
├── LuceneIndexSearcher.cs
└── SearchEngine.cs
├── SECURITY.md
├── LICENSE
├── Masuit.LuceneEFCore.SearchEngine.sln
└── README.md
/WebSearchDemo/Database/Post.cs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ldqk0/Masuit.LuceneEFCore.SearchEngine/HEAD/WebSearchDemo/Database/Post.cs
--------------------------------------------------------------------------------
/WebSearchDemo/appsettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "Logging": {
3 | "LogLevel": {
4 | "Default": "Warning"
5 | }
6 | },
7 | "AllowedHosts": "*"
8 | }
9 |
--------------------------------------------------------------------------------
/WebSearchDemo/appsettings.Development.json:
--------------------------------------------------------------------------------
1 | {
2 | "Logging": {
3 | "LogLevel": {
4 | "Default": "Debug",
5 | "System": "Information",
6 | "Microsoft": "Information"
7 | }
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | */obj
2 | */bin
3 | /.vs
4 | /WebSearchDemo/lucene
5 | /Masuit.LuceneEFCore.SearchEngine.sln.DotSettings.user
6 | /Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.xml
7 | Masuit.LuceneEFCore.SearchEngine.Test
8 | **/TestClass.cs
9 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Properties/PublishProfiles/FolderProfile.pubxml.user:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.csproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | <_LastSelectedProfileId>D:\Private\Masuit.LuceneEFCore.SearchEngine\Masuit.LuceneEFCore.SearchEngine\Properties\PublishProfiles\FolderProfile.pubxml
5 |
6 |
--------------------------------------------------------------------------------
/WebSearchDemo/WebSearchDemo.csproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ProjectDebugger
5 |
6 |
7 | WebSearchDemo
8 |
9 |
--------------------------------------------------------------------------------
/WebSearchDemo/Program.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.AspNetCore.Hosting;
2 | using Microsoft.Extensions.Hosting;
3 |
4 | namespace WebSearchDemo;
5 |
6 | public class Program
7 | {
8 | public static void Main(string[] args)
9 | {
10 | CreateWebHostBuilder(args).Build().Run();
11 | }
12 |
13 | public static IHostBuilder CreateWebHostBuilder(string[] args) =>
14 | Host.CreateDefaultBuilder(args).ConfigureWebHostDefaults(builder => builder.UseStartup());
15 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneSearchResult.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Documents;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
4 | {
5 | ///
6 | /// 搜索结果
7 | ///
8 | public interface ILuceneSearchResult
9 | {
10 | ///
11 | /// 匹配度
12 | ///
13 | float Score { get; set; }
14 |
15 | ///
16 | /// 文档
17 | ///
18 | Document Document { get; set; }
19 | }
20 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/IScoredSearchResult.cs:
--------------------------------------------------------------------------------
1 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
2 | {
3 | ///
4 | /// 结果项
5 | ///
6 | ///
7 | public interface IScoredSearchResult
8 | {
9 | ///
10 | /// 匹配度
11 | ///
12 | float Score { get; set; }
13 |
14 | ///
15 | /// 实体
16 | ///
17 | T Entity { get; set; }
18 | }
19 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Properties/PublishProfiles/FolderProfile.pubxml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 | Release
8 | Any CPU
9 | bin\Release\netstandard2.1\publish\
10 | FileSystem
11 |
12 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/JiebaAnalyzer/Settings.cs:
--------------------------------------------------------------------------------
1 | namespace Masuit.LuceneEFCore.SearchEngine;
2 |
3 | ///
4 | /// JieBaAnalyzer 实例化之前使用
5 | ///
6 | public static class Settings
7 | {
8 | ///
9 | /// show log
10 | ///
11 | public static bool Log { get; set; } = false;
12 |
13 | ///
14 | /// 忽略词典,每行一词
15 | ///
16 | public static string IgnoreDictFile { get; set; }
17 | ///
18 | ///自定义词典,每行一词
19 | ///
20 | public static string UserDictFile { get; set; }
21 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneSearchResult.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Documents;
2 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
3 |
4 | namespace Masuit.LuceneEFCore.SearchEngine
5 | {
6 | ///
7 | /// 搜索结果
8 | ///
9 | public class LuceneSearchResult : ILuceneSearchResult
10 | {
11 | ///
12 | /// 匹配度
13 | ///
14 | public float Score { get; set; }
15 |
16 | ///
17 | /// 文档
18 | ///
19 | public Document Document { get; set; }
20 | }
21 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/ScoredSearchResult.cs:
--------------------------------------------------------------------------------
1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine
4 | {
5 | ///
6 | /// 搜索结果项
7 | ///
8 | ///
9 | public class ScoredSearchResult : IScoredSearchResult
10 | {
11 | ///
12 | /// 匹配度
13 | ///
14 | public float Score { get; set; }
15 |
16 | ///
17 | /// 物理实体
18 | ///
19 | public T Entity { get; set; }
20 | }
21 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexerOptions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine
4 | {
5 | ///
6 | /// 索引器选项
7 | ///
8 | public class LuceneIndexerOptions
9 | {
10 | ///
11 | /// 索引路径
12 | ///
13 | public string Path { get; set; }
14 |
15 | ///
16 | /// 索引列IndexId的生成函数,(Type EntityType, any IdValue) => string IndexId
17 | ///
18 | public static Func IndexIdGenerator = (type, id) => $"{type.Name}:{id}";
19 |
20 | }
21 | }
--------------------------------------------------------------------------------
/WebSearchDemo/Database/DataContext.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.EntityFrameworkCore;
2 |
3 | namespace WebSearchDemo.Database
4 | {
5 | public class DataContext : DbContext
6 | {
7 | public DataContext(DbContextOptions options) : base(options)
8 | {
9 |
10 | }
11 |
12 | protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
13 | {
14 | base.OnConfiguring(optionsBuilder);
15 | optionsBuilder.UseQueryTrackingBehavior(QueryTrackingBehavior.TrackAll);
16 | }
17 |
18 | public virtual DbSet Post { get; set; }
19 |
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneSearchResultCollection.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
4 | {
5 | ///
6 | /// 搜索结果集
7 | ///
8 | public interface ILuceneSearchResultCollection
9 | {
10 | ///
11 | /// 总条数
12 | ///
13 | int TotalHits { get; set; }
14 |
15 | ///
16 | /// 耗时
17 | ///
18 | long Elapsed { get; set; }
19 |
20 | ///
21 | /// 结果集
22 | ///
23 | IList Results { get; set; }
24 | }
25 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ISearchResultCollection.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
4 | {
5 | ///
6 | /// 搜索结果集
7 | ///
8 | ///
9 | public interface ISearchResultCollection
10 | {
11 | ///
12 | /// 总条数
13 | ///
14 | int TotalHits { get; set; }
15 |
16 | ///
17 | /// 耗时
18 | ///
19 | long Elapsed { get; set; }
20 |
21 | ///
22 | /// 结果集
23 | ///
24 | IList Results { get; set; }
25 | }
26 | }
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Use this section to tell people about which versions of your project are
6 | currently being supported with security updates.
7 |
8 | | Version | Supported |
9 | | ------- | ------------------ |
10 | | 5.1.x | :white_check_mark: |
11 | | 5.0.x | :x: |
12 | | 4.0.x | :white_check_mark: |
13 | | < 4.0 | :x: |
14 |
15 | ## Reporting a Vulnerability
16 |
17 | Use this section to tell people how to report a vulnerability.
18 |
19 | Tell them where to go, how often they can expect to get an update on a
20 | reported vulnerability, what to expect if the vulnerability is accepted or
21 | declined, etc.
22 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexState.cs:
--------------------------------------------------------------------------------
1 | namespace Masuit.LuceneEFCore.SearchEngine
2 | {
3 | ///
4 | /// 索引状态枚举
5 | ///
6 | public enum LuceneIndexState
7 | {
8 | ///
9 | /// 已添加
10 | ///
11 | Added,
12 |
13 | ///
14 | /// 被删除
15 | ///
16 | Removed,
17 |
18 | ///
19 | /// 被更新
20 | ///
21 | Updated,
22 |
23 | ///
24 | /// 未作修改
25 | ///
26 | Unchanged,
27 |
28 | ///
29 | /// 不需要修改
30 | ///
31 | NotSet
32 | }
33 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/IScoredSearchResultCollection.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
4 | {
5 | ///
6 | /// 搜索结果集
7 | ///
8 | ///
9 | public interface IScoredSearchResultCollection
10 | {
11 | ///
12 | /// 总条数
13 | ///
14 | int TotalHits { get; set; }
15 |
16 | ///
17 | /// 耗时
18 | ///
19 | long Elapsed { get; set; }
20 |
21 | ///
22 | /// 结果集
23 | ///
24 | IList> Results { get; set; }
25 | }
26 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneSearchResultCollection.cs:
--------------------------------------------------------------------------------
1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
2 | using System.Collections.Generic;
3 |
4 | namespace Masuit.LuceneEFCore.SearchEngine
5 | {
6 | ///
7 | /// 搜索结果集
8 | ///
9 | public class LuceneSearchResultCollection : ILuceneSearchResultCollection
10 | {
11 | ///
12 | /// 结果集
13 | ///
14 | public IList Results { get; set; } = new List();
15 |
16 | ///
17 | /// 耗时
18 | ///
19 | public long Elapsed { get; set; }
20 |
21 | ///
22 | /// 总条数
23 | ///
24 | public int TotalHits { get; set; }
25 | }
26 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/ScoredSearchResultCollection.cs:
--------------------------------------------------------------------------------
1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
2 | using System.Collections.Generic;
3 |
4 | namespace Masuit.LuceneEFCore.SearchEngine
5 | {
6 | ///
7 | /// 搜索结果集
8 | ///
9 | ///
10 | public class ScoredSearchResultCollection : IScoredSearchResultCollection
11 | {
12 | ///
13 | /// 结果集
14 | ///
15 | public IList> Results { get; set; } = new List>();
16 |
17 | ///
18 | /// 耗时
19 | ///
20 | public long Elapsed { get; set; }
21 |
22 | ///
23 | /// 总条数
24 | ///
25 | public int TotalHits { get; set; }
26 | }
27 | }
--------------------------------------------------------------------------------
/WebSearchDemo/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "iisSettings": {
3 | "windowsAuthentication": false,
4 | "anonymousAuthentication": true,
5 | "iisExpress": {
6 | "applicationUrl": "http://localhost:7993",
7 | "sslPort": 0
8 | }
9 | },
10 | "$schema": "http://json.schemastore.org/launchsettings.json",
11 | "profiles": {
12 | "IIS Express": {
13 | "commandName": "IISExpress",
14 | "launchBrowser": true,
15 | "launchUrl": "api/values",
16 | "environmentVariables": {
17 | "ASPNETCORE_ENVIRONMENT": "Development"
18 | }
19 | },
20 | "WebSearchDemo": {
21 | "commandName": "Project",
22 | "launchUrl": "api/values",
23 | "environmentVariables": {
24 | "ASPNETCORE_ENVIRONMENT": "Development"
25 | },
26 | "applicationUrl": "http://localhost:5000"
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/SearchResultCollection.cs:
--------------------------------------------------------------------------------
1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
2 | using System.Collections.Generic;
3 |
4 | namespace Masuit.LuceneEFCore.SearchEngine
5 | {
6 | ///
7 | /// 搜索结果集
8 | ///
9 | ///
10 | public class SearchResultCollection : ISearchResultCollection
11 | {
12 | ///
13 | /// 实体集
14 | ///
15 | public IList Results { get; set; }
16 |
17 | ///
18 | /// 耗时
19 | ///
20 | public long Elapsed { get; set; }
21 |
22 | ///
23 | /// 总条数
24 | ///
25 | public int TotalHits { get; set; }
26 |
27 | public SearchResultCollection()
28 | {
29 | Results = new List();
30 | }
31 | }
32 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexAttribute.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Documents;
2 | using System;
3 |
4 | namespace Masuit.LuceneEFCore.SearchEngine
5 | {
6 | ///
7 | /// 标记该字段可被索引
8 | ///
9 | [AttributeUsage(AttributeTargets.Property)]
10 | public class LuceneIndexAttribute : Attribute
11 | {
12 | ///
13 | ///
14 | ///
15 | public LuceneIndexAttribute()
16 | {
17 | Store = Field.Store.YES;
18 | IsHtml = false;
19 | }
20 |
21 | ///
22 | /// 索引字段名
23 | ///
24 | public string Name { get; set; }
25 |
26 | ///
27 | /// 是否被存储到索引库
28 | ///
29 | public Field.Store Store { get; set; }
30 |
31 | ///
32 | /// 是否是html
33 | ///
34 | public bool IsHtml { get; set; }
35 | }
36 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 ldqk
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexChange.cs:
--------------------------------------------------------------------------------
1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine
4 | {
5 | ///
6 | /// 索引修改实体
7 | ///
8 | public class LuceneIndexChange
9 | {
10 | ///
11 | /// 实体类
12 | ///
13 | public ILuceneIndexable Entity { get; set; }
14 |
15 | ///
16 | /// 变更状态
17 | ///
18 | public LuceneIndexState State { get; set; }
19 |
20 | ///
21 | /// 构造函数
22 | ///
23 | /// 实体
24 | public LuceneIndexChange(ILuceneIndexable entity)
25 | {
26 | Entity = entity;
27 | State = LuceneIndexState.NotSet;
28 | }
29 |
30 | ///
31 | /// 构造函数
32 | ///
33 | /// 实体
34 | /// 变更状态
35 | public LuceneIndexChange(ILuceneIndexable entity, LuceneIndexState state)
36 | {
37 | Entity = entity;
38 | State = state;
39 | }
40 | }
41 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/JiebaAnalyzer/JieBaAnalyzer.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using Lucene.Net.Analysis;
3 | using Lucene.Net.Analysis.Core;
4 | using Lucene.Net.Analysis.TokenAttributes;
5 | using System.IO;
6 |
7 | namespace Masuit.LuceneEFCore.SearchEngine;
8 |
9 | public class JieBaAnalyzer : Analyzer
10 | {
11 | private readonly TokenizerMode _mode;
12 | private readonly bool _defaultUserDict;
13 |
14 | ///
15 | ///
16 | ///
17 | ///
18 | ///
19 | public JieBaAnalyzer(TokenizerMode mode, bool defaultUserDict = false)
20 | {
21 | _mode = mode;
22 | _defaultUserDict = defaultUserDict;
23 | }
24 |
25 | protected override TokenStreamComponents CreateComponents(string filedName, TextReader reader)
26 | {
27 | var tokenizer = new JieBaTokenizer(reader, _mode, _defaultUserDict);
28 | var tokenstream = new LowerCaseFilter(Lucene.Net.Util.LuceneVersion.LUCENE_48, tokenizer);
29 | tokenstream.AddAttribute();
30 | tokenstream.AddAttribute();
31 | return new TokenStreamComponents(tokenizer, tokenstream);
32 | }
33 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneIndexable.cs:
--------------------------------------------------------------------------------
1 | #if Guid
2 | using System;
3 | #endif
4 |
5 | using Lucene.Net.Documents;
6 | using Newtonsoft.Json;
7 | using System.ComponentModel.DataAnnotations;
8 | using System.ComponentModel.DataAnnotations.Schema;
9 |
10 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
11 | {
12 | ///
13 | /// 需要被索引的实体基类
14 | ///
15 | public interface ILuceneIndexable
16 | {
17 | ///
18 | /// 主键id
19 | ///
20 | [LuceneIndex(Name = "Id", Store = Field.Store.YES), Key]
21 | #if Int
22 | int Id { get; set; }
23 |
24 | #endif
25 | #if Long
26 | long Id { get; set; }
27 | #endif
28 | #if String
29 | string Id { get; set; }
30 | #endif
31 | #if Guid
32 | Guid Id { get; set; }
33 | #endif
34 |
35 | ///
36 | /// 索引id
37 | ///
38 | [LuceneIndex(Name = "IndexId", Store = Field.Store.YES)]
39 | [JsonIgnore, NotMapped]
40 | internal string IndexId { get; set; }
41 |
42 | ///
43 | /// 转换成Lucene文档
44 | ///
45 | ///
46 | Document ToDocument();
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/WebSearchDemo/WebSearchDemo.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | net8.0
4 | InProcess
5 | Debug;Release;String版本;Guid版本;Long版本
6 | false
7 | false
8 |
9 |
10 | D:\Private\Masuit.LuceneEFCore.SearchEngine\WebSearchDemo\WebSearchDemo.xml
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | PreserveNewest
23 |
24 |
25 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Extensions/ServiceCollectionExtension.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using Lucene.Net.Analysis;
3 | using Lucene.Net.Store;
4 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
5 | using Microsoft.EntityFrameworkCore;
6 | using Microsoft.Extensions.DependencyInjection;
7 | using Microsoft.Extensions.DependencyInjection.Extensions;
8 | using Directory = Lucene.Net.Store.Directory;
9 |
10 | namespace Masuit.LuceneEFCore.SearchEngine.Extensions;
11 |
12 | public static class ServiceCollectionExtension
13 | {
14 | ///
15 | /// 依赖注入
16 | ///
17 | ///
18 | ///
19 | ///
20 | public static IServiceCollection AddSearchEngine(this IServiceCollection services, LuceneIndexerOptions option) where TContext : DbContext
21 | {
22 | services.AddSingleton(option);
23 | services.AddMemoryCache();
24 | services.TryAddSingleton(s => FSDirectory.Open(option.Path));
25 | services.TryAddSingleton(s => new JieBaAnalyzer(TokenizerMode.Search));
26 | services.TryAddScoped();
27 | services.TryAddScoped();
28 | services.TryAddScoped(typeof(ISearchEngine<>), typeof(SearchEngine<>));
29 | services.TryAddScoped, SearchEngine>();
30 | return services;
31 | }
32 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneIndexSearcher.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Documents;
2 | using System;
3 | using System.Collections.Generic;
4 |
5 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
6 | {
7 | ///
8 | /// 搜索引擎
9 | ///
10 | public interface ILuceneIndexSearcher
11 | {
12 | ///
13 | /// 分词
14 | ///
15 | ///
16 | ///
17 | List CutKeywords(string keyword);
18 |
19 | ///
20 | /// 搜索单条记录
21 | ///
22 | ///
23 | ///
24 | Document ScoredSearchSingle(SearchOptions options);
25 |
26 | ///
27 | /// 按权重搜索
28 | ///
29 | ///
30 | ///
31 | ILuceneSearchResultCollection ScoredSearch(SearchOptions options);
32 |
33 | ///
34 | /// 按权重搜索
35 | ///
36 | /// 关键词
37 | /// 限定检索字段
38 | /// 最大检索量
39 | /// 多字段搜索时,给字段的搜索加速
40 | /// 文档类型
41 | /// 排序字段
42 | /// 跳过多少条
43 | /// 取多少条
44 | ///
45 | ILuceneSearchResultCollection ScoredSearch(string keywords, string fields, int maximumNumberOfHits, Dictionary boosts, Type type, string sortBy, int? skip, int? take);
46 | }
47 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Extensions/StringHelpers.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text.RegularExpressions;
5 |
6 | namespace Masuit.LuceneEFCore.SearchEngine.Extensions
7 | {
8 | public static class StringHelpers
9 | {
10 | ///
11 | /// 移除字符串的指定字符
12 | ///
13 | ///
14 | ///
15 | ///
16 | internal static string RemoveCharacters(this string s, IEnumerable chars)
17 | {
18 | return string.IsNullOrEmpty(s) ? string.Empty : new string(s.Where(c => !chars.Contains(c)).ToArray());
19 | }
20 |
21 | ///
22 | /// 去除html标签后并截取字符串
23 | ///
24 | /// 源html
25 | ///
26 | internal static string RemoveHtmlTag(this string html)
27 | {
28 | var strText = Regex.Replace(html, "<[^>]+>", "");
29 | strText = Regex.Replace(strText, "&[^;]+;", "");
30 | return strText;
31 | }
32 |
33 | ///
34 | /// 添加多个元素
35 | ///
36 | ///
37 | ///
38 | ///
39 | public static void AddRange(this ICollection @this, IEnumerable values)
40 | {
41 | foreach (var obj in values)
42 | {
43 | @this.Add(obj);
44 | }
45 | }
46 |
47 | ///
48 | /// 移除符合条件的元素
49 | ///
50 | ///
51 | ///
52 | ///
53 | public static void RemoveWhere(this ICollection @this, Func @where)
54 | {
55 | foreach (var obj in @this.Where(where).ToList())
56 | {
57 | @this.Remove(obj);
58 | }
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexChangeset.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.Linq;
3 |
4 | namespace Masuit.LuceneEFCore.SearchEngine
5 | {
6 | ///
7 | /// 索引变更集
8 | ///
9 | public class LuceneIndexChangeset
10 | {
11 | ///
12 | /// 实体集
13 | ///
14 | public IList Entries { get; set; }
15 |
16 | ///
17 | /// 实体是否有某种状态
18 | ///
19 | /// 状态
20 | ///
21 | private bool EntriesHaveState(LuceneIndexState state)
22 | {
23 | return Entries.Any(x => x.State == state);
24 | }
25 |
26 | ///
27 | /// 已经被添加?
28 | ///
29 | public bool HasAdds => EntriesHaveState(LuceneIndexState.Added);
30 |
31 | ///
32 | /// 已经被更新?
33 | ///
34 | public bool HasUpdates => EntriesHaveState(LuceneIndexState.Updated);
35 |
36 | ///
37 | /// 已经被删除?
38 | ///
39 | public bool HasDeletes => EntriesHaveState(LuceneIndexState.Removed);
40 |
41 | ///
42 | /// 已经被修改
43 | ///
44 | public bool HasChanges => Entries.Any() && (HasAdds || HasUpdates || HasDeletes);
45 |
46 | ///
47 | /// 构造函数
48 | ///
49 | public LuceneIndexChangeset()
50 | {
51 | Entries = new List();
52 | }
53 |
54 | ///
55 | /// 构造函数
56 | ///
57 | /// 被修改的实体
58 | public LuceneIndexChangeset(LuceneIndexChange change) => Entries = new List
59 | {
60 | change
61 | };
62 | }
63 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneIndexer.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
4 | {
5 | public interface ILuceneIndexer
6 | {
7 | ///
8 | /// 添加到索引
9 | ///
10 | /// 实体
11 | void Add(ILuceneIndexable entity);
12 |
13 | ///
14 | /// 创建索引
15 | ///
16 | /// 实体集
17 | /// 是否需要覆盖
18 | void CreateIndex(IEnumerable entities, bool recreate = true);
19 |
20 | ///
21 | /// 删除索引
22 | ///
23 | /// 实体
24 | void Delete(ILuceneIndexable entity);
25 |
26 | ///
27 | /// 删除索引
28 | ///
29 | /// 实体集
30 | void Delete(IList entries) where T : ILuceneIndexable;
31 |
32 | ///
33 | /// 删除所有索引
34 | ///
35 | /// 是否提交
36 | void DeleteAll(bool commit = true);
37 |
38 | ///
39 | /// 更新索引
40 | ///
41 | /// 实体
42 | void Update(ILuceneIndexable entity);
43 |
44 | ///
45 | /// 更新索引
46 | ///
47 | /// 实体
48 | void Update(LuceneIndexChange change);
49 |
50 | ///
51 | /// 更新索引
52 | ///
53 | /// 实体
54 | void Update(LuceneIndexChangeset changeset);
55 |
56 | ///
57 | /// 索引库数量
58 | ///
59 | ///
60 | int Count();
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Linq/LinqExtension.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq.Expressions;
4 |
5 | namespace Masuit.LuceneEFCore.SearchEngine.Linq
6 | {
7 | ///
8 | /// linq扩展类
9 | ///
10 | public static class LinqExtension
11 | {
12 | ///
13 | /// 与连接
14 | ///
15 | /// 类型
16 | /// 左条件
17 | /// 右条件
18 | /// 新表达式
19 | internal static Expression> And(this Expression> left, Expression> right)
20 | {
21 | return CombineLambdas(left, right, ExpressionType.AndAlso);
22 | }
23 |
24 | private static Expression> CombineLambdas(this Expression> left, Expression> right, ExpressionType expressionType)
25 | {
26 | if (IsExpressionBodyConstant(left))
27 | {
28 | return right;
29 | }
30 |
31 | var visitor = new SubstituteParameterVisitor
32 | {
33 | Sub =
34 | {
35 | [right.Parameters[0]] = left.Parameters[0]
36 | }
37 | };
38 |
39 | Expression body = Expression.MakeBinary(expressionType, left.Body, visitor.Visit(right.Body));
40 | return Expression.Lambda>(body, left.Parameters[0]);
41 | }
42 |
43 | private static bool IsExpressionBodyConstant(Expression> left)
44 | {
45 | return left.Body.NodeType == ExpressionType.Constant;
46 | }
47 |
48 | internal class SubstituteParameterVisitor : ExpressionVisitor
49 | {
50 | public Dictionary Sub = new Dictionary();
51 |
52 | protected override Expression VisitParameter(ParameterExpression node)
53 | {
54 | return Sub.TryGetValue(node, out var newValue) ? newValue : node;
55 | }
56 | }
57 | }
58 | }
--------------------------------------------------------------------------------
/WebSearchDemo/WebSearchDemo.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | WebSearchDemo
5 |
6 |
7 |
8 |
9 | 搜索
10 |
11 | 关键词
12 | 第几页
13 | 页大小
14 |
15 |
16 |
17 |
18 | 创建索引
19 |
20 |
21 |
22 |
23 | 添加索引
24 |
25 |
26 |
27 |
28 | 删除索引
29 |
30 |
31 |
32 |
33 | 更新索引库
34 |
35 |
36 |
37 |
38 |
39 | 文章
40 |
41 |
42 |
43 |
44 | 标题
45 |
46 |
47 |
48 |
49 | 作者
50 |
51 |
52 |
53 |
54 | 内容
55 |
56 |
57 |
58 |
59 | 发表时间
60 |
61 |
62 |
63 |
64 | 作者邮箱
65 |
66 |
67 |
68 |
69 | 标签
70 |
71 |
72 |
73 |
74 | 文章关键词
75 |
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Extensions/DocumentExtension.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Documents;
2 | using Newtonsoft.Json;
3 | using System;
4 | using System.ComponentModel;
5 | using System.Globalization;
6 |
7 | namespace Masuit.LuceneEFCore.SearchEngine.Extensions
8 | {
9 | public static class DocumentExtension
10 | {
11 | ///
12 | /// 获取文档的值
13 | ///
14 | /// Lucene文档
15 | /// 键
16 | /// 类型
17 | ///
18 | internal static object Get(this Document doc, string key, Type t)
19 | {
20 | string value = doc.Get(key);
21 | return t switch
22 | {
23 | _ when t.IsAssignableFrom(typeof(string)) => value,
24 | _ when t.IsValueType => ConvertTo(value, t),
25 | _ => JsonConvert.DeserializeObject(value, t)
26 | };
27 | }
28 |
29 | ///
30 | /// 类型直转
31 | ///
32 | ///
33 | /// 目标类型
34 | ///
35 | private static object ConvertTo(string value, Type type)
36 | {
37 | if (value == null)
38 | {
39 | return default;
40 | }
41 |
42 | if (value.GetType() == type)
43 | {
44 | return value;
45 | }
46 |
47 | if (type.IsEnum)
48 | {
49 | return Enum.Parse(type, value.ToString(CultureInfo.InvariantCulture));
50 | }
51 |
52 | if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>))
53 | {
54 | var underlyingType = Nullable.GetUnderlyingType(type);
55 | return underlyingType!.IsEnum ? Enum.Parse(underlyingType, value.ToString(CultureInfo.CurrentCulture)) : Convert.ChangeType(value, underlyingType);
56 | }
57 |
58 | var converter = TypeDescriptor.GetConverter(value);
59 | if (converter != null)
60 | {
61 | if (converter.CanConvertTo(type))
62 | {
63 | return converter.ConvertTo(value, type);
64 | }
65 | }
66 |
67 | converter = TypeDescriptor.GetConverter(type);
68 | if (converter != null)
69 | {
70 | if (converter.CanConvertFrom(value.GetType()))
71 | {
72 | return converter.ConvertFrom(value);
73 | }
74 | }
75 |
76 | return Convert.ChangeType(value, type);
77 | }
78 | }
79 | }
--------------------------------------------------------------------------------
/WebSearchDemo/Controllers/HomeController.cs:
--------------------------------------------------------------------------------
1 | using Masuit.LuceneEFCore.SearchEngine;
2 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
3 | using Microsoft.AspNetCore.Mvc;
4 | using System.Collections.Generic;
5 | using WebSearchDemo.Database;
6 |
7 | namespace WebSearchDemo.Controllers
8 | {
9 | [Route("[controller]/[action]")]
10 | public class HomeController : Controller
11 | {
12 | private readonly ISearchEngine _searchEngine;
13 |
14 | public HomeController(ISearchEngine searchEngine)
15 | {
16 | _searchEngine = searchEngine;
17 | }
18 |
19 | ///
20 | /// 搜索
21 | ///
22 | /// 关键词
23 | /// 第几页
24 | /// 页大小
25 | ///
26 | [HttpGet]
27 | public IActionResult Index(string s, int page, int size)
28 | {
29 | var result = _searchEngine.ScoredSearch(new SearchOptions(s, page, size, typeof(Post)));
30 | return Ok(result);
31 | }
32 |
33 | ///
34 | /// 创建索引
35 | ///
36 | [HttpGet]
37 | public void CreateIndex()
38 | {
39 | //_searchEngine.CreateIndex();//扫描所有数据表,创建符合条件的库的索引
40 | _searchEngine.CreateIndex(new List() { nameof(Post) });//创建指定的数据表的索引
41 | }
42 |
43 | ///
44 | /// 添加索引
45 | ///
46 | [HttpPost]
47 | public void AddIndex(Post p)
48 | {
49 | // 添加到数据库并更新索引
50 | _searchEngine.Context.Post.Add(p);
51 | _searchEngine.SaveChanges();
52 |
53 | //_luceneIndexer.Add(p); //单纯的只添加索引库
54 | }
55 |
56 | ///
57 | /// 删除索引
58 | ///
59 | [HttpDelete]
60 | public void DeleteIndex(Post post)
61 | {
62 | //从数据库删除并更新索引库
63 | Post p = _searchEngine.Context.Post.Find(post.Id);
64 | _searchEngine.Context.Post.Remove(p);
65 | _searchEngine.SaveChanges();
66 |
67 | //_luceneIndexer.Delete(p);// 单纯的从索引库移除
68 | }
69 |
70 | ///
71 | /// 更新索引库
72 | ///
73 | ///
74 | [HttpPatch]
75 | public void UpdateIndex(Post post)
76 | {
77 | //从数据库更新并同步索引库
78 | Post p = _searchEngine.Context.Post.Find(post.Id);
79 |
80 | // update...
81 | _searchEngine.Context.Post.Update(p);
82 | _searchEngine.SaveChanges();
83 |
84 | //_luceneIndexer.Update(p);// 单纯的更新索引库
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.30709.132
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Masuit.LuceneEFCore.SearchEngine", "Masuit.LuceneEFCore.SearchEngine\Masuit.LuceneEFCore.SearchEngine.csproj", "{1A9A907B-8254-40EF-BB80-47B716919B07}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebSearchDemo", "WebSearchDemo\WebSearchDemo.csproj", "{12EF81D1-26CF-417C-A814-BB49A0F87AFE}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Guid版本|Any CPU = Guid版本|Any CPU
14 | Long版本|Any CPU = Long版本|Any CPU
15 | Release|Any CPU = Release|Any CPU
16 | String版本|Any CPU = String版本|Any CPU
17 | EndGlobalSection
18 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
19 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
20 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Debug|Any CPU.Build.0 = Debug|Any CPU
21 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Guid版本|Any CPU.ActiveCfg = Guid版本|Any CPU
22 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Guid版本|Any CPU.Build.0 = Guid版本|Any CPU
23 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Long版本|Any CPU.ActiveCfg = Long版本|Any CPU
24 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Long版本|Any CPU.Build.0 = Long版本|Any CPU
25 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Release|Any CPU.ActiveCfg = Release|Any CPU
26 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Release|Any CPU.Build.0 = Release|Any CPU
27 | {1A9A907B-8254-40EF-BB80-47B716919B07}.String版本|Any CPU.ActiveCfg = String版本|Any CPU
28 | {1A9A907B-8254-40EF-BB80-47B716919B07}.String版本|Any CPU.Build.0 = String版本|Any CPU
29 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
30 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Debug|Any CPU.Build.0 = Debug|Any CPU
31 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Guid版本|Any CPU.ActiveCfg = Guid版本|Any CPU
32 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Guid版本|Any CPU.Build.0 = Guid版本|Any CPU
33 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Long版本|Any CPU.ActiveCfg = Long版本|Any CPU
34 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Long版本|Any CPU.Build.0 = Long版本|Any CPU
35 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Release|Any CPU.ActiveCfg = Release|Any CPU
36 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Release|Any CPU.Build.0 = Release|Any CPU
37 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.String版本|Any CPU.ActiveCfg = String版本|Any CPU
38 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.String版本|Any CPU.Build.0 = String版本|Any CPU
39 | EndGlobalSection
40 | GlobalSection(SolutionProperties) = preSolution
41 | HideSolutionNode = FALSE
42 | EndGlobalSection
43 | GlobalSection(ExtensibilityGlobals) = postSolution
44 | SolutionGuid = {C5C40C6B-2856-4202-A102-44DD07D576E4}
45 | EndGlobalSection
46 | EndGlobal
47 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Interfaces/ISearchEngine.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.EntityFrameworkCore;
2 | using System.Collections.Generic;
3 | using System.Threading.Tasks;
4 |
5 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces
6 | {
7 | public interface ISearchEngine where TContext : DbContext
8 | {
9 | ///
10 | /// 数据库上下文
11 | ///
12 | TContext Context { get; }
13 |
14 | ///
15 | /// 索引器
16 | ///
17 | ILuceneIndexer LuceneIndexer { get; }
18 |
19 | ///
20 | /// 索引搜索器
21 | ///
22 | ILuceneIndexSearcher LuceneIndexSearcher { get; }
23 |
24 | ///
25 | /// 索引总数
26 | ///
27 | int IndexCount { get; }
28 |
29 | ///
30 | /// 创建索引
31 | ///
32 | void CreateIndex();
33 |
34 | ///
35 | /// 创建指定数据表的索引
36 | ///
37 | void CreateIndex(List tables);
38 |
39 | ///
40 | /// 删除索引
41 | ///
42 | void DeleteIndex();
43 |
44 | ///
45 | /// 保存数据更改并同步索引
46 | ///
47 | /// 创建索引
48 | ///
49 | int SaveChanges(bool index = true);
50 |
51 | ///
52 | /// 保存数据更改并同步索引
53 | ///
54 | /// 创建索引
55 | ///
56 | Task SaveChangesAsync(bool index = true);
57 |
58 | ///
59 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型
60 | ///
61 | /// 搜索选项
62 | IScoredSearchResultCollection ScoredSearch(SearchOptions options);
63 |
64 | ///
65 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型
66 | ///
67 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable
68 | /// 搜索选项
69 | IScoredSearchResultCollection ScoredSearch(SearchOptions options);
70 |
71 | ///
72 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息
73 | ///
74 | /// 搜索选项
75 | ///
76 | ISearchResultCollection Search(SearchOptions options);
77 |
78 | ///
79 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息
80 | ///
81 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable
82 | /// 搜索选项
83 | ///
84 | ISearchResultCollection Search(SearchOptions options);
85 |
86 | ///
87 | /// 搜索一条匹配度最高的记录
88 | ///
89 | /// 搜索选项
90 | ILuceneIndexable SearchOne(SearchOptions options);
91 |
92 | ///
93 | /// 搜索一条匹配度最高的记录
94 | ///
95 | /// 搜索选项
96 | T SearchOne(SearchOptions options) where T : class;
97 |
98 | ///
99 | /// 导入自定义词库
100 | ///
101 | ///
102 | void ImportCustomerKeywords(IEnumerable words);
103 | }
104 | }
--------------------------------------------------------------------------------
/WebSearchDemo/Startup.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using Masuit.LuceneEFCore.SearchEngine;
3 | using Masuit.LuceneEFCore.SearchEngine.Extensions;
4 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
5 | using Microsoft.AspNetCore.Builder;
6 | using Microsoft.AspNetCore.Hosting;
7 | using Microsoft.AspNetCore.Mvc;
8 | using Microsoft.EntityFrameworkCore;
9 | using Microsoft.Extensions.Configuration;
10 | using Microsoft.Extensions.DependencyInjection;
11 | using Microsoft.OpenApi.Models;
12 | using Newtonsoft.Json;
13 | using System;
14 | using System.Collections.Generic;
15 | using System.IO;
16 | using WebSearchDemo.Database;
17 |
18 | namespace WebSearchDemo
19 | {
20 | public class Startup
21 | {
22 | public Startup(IConfiguration configuration)
23 | {
24 | Configuration = configuration;
25 | }
26 |
27 | public IConfiguration Configuration { get; }
28 |
29 | // This method gets called by the runtime. Use this method to add services to the container.
30 | public void ConfigureServices(IServiceCollection services)
31 | {
32 | services.AddDbContext(db =>
33 | {
34 | db.UseInMemoryDatabase("test");
35 |
36 | //db.UseSqlServer("Data Source=.;Initial Catalog=MyBlogs;Integrated Security=True");
37 | });
38 | services.AddSearchEngine(new LuceneIndexerOptions()
39 | {
40 | Path = "lucene"
41 | });
42 | services.AddSwaggerGen(c =>
43 | {
44 | c.SwaggerDoc("v1", new OpenApiInfo
45 | {
46 | Version = "v1",
47 | Title = $"接口文档",
48 | Description = $"HTTP API ",
49 | Contact = new OpenApiContact { Name = "懒得勤快", Email = "admin@masuit.com", Url = new Uri("https://masuit.coom") },
50 | License = new OpenApiLicense { Name = "懒得勤快", Url = new Uri("https://masuit.com") }
51 | });
52 | c.IncludeXmlComments(AppContext.BaseDirectory + "WebSearchDemo.xml");
53 | }); //配置swagger
54 | services.AddControllers();
55 | services.AddControllersWithViews().SetCompatibilityVersion(CompatibilityVersion.Latest);
56 | }
57 |
58 | // This method gets called by the runtime. Use this method to configure the HTTP request pipeline.
59 | public void Configure(IApplicationBuilder app, IHostingEnvironment env, DataContext db, ISearchEngine searchEngine)
60 | {
61 | if (env.IsDevelopment())
62 | {
63 | app.UseDeveloperExceptionPage();
64 | }
65 | new JiebaSegmenter().AddWord("会声会影"); //添加自定义词库
66 | new JiebaSegmenter().AddWord("思杰马克丁"); //添加自定义词库
67 | new JiebaSegmenter().AddWord("TeamViewer"); //添加自定义词库
68 | db.Post.AddRange(JsonConvert.DeserializeObject>(File.ReadAllText(AppContext.BaseDirectory + "Posts.json")));
69 | db.SaveChanges();
70 | searchEngine.DeleteIndex();
71 | searchEngine.CreateIndex(new List()
72 | {
73 | nameof(Post)
74 | });
75 | app.UseSwagger().UseSwaggerUI(c =>
76 | {
77 | c.SwaggerEndpoint($"/swagger/v1/swagger.json", "懒得勤快的博客,搜索引擎测试");
78 | }); //配置swagger
79 | app.UseRouting().UseEndpoints(endpoints =>
80 | {
81 | endpoints.MapControllers(); // 属性路由
82 | endpoints.MapControllerRoute("default", "{controller=Home}/{action=Index}/{id?}"); // 默认路由
83 | });
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/KeywordsManager.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text.RegularExpressions;
5 | using TinyPinyin;
6 |
7 | namespace Masuit.LuceneEFCore.SearchEngine
8 | {
9 | public static class KeywordsManager
10 | {
11 | ///
12 | /// 近义词组
13 | ///
14 | internal static HashSet<(string key, string value)> SynonymWords { get; set; } = new();
15 |
16 | private static HashSet<(string key, string value)> Pinyins { get; set; } = new();
17 | private static ILookup _pinyinsLookup;
18 |
19 | internal static ILookup PinyinsLookup => _pinyinsLookup ??= Pinyins.ToLookup(t => t.key, t => t.value);
20 |
21 | private static readonly JiebaSegmenter JiebaSegmenter = new();
22 |
23 | ///
24 | /// 添加近义词
25 | ///
26 | ///
27 | public static void AddSynonyms(KeyValuePair pair)
28 | {
29 | SynonymWords.Add((pair.Key, pair.Value));
30 | AddWords(pair.Key, pair.Value);
31 | }
32 |
33 | ///
34 | /// 添加近义词
35 | ///
36 | ///
37 | public static void AddSynonyms((string, string) pair)
38 | {
39 | SynonymWords.Add((pair.Item1, pair.Item2));
40 | AddWords(pair.Item1, pair.Item2);
41 | }
42 |
43 | ///
44 | /// 添加近义词
45 | ///
46 | public static void AddSynonyms(string key, string value, params string[] values)
47 | {
48 | SynonymWords.Add((key, value));
49 | AddWords(key, value);
50 | foreach (var s in values)
51 | {
52 | SynonymWords.Add((key, s));
53 | AddWords(s);
54 | }
55 | }
56 |
57 | ///
58 | /// 添加近义词
59 | ///
60 | ///
61 | public static void AddSynonyms(IEnumerable<(string key, string value)> pairs)
62 | {
63 | foreach (var t in pairs)
64 | {
65 | SynonymWords.Add(t);
66 | AddWords(t.key, t.value);
67 | }
68 | }
69 |
70 | ///
71 | /// 添加近义词
72 | ///
73 | ///
74 | public static void AddSynonyms(IEnumerable> pairs)
75 | {
76 | foreach (var pair in pairs)
77 | {
78 | AddWords(pair.Key, pair.Value);
79 | SynonymWords.Add((pair.Key, pair.Value));
80 | }
81 | }
82 |
83 | ///
84 | /// 添加关键词
85 | ///
86 | ///
87 | public static void AddWords(string word)
88 | {
89 | JiebaSegmenter.AddWord(word);
90 | var pinyin = PinyinHelper.GetPinyin(Regex.Replace(word, @"[^\u4e00-\u9fa5]", ""));
91 | if (!string.IsNullOrEmpty(pinyin))
92 | {
93 | var key = pinyin.ToLower();
94 | Pinyins.Add((key.Replace(" ", ""), word));
95 | Pinyins.Add((new string(key.Split(' ').Select(s => s[0]).ToArray()), word));
96 | }
97 | }
98 |
99 | ///
100 | /// 添加关键词
101 | ///
102 | ///
103 | public static void AddWords(IEnumerable words)
104 | {
105 | foreach (var s in words)
106 | {
107 | JiebaSegmenter.AddWord(s);
108 | var pinyin = PinyinHelper.GetPinyin(Regex.Replace(s, @"[^\u4e00-\u9fa5]", ""));
109 | if (!string.IsNullOrEmpty(pinyin))
110 | {
111 | var key = pinyin.ToLower();
112 | Pinyins.Add((key.Replace(" ", ""), s));
113 | Pinyins.Add((new string(key.Split(' ').Select(ss => ss[0]).ToArray()), s));
114 | }
115 | }
116 | }
117 |
118 | ///
119 | /// 添加关键词
120 | ///
121 | ///
122 | ///
123 | public static void AddWords(string word, params string[] words)
124 | {
125 | JiebaSegmenter.AddWord(word);
126 | foreach (var s in words)
127 | {
128 | JiebaSegmenter.AddWord(s);
129 | var pinyin = PinyinHelper.GetPinyin(Regex.Replace(s, @"[^\u4e00-\u9fa5]", ""));
130 | if (!string.IsNullOrEmpty(pinyin))
131 | {
132 | var key = pinyin.ToLower();
133 | Pinyins.Add((key.Replace(" ", ""), s));
134 | Pinyins.Add((new string(key.Split(' ').Select(ss => ss[0]).ToArray()), s));
135 | }
136 | }
137 | }
138 | }
139 | }
140 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexableBaseEntity.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Documents;
2 | using Masuit.LuceneEFCore.SearchEngine.Extensions;
3 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
4 | using Newtonsoft.Json;
5 | using System;
6 | using System.ComponentModel.DataAnnotations;
7 | using System.ComponentModel.DataAnnotations.Schema;
8 | using System.Reflection;
9 |
10 | namespace Masuit.LuceneEFCore.SearchEngine
11 | {
12 | ///
13 | /// 需要被索引的实体基类
14 | ///
15 | public abstract class LuceneIndexableBaseEntity : ILuceneIndexable
16 | {
17 | ///
18 | /// 主键id
19 | ///
20 | [LuceneIndex(Name = nameof(Id), Store = Field.Store.YES), Key]
21 | #if Int
22 | [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
23 | public int Id { get; set; }
24 |
25 | #endif
26 | #if Long
27 | [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
28 | public long Id { get; set; }
29 | #endif
30 | #if String
31 | public string Id { get; set; }
32 | #endif
33 | #if Guid
34 | public Guid Id { get; set; }
35 | #endif
36 |
37 | ///
38 | /// 索引唯一id
39 | ///
40 | [LuceneIndex(Name = nameof(ILuceneIndexable.IndexId), Store = Field.Store.YES)]
41 | [NotMapped, JsonIgnore]
42 | string ILuceneIndexable.IndexId
43 | {
44 | get => LuceneIndexerOptions.IndexIdGenerator(GetType(), Id);
45 |
46 | set
47 | {
48 | }
49 | }
50 |
51 | ///
52 | /// 转换成Lucene文档
53 | ///
54 | ///
55 | public virtual Document ToDocument()
56 | {
57 | var doc = new Document();
58 | var type = GetType();
59 | if (type.Assembly.IsDynamic && type.FullName.Contains("Prox"))
60 | {
61 | type = type.BaseType;
62 | }
63 |
64 | var classProperties = type.GetProperties();
65 | doc.Add(new StringField("Type", type.AssemblyQualifiedName, Field.Store.YES));
66 | foreach (var propertyInfo in classProperties)
67 | {
68 | var propertyValue = propertyInfo.GetValue(this);
69 | if (propertyValue == null)
70 | {
71 | continue;
72 | }
73 |
74 | //1. 该处修复用IndexId去删除索引无效的问题
75 | //2. 以Id为目标的删除放在其他处: 也利用到了IndexId
76 | if (propertyInfo.Name == nameof(ILuceneIndexable.IndexId))
77 | {
78 | var filed = new Field(propertyInfo.Name, propertyValue.ToString(), new FieldType
79 | {
80 | IsStored = true,
81 | IsIndexed = true,
82 | IsTokenized = false
83 | });
84 | doc.Add(filed);
85 | continue;
86 | }
87 |
88 | var attrs = propertyInfo.GetCustomAttributes();
89 | foreach (var attr in attrs)
90 | {
91 | string name = !string.IsNullOrEmpty(attr.Name) ? attr.Name : propertyInfo.Name;
92 | switch (propertyValue)
93 | {
94 | case DateTime time:
95 | doc.Add(new StringField(name, time.ToString("yyyy-MM-dd HH:mm:ss"), attr.Store));
96 | break;
97 |
98 | case int num:
99 | doc.Add(new Int32Field(name, num, attr.Store));
100 | break;
101 |
102 | case long num:
103 | doc.Add(new Int64Field(name, num, attr.Store));
104 | break;
105 |
106 | case float num:
107 | doc.Add(new SingleField(name, num, attr.Store));
108 | break;
109 |
110 | case double num:
111 | doc.Add(new DoubleField(name, num, attr.Store));
112 | break;
113 |
114 | case Guid guid:
115 | doc.Add(new StringField(name, guid.ToString(), attr.Store));
116 | break;
117 |
118 | default:
119 | string value = attr.IsHtml ? propertyValue.ToString().RemoveHtmlTag() : propertyValue.ToString();
120 | doc.Add(new TextField(name, value, attr.Store));
121 | break;
122 | }
123 | }
124 | }
125 |
126 | return doc;
127 | }
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.1;net5;net6;net7;net8
4 | True
5 | 懒得勤快
6 | 懒得勤快
7 | 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎,主键int版本
8 | 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎
9 | 懒得勤快
10 | https://github.com/ldqk/Masuit.LuceneEFCore.SearchEngine
11 | Masuit.LuceneEFCore.SearchEngine_int
12 | 1.2.4
13 | Debug;Release;String版本;Guid版本;Long版本
14 | false
15 | false
16 | False
17 | true
18 | true
19 | snupkg
20 | latest
21 | 1.2
22 | 1.2
23 | true
24 | true
25 | snupkg
26 | README.md
27 | https://github.com/ldqk/Masuit.LuceneEFCore.SearchEngine
28 | lucene;efcore;EntityFramework;masuit
29 | Masuit.LuceneEFCore.SearchEngine
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | True
40 | \
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | TRACE;Int
66 | .\Masuit.LuceneEFCore.SearchEngine.xml
67 |
68 |
69 |
70 | TRACE;Int
71 | true
72 | .\Masuit.LuceneEFCore.SearchEngine.xml
73 |
74 |
75 |
76 | TRACE;Long
77 | true
78 | .\Masuit.LuceneEFCore.SearchEngine.xml
79 |
80 |
81 |
82 | TRACE;Guid
83 | true
84 | .\Masuit.LuceneEFCore.SearchEngine.xml
85 |
86 |
87 |
88 | TRACE;String
89 | true
90 | .\Masuit.LuceneEFCore.SearchEngine.xml
91 |
92 |
93 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/JiebaAnalyzer/JieBaTokenizer.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using JiebaNet.Segmenter.Common;
3 | using Lucene.Net.Analysis;
4 | using Lucene.Net.Analysis.TokenAttributes;
5 | using System;
6 | using System.Collections.Generic;
7 | using System.IO;
8 | using System.Reflection;
9 | using System.Text.RegularExpressions;
10 | using Token = JiebaNet.Segmenter.Token;
11 |
12 | namespace Masuit.LuceneEFCore.SearchEngine;
13 |
14 | public class JieBaTokenizer : Tokenizer
15 | {
16 | private string _inputText;
17 | private readonly string _dictPath = "Resources/dict.txt";
18 |
19 | private readonly JiebaSegmenter _segmenter;
20 | private TokenizerMode _mode;
21 | private ICharTermAttribute _termAtt;
22 | private IOffsetAttribute _offsetAtt;
23 | //private IPositionIncrementAttribute _posIncrAtt;
24 | private ITypeAttribute _typeAtt;
25 | private readonly List _wordList = new List();
26 |
27 | private IEnumerator _iter;
28 |
29 | public List StopWords { get; } = new List();
30 |
31 | ///
32 | ///
33 | ///
34 | ///
35 | ///
36 | /// 使用内置词库
37 | public JieBaTokenizer(TextReader input, TokenizerMode mode, bool defaultUserDict = false) : base(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input)
38 | {
39 | _segmenter = new JiebaSegmenter();
40 | _mode = mode;
41 | if (defaultUserDict)
42 | {
43 | _segmenter.LoadUserDictForEmbedded(Assembly.GetCallingAssembly(), _dictPath);
44 | }
45 |
46 | if (!string.IsNullOrEmpty(Settings.IgnoreDictFile))
47 | {
48 | var list = FileExtension.ReadAllLines(Settings.IgnoreDictFile);
49 | foreach (var item in list)
50 | {
51 | if (string.IsNullOrEmpty(item))
52 | continue;
53 | if (StopWords.Contains(item))
54 | continue;
55 | StopWords.Add(item);
56 | }
57 | }
58 |
59 | if (!string.IsNullOrEmpty(Settings.UserDictFile))
60 | {
61 | _segmenter.LoadUserDict(Settings.UserDictFile);
62 | }
63 |
64 | Init();
65 | }
66 |
67 | #region private func
68 | private void Init()
69 | {
70 | _termAtt = AddAttribute();
71 | _offsetAtt = AddAttribute();
72 | //_posIncrAtt = AddAttribute();
73 | _typeAtt = AddAttribute();
74 | AddAttribute();
75 | }
76 |
77 | private string ReadToEnd(TextReader input)
78 | {
79 | return input.ReadToEnd();
80 | }
81 |
82 |
83 | private Lucene.Net.Analysis.Token Next()
84 | {
85 | var res = _iter.MoveNext();
86 | if (res)
87 | {
88 | var word = _iter.Current;
89 | var token = new Lucene.Net.Analysis.Token(word.Word, word.StartIndex, word.EndIndex);
90 | if (Settings.Log)
91 | {
92 | //chinese char
93 | var zh = new Regex(@"[\u4e00-\u9fa5]|[^\x00-\xff]");
94 | var offset = zh.Matches(word.Word).Count;
95 | var len = 10;
96 | offset = offset > len ? 0 : offset;
97 | Console.WriteLine($"==分词:{word.Word.PadRight(len - offset, '=')}==起始位置:{word.StartIndex.ToString().PadLeft(3, '=')}==结束位置{word.EndIndex.ToString().PadLeft(3, '=')}");
98 | }
99 | return token;
100 | }
101 | return null;
102 | }
103 | #endregion
104 |
105 | public sealed override bool IncrementToken()
106 | {
107 | ClearAttributes();
108 |
109 | var word = Next();
110 | if (word != null)
111 | {
112 | var buffer = word.ToString();
113 | _termAtt.SetEmpty().Append(buffer);
114 | _offsetAtt.SetOffset(CorrectOffset(word.StartOffset), CorrectOffset(word.EndOffset));
115 | _typeAtt.Type = word.Type;
116 | return true;
117 | }
118 |
119 | End();
120 | Dispose();
121 | return false;
122 | }
123 |
124 |
125 | public override void Reset()
126 | {
127 | base.Reset();
128 |
129 | _inputText = ReadToEnd(m_input);
130 | RemoveStopWords(_segmenter.Tokenize(_inputText, _mode));
131 |
132 | _iter = _wordList.GetEnumerator();
133 | }
134 |
135 | private void RemoveStopWords(IEnumerable words)
136 | {
137 | _wordList.Clear();
138 |
139 | foreach (var x in words)
140 | {
141 | if (!StopWords.Contains(x.Word))
142 | {
143 | _wordList.Add(x);
144 | }
145 | }
146 | }
147 | }
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/SearchOptions.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Search;
2 | using Masuit.LuceneEFCore.SearchEngine.Extensions;
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Reflection;
7 |
8 | namespace Masuit.LuceneEFCore.SearchEngine
9 | {
10 | ///
11 | /// 搜索选项
12 | ///
13 | public class SearchOptions
14 | {
15 | ///
16 | /// 关键词
17 | ///
18 | public string Keywords { get; set; }
19 |
20 | ///
21 | /// 限定搜索字段
22 | ///
23 | public List Fields { get; set; }
24 |
25 | ///
26 | /// 最大检索量
27 | ///
28 | public int MaximumNumberOfHits { get; set; }
29 |
30 | ///
31 | /// 多字段搜索时,给字段设定搜索权重
32 | ///
33 | private readonly Dictionary _boosts;
34 |
35 | ///
36 | /// 多字段搜索时,给字段设定搜索权重
37 | ///
38 | internal Dictionary Boosts
39 | {
40 | get
41 | {
42 | foreach (var field in Fields.Where(field => _boosts.All(x => x.Key.ToUpper() != field.ToUpper())))
43 | {
44 | _boosts.Add(field, 2.0f);
45 | }
46 |
47 | return _boosts;
48 | }
49 | }
50 |
51 | ///
52 | /// 排序字段
53 | ///
54 | public List OrderBy { get; set; }
55 |
56 | ///
57 | /// 跳过多少条
58 | ///
59 | public int? Skip { get; set; }
60 |
61 | ///
62 | /// 取多少条
63 | ///
64 | public int? Take { get; set; }
65 |
66 | ///
67 | /// 文档类型
68 | ///
69 | public Type Type { get; set; }
70 |
71 | ///
72 | /// 匹配度,0-1,数值越大结果越精确
73 | ///
74 | public float Score { get; set; } = 0.5f;
75 |
76 | ///
77 | /// 过滤条件
78 | ///
79 | public Filter Filter { get; set; }
80 |
81 | ///
82 | /// 搜索选项
83 | ///
84 | /// 关键词
85 | /// 限定检索字段
86 | /// 最大检索量
87 | /// 多字段搜索时,给字段设定搜索权重
88 | /// 文档类型
89 | /// 排序字段
90 | /// 跳过多少条
91 | /// 取多少条
92 | public SearchOptions(string keywords, string fields, int maximumNumberOfHits = 1000, Dictionary boosts = null, Type type = null, string orderBy = null, int? skip = null, int? take = null)
93 | {
94 | if (string.IsNullOrWhiteSpace(keywords))
95 | {
96 | throw new ArgumentException("搜索关键词不能为空!");
97 | }
98 |
99 | Keywords = keywords;
100 | MaximumNumberOfHits = maximumNumberOfHits;
101 | Skip = skip;
102 | Take = take;
103 | _boosts = boosts ?? new Dictionary();
104 | Type = type;
105 | Fields = new List();
106 | OrderBy = new List()
107 | {
108 | SortField.FIELD_SCORE
109 | };
110 |
111 | // 添加被检索字段
112 | if (!string.IsNullOrEmpty(fields))
113 | {
114 | fields = fields.RemoveCharacters(" ");
115 | Fields.AddRange(fields.Split(',').ToList());
116 | }
117 |
118 | // 添加排序规则
119 | if (!string.IsNullOrEmpty(orderBy))
120 | {
121 | orderBy = orderBy.RemoveCharacters(" ");
122 | OrderBy.AddRange(orderBy.Split(',').Select(sortField => new SortField(sortField, SortFieldType.STRING)));
123 | }
124 | }
125 |
126 | ///
127 | /// 搜索选项
128 | ///
129 | /// 关键词
130 | /// 页大小
131 | /// 限定检索字段
132 | /// 第几页
133 | public SearchOptions(string keywords, int page, int size, string fields) : this(keywords, fields, int.MaxValue, null, null, null, (page - 1) * size, size)
134 | {
135 | if (page < 1)
136 | {
137 | page = 1;
138 | }
139 | if (size < 1)
140 | {
141 | size = 1;
142 | }
143 | Skip = (page - 1) * size;
144 | Take = size;
145 | }
146 |
147 | ///
148 | /// 搜索选项
149 | ///
150 | /// 关键词
151 | /// 页大小
152 | /// 第几页
153 | /// 需要被全文检索的类型
154 | public SearchOptions(string keywords, int page, int size, Type t) : this(keywords, string.Join(",", t.GetProperties().Where(p => p.GetCustomAttributes().Any()).Select(p => p.Name)), int.MaxValue, null, null, null, (page - 1) * size, size)
155 | {
156 | if (page < 1)
157 | {
158 | page = 1;
159 | }
160 | if (size < 1)
161 | {
162 | size = 1;
163 | }
164 | Skip = (page - 1) * size;
165 | Take = size;
166 | }
167 |
168 | public void SetBoosts(string field, float boost)
169 | {
170 | _boosts[field] = boost;
171 | }
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexer.cs:
--------------------------------------------------------------------------------
1 | using Lucene.Net.Analysis;
2 | using Lucene.Net.Index;
3 | using Lucene.Net.Store;
4 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
5 | using System;
6 | using System.Collections.Generic;
7 | using System.Linq;
8 |
9 | namespace Masuit.LuceneEFCore.SearchEngine
10 | {
11 | public class LuceneIndexer : ILuceneIndexer
12 | {
13 | ///
14 | /// 索引目录
15 | ///
16 | private readonly Directory _directory;
17 |
18 | ///
19 | /// 索引分析器
20 | ///
21 | private readonly Analyzer _analyzer;
22 |
23 | ///
24 | /// 构造函数
25 | ///
26 | ///
27 | ///
28 | public LuceneIndexer(Directory directory, Analyzer analyzer)
29 | {
30 | _directory = directory;
31 | _analyzer = analyzer;
32 | }
33 |
34 | ///
35 | /// 添加到索引
36 | ///
37 | /// 实体
38 | public void Add(ILuceneIndexable entity)
39 | {
40 | Update(new LuceneIndexChange(entity, LuceneIndexState.Added));
41 | }
42 |
43 | ///
44 | /// 创建索引
45 | ///
46 | /// 实体集
47 | /// 是否需要覆盖
48 | public void CreateIndex(IEnumerable entities, bool recreate = true)
49 | {
50 | var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer);
51 |
52 | using var writer = new IndexWriter(_directory, config);
53 |
54 | // 删除重建
55 | if (recreate)
56 | {
57 | writer.DeleteAll();
58 | writer.Commit();
59 | }
60 |
61 | // 遍历实体集,添加到索引库
62 | foreach (var entity in entities)
63 | {
64 | writer.AddDocument(entity.ToDocument());
65 | }
66 |
67 | writer.Flush(true, true);
68 | }
69 |
70 | ///
71 | /// 删除索引
72 | ///
73 | /// 实体
74 | public void Delete(ILuceneIndexable entity)
75 | {
76 | Update(new LuceneIndexChange(entity, LuceneIndexState.Removed));
77 | }
78 |
79 | ///
80 | /// 删除索引
81 | ///
82 | /// 实体集
83 | public void Delete(IList entries) where T : ILuceneIndexable
84 | {
85 | var set = new LuceneIndexChangeset
86 | {
87 | Entries = entries.Select(e => new LuceneIndexChange(e, LuceneIndexState.Removed)).ToList()
88 | };
89 | Update(set);
90 | }
91 |
92 | ///
93 | /// 删除所有索引
94 | ///
95 | /// 是否提交
96 | public void DeleteAll(bool commit = true)
97 | {
98 | var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer);
99 | using var writer = new IndexWriter(_directory, config);
100 | try
101 | {
102 | writer.DeleteAll();
103 | if (commit)
104 | {
105 | writer.Commit();
106 | }
107 | writer.Flush(true, true);
108 | }
109 | catch (Exception ex)
110 | {
111 | Console.WriteLine(ex.Message);
112 | }
113 | }
114 |
115 | ///
116 | /// 更新索引
117 | ///
118 | /// 实体
119 | public void Update(ILuceneIndexable entity)
120 | {
121 | Update(new LuceneIndexChange(entity, LuceneIndexState.Updated));
122 | }
123 |
124 | ///
125 | /// 更新索引
126 | ///
127 | /// 实体
128 | public void Update(LuceneIndexChange change)
129 | {
130 | var changeset = new LuceneIndexChangeset(change);
131 | Update(changeset);
132 | }
133 |
134 | ///
135 | /// 更新索引-删除索引时仅利用IndexId去删除
136 | ///
137 | /// 实体
138 | public void Update(LuceneIndexChangeset changeset)
139 | {
140 | var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer);
141 | using var writer = new IndexWriter(_directory, config);
142 | foreach (var change in changeset.Entries)
143 | {
144 | switch (change.State)
145 | {
146 | case LuceneIndexState.Removed:
147 | //writer.DeleteDocuments(new Term("Id", change.Entity.Id.ToString()));
148 | writer.DeleteDocuments(new Term("IndexId", change.Entity.IndexId));
149 | break;
150 |
151 | case LuceneIndexState.Added:
152 | case LuceneIndexState.Updated:
153 | //writer.DeleteDocuments(new Term("Id", change.Entity.Id.ToString()));
154 | writer.DeleteDocuments(new Term("IndexId", change.Entity.IndexId));
155 | writer.AddDocument(change.Entity.ToDocument());
156 | break;
157 | }
158 | }
159 |
160 | writer.Flush(true, changeset.HasDeletes);
161 | writer.Commit();
162 | }
163 |
164 | ///
165 | /// 索引库数量
166 | ///
167 | ///
168 | public int Count()
169 | {
170 | try
171 | {
172 | IndexReader reader = DirectoryReader.Open(_directory);
173 | return reader.NumDocs;
174 | }
175 | catch (IndexNotFoundException ex)
176 | {
177 | _directory.ClearLock("write.lock");
178 | Console.WriteLine(ex.Message);
179 | return 0;
180 | }
181 | }
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/LuceneIndexSearcher.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using Lucene.Net.Analysis;
3 | using Lucene.Net.Documents;
4 | using Lucene.Net.Index;
5 | using Lucene.Net.QueryParsers.Classic;
6 | using Lucene.Net.Search;
7 | using Lucene.Net.Store;
8 | using Masuit.LuceneEFCore.SearchEngine.Extensions;
9 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
10 | using Masuit.LuceneEFCore.SearchEngine.Linq;
11 | using Microsoft.Extensions.Caching.Memory;
12 | using System;
13 | using System.Collections.Generic;
14 | using System.Diagnostics;
15 | using System.Linq;
16 | using System.Linq.Expressions;
17 | using System.Text.RegularExpressions;
18 | using TinyPinyin;
19 |
20 | namespace Masuit.LuceneEFCore.SearchEngine
21 | {
22 | public class LuceneIndexSearcher : ILuceneIndexSearcher
23 | {
24 | private readonly Directory _directory;
25 | private readonly Analyzer _analyzer;
26 | private readonly IMemoryCache _memoryCache;
27 |
28 | ///
29 | /// 构造函数
30 | ///
31 | /// 索引目录
32 | /// 索引分析器
33 | /// 内存缓存
34 | public LuceneIndexSearcher(Directory directory, Analyzer analyzer, IMemoryCache memoryCache)
35 | {
36 | _directory = directory;
37 | _analyzer = analyzer;
38 | _memoryCache = memoryCache;
39 | }
40 |
41 | ///
42 | /// 分词
43 | ///
44 | ///
45 | ///
46 | public List CutKeywords(string keyword)
47 | {
48 | if (keyword.Length <= 2)
49 | {
50 | return new List
51 | {
52 | keyword
53 | };
54 | }
55 |
56 | keyword = keyword.Replace("AND ", "+").Replace("NOT ", "-").Replace("OR ", " ");
57 | return _memoryCache.GetOrCreate(keyword, entry =>
58 | {
59 | entry.AbsoluteExpiration = DateTimeOffset.Now.AddHours(1);
60 | var list = new HashSet
61 | {
62 | keyword
63 | };
64 | list.AddRange(Regex.Matches(keyword, @""".+""").Cast().Select(m =>
65 | {
66 | keyword = keyword.Replace(m.Value, "");
67 | return m.Value;
68 | }));//必须包含的
69 | list.AddRange(Regex.Matches(keyword, @"\s-.+\s?").Cast().Select(m =>
70 | {
71 | keyword = keyword.Replace(m.Value, "");
72 | return m.Value.Trim();
73 | }));//必须不包含的
74 | list.AddRange(Regex.Matches(keyword, @"[\u4e00-\u9fa5]+").Cast().Select(m => m.Value));//中文
75 | list.AddRange(Regex.Matches(keyword, @"\p{P}?[A-Z]*[a-z]*[\p{P}|\p{S}]*").Cast().Select(m => m.Value));//英文单词
76 | list.AddRange(Regex.Matches(keyword, "([A-z]+)([0-9.]+)").Cast().SelectMany(m => m.Groups.Cast().Select(g => g.Value)));//英文+数字
77 | list.AddRange(new JiebaSegmenter().Cut(keyword, true));//结巴分词
78 | list.RemoveWhere(s => s.Length < 2);
79 | list.AddRange(KeywordsManager.SynonymWords.Where(t => list.Contains(t.key) || list.Contains(t.value)).SelectMany(t => new[] { t.key, t.value }));
80 | var pinyins = new HashSet();
81 | foreach (var s in list.Select(s => Regex.Replace(s, @"\p{P}|\p{S}", "")).Distinct())
82 | {
83 | if (!pinyins.Contains(s))
84 | {
85 | pinyins.AddRange(KeywordsManager.PinyinsLookup[PinyinHelper.GetPinyin(s)]);
86 | }
87 |
88 | var lower = s.ToLower();
89 | if (KeywordsManager.PinyinsLookup.Contains(lower))
90 | {
91 | pinyins.AddRange(KeywordsManager.PinyinsLookup[lower]);
92 | }
93 | }
94 |
95 | return list.Union(pinyins).OrderByDescending(s => s.Length).Take(10).Select(s => s.Trim('[', ']', '{', '}', '(', ')')).ToList();
96 | });
97 | }
98 |
99 | ///
100 | /// 分词模糊查询
101 | ///
102 | /// 条件
103 | /// 关键词
104 | ///
105 | private BooleanQuery GetFuzzyquery(MultiFieldQueryParser parser, string keywords)
106 | {
107 | var finalQuery = new BooleanQuery();
108 | var terms = CutKeywords(keywords);
109 | foreach (var term in terms)
110 | {
111 | try
112 | {
113 | if (term.StartsWith("\""))
114 | {
115 | finalQuery.Add(parser.Parse(term.Trim('"')), Occur.MUST);
116 | }
117 | else if (term.StartsWith("-"))
118 | {
119 | finalQuery.Add(parser.Parse(term), Occur.MUST_NOT);
120 | }
121 | else
122 | {
123 | finalQuery.Add(parser.Parse(term.Replace("~", "") + "~"), Occur.SHOULD);
124 | }
125 | }
126 | catch (ParseException)
127 | {
128 | finalQuery.Add(parser.Parse(Regex.Replace(term, @"\p{P}|\p{S}", "")), Occur.SHOULD);
129 | }
130 | }
131 |
132 | return finalQuery;
133 | }
134 |
135 | ///
136 | /// 执行搜索
137 | ///
138 | /// 搜索选项
139 | /// 启用安全搜索
140 | ///
141 | private ILuceneSearchResultCollection PerformSearch(SearchOptions options, bool safeSearch)
142 | {
143 | // 结果集
144 | ILuceneSearchResultCollection results = new LuceneSearchResultCollection();
145 | using var reader = DirectoryReader.Open(_directory);
146 | var searcher = new IndexSearcher(reader);
147 | Query query;
148 |
149 | // 启用安全搜索
150 | if (safeSearch)
151 | {
152 | options.Keywords = QueryParserBase.Escape(options.Keywords);
153 | }
154 |
155 | if (options.Fields.Count == 1)
156 | {
157 | // 单字段搜索
158 | var queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], _analyzer);
159 | query = queryParser.Parse(options.Keywords);
160 | }
161 | else
162 | {
163 | // 多字段搜索
164 | var queryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), _analyzer, options.Boosts);
165 | query = GetFuzzyquery(queryParser, options.Keywords);
166 | }
167 |
168 | // 排序规则处理
169 | var sort = new Sort(options.OrderBy.ToArray());
170 | Expression> where = m => m.Score >= options.Score;
171 | if (options.Type != null)
172 | {
173 | // 过滤掉已经设置了类型的对象
174 | where = where.And(m => options.Type.AssemblyQualifiedName == searcher.Doc(m.Doc).Get("Type"));
175 | }
176 |
177 | var matches = searcher.Search(query, options.Filter, options.MaximumNumberOfHits, sort, true, true).ScoreDocs.Where(where.Compile());
178 | results.TotalHits = matches.Count();
179 |
180 | // 分页处理
181 | if (options.Skip.HasValue)
182 | {
183 | matches = matches.Skip(options.Skip.Value);
184 | }
185 | if (options.Take.HasValue)
186 | {
187 | matches = matches.Take(options.Take.Value);
188 | }
189 |
190 | var docs = matches.ToList();
191 |
192 | // 创建结果集
193 | foreach (var match in docs)
194 | {
195 | var doc = searcher.Doc(match.Doc);
196 | results.Results.Add(new LuceneSearchResult()
197 | {
198 | Score = match.Score,
199 | Document = doc
200 | });
201 | }
202 |
203 | return results;
204 | }
205 |
206 | ///
207 | /// 搜索单条记录
208 | ///
209 | ///
210 | ///
211 | public Document ScoredSearchSingle(SearchOptions options)
212 | {
213 | options.MaximumNumberOfHits = 1;
214 | var results = ScoredSearch(options);
215 | return results.TotalHits > 0 ? results.Results.First().Document : null;
216 | }
217 |
218 | ///
219 | /// 按权重搜索
220 | ///
221 | /// 搜索选项
222 | ///
223 | public ILuceneSearchResultCollection ScoredSearch(SearchOptions options)
224 | {
225 | ILuceneSearchResultCollection results;
226 | var sw = Stopwatch.StartNew();
227 | try
228 | {
229 | results = PerformSearch(options, false);
230 | }
231 | catch (ParseException)
232 | {
233 | results = PerformSearch(options, true);
234 | }
235 |
236 | sw.Stop();
237 | results.Elapsed = sw.ElapsedMilliseconds;
238 | return results;
239 | }
240 |
241 | ///
242 | /// 按权重搜索
243 | ///
244 | /// 关键词
245 | /// 限定检索字段
246 | /// 最大检索量
247 | /// 多字段搜索时,给字段的搜索加速
248 | /// 文档类型
249 | /// 排序规则
250 | /// 跳过多少条
251 | /// 取多少条
252 | ///
253 | public ILuceneSearchResultCollection ScoredSearch(string keywords, string fields, int maximumNumberOfHits, Dictionary boosts, Type type, string sortBy, int? skip, int? take)
254 | {
255 | var options = new SearchOptions(keywords, fields, maximumNumberOfHits, boosts, type, sortBy, skip, take);
256 | return ScoredSearch(options);
257 | }
258 | }
259 | }
260 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎(码数鲁森库)
2 |

3 | **仅70KB的代码量!新手友好。** 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎,可轻松实现高性能的全文检索,支持添加自定义词库,自定义同义词和同音词,搜索分词默认支持同音词搜索。可以轻松应用于任何基于EntityFrameworkCore的实体框架数据库。
4 | **`注意:该项目仅适用于单体项目的简单搜索场景,不适用于分布式应用以及复杂的搜索场景,分布式应用请考虑使用大型的搜索引擎中间件做支撑,如:ElasticSearch,或考虑数据库的正则表达式查询`**
5 |
6 | [官网页面](http://masuit.com/1437) | [实际应用案例体验](https://masuit.com/s?wd=会声会影+TeamViewer)
7 |
8 | 项目开发模式:日常代码积累+网络搜集
9 |
10 | [](https://github.com/996icu/996.ICU/blob/master/LICENSE) [](https://www.nuget.org/packages/Masuit.LuceneEFCore.SearchEngine_string) [](https://www.nuget.org/packages/Masuit.LuceneEFCore.SearchEngine_string)  
11 |
12 | ### 请注意:
13 | 一旦使用本开源项目以及引用了本项目或包含本项目代码的公司因为违反劳动法(包括但不限定非法裁员、超时用工、雇佣童工等)在任何法律诉讼中败诉的,一经发现,本项目作者有权利追讨本项目的使用费(**公司工商注册信息认缴金额的2-5倍作为本项目的授权费**),或者直接不允许使用任何包含本项目的源代码!任何性质的`外包公司`或`996公司`需要使用本类库,请联系作者进行商业授权!其他企业或个人可随意使用不受限。996那叫用人,也是废人。8小时工作制才可以让你有时间自我提升,将来有竞争力。反对996,人人有责!
14 |
15 | ⭐⭐⭐喜欢这个项目的话就Star、Fork、Follow素质三连关♂注一下吧⭐⭐⭐
16 |
17 | ## Stargazers over time
18 |
19 |
20 | ### 项目特点
21 | 1. 基于原生Lucene实现,轻量高效,毫秒级响应
22 | 2. 与EFCore无缝接入,配置代码少,可轻松接入现有项目
23 | 3. 支持添加自定义词库,支持同义词和同音词检索,支持添加自定义同义词和同音词
24 | 4. 不支持分布式应用,若你能解决分布式场景中索引库的同步问题,可以选择
25 |
26 | ### 为什么没有集成到Masuit.Tools这个库?
27 | 因为这个项目又引入了几个Lucene相关的库,如果集成到[Masuit.Tools](https://github.com/ldqk/Masuit.Tools "Masuit.Tools"),这必将给原来的项目增加了更多的引用包,使用过程中也有可能没有使用Lucene的场景,这就造成了项目更加的臃肿,所以做了个新的项目。
28 | ### 为什么有这个库?现成的ElasticSearch不好么?
29 | ES确实很好用,但我想的是还有很多的小站没必要上那么重量级的中间件,于是原生lucene库不失为一种好的选择,然而原生LuceneAPI的学习成本也相对较高,所以专门封装了这个库。
30 | ### 快速开始
31 | #### EntityFrameworkCore基架搭建
32 | 新建项目,并安装EntityFrameworkCore相关库以及全文检索包:
33 |
34 | 根据你的项目情况,选择对应的后缀版本,提供了4个主键版本的库,后缀为int的代表主键是基于int自增类型的,后缀为Guid的代表主键是基于Guid类型的...
35 | ```shell
36 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_int
37 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_long
38 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_string
39 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_Guid
40 | ```
41 | 按照套路我们需要首先搭建好EntityFrameworkCore的基架,即数据库上下文和实体对象;
42 |
43 | 准备数据库上下文对象:
44 | ```csharp
45 | public class DataContext : DbContext
46 | {
47 | public DataContext(DbContextOptions options) : base(options){}
48 | protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
49 | {
50 | base.OnConfiguring(optionsBuilder);
51 | optionsBuilder.UseQueryTrackingBehavior(QueryTrackingBehavior.TrackAll);
52 | }
53 | public virtual DbSet Post { get; set; }
54 | }
55 | ```
56 | 准备实体对象,这里开始需要注意了,要想这个库的数据被全文检索,需要符合两个条件:
57 | 1. 实体必须继承自LuceneIndexableBaseEntity;
58 | 2. 需要被检索的字段需要被LuceneIndexAttribute所标记。
59 | ```csharp
60 | ///
61 | /// 文章
62 | ///
63 | [Table("Post")]
64 | public class Post : LuceneIndexableBaseEntity
65 | {
66 | public Post()
67 | {
68 | PostDate = DateTime.Now;
69 | }
70 |
71 | ///
72 | /// 标题
73 | ///
74 | [Required(ErrorMessage = "文章标题不能为空!"), LuceneIndex]
75 | public string Title { get; set; }
76 |
77 | ///
78 | /// 作者
79 | ///
80 | [Required, MaxLength(24, ErrorMessage = "作者名最长支持24个字符!"), LuceneIndex]
81 | public string Author { get; set; }
82 |
83 | ///
84 | /// 内容
85 | ///
86 | [Required(ErrorMessage = "文章内容不能为空!"), LuceneIndex(IsHtml = true)]
87 | public string Content { get; set; }
88 |
89 | ///
90 | /// 发表时间
91 | ///
92 | public DateTime PostDate { get; set; }
93 |
94 | ///
95 | /// 作者邮箱
96 | ///
97 | [Required(ErrorMessage = "作者邮箱不能为空!"), LuceneIndex]
98 | public string Email { get; set; }
99 |
100 | ///
101 | /// 标签
102 | ///
103 | [StringLength(256, ErrorMessage = "标签最大允许255个字符"), LuceneIndex]
104 | public string Label { get; set; }
105 |
106 | ///
107 | /// 文章关键词
108 | ///
109 | [StringLength(256, ErrorMessage = "文章关键词最大允许255个字符"), LuceneIndex]
110 | public string Keyword { get; set; }
111 |
112 | }
113 | ```
114 | LuceneIndexAttribute对应的4个自定义参数:
115 | 1. Name:自定义索引字段名,默认为空;
116 | 2. Index:索引行为,默认为Field.Index.ANALYZED;
117 | 3. Store:是否被存储到索引库,默认为Field.Store.YES;
118 | 4. IsHtml:是否是html,默认为false,若标记为true,则在索引解析时会先清空其中的html标签。
119 | #### 为什么实体类要继承LuceneIndexableBaseEntity?
120 | LuceneIndexableBaseEntity源代码如下:
121 | ```csharp
122 | ///
123 | /// 需要被索引的实体基类
124 | ///
125 | public abstract class LuceneIndexableBaseEntity : ILuceneIndexable
126 | {
127 | ///
128 | /// 主键id
129 | ///
130 | [LuceneIndex(Name = "Id", Store = Field.Store.YES, Index = Field.Index.NOT_ANALYZED), Key]
131 | public int Id { get; set; }
132 |
133 | ///
134 | /// 索引唯一id
135 | ///
136 | [LuceneIndex(Name = "IndexId", Store = Field.Store.YES, Index = Field.Index.NOT_ANALYZED)]
137 | [NotMapped]
138 | public string IndexId
139 | {
140 | get => GetType().Name + ":" + Id;
141 | set
142 | {
143 | }
144 | }
145 |
146 | ///
147 | /// 转换成Lucene文档
148 | ///
149 | ///
150 | public virtual Document ToDocument()
151 | {
152 | // 将实体对象转换成Lucene文档的逻辑
153 | }
154 | }
155 | ```
156 | 实体继承自LuceneIndexableBaseEntity后,方便封装的Lucene可以直接调用ToDocument方法进行存储,同时,主键Id和IndexId需要参与Lucene索引文档的唯一标识(但IndexId不会生成到数据库)。
157 | #### 搜索引擎配置、创建索引、导入自定义词库等
158 | Startup.cs
159 | ```csharp
160 | public void ConfigureServices(IServiceCollection services)
161 | {
162 | // ...
163 | services.AddDbContext(db =>
164 | {
165 | db.UseSqlServer("Data Source=.;Initial Catalog=MyBlogs;Integrated Security=True");
166 | });// 配置数据库上下文
167 | services.AddSearchEngine(new LuceneIndexerOptions()
168 | {
169 | Path = "lucene"
170 | });// 依赖注入搜索引擎,并配置索引库路径
171 | // ...
172 | }
173 |
174 | public void Configure(IApplicationBuilder app, IHostingEnvironment env, ISearchEngine searchEngine, LuceneIndexerOptions luceneIndexerOptions)
175 | {
176 | // ...
177 | // 导入自定义词库,支持中英文词
178 | KeywordsManager.AddWords("面向对象编程语言");
179 | KeywordsManager.AddWords("懒得勤快");
180 | KeywordsManager.AddWords("码数科技");
181 | KeywordsManager.AddWords("Tree New Bee");
182 | KeywordsManager.AddWords("男♂能可贵");
183 |
184 | // 导入自定义同义词,支持中英文词
185 | KeywordsManager.AddSynonyms("RDM","Redis Desktop Manager");
186 | KeywordsManager.AddSynonyms("RDM","Remote Desktop Manager");
187 | KeywordsManager.AddSynonyms("VS","Visual Studio");
188 | KeywordsManager.AddSynonyms("Visual Studio","宇宙最强IDE");
189 | KeywordsManager.AddSynonyms("VS","Video Studio");
190 | KeywordsManager.AddSynonyms("难能可贵","男♂能可贵");
191 | // 提问:以上示例配置了近义词:VS->Visual Studio和Visual Studio->宇宙最强IDE?那么分词时VS是否能够找到间接近义词“宇宙最强IDE”?
192 | // 答案是不能,为什么不能?近义词查找并没有实现递归查找,为什么不做递归查找?因为近义词库是完全不可控的动态配置,如果做了递归查找,词库的配置不当很有可能造成死递归,所以,如果需要让VS和“宇宙最强IDE”同义,则需要再单独配置
193 |
194 | // 初始化索引库,建议结合定时任务使用,定期刷新索引库
195 | string lucenePath = Path.Combine(env.ContentRootPath, luceneIndexerOptions.Path);
196 | if (!Directory.Exists(lucenePath) || Directory.GetFiles(lucenePath).Length < 1)
197 | {
198 | // 创建索引
199 | Console.WriteLine("索引库不存在,开始自动创建Lucene索引库...");
200 | searchEngine.CreateIndex(new List()
201 | {
202 | nameof(DataContext.Post),
203 | });
204 | var list = searchEngine.Context.Post.Where(i => i.Status != Status.Pended).ToList(); // 删除不需要被索引的数据
205 | searchEngine.LuceneIndexer.Delete(list);
206 | Console.WriteLine("索引库创建完成!");
207 | }
208 | // ...
209 | }
210 |
211 | ```
212 | **同义词支持正向和反向查找,如配置了:`KeywordsManager.AddSynonyms("地大物博","弟大勿勃")`和`KeywordsManager.AddSynonyms("弟大勿勃","地大物博")`是等效的,只需要其中一条即可**
213 | HomeController.cs
214 | ```csharp
215 | [Route("[controller]/[action]")]
216 | public class HomeController : Controller
217 | {
218 | private readonly ISearchEngine _searchEngine;
219 | private readonly ILuceneIndexer _luceneIndexer;
220 | public HomeController(ISearchEngine searchEngine, ILuceneIndexer luceneIndexer)
221 | {
222 | _searchEngine = searchEngine;
223 | _luceneIndexer = luceneIndexer;
224 | }
225 |
226 | ///
227 | /// 搜索
228 | ///
229 | /// 关键词
230 | /// 第几页
231 | /// 页大小
232 | ///
233 | [HttpGet]
234 | public async Task Index(string s, int page, int size)
235 | {
236 | //var result = _searchEngine.ScoredSearch(new SearchOptions(s, page, size, "Title,Content,Email,Author"));
237 | var result = _searchEngine.ScoredSearch(new SearchOptions(s, page, size, typeof(Post)));
238 | return Ok(result);
239 | }
240 |
241 | ///
242 | /// 创建索引
243 | ///
244 | [HttpGet]
245 | public void CreateIndex()
246 | {
247 | //_searchEngine.CreateIndex();//扫描所有数据表,创建符合条件的库的索引
248 | _searchEngine.CreateIndex(new List() { nameof(Post) });//创建指定的数据表的索引
249 | }
250 |
251 | ///
252 | /// 添加索引
253 | ///
254 | [HttpPost]
255 | public void AddIndex(Post p)
256 | {
257 | // 添加到数据库并更新索引
258 | _searchEngine.Context.Post.Add(p);
259 | _searchEngine.SaveChanges();
260 |
261 | //_luceneIndexer.Add(p); //单纯的只添加索引库
262 | }
263 |
264 | ///
265 | /// 删除索引
266 | ///
267 | [HttpDelete]
268 | public void DeleteIndex(Post post)
269 | {
270 | //从数据库删除并更新索引库
271 | Post p = _searchEngine.Context.Post.Find(post.Id);
272 | _searchEngine.Context.Post.Remove(p);
273 | _searchEngine.SaveChanges();
274 |
275 | //_luceneIndexer.Delete(p);// 单纯的从索引库移除
276 | }
277 |
278 | ///
279 | /// 更新索引库
280 | ///
281 | ///
282 | [HttpPatch]
283 | public void UpdateIndex(Post post)
284 | {
285 | //从数据库更新并同步索引库
286 | Post p = _searchEngine.Context.Post.Find(post.Id);
287 | // update...
288 | _searchEngine.Context.Post.Update(p);
289 | _searchEngine.SaveChanges();
290 |
291 | //_luceneIndexer.Update(p);// 单纯的更新索引库
292 | }
293 | }
294 | ```
295 | #### 关于更新索引
296 | 要在执行任何CRUD操作后更新索引,只需从ISearchEngine调用SaveChanges()方法,而不是从DataContext调用SaveChanges()。 这才会更新索引,然后会自动调用DataContexts的SaveChanges()方法。如果直接调用DataContexts的SaveChanges()方法,只会保存到数据库,而不会更新索引库。
297 | #### 关于搜索结果
298 | 搜索返回IScoredSearchResultCollection,其中包括执行搜索所花费的时间,命中总数以及每个包含的对象的结果集以及在搜索中匹配度的数量。
299 |
300 | 特别注意:单元测试中使用内存RAM目录进行索引和搜索,但这仅用于测试目的,真实生产环境应使用物理磁盘的目录。
301 |
302 | #### 演示项目
303 | [点击这里](/WebSearchDemo "demo")
304 | ### 推荐项目
305 | .NET万能框架:[Masuit.Tools](https://github.com/ldqk/Masuit.Tools "Masuit.Tools")
306 |
307 | 开源博客系统:[Masuit.MyBlogs](https://github.com/ldqk/Masuit.MyBlogs "Masuit.MyBlogs")
308 |
--------------------------------------------------------------------------------
/Masuit.LuceneEFCore.SearchEngine/SearchEngine.cs:
--------------------------------------------------------------------------------
1 | using JiebaNet.Segmenter;
2 | using Lucene.Net.Analysis;
3 | using Lucene.Net.Documents;
4 | using Lucene.Net.Store;
5 | using Masuit.LuceneEFCore.SearchEngine.Extensions;
6 | using Masuit.LuceneEFCore.SearchEngine.Interfaces;
7 | using Microsoft.EntityFrameworkCore;
8 | using Microsoft.Extensions.Caching.Memory;
9 | using System;
10 | using System.Collections.Generic;
11 | using System.Diagnostics;
12 | using System.Linq;
13 | using System.Linq.Expressions;
14 | using System.Reflection;
15 | using System.Threading.Tasks;
16 |
17 | namespace Masuit.LuceneEFCore.SearchEngine
18 | {
19 | ///
20 | /// 搜索引擎
21 | ///
22 | ///
23 | public class SearchEngine : ISearchEngine where TContext : DbContext
24 | {
25 | ///
26 | /// 数据库上下文
27 | ///
28 | public TContext Context { get; }
29 |
30 | ///
31 | /// 索引器
32 | ///
33 | public ILuceneIndexer LuceneIndexer { get; }
34 |
35 | ///
36 | /// 索引搜索器
37 | ///
38 | public ILuceneIndexSearcher LuceneIndexSearcher { get; }
39 |
40 | ///
41 | /// 索引条数
42 | ///
43 | public int IndexCount => LuceneIndexer.Count();
44 |
45 | ///
46 | /// 搜索引擎
47 | ///
48 | /// 数据库上下文
49 | ///
50 | ///
51 | ///
52 | public SearchEngine(TContext context, Directory directory, Analyzer analyzer, IMemoryCache memoryCache)
53 | {
54 | Context = context;
55 | LuceneIndexer = new LuceneIndexer(directory, analyzer);
56 | LuceneIndexSearcher = new LuceneIndexSearcher(directory, analyzer, memoryCache);
57 | }
58 |
59 | ///
60 | /// 检查数据库上下文更改,并返回LuceneIndexChanges类型的集合
61 | ///
62 | /// LuceneIndexChangeset - 转换为LuceneIndexChanges类型的实体更改集合
63 | private LuceneIndexChangeset GetChangeset()
64 | {
65 | var changes = new LuceneIndexChangeset();
66 | foreach (var entity in Context.ChangeTracker.Entries().Where(x => x.State != EntityState.Unchanged))
67 | {
68 | var entityType = entity.Entity.GetType();
69 | if (!typeof(ILuceneIndexable).IsAssignableFrom(entityType) || entityType.GetMethod("ToDocument") is null)
70 | {
71 | continue;
72 | }
73 |
74 | var change = new LuceneIndexChange(entity.Entity as ILuceneIndexable);
75 |
76 | switch (entity.State)
77 | {
78 | case EntityState.Added:
79 | change.State = LuceneIndexState.Added;
80 | break;
81 | case EntityState.Deleted:
82 | change.State = LuceneIndexState.Removed;
83 | break;
84 | case EntityState.Modified:
85 | change.State = LuceneIndexState.Updated;
86 | break;
87 | default:
88 | change.State = LuceneIndexState.Unchanged;
89 | break;
90 | }
91 |
92 | changes.Entries.Add(change);
93 | }
94 |
95 | return changes;
96 | }
97 |
98 | ///
99 | ///获取文档的具体版本
100 | ///
101 | /// 要转换的文档
102 | ///
103 | private ILuceneIndexable GetConcreteFromDocument(Document doc)
104 | {
105 | var t = Type.GetType(doc.Get("Type"));
106 | var obj = Expression.Lambda>(Expression.New(t.GetConstructors()[0])).Compile()();
107 | foreach (var p in t.GetProperties().Where(p => p.GetCustomAttributes().Any()))
108 | {
109 | p.SetValue(obj, doc.Get(p.Name, p.PropertyType));
110 | }
111 | return obj;
112 | }
113 |
114 | ///
115 | /// 保存数据更改并同步索引
116 | ///
117 | ///
118 | public int SaveChanges(bool index = true)
119 | {
120 | int result = 0;
121 |
122 | if (Context.ChangeTracker.HasChanges())
123 | {
124 | // 获取要变更的实体集
125 | var changes = GetChangeset();
126 | result = Context.SaveChanges();
127 | if (changes.HasChanges && index)
128 | {
129 | LuceneIndexer.Update(changes);
130 | }
131 | }
132 |
133 | return result;
134 | }
135 |
136 | ///
137 | /// 保存数据更改并同步索引
138 | ///
139 | /// 是否需要被重新索引
140 | ///
141 | public async Task SaveChangesAsync(bool index = true)
142 | {
143 | int result = 0;
144 |
145 | if (Context.ChangeTracker.HasChanges())
146 | {
147 | // 获取要变更的结果集
148 | var changes = GetChangeset();
149 | result = await Context.SaveChangesAsync();
150 | if (changes.HasChanges && index)
151 | {
152 | LuceneIndexer.Update(changes);
153 | }
154 | }
155 |
156 | return result;
157 | }
158 |
159 | ///
160 | /// 扫描数据库上下文并对所有已实现ILuceneIndexable的对象,并创建索引
161 | ///
162 | public void CreateIndex()
163 | {
164 | if (LuceneIndexer == null)
165 | {
166 | return;
167 | }
168 |
169 | var properties = Context.GetType().GetProperties();
170 | foreach (var pi in properties)
171 | {
172 | if (typeof(IQueryable).IsAssignableFrom(pi.PropertyType))
173 | {
174 | var entities = Context.GetType().GetProperty(pi.Name).GetValue(Context, null) as IQueryable;
175 | LuceneIndexer.CreateIndex(entities, false);
176 | }
177 | }
178 | }
179 |
180 | ///
181 | /// 创建指定数据表的索引
182 | ///
183 | public void CreateIndex(List tables)
184 | {
185 | if (LuceneIndexer == null)
186 | {
187 | return;
188 | }
189 |
190 | var properties = Context.GetType().GetProperties();
191 | foreach (var pi in properties)
192 | {
193 | if (typeof(IQueryable).IsAssignableFrom(pi.PropertyType) && tables.Contains(pi.Name))
194 | {
195 | var entities = Context.GetType().GetProperty(pi.Name).GetValue(Context, null) as IQueryable;
196 | LuceneIndexer.CreateIndex(entities, false);
197 | }
198 | }
199 | }
200 |
201 | ///
202 | /// 删除索引
203 | ///
204 | public void DeleteIndex()
205 | {
206 | LuceneIndexer?.DeleteAll();
207 | }
208 |
209 | ///
210 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息
211 | ///
212 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable
213 | /// 搜索选项
214 | ///
215 | public ISearchResultCollection Search(SearchOptions options)
216 | {
217 | options.Type = typeof(T);
218 | var indexResults = LuceneIndexSearcher.ScoredSearch(options);
219 | ISearchResultCollection resultSet = new SearchResultCollection
220 | {
221 | TotalHits = indexResults.TotalHits
222 | };
223 |
224 | var sw = Stopwatch.StartNew();
225 | foreach (var indexResult in indexResults.Results)
226 | {
227 | var entity = (T)GetConcreteFromDocument(indexResult.Document);
228 | resultSet.Results.Add(entity);
229 | }
230 |
231 | sw.Stop();
232 | resultSet.Elapsed = indexResults.Elapsed + sw.ElapsedMilliseconds;
233 | return resultSet;
234 | }
235 |
236 | ///
237 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息
238 | ///
239 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable
240 | /// 搜索选项
241 | ///
242 | public IScoredSearchResultCollection ScoredSearch(SearchOptions options)
243 | {
244 | // 确保类型匹配
245 | if (typeof(T) != typeof(ILuceneIndexable))
246 | {
247 | options.Type = typeof(T);
248 | }
249 |
250 | var indexResults = LuceneIndexSearcher.ScoredSearch(options);
251 | IScoredSearchResultCollection results = new ScoredSearchResultCollection();
252 | results.TotalHits = indexResults.TotalHits;
253 | var sw = Stopwatch.StartNew();
254 | foreach (var indexResult in indexResults.Results)
255 | {
256 | IScoredSearchResult result = new ScoredSearchResult();
257 | result.Score = indexResult.Score;
258 | result.Entity = (T)GetConcreteFromDocument(indexResult.Document);
259 | results.Results.Add(result);
260 | }
261 |
262 | sw.Stop();
263 | results.Elapsed = indexResults.Elapsed + sw.ElapsedMilliseconds;
264 | return results;
265 | }
266 |
267 | ///
268 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型
269 | ///
270 | /// 搜索选项
271 | ///
272 | public IScoredSearchResultCollection ScoredSearch(SearchOptions options)
273 | {
274 | return ScoredSearch(options);
275 | }
276 |
277 | ///
278 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型
279 | ///
280 | /// 搜索选项
281 | ///
282 | public ISearchResultCollection Search(SearchOptions options)
283 | {
284 | return Search(options);
285 | }
286 |
287 | ///
288 | /// 搜索一条匹配度最高的记录
289 | ///
290 | /// 搜索选项
291 | ///
292 | public ILuceneIndexable SearchOne(SearchOptions options)
293 | {
294 | return GetConcreteFromDocument(LuceneIndexSearcher.ScoredSearchSingle(options));
295 | }
296 |
297 | ///
298 | /// 搜索一条匹配度最高的记录
299 | ///
300 | /// 搜索选项
301 | ///
302 | public T SearchOne(SearchOptions options) where T : class
303 | {
304 | return GetConcreteFromDocument(LuceneIndexSearcher.ScoredSearchSingle(options)) as T;
305 | }
306 |
307 | ///
308 | /// 导入自定义词库
309 | ///
310 | ///
311 | public void ImportCustomerKeywords(IEnumerable words)
312 | {
313 | var segmenter = new JiebaSegmenter();
314 | foreach (var word in words)
315 | {
316 | segmenter.AddWord(word);
317 | }
318 | }
319 | }
320 | }
--------------------------------------------------------------------------------