├── WebSearchDemo ├── Database │ ├── Post.cs │ └── DataContext.cs ├── appsettings.json ├── appsettings.Development.json ├── WebSearchDemo.csproj.user ├── Program.cs ├── Properties │ └── launchSettings.json ├── WebSearchDemo.csproj ├── WebSearchDemo.xml ├── Controllers │ └── HomeController.cs └── Startup.cs ├── .gitignore ├── Masuit.LuceneEFCore.SearchEngine ├── Properties │ └── PublishProfiles │ │ ├── FolderProfile.pubxml.user │ │ └── FolderProfile.pubxml ├── Masuit.LuceneEFCore.SearchEngine.csproj.user ├── Interfaces │ ├── ILuceneSearchResult.cs │ ├── IScoredSearchResult.cs │ ├── ILuceneSearchResultCollection.cs │ ├── ISearchResultCollection.cs │ ├── IScoredSearchResultCollection.cs │ ├── ILuceneIndexable.cs │ ├── ILuceneIndexSearcher.cs │ ├── ILuceneIndexer.cs │ └── ISearchEngine.cs ├── JiebaAnalyzer │ ├── Settings.cs │ ├── JieBaAnalyzer.cs │ └── JieBaTokenizer.cs ├── LuceneSearchResult.cs ├── ScoredSearchResult.cs ├── LuceneIndexerOptions.cs ├── LuceneIndexState.cs ├── LuceneSearchResultCollection.cs ├── ScoredSearchResultCollection.cs ├── SearchResultCollection.cs ├── LuceneIndexAttribute.cs ├── LuceneIndexChange.cs ├── Extensions │ ├── ServiceCollectionExtension.cs │ ├── StringHelpers.cs │ └── DocumentExtension.cs ├── LuceneIndexChangeset.cs ├── Linq │ └── LinqExtension.cs ├── KeywordsManager.cs ├── LuceneIndexableBaseEntity.cs ├── Masuit.LuceneEFCore.SearchEngine.csproj ├── SearchOptions.cs ├── LuceneIndexer.cs ├── LuceneIndexSearcher.cs └── SearchEngine.cs ├── SECURITY.md ├── LICENSE ├── Masuit.LuceneEFCore.SearchEngine.sln └── README.md /WebSearchDemo/Database/Post.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldqk0/Masuit.LuceneEFCore.SearchEngine/HEAD/WebSearchDemo/Database/Post.cs -------------------------------------------------------------------------------- /WebSearchDemo/appsettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "Logging": { 3 | "LogLevel": { 4 | "Default": "Warning" 5 | } 6 | }, 7 | "AllowedHosts": "*" 8 | } 9 | -------------------------------------------------------------------------------- /WebSearchDemo/appsettings.Development.json: -------------------------------------------------------------------------------- 1 | { 2 | "Logging": { 3 | "LogLevel": { 4 | "Default": "Debug", 5 | "System": "Information", 6 | "Microsoft": "Information" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | */obj 2 | */bin 3 | /.vs 4 | /WebSearchDemo/lucene 5 | /Masuit.LuceneEFCore.SearchEngine.sln.DotSettings.user 6 | /Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.xml 7 | Masuit.LuceneEFCore.SearchEngine.Test 8 | **/TestClass.cs 9 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Properties/PublishProfiles/FolderProfile.pubxml.user: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.csproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | <_LastSelectedProfileId>D:\Private\Masuit.LuceneEFCore.SearchEngine\Masuit.LuceneEFCore.SearchEngine\Properties\PublishProfiles\FolderProfile.pubxml 5 | 6 | -------------------------------------------------------------------------------- /WebSearchDemo/WebSearchDemo.csproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ProjectDebugger 5 | 6 | 7 | WebSearchDemo 8 | 9 | -------------------------------------------------------------------------------- /WebSearchDemo/Program.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.AspNetCore.Hosting; 2 | using Microsoft.Extensions.Hosting; 3 | 4 | namespace WebSearchDemo; 5 | 6 | public class Program 7 | { 8 | public static void Main(string[] args) 9 | { 10 | CreateWebHostBuilder(args).Build().Run(); 11 | } 12 | 13 | public static IHostBuilder CreateWebHostBuilder(string[] args) => 14 | Host.CreateDefaultBuilder(args).ConfigureWebHostDefaults(builder => builder.UseStartup()); 15 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneSearchResult.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Documents; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 4 | { 5 | /// 6 | /// 搜索结果 7 | /// 8 | public interface ILuceneSearchResult 9 | { 10 | /// 11 | /// 匹配度 12 | /// 13 | float Score { get; set; } 14 | 15 | /// 16 | /// 文档 17 | /// 18 | Document Document { get; set; } 19 | } 20 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/IScoredSearchResult.cs: -------------------------------------------------------------------------------- 1 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 2 | { 3 | /// 4 | /// 结果项 5 | /// 6 | /// 7 | public interface IScoredSearchResult 8 | { 9 | /// 10 | /// 匹配度 11 | /// 12 | float Score { get; set; } 13 | 14 | /// 15 | /// 实体 16 | /// 17 | T Entity { get; set; } 18 | } 19 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Properties/PublishProfiles/FolderProfile.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Release 8 | Any CPU 9 | bin\Release\netstandard2.1\publish\ 10 | FileSystem 11 | 12 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/JiebaAnalyzer/Settings.cs: -------------------------------------------------------------------------------- 1 | namespace Masuit.LuceneEFCore.SearchEngine; 2 | 3 | /// 4 | /// JieBaAnalyzer 实例化之前使用 5 | /// 6 | public static class Settings 7 | { 8 | /// 9 | /// show log 10 | /// 11 | public static bool Log { get; set; } = false; 12 | 13 | /// 14 | /// 忽略词典,每行一词 15 | /// 16 | public static string IgnoreDictFile { get; set; } 17 | /// 18 | ///自定义词典,每行一词 19 | /// 20 | public static string UserDictFile { get; set; } 21 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneSearchResult.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Documents; 2 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 3 | 4 | namespace Masuit.LuceneEFCore.SearchEngine 5 | { 6 | /// 7 | /// 搜索结果 8 | /// 9 | public class LuceneSearchResult : ILuceneSearchResult 10 | { 11 | /// 12 | /// 匹配度 13 | /// 14 | public float Score { get; set; } 15 | 16 | /// 17 | /// 文档 18 | /// 19 | public Document Document { get; set; } 20 | } 21 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/ScoredSearchResult.cs: -------------------------------------------------------------------------------- 1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine 4 | { 5 | /// 6 | /// 搜索结果项 7 | /// 8 | /// 9 | public class ScoredSearchResult : IScoredSearchResult 10 | { 11 | /// 12 | /// 匹配度 13 | /// 14 | public float Score { get; set; } 15 | 16 | /// 17 | /// 物理实体 18 | /// 19 | public T Entity { get; set; } 20 | } 21 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexerOptions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine 4 | { 5 | /// 6 | /// 索引器选项 7 | /// 8 | public class LuceneIndexerOptions 9 | { 10 | /// 11 | /// 索引路径 12 | /// 13 | public string Path { get; set; } 14 | 15 | /// 16 | /// 索引列IndexId的生成函数,(Type EntityType, any IdValue) => string IndexId 17 | /// 18 | public static Func IndexIdGenerator = (type, id) => $"{type.Name}:{id}"; 19 | 20 | } 21 | } -------------------------------------------------------------------------------- /WebSearchDemo/Database/DataContext.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.EntityFrameworkCore; 2 | 3 | namespace WebSearchDemo.Database 4 | { 5 | public class DataContext : DbContext 6 | { 7 | public DataContext(DbContextOptions options) : base(options) 8 | { 9 | 10 | } 11 | 12 | protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) 13 | { 14 | base.OnConfiguring(optionsBuilder); 15 | optionsBuilder.UseQueryTrackingBehavior(QueryTrackingBehavior.TrackAll); 16 | } 17 | 18 | public virtual DbSet Post { get; set; } 19 | 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneSearchResultCollection.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 4 | { 5 | /// 6 | /// 搜索结果集 7 | /// 8 | public interface ILuceneSearchResultCollection 9 | { 10 | /// 11 | /// 总条数 12 | /// 13 | int TotalHits { get; set; } 14 | 15 | /// 16 | /// 耗时 17 | /// 18 | long Elapsed { get; set; } 19 | 20 | /// 21 | /// 结果集 22 | /// 23 | IList Results { get; set; } 24 | } 25 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ISearchResultCollection.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 4 | { 5 | /// 6 | /// 搜索结果集 7 | /// 8 | /// 9 | public interface ISearchResultCollection 10 | { 11 | /// 12 | /// 总条数 13 | /// 14 | int TotalHits { get; set; } 15 | 16 | /// 17 | /// 耗时 18 | /// 19 | long Elapsed { get; set; } 20 | 21 | /// 22 | /// 结果集 23 | /// 24 | IList Results { get; set; } 25 | } 26 | } -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Use this section to tell people about which versions of your project are 6 | currently being supported with security updates. 7 | 8 | | Version | Supported | 9 | | ------- | ------------------ | 10 | | 5.1.x | :white_check_mark: | 11 | | 5.0.x | :x: | 12 | | 4.0.x | :white_check_mark: | 13 | | < 4.0 | :x: | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | Use this section to tell people how to report a vulnerability. 18 | 19 | Tell them where to go, how often they can expect to get an update on a 20 | reported vulnerability, what to expect if the vulnerability is accepted or 21 | declined, etc. 22 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexState.cs: -------------------------------------------------------------------------------- 1 | namespace Masuit.LuceneEFCore.SearchEngine 2 | { 3 | /// 4 | /// 索引状态枚举 5 | /// 6 | public enum LuceneIndexState 7 | { 8 | /// 9 | /// 已添加 10 | /// 11 | Added, 12 | 13 | /// 14 | /// 被删除 15 | /// 16 | Removed, 17 | 18 | /// 19 | /// 被更新 20 | /// 21 | Updated, 22 | 23 | /// 24 | /// 未作修改 25 | /// 26 | Unchanged, 27 | 28 | /// 29 | /// 不需要修改 30 | /// 31 | NotSet 32 | } 33 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/IScoredSearchResultCollection.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 4 | { 5 | /// 6 | /// 搜索结果集 7 | /// 8 | /// 9 | public interface IScoredSearchResultCollection 10 | { 11 | /// 12 | /// 总条数 13 | /// 14 | int TotalHits { get; set; } 15 | 16 | /// 17 | /// 耗时 18 | /// 19 | long Elapsed { get; set; } 20 | 21 | /// 22 | /// 结果集 23 | /// 24 | IList> Results { get; set; } 25 | } 26 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneSearchResultCollection.cs: -------------------------------------------------------------------------------- 1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 2 | using System.Collections.Generic; 3 | 4 | namespace Masuit.LuceneEFCore.SearchEngine 5 | { 6 | /// 7 | /// 搜索结果集 8 | /// 9 | public class LuceneSearchResultCollection : ILuceneSearchResultCollection 10 | { 11 | /// 12 | /// 结果集 13 | /// 14 | public IList Results { get; set; } = new List(); 15 | 16 | /// 17 | /// 耗时 18 | /// 19 | public long Elapsed { get; set; } 20 | 21 | /// 22 | /// 总条数 23 | /// 24 | public int TotalHits { get; set; } 25 | } 26 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/ScoredSearchResultCollection.cs: -------------------------------------------------------------------------------- 1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 2 | using System.Collections.Generic; 3 | 4 | namespace Masuit.LuceneEFCore.SearchEngine 5 | { 6 | /// 7 | /// 搜索结果集 8 | /// 9 | /// 10 | public class ScoredSearchResultCollection : IScoredSearchResultCollection 11 | { 12 | /// 13 | /// 结果集 14 | /// 15 | public IList> Results { get; set; } = new List>(); 16 | 17 | /// 18 | /// 耗时 19 | /// 20 | public long Elapsed { get; set; } 21 | 22 | /// 23 | /// 总条数 24 | /// 25 | public int TotalHits { get; set; } 26 | } 27 | } -------------------------------------------------------------------------------- /WebSearchDemo/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "iisSettings": { 3 | "windowsAuthentication": false, 4 | "anonymousAuthentication": true, 5 | "iisExpress": { 6 | "applicationUrl": "http://localhost:7993", 7 | "sslPort": 0 8 | } 9 | }, 10 | "$schema": "http://json.schemastore.org/launchsettings.json", 11 | "profiles": { 12 | "IIS Express": { 13 | "commandName": "IISExpress", 14 | "launchBrowser": true, 15 | "launchUrl": "api/values", 16 | "environmentVariables": { 17 | "ASPNETCORE_ENVIRONMENT": "Development" 18 | } 19 | }, 20 | "WebSearchDemo": { 21 | "commandName": "Project", 22 | "launchUrl": "api/values", 23 | "environmentVariables": { 24 | "ASPNETCORE_ENVIRONMENT": "Development" 25 | }, 26 | "applicationUrl": "http://localhost:5000" 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/SearchResultCollection.cs: -------------------------------------------------------------------------------- 1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 2 | using System.Collections.Generic; 3 | 4 | namespace Masuit.LuceneEFCore.SearchEngine 5 | { 6 | /// 7 | /// 搜索结果集 8 | /// 9 | /// 10 | public class SearchResultCollection : ISearchResultCollection 11 | { 12 | /// 13 | /// 实体集 14 | /// 15 | public IList Results { get; set; } 16 | 17 | /// 18 | /// 耗时 19 | /// 20 | public long Elapsed { get; set; } 21 | 22 | /// 23 | /// 总条数 24 | /// 25 | public int TotalHits { get; set; } 26 | 27 | public SearchResultCollection() 28 | { 29 | Results = new List(); 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexAttribute.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Documents; 2 | using System; 3 | 4 | namespace Masuit.LuceneEFCore.SearchEngine 5 | { 6 | /// 7 | /// 标记该字段可被索引 8 | /// 9 | [AttributeUsage(AttributeTargets.Property)] 10 | public class LuceneIndexAttribute : Attribute 11 | { 12 | /// 13 | /// 14 | /// 15 | public LuceneIndexAttribute() 16 | { 17 | Store = Field.Store.YES; 18 | IsHtml = false; 19 | } 20 | 21 | /// 22 | /// 索引字段名 23 | /// 24 | public string Name { get; set; } 25 | 26 | /// 27 | /// 是否被存储到索引库 28 | /// 29 | public Field.Store Store { get; set; } 30 | 31 | /// 32 | /// 是否是html 33 | /// 34 | public bool IsHtml { get; set; } 35 | } 36 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 ldqk 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexChange.cs: -------------------------------------------------------------------------------- 1 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine 4 | { 5 | /// 6 | /// 索引修改实体 7 | /// 8 | public class LuceneIndexChange 9 | { 10 | /// 11 | /// 实体类 12 | /// 13 | public ILuceneIndexable Entity { get; set; } 14 | 15 | /// 16 | /// 变更状态 17 | /// 18 | public LuceneIndexState State { get; set; } 19 | 20 | /// 21 | /// 构造函数 22 | /// 23 | /// 实体 24 | public LuceneIndexChange(ILuceneIndexable entity) 25 | { 26 | Entity = entity; 27 | State = LuceneIndexState.NotSet; 28 | } 29 | 30 | /// 31 | /// 构造函数 32 | /// 33 | /// 实体 34 | /// 变更状态 35 | public LuceneIndexChange(ILuceneIndexable entity, LuceneIndexState state) 36 | { 37 | Entity = entity; 38 | State = state; 39 | } 40 | } 41 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/JiebaAnalyzer/JieBaAnalyzer.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using Lucene.Net.Analysis; 3 | using Lucene.Net.Analysis.Core; 4 | using Lucene.Net.Analysis.TokenAttributes; 5 | using System.IO; 6 | 7 | namespace Masuit.LuceneEFCore.SearchEngine; 8 | 9 | public class JieBaAnalyzer : Analyzer 10 | { 11 | private readonly TokenizerMode _mode; 12 | private readonly bool _defaultUserDict; 13 | 14 | /// 15 | /// 16 | /// 17 | /// 18 | /// 19 | public JieBaAnalyzer(TokenizerMode mode, bool defaultUserDict = false) 20 | { 21 | _mode = mode; 22 | _defaultUserDict = defaultUserDict; 23 | } 24 | 25 | protected override TokenStreamComponents CreateComponents(string filedName, TextReader reader) 26 | { 27 | var tokenizer = new JieBaTokenizer(reader, _mode, _defaultUserDict); 28 | var tokenstream = new LowerCaseFilter(Lucene.Net.Util.LuceneVersion.LUCENE_48, tokenizer); 29 | tokenstream.AddAttribute(); 30 | tokenstream.AddAttribute(); 31 | return new TokenStreamComponents(tokenizer, tokenstream); 32 | } 33 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneIndexable.cs: -------------------------------------------------------------------------------- 1 | #if Guid 2 | using System; 3 | #endif 4 | 5 | using Lucene.Net.Documents; 6 | using Newtonsoft.Json; 7 | using System.ComponentModel.DataAnnotations; 8 | using System.ComponentModel.DataAnnotations.Schema; 9 | 10 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 11 | { 12 | /// 13 | /// 需要被索引的实体基类 14 | /// 15 | public interface ILuceneIndexable 16 | { 17 | /// 18 | /// 主键id 19 | /// 20 | [LuceneIndex(Name = "Id", Store = Field.Store.YES), Key] 21 | #if Int 22 | int Id { get; set; } 23 | 24 | #endif 25 | #if Long 26 | long Id { get; set; } 27 | #endif 28 | #if String 29 | string Id { get; set; } 30 | #endif 31 | #if Guid 32 | Guid Id { get; set; } 33 | #endif 34 | 35 | /// 36 | /// 索引id 37 | /// 38 | [LuceneIndex(Name = "IndexId", Store = Field.Store.YES)] 39 | [JsonIgnore, NotMapped] 40 | internal string IndexId { get; set; } 41 | 42 | /// 43 | /// 转换成Lucene文档 44 | /// 45 | /// 46 | Document ToDocument(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /WebSearchDemo/WebSearchDemo.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net8.0 4 | InProcess 5 | Debug;Release;String版本;Guid版本;Long版本 6 | false 7 | false 8 | 9 | 10 | D:\Private\Masuit.LuceneEFCore.SearchEngine\WebSearchDemo\WebSearchDemo.xml 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | PreserveNewest 23 | 24 | 25 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Extensions/ServiceCollectionExtension.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using Lucene.Net.Analysis; 3 | using Lucene.Net.Store; 4 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 5 | using Microsoft.EntityFrameworkCore; 6 | using Microsoft.Extensions.DependencyInjection; 7 | using Microsoft.Extensions.DependencyInjection.Extensions; 8 | using Directory = Lucene.Net.Store.Directory; 9 | 10 | namespace Masuit.LuceneEFCore.SearchEngine.Extensions; 11 | 12 | public static class ServiceCollectionExtension 13 | { 14 | /// 15 | /// 依赖注入 16 | /// 17 | /// 18 | /// 19 | /// 20 | public static IServiceCollection AddSearchEngine(this IServiceCollection services, LuceneIndexerOptions option) where TContext : DbContext 21 | { 22 | services.AddSingleton(option); 23 | services.AddMemoryCache(); 24 | services.TryAddSingleton(s => FSDirectory.Open(option.Path)); 25 | services.TryAddSingleton(s => new JieBaAnalyzer(TokenizerMode.Search)); 26 | services.TryAddScoped(); 27 | services.TryAddScoped(); 28 | services.TryAddScoped(typeof(ISearchEngine<>), typeof(SearchEngine<>)); 29 | services.TryAddScoped, SearchEngine>(); 30 | return services; 31 | } 32 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneIndexSearcher.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Documents; 2 | using System; 3 | using System.Collections.Generic; 4 | 5 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 6 | { 7 | /// 8 | /// 搜索引擎 9 | /// 10 | public interface ILuceneIndexSearcher 11 | { 12 | /// 13 | /// 分词 14 | /// 15 | /// 16 | /// 17 | List CutKeywords(string keyword); 18 | 19 | /// 20 | /// 搜索单条记录 21 | /// 22 | /// 23 | /// 24 | Document ScoredSearchSingle(SearchOptions options); 25 | 26 | /// 27 | /// 按权重搜索 28 | /// 29 | /// 30 | /// 31 | ILuceneSearchResultCollection ScoredSearch(SearchOptions options); 32 | 33 | /// 34 | /// 按权重搜索 35 | /// 36 | /// 关键词 37 | /// 限定检索字段 38 | /// 最大检索量 39 | /// 多字段搜索时,给字段的搜索加速 40 | /// 文档类型 41 | /// 排序字段 42 | /// 跳过多少条 43 | /// 取多少条 44 | /// 45 | ILuceneSearchResultCollection ScoredSearch(string keywords, string fields, int maximumNumberOfHits, Dictionary boosts, Type type, string sortBy, int? skip, int? take); 46 | } 47 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Extensions/StringHelpers.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text.RegularExpressions; 5 | 6 | namespace Masuit.LuceneEFCore.SearchEngine.Extensions 7 | { 8 | public static class StringHelpers 9 | { 10 | /// 11 | /// 移除字符串的指定字符 12 | /// 13 | /// 14 | /// 15 | /// 16 | internal static string RemoveCharacters(this string s, IEnumerable chars) 17 | { 18 | return string.IsNullOrEmpty(s) ? string.Empty : new string(s.Where(c => !chars.Contains(c)).ToArray()); 19 | } 20 | 21 | /// 22 | /// 去除html标签后并截取字符串 23 | /// 24 | /// 源html 25 | /// 26 | internal static string RemoveHtmlTag(this string html) 27 | { 28 | var strText = Regex.Replace(html, "<[^>]+>", ""); 29 | strText = Regex.Replace(strText, "&[^;]+;", ""); 30 | return strText; 31 | } 32 | 33 | /// 34 | /// 添加多个元素 35 | /// 36 | /// 37 | /// 38 | /// 39 | public static void AddRange(this ICollection @this, IEnumerable values) 40 | { 41 | foreach (var obj in values) 42 | { 43 | @this.Add(obj); 44 | } 45 | } 46 | 47 | /// 48 | /// 移除符合条件的元素 49 | /// 50 | /// 51 | /// 52 | /// 53 | public static void RemoveWhere(this ICollection @this, Func @where) 54 | { 55 | foreach (var obj in @this.Where(where).ToList()) 56 | { 57 | @this.Remove(obj); 58 | } 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexChangeset.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | 4 | namespace Masuit.LuceneEFCore.SearchEngine 5 | { 6 | /// 7 | /// 索引变更集 8 | /// 9 | public class LuceneIndexChangeset 10 | { 11 | /// 12 | /// 实体集 13 | /// 14 | public IList Entries { get; set; } 15 | 16 | /// 17 | /// 实体是否有某种状态 18 | /// 19 | /// 状态 20 | /// 21 | private bool EntriesHaveState(LuceneIndexState state) 22 | { 23 | return Entries.Any(x => x.State == state); 24 | } 25 | 26 | /// 27 | /// 已经被添加? 28 | /// 29 | public bool HasAdds => EntriesHaveState(LuceneIndexState.Added); 30 | 31 | /// 32 | /// 已经被更新? 33 | /// 34 | public bool HasUpdates => EntriesHaveState(LuceneIndexState.Updated); 35 | 36 | /// 37 | /// 已经被删除? 38 | /// 39 | public bool HasDeletes => EntriesHaveState(LuceneIndexState.Removed); 40 | 41 | /// 42 | /// 已经被修改 43 | /// 44 | public bool HasChanges => Entries.Any() && (HasAdds || HasUpdates || HasDeletes); 45 | 46 | /// 47 | /// 构造函数 48 | /// 49 | public LuceneIndexChangeset() 50 | { 51 | Entries = new List(); 52 | } 53 | 54 | /// 55 | /// 构造函数 56 | /// 57 | /// 被修改的实体 58 | public LuceneIndexChangeset(LuceneIndexChange change) => Entries = new List 59 | { 60 | change 61 | }; 62 | } 63 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ILuceneIndexer.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 4 | { 5 | public interface ILuceneIndexer 6 | { 7 | /// 8 | /// 添加到索引 9 | /// 10 | /// 实体 11 | void Add(ILuceneIndexable entity); 12 | 13 | /// 14 | /// 创建索引 15 | /// 16 | /// 实体集 17 | /// 是否需要覆盖 18 | void CreateIndex(IEnumerable entities, bool recreate = true); 19 | 20 | /// 21 | /// 删除索引 22 | /// 23 | /// 实体 24 | void Delete(ILuceneIndexable entity); 25 | 26 | /// 27 | /// 删除索引 28 | /// 29 | /// 实体集 30 | void Delete(IList entries) where T : ILuceneIndexable; 31 | 32 | /// 33 | /// 删除所有索引 34 | /// 35 | /// 是否提交 36 | void DeleteAll(bool commit = true); 37 | 38 | /// 39 | /// 更新索引 40 | /// 41 | /// 实体 42 | void Update(ILuceneIndexable entity); 43 | 44 | /// 45 | /// 更新索引 46 | /// 47 | /// 实体 48 | void Update(LuceneIndexChange change); 49 | 50 | /// 51 | /// 更新索引 52 | /// 53 | /// 实体 54 | void Update(LuceneIndexChangeset changeset); 55 | 56 | /// 57 | /// 索引库数量 58 | /// 59 | /// 60 | int Count(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Linq/LinqExtension.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq.Expressions; 4 | 5 | namespace Masuit.LuceneEFCore.SearchEngine.Linq 6 | { 7 | /// 8 | /// linq扩展类 9 | /// 10 | public static class LinqExtension 11 | { 12 | /// 13 | /// 与连接 14 | /// 15 | /// 类型 16 | /// 左条件 17 | /// 右条件 18 | /// 新表达式 19 | internal static Expression> And(this Expression> left, Expression> right) 20 | { 21 | return CombineLambdas(left, right, ExpressionType.AndAlso); 22 | } 23 | 24 | private static Expression> CombineLambdas(this Expression> left, Expression> right, ExpressionType expressionType) 25 | { 26 | if (IsExpressionBodyConstant(left)) 27 | { 28 | return right; 29 | } 30 | 31 | var visitor = new SubstituteParameterVisitor 32 | { 33 | Sub = 34 | { 35 | [right.Parameters[0]] = left.Parameters[0] 36 | } 37 | }; 38 | 39 | Expression body = Expression.MakeBinary(expressionType, left.Body, visitor.Visit(right.Body)); 40 | return Expression.Lambda>(body, left.Parameters[0]); 41 | } 42 | 43 | private static bool IsExpressionBodyConstant(Expression> left) 44 | { 45 | return left.Body.NodeType == ExpressionType.Constant; 46 | } 47 | 48 | internal class SubstituteParameterVisitor : ExpressionVisitor 49 | { 50 | public Dictionary Sub = new Dictionary(); 51 | 52 | protected override Expression VisitParameter(ParameterExpression node) 53 | { 54 | return Sub.TryGetValue(node, out var newValue) ? newValue : node; 55 | } 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /WebSearchDemo/WebSearchDemo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | WebSearchDemo 5 | 6 | 7 | 8 | 9 | 搜索 10 | 11 | 关键词 12 | 第几页 13 | 页大小 14 | 15 | 16 | 17 | 18 | 创建索引 19 | 20 | 21 | 22 | 23 | 添加索引 24 | 25 | 26 | 27 | 28 | 删除索引 29 | 30 | 31 | 32 | 33 | 更新索引库 34 | 35 | 36 | 37 | 38 | 39 | 文章 40 | 41 | 42 | 43 | 44 | 标题 45 | 46 | 47 | 48 | 49 | 作者 50 | 51 | 52 | 53 | 54 | 内容 55 | 56 | 57 | 58 | 59 | 发表时间 60 | 61 | 62 | 63 | 64 | 作者邮箱 65 | 66 | 67 | 68 | 69 | 标签 70 | 71 | 72 | 73 | 74 | 文章关键词 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Extensions/DocumentExtension.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Documents; 2 | using Newtonsoft.Json; 3 | using System; 4 | using System.ComponentModel; 5 | using System.Globalization; 6 | 7 | namespace Masuit.LuceneEFCore.SearchEngine.Extensions 8 | { 9 | public static class DocumentExtension 10 | { 11 | /// 12 | /// 获取文档的值 13 | /// 14 | /// Lucene文档 15 | /// 键 16 | /// 类型 17 | /// 18 | internal static object Get(this Document doc, string key, Type t) 19 | { 20 | string value = doc.Get(key); 21 | return t switch 22 | { 23 | _ when t.IsAssignableFrom(typeof(string)) => value, 24 | _ when t.IsValueType => ConvertTo(value, t), 25 | _ => JsonConvert.DeserializeObject(value, t) 26 | }; 27 | } 28 | 29 | /// 30 | /// 类型直转 31 | /// 32 | /// 33 | /// 目标类型 34 | /// 35 | private static object ConvertTo(string value, Type type) 36 | { 37 | if (value == null) 38 | { 39 | return default; 40 | } 41 | 42 | if (value.GetType() == type) 43 | { 44 | return value; 45 | } 46 | 47 | if (type.IsEnum) 48 | { 49 | return Enum.Parse(type, value.ToString(CultureInfo.InvariantCulture)); 50 | } 51 | 52 | if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>)) 53 | { 54 | var underlyingType = Nullable.GetUnderlyingType(type); 55 | return underlyingType!.IsEnum ? Enum.Parse(underlyingType, value.ToString(CultureInfo.CurrentCulture)) : Convert.ChangeType(value, underlyingType); 56 | } 57 | 58 | var converter = TypeDescriptor.GetConverter(value); 59 | if (converter != null) 60 | { 61 | if (converter.CanConvertTo(type)) 62 | { 63 | return converter.ConvertTo(value, type); 64 | } 65 | } 66 | 67 | converter = TypeDescriptor.GetConverter(type); 68 | if (converter != null) 69 | { 70 | if (converter.CanConvertFrom(value.GetType())) 71 | { 72 | return converter.ConvertFrom(value); 73 | } 74 | } 75 | 76 | return Convert.ChangeType(value, type); 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /WebSearchDemo/Controllers/HomeController.cs: -------------------------------------------------------------------------------- 1 | using Masuit.LuceneEFCore.SearchEngine; 2 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 3 | using Microsoft.AspNetCore.Mvc; 4 | using System.Collections.Generic; 5 | using WebSearchDemo.Database; 6 | 7 | namespace WebSearchDemo.Controllers 8 | { 9 | [Route("[controller]/[action]")] 10 | public class HomeController : Controller 11 | { 12 | private readonly ISearchEngine _searchEngine; 13 | 14 | public HomeController(ISearchEngine searchEngine) 15 | { 16 | _searchEngine = searchEngine; 17 | } 18 | 19 | /// 20 | /// 搜索 21 | /// 22 | /// 关键词 23 | /// 第几页 24 | /// 页大小 25 | /// 26 | [HttpGet] 27 | public IActionResult Index(string s, int page, int size) 28 | { 29 | var result = _searchEngine.ScoredSearch(new SearchOptions(s, page, size, typeof(Post))); 30 | return Ok(result); 31 | } 32 | 33 | /// 34 | /// 创建索引 35 | /// 36 | [HttpGet] 37 | public void CreateIndex() 38 | { 39 | //_searchEngine.CreateIndex();//扫描所有数据表,创建符合条件的库的索引 40 | _searchEngine.CreateIndex(new List() { nameof(Post) });//创建指定的数据表的索引 41 | } 42 | 43 | /// 44 | /// 添加索引 45 | /// 46 | [HttpPost] 47 | public void AddIndex(Post p) 48 | { 49 | // 添加到数据库并更新索引 50 | _searchEngine.Context.Post.Add(p); 51 | _searchEngine.SaveChanges(); 52 | 53 | //_luceneIndexer.Add(p); //单纯的只添加索引库 54 | } 55 | 56 | /// 57 | /// 删除索引 58 | /// 59 | [HttpDelete] 60 | public void DeleteIndex(Post post) 61 | { 62 | //从数据库删除并更新索引库 63 | Post p = _searchEngine.Context.Post.Find(post.Id); 64 | _searchEngine.Context.Post.Remove(p); 65 | _searchEngine.SaveChanges(); 66 | 67 | //_luceneIndexer.Delete(p);// 单纯的从索引库移除 68 | } 69 | 70 | /// 71 | /// 更新索引库 72 | /// 73 | /// 74 | [HttpPatch] 75 | public void UpdateIndex(Post post) 76 | { 77 | //从数据库更新并同步索引库 78 | Post p = _searchEngine.Context.Post.Find(post.Id); 79 | 80 | // update... 81 | _searchEngine.Context.Post.Update(p); 82 | _searchEngine.SaveChanges(); 83 | 84 | //_luceneIndexer.Update(p);// 单纯的更新索引库 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30709.132 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Masuit.LuceneEFCore.SearchEngine", "Masuit.LuceneEFCore.SearchEngine\Masuit.LuceneEFCore.SearchEngine.csproj", "{1A9A907B-8254-40EF-BB80-47B716919B07}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebSearchDemo", "WebSearchDemo\WebSearchDemo.csproj", "{12EF81D1-26CF-417C-A814-BB49A0F87AFE}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Guid版本|Any CPU = Guid版本|Any CPU 14 | Long版本|Any CPU = Long版本|Any CPU 15 | Release|Any CPU = Release|Any CPU 16 | String版本|Any CPU = String版本|Any CPU 17 | EndGlobalSection 18 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 19 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 20 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Debug|Any CPU.Build.0 = Debug|Any CPU 21 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Guid版本|Any CPU.ActiveCfg = Guid版本|Any CPU 22 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Guid版本|Any CPU.Build.0 = Guid版本|Any CPU 23 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Long版本|Any CPU.ActiveCfg = Long版本|Any CPU 24 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Long版本|Any CPU.Build.0 = Long版本|Any CPU 25 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Release|Any CPU.ActiveCfg = Release|Any CPU 26 | {1A9A907B-8254-40EF-BB80-47B716919B07}.Release|Any CPU.Build.0 = Release|Any CPU 27 | {1A9A907B-8254-40EF-BB80-47B716919B07}.String版本|Any CPU.ActiveCfg = String版本|Any CPU 28 | {1A9A907B-8254-40EF-BB80-47B716919B07}.String版本|Any CPU.Build.0 = String版本|Any CPU 29 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 30 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Debug|Any CPU.Build.0 = Debug|Any CPU 31 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Guid版本|Any CPU.ActiveCfg = Guid版本|Any CPU 32 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Guid版本|Any CPU.Build.0 = Guid版本|Any CPU 33 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Long版本|Any CPU.ActiveCfg = Long版本|Any CPU 34 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Long版本|Any CPU.Build.0 = Long版本|Any CPU 35 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Release|Any CPU.ActiveCfg = Release|Any CPU 36 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.Release|Any CPU.Build.0 = Release|Any CPU 37 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.String版本|Any CPU.ActiveCfg = String版本|Any CPU 38 | {12EF81D1-26CF-417C-A814-BB49A0F87AFE}.String版本|Any CPU.Build.0 = String版本|Any CPU 39 | EndGlobalSection 40 | GlobalSection(SolutionProperties) = preSolution 41 | HideSolutionNode = FALSE 42 | EndGlobalSection 43 | GlobalSection(ExtensibilityGlobals) = postSolution 44 | SolutionGuid = {C5C40C6B-2856-4202-A102-44DD07D576E4} 45 | EndGlobalSection 46 | EndGlobal 47 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Interfaces/ISearchEngine.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.EntityFrameworkCore; 2 | using System.Collections.Generic; 3 | using System.Threading.Tasks; 4 | 5 | namespace Masuit.LuceneEFCore.SearchEngine.Interfaces 6 | { 7 | public interface ISearchEngine where TContext : DbContext 8 | { 9 | /// 10 | /// 数据库上下文 11 | /// 12 | TContext Context { get; } 13 | 14 | /// 15 | /// 索引器 16 | /// 17 | ILuceneIndexer LuceneIndexer { get; } 18 | 19 | /// 20 | /// 索引搜索器 21 | /// 22 | ILuceneIndexSearcher LuceneIndexSearcher { get; } 23 | 24 | /// 25 | /// 索引总数 26 | /// 27 | int IndexCount { get; } 28 | 29 | /// 30 | /// 创建索引 31 | /// 32 | void CreateIndex(); 33 | 34 | /// 35 | /// 创建指定数据表的索引 36 | /// 37 | void CreateIndex(List tables); 38 | 39 | /// 40 | /// 删除索引 41 | /// 42 | void DeleteIndex(); 43 | 44 | /// 45 | /// 保存数据更改并同步索引 46 | /// 47 | /// 创建索引 48 | /// 49 | int SaveChanges(bool index = true); 50 | 51 | /// 52 | /// 保存数据更改并同步索引 53 | /// 54 | /// 创建索引 55 | /// 56 | Task SaveChangesAsync(bool index = true); 57 | 58 | /// 59 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型 60 | /// 61 | /// 搜索选项 62 | IScoredSearchResultCollection ScoredSearch(SearchOptions options); 63 | 64 | /// 65 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型 66 | /// 67 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable 68 | /// 搜索选项 69 | IScoredSearchResultCollection ScoredSearch(SearchOptions options); 70 | 71 | /// 72 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息 73 | /// 74 | /// 搜索选项 75 | /// 76 | ISearchResultCollection Search(SearchOptions options); 77 | 78 | /// 79 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息 80 | /// 81 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable 82 | /// 搜索选项 83 | /// 84 | ISearchResultCollection Search(SearchOptions options); 85 | 86 | /// 87 | /// 搜索一条匹配度最高的记录 88 | /// 89 | /// 搜索选项 90 | ILuceneIndexable SearchOne(SearchOptions options); 91 | 92 | /// 93 | /// 搜索一条匹配度最高的记录 94 | /// 95 | /// 搜索选项 96 | T SearchOne(SearchOptions options) where T : class; 97 | 98 | /// 99 | /// 导入自定义词库 100 | /// 101 | /// 102 | void ImportCustomerKeywords(IEnumerable words); 103 | } 104 | } -------------------------------------------------------------------------------- /WebSearchDemo/Startup.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using Masuit.LuceneEFCore.SearchEngine; 3 | using Masuit.LuceneEFCore.SearchEngine.Extensions; 4 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 5 | using Microsoft.AspNetCore.Builder; 6 | using Microsoft.AspNetCore.Hosting; 7 | using Microsoft.AspNetCore.Mvc; 8 | using Microsoft.EntityFrameworkCore; 9 | using Microsoft.Extensions.Configuration; 10 | using Microsoft.Extensions.DependencyInjection; 11 | using Microsoft.OpenApi.Models; 12 | using Newtonsoft.Json; 13 | using System; 14 | using System.Collections.Generic; 15 | using System.IO; 16 | using WebSearchDemo.Database; 17 | 18 | namespace WebSearchDemo 19 | { 20 | public class Startup 21 | { 22 | public Startup(IConfiguration configuration) 23 | { 24 | Configuration = configuration; 25 | } 26 | 27 | public IConfiguration Configuration { get; } 28 | 29 | // This method gets called by the runtime. Use this method to add services to the container. 30 | public void ConfigureServices(IServiceCollection services) 31 | { 32 | services.AddDbContext(db => 33 | { 34 | db.UseInMemoryDatabase("test"); 35 | 36 | //db.UseSqlServer("Data Source=.;Initial Catalog=MyBlogs;Integrated Security=True"); 37 | }); 38 | services.AddSearchEngine(new LuceneIndexerOptions() 39 | { 40 | Path = "lucene" 41 | }); 42 | services.AddSwaggerGen(c => 43 | { 44 | c.SwaggerDoc("v1", new OpenApiInfo 45 | { 46 | Version = "v1", 47 | Title = $"接口文档", 48 | Description = $"HTTP API ", 49 | Contact = new OpenApiContact { Name = "懒得勤快", Email = "admin@masuit.com", Url = new Uri("https://masuit.coom") }, 50 | License = new OpenApiLicense { Name = "懒得勤快", Url = new Uri("https://masuit.com") } 51 | }); 52 | c.IncludeXmlComments(AppContext.BaseDirectory + "WebSearchDemo.xml"); 53 | }); //配置swagger 54 | services.AddControllers(); 55 | services.AddControllersWithViews().SetCompatibilityVersion(CompatibilityVersion.Latest); 56 | } 57 | 58 | // This method gets called by the runtime. Use this method to configure the HTTP request pipeline. 59 | public void Configure(IApplicationBuilder app, IHostingEnvironment env, DataContext db, ISearchEngine searchEngine) 60 | { 61 | if (env.IsDevelopment()) 62 | { 63 | app.UseDeveloperExceptionPage(); 64 | } 65 | new JiebaSegmenter().AddWord("会声会影"); //添加自定义词库 66 | new JiebaSegmenter().AddWord("思杰马克丁"); //添加自定义词库 67 | new JiebaSegmenter().AddWord("TeamViewer"); //添加自定义词库 68 | db.Post.AddRange(JsonConvert.DeserializeObject>(File.ReadAllText(AppContext.BaseDirectory + "Posts.json"))); 69 | db.SaveChanges(); 70 | searchEngine.DeleteIndex(); 71 | searchEngine.CreateIndex(new List() 72 | { 73 | nameof(Post) 74 | }); 75 | app.UseSwagger().UseSwaggerUI(c => 76 | { 77 | c.SwaggerEndpoint($"/swagger/v1/swagger.json", "懒得勤快的博客,搜索引擎测试"); 78 | }); //配置swagger 79 | app.UseRouting().UseEndpoints(endpoints => 80 | { 81 | endpoints.MapControllers(); // 属性路由 82 | endpoints.MapControllerRoute("default", "{controller=Home}/{action=Index}/{id?}"); // 默认路由 83 | }); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/KeywordsManager.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text.RegularExpressions; 5 | using TinyPinyin; 6 | 7 | namespace Masuit.LuceneEFCore.SearchEngine 8 | { 9 | public static class KeywordsManager 10 | { 11 | /// 12 | /// 近义词组 13 | /// 14 | internal static HashSet<(string key, string value)> SynonymWords { get; set; } = new(); 15 | 16 | private static HashSet<(string key, string value)> Pinyins { get; set; } = new(); 17 | private static ILookup _pinyinsLookup; 18 | 19 | internal static ILookup PinyinsLookup => _pinyinsLookup ??= Pinyins.ToLookup(t => t.key, t => t.value); 20 | 21 | private static readonly JiebaSegmenter JiebaSegmenter = new(); 22 | 23 | /// 24 | /// 添加近义词 25 | /// 26 | /// 27 | public static void AddSynonyms(KeyValuePair pair) 28 | { 29 | SynonymWords.Add((pair.Key, pair.Value)); 30 | AddWords(pair.Key, pair.Value); 31 | } 32 | 33 | /// 34 | /// 添加近义词 35 | /// 36 | /// 37 | public static void AddSynonyms((string, string) pair) 38 | { 39 | SynonymWords.Add((pair.Item1, pair.Item2)); 40 | AddWords(pair.Item1, pair.Item2); 41 | } 42 | 43 | /// 44 | /// 添加近义词 45 | /// 46 | public static void AddSynonyms(string key, string value, params string[] values) 47 | { 48 | SynonymWords.Add((key, value)); 49 | AddWords(key, value); 50 | foreach (var s in values) 51 | { 52 | SynonymWords.Add((key, s)); 53 | AddWords(s); 54 | } 55 | } 56 | 57 | /// 58 | /// 添加近义词 59 | /// 60 | /// 61 | public static void AddSynonyms(IEnumerable<(string key, string value)> pairs) 62 | { 63 | foreach (var t in pairs) 64 | { 65 | SynonymWords.Add(t); 66 | AddWords(t.key, t.value); 67 | } 68 | } 69 | 70 | /// 71 | /// 添加近义词 72 | /// 73 | /// 74 | public static void AddSynonyms(IEnumerable> pairs) 75 | { 76 | foreach (var pair in pairs) 77 | { 78 | AddWords(pair.Key, pair.Value); 79 | SynonymWords.Add((pair.Key, pair.Value)); 80 | } 81 | } 82 | 83 | /// 84 | /// 添加关键词 85 | /// 86 | /// 87 | public static void AddWords(string word) 88 | { 89 | JiebaSegmenter.AddWord(word); 90 | var pinyin = PinyinHelper.GetPinyin(Regex.Replace(word, @"[^\u4e00-\u9fa5]", "")); 91 | if (!string.IsNullOrEmpty(pinyin)) 92 | { 93 | var key = pinyin.ToLower(); 94 | Pinyins.Add((key.Replace(" ", ""), word)); 95 | Pinyins.Add((new string(key.Split(' ').Select(s => s[0]).ToArray()), word)); 96 | } 97 | } 98 | 99 | /// 100 | /// 添加关键词 101 | /// 102 | /// 103 | public static void AddWords(IEnumerable words) 104 | { 105 | foreach (var s in words) 106 | { 107 | JiebaSegmenter.AddWord(s); 108 | var pinyin = PinyinHelper.GetPinyin(Regex.Replace(s, @"[^\u4e00-\u9fa5]", "")); 109 | if (!string.IsNullOrEmpty(pinyin)) 110 | { 111 | var key = pinyin.ToLower(); 112 | Pinyins.Add((key.Replace(" ", ""), s)); 113 | Pinyins.Add((new string(key.Split(' ').Select(ss => ss[0]).ToArray()), s)); 114 | } 115 | } 116 | } 117 | 118 | /// 119 | /// 添加关键词 120 | /// 121 | /// 122 | /// 123 | public static void AddWords(string word, params string[] words) 124 | { 125 | JiebaSegmenter.AddWord(word); 126 | foreach (var s in words) 127 | { 128 | JiebaSegmenter.AddWord(s); 129 | var pinyin = PinyinHelper.GetPinyin(Regex.Replace(s, @"[^\u4e00-\u9fa5]", "")); 130 | if (!string.IsNullOrEmpty(pinyin)) 131 | { 132 | var key = pinyin.ToLower(); 133 | Pinyins.Add((key.Replace(" ", ""), s)); 134 | Pinyins.Add((new string(key.Split(' ').Select(ss => ss[0]).ToArray()), s)); 135 | } 136 | } 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexableBaseEntity.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Documents; 2 | using Masuit.LuceneEFCore.SearchEngine.Extensions; 3 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 4 | using Newtonsoft.Json; 5 | using System; 6 | using System.ComponentModel.DataAnnotations; 7 | using System.ComponentModel.DataAnnotations.Schema; 8 | using System.Reflection; 9 | 10 | namespace Masuit.LuceneEFCore.SearchEngine 11 | { 12 | /// 13 | /// 需要被索引的实体基类 14 | /// 15 | public abstract class LuceneIndexableBaseEntity : ILuceneIndexable 16 | { 17 | /// 18 | /// 主键id 19 | /// 20 | [LuceneIndex(Name = nameof(Id), Store = Field.Store.YES), Key] 21 | #if Int 22 | [DatabaseGenerated(DatabaseGeneratedOption.Identity)] 23 | public int Id { get; set; } 24 | 25 | #endif 26 | #if Long 27 | [DatabaseGenerated(DatabaseGeneratedOption.Identity)] 28 | public long Id { get; set; } 29 | #endif 30 | #if String 31 | public string Id { get; set; } 32 | #endif 33 | #if Guid 34 | public Guid Id { get; set; } 35 | #endif 36 | 37 | /// 38 | /// 索引唯一id 39 | /// 40 | [LuceneIndex(Name = nameof(ILuceneIndexable.IndexId), Store = Field.Store.YES)] 41 | [NotMapped, JsonIgnore] 42 | string ILuceneIndexable.IndexId 43 | { 44 | get => LuceneIndexerOptions.IndexIdGenerator(GetType(), Id); 45 | 46 | set 47 | { 48 | } 49 | } 50 | 51 | /// 52 | /// 转换成Lucene文档 53 | /// 54 | /// 55 | public virtual Document ToDocument() 56 | { 57 | var doc = new Document(); 58 | var type = GetType(); 59 | if (type.Assembly.IsDynamic && type.FullName.Contains("Prox")) 60 | { 61 | type = type.BaseType; 62 | } 63 | 64 | var classProperties = type.GetProperties(); 65 | doc.Add(new StringField("Type", type.AssemblyQualifiedName, Field.Store.YES)); 66 | foreach (var propertyInfo in classProperties) 67 | { 68 | var propertyValue = propertyInfo.GetValue(this); 69 | if (propertyValue == null) 70 | { 71 | continue; 72 | } 73 | 74 | //1. 该处修复用IndexId去删除索引无效的问题 75 | //2. 以Id为目标的删除放在其他处: 也利用到了IndexId 76 | if (propertyInfo.Name == nameof(ILuceneIndexable.IndexId)) 77 | { 78 | var filed = new Field(propertyInfo.Name, propertyValue.ToString(), new FieldType 79 | { 80 | IsStored = true, 81 | IsIndexed = true, 82 | IsTokenized = false 83 | }); 84 | doc.Add(filed); 85 | continue; 86 | } 87 | 88 | var attrs = propertyInfo.GetCustomAttributes(); 89 | foreach (var attr in attrs) 90 | { 91 | string name = !string.IsNullOrEmpty(attr.Name) ? attr.Name : propertyInfo.Name; 92 | switch (propertyValue) 93 | { 94 | case DateTime time: 95 | doc.Add(new StringField(name, time.ToString("yyyy-MM-dd HH:mm:ss"), attr.Store)); 96 | break; 97 | 98 | case int num: 99 | doc.Add(new Int32Field(name, num, attr.Store)); 100 | break; 101 | 102 | case long num: 103 | doc.Add(new Int64Field(name, num, attr.Store)); 104 | break; 105 | 106 | case float num: 107 | doc.Add(new SingleField(name, num, attr.Store)); 108 | break; 109 | 110 | case double num: 111 | doc.Add(new DoubleField(name, num, attr.Store)); 112 | break; 113 | 114 | case Guid guid: 115 | doc.Add(new StringField(name, guid.ToString(), attr.Store)); 116 | break; 117 | 118 | default: 119 | string value = attr.IsHtml ? propertyValue.ToString().RemoveHtmlTag() : propertyValue.ToString(); 120 | doc.Add(new TextField(name, value, attr.Store)); 121 | break; 122 | } 123 | } 124 | } 125 | 126 | return doc; 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | netstandard2.1;net5;net6;net7;net8 4 | True 5 | 懒得勤快 6 | 懒得勤快 7 | 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎,主键int版本 8 | 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎 9 | 懒得勤快 10 | https://github.com/ldqk/Masuit.LuceneEFCore.SearchEngine 11 | Masuit.LuceneEFCore.SearchEngine_int 12 | 1.2.4 13 | Debug;Release;String版本;Guid版本;Long版本 14 | false 15 | false 16 | False 17 | true 18 | true 19 | snupkg 20 | latest 21 | 1.2 22 | 1.2 23 | true 24 | true 25 | snupkg 26 | README.md 27 | https://github.com/ldqk/Masuit.LuceneEFCore.SearchEngine 28 | lucene;efcore;EntityFramework;masuit 29 | Masuit.LuceneEFCore.SearchEngine 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | True 40 | \ 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | TRACE;Int 66 | .\Masuit.LuceneEFCore.SearchEngine.xml 67 | 68 | 69 | 70 | TRACE;Int 71 | true 72 | .\Masuit.LuceneEFCore.SearchEngine.xml 73 | 74 | 75 | 76 | TRACE;Long 77 | true 78 | .\Masuit.LuceneEFCore.SearchEngine.xml 79 | 80 | 81 | 82 | TRACE;Guid 83 | true 84 | .\Masuit.LuceneEFCore.SearchEngine.xml 85 | 86 | 87 | 88 | TRACE;String 89 | true 90 | .\Masuit.LuceneEFCore.SearchEngine.xml 91 | 92 | 93 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/JiebaAnalyzer/JieBaTokenizer.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using JiebaNet.Segmenter.Common; 3 | using Lucene.Net.Analysis; 4 | using Lucene.Net.Analysis.TokenAttributes; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.IO; 8 | using System.Reflection; 9 | using System.Text.RegularExpressions; 10 | using Token = JiebaNet.Segmenter.Token; 11 | 12 | namespace Masuit.LuceneEFCore.SearchEngine; 13 | 14 | public class JieBaTokenizer : Tokenizer 15 | { 16 | private string _inputText; 17 | private readonly string _dictPath = "Resources/dict.txt"; 18 | 19 | private readonly JiebaSegmenter _segmenter; 20 | private TokenizerMode _mode; 21 | private ICharTermAttribute _termAtt; 22 | private IOffsetAttribute _offsetAtt; 23 | //private IPositionIncrementAttribute _posIncrAtt; 24 | private ITypeAttribute _typeAtt; 25 | private readonly List _wordList = new List(); 26 | 27 | private IEnumerator _iter; 28 | 29 | public List StopWords { get; } = new List(); 30 | 31 | /// 32 | /// 33 | /// 34 | /// 35 | /// 36 | /// 使用内置词库 37 | public JieBaTokenizer(TextReader input, TokenizerMode mode, bool defaultUserDict = false) : base(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input) 38 | { 39 | _segmenter = new JiebaSegmenter(); 40 | _mode = mode; 41 | if (defaultUserDict) 42 | { 43 | _segmenter.LoadUserDictForEmbedded(Assembly.GetCallingAssembly(), _dictPath); 44 | } 45 | 46 | if (!string.IsNullOrEmpty(Settings.IgnoreDictFile)) 47 | { 48 | var list = FileExtension.ReadAllLines(Settings.IgnoreDictFile); 49 | foreach (var item in list) 50 | { 51 | if (string.IsNullOrEmpty(item)) 52 | continue; 53 | if (StopWords.Contains(item)) 54 | continue; 55 | StopWords.Add(item); 56 | } 57 | } 58 | 59 | if (!string.IsNullOrEmpty(Settings.UserDictFile)) 60 | { 61 | _segmenter.LoadUserDict(Settings.UserDictFile); 62 | } 63 | 64 | Init(); 65 | } 66 | 67 | #region private func 68 | private void Init() 69 | { 70 | _termAtt = AddAttribute(); 71 | _offsetAtt = AddAttribute(); 72 | //_posIncrAtt = AddAttribute(); 73 | _typeAtt = AddAttribute(); 74 | AddAttribute(); 75 | } 76 | 77 | private string ReadToEnd(TextReader input) 78 | { 79 | return input.ReadToEnd(); 80 | } 81 | 82 | 83 | private Lucene.Net.Analysis.Token Next() 84 | { 85 | var res = _iter.MoveNext(); 86 | if (res) 87 | { 88 | var word = _iter.Current; 89 | var token = new Lucene.Net.Analysis.Token(word.Word, word.StartIndex, word.EndIndex); 90 | if (Settings.Log) 91 | { 92 | //chinese char 93 | var zh = new Regex(@"[\u4e00-\u9fa5]|[^\x00-\xff]"); 94 | var offset = zh.Matches(word.Word).Count; 95 | var len = 10; 96 | offset = offset > len ? 0 : offset; 97 | Console.WriteLine($"==分词:{word.Word.PadRight(len - offset, '=')}==起始位置:{word.StartIndex.ToString().PadLeft(3, '=')}==结束位置{word.EndIndex.ToString().PadLeft(3, '=')}"); 98 | } 99 | return token; 100 | } 101 | return null; 102 | } 103 | #endregion 104 | 105 | public sealed override bool IncrementToken() 106 | { 107 | ClearAttributes(); 108 | 109 | var word = Next(); 110 | if (word != null) 111 | { 112 | var buffer = word.ToString(); 113 | _termAtt.SetEmpty().Append(buffer); 114 | _offsetAtt.SetOffset(CorrectOffset(word.StartOffset), CorrectOffset(word.EndOffset)); 115 | _typeAtt.Type = word.Type; 116 | return true; 117 | } 118 | 119 | End(); 120 | Dispose(); 121 | return false; 122 | } 123 | 124 | 125 | public override void Reset() 126 | { 127 | base.Reset(); 128 | 129 | _inputText = ReadToEnd(m_input); 130 | RemoveStopWords(_segmenter.Tokenize(_inputText, _mode)); 131 | 132 | _iter = _wordList.GetEnumerator(); 133 | } 134 | 135 | private void RemoveStopWords(IEnumerable words) 136 | { 137 | _wordList.Clear(); 138 | 139 | foreach (var x in words) 140 | { 141 | if (!StopWords.Contains(x.Word)) 142 | { 143 | _wordList.Add(x); 144 | } 145 | } 146 | } 147 | } -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/SearchOptions.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Search; 2 | using Masuit.LuceneEFCore.SearchEngine.Extensions; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Reflection; 7 | 8 | namespace Masuit.LuceneEFCore.SearchEngine 9 | { 10 | /// 11 | /// 搜索选项 12 | /// 13 | public class SearchOptions 14 | { 15 | /// 16 | /// 关键词 17 | /// 18 | public string Keywords { get; set; } 19 | 20 | /// 21 | /// 限定搜索字段 22 | /// 23 | public List Fields { get; set; } 24 | 25 | /// 26 | /// 最大检索量 27 | /// 28 | public int MaximumNumberOfHits { get; set; } 29 | 30 | /// 31 | /// 多字段搜索时,给字段设定搜索权重 32 | /// 33 | private readonly Dictionary _boosts; 34 | 35 | /// 36 | /// 多字段搜索时,给字段设定搜索权重 37 | /// 38 | internal Dictionary Boosts 39 | { 40 | get 41 | { 42 | foreach (var field in Fields.Where(field => _boosts.All(x => x.Key.ToUpper() != field.ToUpper()))) 43 | { 44 | _boosts.Add(field, 2.0f); 45 | } 46 | 47 | return _boosts; 48 | } 49 | } 50 | 51 | /// 52 | /// 排序字段 53 | /// 54 | public List OrderBy { get; set; } 55 | 56 | /// 57 | /// 跳过多少条 58 | /// 59 | public int? Skip { get; set; } 60 | 61 | /// 62 | /// 取多少条 63 | /// 64 | public int? Take { get; set; } 65 | 66 | /// 67 | /// 文档类型 68 | /// 69 | public Type Type { get; set; } 70 | 71 | /// 72 | /// 匹配度,0-1,数值越大结果越精确 73 | /// 74 | public float Score { get; set; } = 0.5f; 75 | 76 | /// 77 | /// 过滤条件 78 | /// 79 | public Filter Filter { get; set; } 80 | 81 | /// 82 | /// 搜索选项 83 | /// 84 | /// 关键词 85 | /// 限定检索字段 86 | /// 最大检索量 87 | /// 多字段搜索时,给字段设定搜索权重 88 | /// 文档类型 89 | /// 排序字段 90 | /// 跳过多少条 91 | /// 取多少条 92 | public SearchOptions(string keywords, string fields, int maximumNumberOfHits = 1000, Dictionary boosts = null, Type type = null, string orderBy = null, int? skip = null, int? take = null) 93 | { 94 | if (string.IsNullOrWhiteSpace(keywords)) 95 | { 96 | throw new ArgumentException("搜索关键词不能为空!"); 97 | } 98 | 99 | Keywords = keywords; 100 | MaximumNumberOfHits = maximumNumberOfHits; 101 | Skip = skip; 102 | Take = take; 103 | _boosts = boosts ?? new Dictionary(); 104 | Type = type; 105 | Fields = new List(); 106 | OrderBy = new List() 107 | { 108 | SortField.FIELD_SCORE 109 | }; 110 | 111 | // 添加被检索字段 112 | if (!string.IsNullOrEmpty(fields)) 113 | { 114 | fields = fields.RemoveCharacters(" "); 115 | Fields.AddRange(fields.Split(',').ToList()); 116 | } 117 | 118 | // 添加排序规则 119 | if (!string.IsNullOrEmpty(orderBy)) 120 | { 121 | orderBy = orderBy.RemoveCharacters(" "); 122 | OrderBy.AddRange(orderBy.Split(',').Select(sortField => new SortField(sortField, SortFieldType.STRING))); 123 | } 124 | } 125 | 126 | /// 127 | /// 搜索选项 128 | /// 129 | /// 关键词 130 | /// 页大小 131 | /// 限定检索字段 132 | /// 第几页 133 | public SearchOptions(string keywords, int page, int size, string fields) : this(keywords, fields, int.MaxValue, null, null, null, (page - 1) * size, size) 134 | { 135 | if (page < 1) 136 | { 137 | page = 1; 138 | } 139 | if (size < 1) 140 | { 141 | size = 1; 142 | } 143 | Skip = (page - 1) * size; 144 | Take = size; 145 | } 146 | 147 | /// 148 | /// 搜索选项 149 | /// 150 | /// 关键词 151 | /// 页大小 152 | /// 第几页 153 | /// 需要被全文检索的类型 154 | public SearchOptions(string keywords, int page, int size, Type t) : this(keywords, string.Join(",", t.GetProperties().Where(p => p.GetCustomAttributes().Any()).Select(p => p.Name)), int.MaxValue, null, null, null, (page - 1) * size, size) 155 | { 156 | if (page < 1) 157 | { 158 | page = 1; 159 | } 160 | if (size < 1) 161 | { 162 | size = 1; 163 | } 164 | Skip = (page - 1) * size; 165 | Take = size; 166 | } 167 | 168 | public void SetBoosts(string field, float boost) 169 | { 170 | _boosts[field] = boost; 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexer.cs: -------------------------------------------------------------------------------- 1 | using Lucene.Net.Analysis; 2 | using Lucene.Net.Index; 3 | using Lucene.Net.Store; 4 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | 9 | namespace Masuit.LuceneEFCore.SearchEngine 10 | { 11 | public class LuceneIndexer : ILuceneIndexer 12 | { 13 | /// 14 | /// 索引目录 15 | /// 16 | private readonly Directory _directory; 17 | 18 | /// 19 | /// 索引分析器 20 | /// 21 | private readonly Analyzer _analyzer; 22 | 23 | /// 24 | /// 构造函数 25 | /// 26 | /// 27 | /// 28 | public LuceneIndexer(Directory directory, Analyzer analyzer) 29 | { 30 | _directory = directory; 31 | _analyzer = analyzer; 32 | } 33 | 34 | /// 35 | /// 添加到索引 36 | /// 37 | /// 实体 38 | public void Add(ILuceneIndexable entity) 39 | { 40 | Update(new LuceneIndexChange(entity, LuceneIndexState.Added)); 41 | } 42 | 43 | /// 44 | /// 创建索引 45 | /// 46 | /// 实体集 47 | /// 是否需要覆盖 48 | public void CreateIndex(IEnumerable entities, bool recreate = true) 49 | { 50 | var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer); 51 | 52 | using var writer = new IndexWriter(_directory, config); 53 | 54 | // 删除重建 55 | if (recreate) 56 | { 57 | writer.DeleteAll(); 58 | writer.Commit(); 59 | } 60 | 61 | // 遍历实体集,添加到索引库 62 | foreach (var entity in entities) 63 | { 64 | writer.AddDocument(entity.ToDocument()); 65 | } 66 | 67 | writer.Flush(true, true); 68 | } 69 | 70 | /// 71 | /// 删除索引 72 | /// 73 | /// 实体 74 | public void Delete(ILuceneIndexable entity) 75 | { 76 | Update(new LuceneIndexChange(entity, LuceneIndexState.Removed)); 77 | } 78 | 79 | /// 80 | /// 删除索引 81 | /// 82 | /// 实体集 83 | public void Delete(IList entries) where T : ILuceneIndexable 84 | { 85 | var set = new LuceneIndexChangeset 86 | { 87 | Entries = entries.Select(e => new LuceneIndexChange(e, LuceneIndexState.Removed)).ToList() 88 | }; 89 | Update(set); 90 | } 91 | 92 | /// 93 | /// 删除所有索引 94 | /// 95 | /// 是否提交 96 | public void DeleteAll(bool commit = true) 97 | { 98 | var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer); 99 | using var writer = new IndexWriter(_directory, config); 100 | try 101 | { 102 | writer.DeleteAll(); 103 | if (commit) 104 | { 105 | writer.Commit(); 106 | } 107 | writer.Flush(true, true); 108 | } 109 | catch (Exception ex) 110 | { 111 | Console.WriteLine(ex.Message); 112 | } 113 | } 114 | 115 | /// 116 | /// 更新索引 117 | /// 118 | /// 实体 119 | public void Update(ILuceneIndexable entity) 120 | { 121 | Update(new LuceneIndexChange(entity, LuceneIndexState.Updated)); 122 | } 123 | 124 | /// 125 | /// 更新索引 126 | /// 127 | /// 实体 128 | public void Update(LuceneIndexChange change) 129 | { 130 | var changeset = new LuceneIndexChangeset(change); 131 | Update(changeset); 132 | } 133 | 134 | /// 135 | /// 更新索引-删除索引时仅利用IndexId去删除 136 | /// 137 | /// 实体 138 | public void Update(LuceneIndexChangeset changeset) 139 | { 140 | var config = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, _analyzer); 141 | using var writer = new IndexWriter(_directory, config); 142 | foreach (var change in changeset.Entries) 143 | { 144 | switch (change.State) 145 | { 146 | case LuceneIndexState.Removed: 147 | //writer.DeleteDocuments(new Term("Id", change.Entity.Id.ToString())); 148 | writer.DeleteDocuments(new Term("IndexId", change.Entity.IndexId)); 149 | break; 150 | 151 | case LuceneIndexState.Added: 152 | case LuceneIndexState.Updated: 153 | //writer.DeleteDocuments(new Term("Id", change.Entity.Id.ToString())); 154 | writer.DeleteDocuments(new Term("IndexId", change.Entity.IndexId)); 155 | writer.AddDocument(change.Entity.ToDocument()); 156 | break; 157 | } 158 | } 159 | 160 | writer.Flush(true, changeset.HasDeletes); 161 | writer.Commit(); 162 | } 163 | 164 | /// 165 | /// 索引库数量 166 | /// 167 | /// 168 | public int Count() 169 | { 170 | try 171 | { 172 | IndexReader reader = DirectoryReader.Open(_directory); 173 | return reader.NumDocs; 174 | } 175 | catch (IndexNotFoundException ex) 176 | { 177 | _directory.ClearLock("write.lock"); 178 | Console.WriteLine(ex.Message); 179 | return 0; 180 | } 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/LuceneIndexSearcher.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using Lucene.Net.Analysis; 3 | using Lucene.Net.Documents; 4 | using Lucene.Net.Index; 5 | using Lucene.Net.QueryParsers.Classic; 6 | using Lucene.Net.Search; 7 | using Lucene.Net.Store; 8 | using Masuit.LuceneEFCore.SearchEngine.Extensions; 9 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 10 | using Masuit.LuceneEFCore.SearchEngine.Linq; 11 | using Microsoft.Extensions.Caching.Memory; 12 | using System; 13 | using System.Collections.Generic; 14 | using System.Diagnostics; 15 | using System.Linq; 16 | using System.Linq.Expressions; 17 | using System.Text.RegularExpressions; 18 | using TinyPinyin; 19 | 20 | namespace Masuit.LuceneEFCore.SearchEngine 21 | { 22 | public class LuceneIndexSearcher : ILuceneIndexSearcher 23 | { 24 | private readonly Directory _directory; 25 | private readonly Analyzer _analyzer; 26 | private readonly IMemoryCache _memoryCache; 27 | 28 | /// 29 | /// 构造函数 30 | /// 31 | /// 索引目录 32 | /// 索引分析器 33 | /// 内存缓存 34 | public LuceneIndexSearcher(Directory directory, Analyzer analyzer, IMemoryCache memoryCache) 35 | { 36 | _directory = directory; 37 | _analyzer = analyzer; 38 | _memoryCache = memoryCache; 39 | } 40 | 41 | /// 42 | /// 分词 43 | /// 44 | /// 45 | /// 46 | public List CutKeywords(string keyword) 47 | { 48 | if (keyword.Length <= 2) 49 | { 50 | return new List 51 | { 52 | keyword 53 | }; 54 | } 55 | 56 | keyword = keyword.Replace("AND ", "+").Replace("NOT ", "-").Replace("OR ", " "); 57 | return _memoryCache.GetOrCreate(keyword, entry => 58 | { 59 | entry.AbsoluteExpiration = DateTimeOffset.Now.AddHours(1); 60 | var list = new HashSet 61 | { 62 | keyword 63 | }; 64 | list.AddRange(Regex.Matches(keyword, @""".+""").Cast().Select(m => 65 | { 66 | keyword = keyword.Replace(m.Value, ""); 67 | return m.Value; 68 | }));//必须包含的 69 | list.AddRange(Regex.Matches(keyword, @"\s-.+\s?").Cast().Select(m => 70 | { 71 | keyword = keyword.Replace(m.Value, ""); 72 | return m.Value.Trim(); 73 | }));//必须不包含的 74 | list.AddRange(Regex.Matches(keyword, @"[\u4e00-\u9fa5]+").Cast().Select(m => m.Value));//中文 75 | list.AddRange(Regex.Matches(keyword, @"\p{P}?[A-Z]*[a-z]*[\p{P}|\p{S}]*").Cast().Select(m => m.Value));//英文单词 76 | list.AddRange(Regex.Matches(keyword, "([A-z]+)([0-9.]+)").Cast().SelectMany(m => m.Groups.Cast().Select(g => g.Value)));//英文+数字 77 | list.AddRange(new JiebaSegmenter().Cut(keyword, true));//结巴分词 78 | list.RemoveWhere(s => s.Length < 2); 79 | list.AddRange(KeywordsManager.SynonymWords.Where(t => list.Contains(t.key) || list.Contains(t.value)).SelectMany(t => new[] { t.key, t.value })); 80 | var pinyins = new HashSet(); 81 | foreach (var s in list.Select(s => Regex.Replace(s, @"\p{P}|\p{S}", "")).Distinct()) 82 | { 83 | if (!pinyins.Contains(s)) 84 | { 85 | pinyins.AddRange(KeywordsManager.PinyinsLookup[PinyinHelper.GetPinyin(s)]); 86 | } 87 | 88 | var lower = s.ToLower(); 89 | if (KeywordsManager.PinyinsLookup.Contains(lower)) 90 | { 91 | pinyins.AddRange(KeywordsManager.PinyinsLookup[lower]); 92 | } 93 | } 94 | 95 | return list.Union(pinyins).OrderByDescending(s => s.Length).Take(10).Select(s => s.Trim('[', ']', '{', '}', '(', ')')).ToList(); 96 | }); 97 | } 98 | 99 | /// 100 | /// 分词模糊查询 101 | /// 102 | /// 条件 103 | /// 关键词 104 | /// 105 | private BooleanQuery GetFuzzyquery(MultiFieldQueryParser parser, string keywords) 106 | { 107 | var finalQuery = new BooleanQuery(); 108 | var terms = CutKeywords(keywords); 109 | foreach (var term in terms) 110 | { 111 | try 112 | { 113 | if (term.StartsWith("\"")) 114 | { 115 | finalQuery.Add(parser.Parse(term.Trim('"')), Occur.MUST); 116 | } 117 | else if (term.StartsWith("-")) 118 | { 119 | finalQuery.Add(parser.Parse(term), Occur.MUST_NOT); 120 | } 121 | else 122 | { 123 | finalQuery.Add(parser.Parse(term.Replace("~", "") + "~"), Occur.SHOULD); 124 | } 125 | } 126 | catch (ParseException) 127 | { 128 | finalQuery.Add(parser.Parse(Regex.Replace(term, @"\p{P}|\p{S}", "")), Occur.SHOULD); 129 | } 130 | } 131 | 132 | return finalQuery; 133 | } 134 | 135 | /// 136 | /// 执行搜索 137 | /// 138 | /// 搜索选项 139 | /// 启用安全搜索 140 | /// 141 | private ILuceneSearchResultCollection PerformSearch(SearchOptions options, bool safeSearch) 142 | { 143 | // 结果集 144 | ILuceneSearchResultCollection results = new LuceneSearchResultCollection(); 145 | using var reader = DirectoryReader.Open(_directory); 146 | var searcher = new IndexSearcher(reader); 147 | Query query; 148 | 149 | // 启用安全搜索 150 | if (safeSearch) 151 | { 152 | options.Keywords = QueryParserBase.Escape(options.Keywords); 153 | } 154 | 155 | if (options.Fields.Count == 1) 156 | { 157 | // 单字段搜索 158 | var queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], _analyzer); 159 | query = queryParser.Parse(options.Keywords); 160 | } 161 | else 162 | { 163 | // 多字段搜索 164 | var queryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), _analyzer, options.Boosts); 165 | query = GetFuzzyquery(queryParser, options.Keywords); 166 | } 167 | 168 | // 排序规则处理 169 | var sort = new Sort(options.OrderBy.ToArray()); 170 | Expression> where = m => m.Score >= options.Score; 171 | if (options.Type != null) 172 | { 173 | // 过滤掉已经设置了类型的对象 174 | where = where.And(m => options.Type.AssemblyQualifiedName == searcher.Doc(m.Doc).Get("Type")); 175 | } 176 | 177 | var matches = searcher.Search(query, options.Filter, options.MaximumNumberOfHits, sort, true, true).ScoreDocs.Where(where.Compile()); 178 | results.TotalHits = matches.Count(); 179 | 180 | // 分页处理 181 | if (options.Skip.HasValue) 182 | { 183 | matches = matches.Skip(options.Skip.Value); 184 | } 185 | if (options.Take.HasValue) 186 | { 187 | matches = matches.Take(options.Take.Value); 188 | } 189 | 190 | var docs = matches.ToList(); 191 | 192 | // 创建结果集 193 | foreach (var match in docs) 194 | { 195 | var doc = searcher.Doc(match.Doc); 196 | results.Results.Add(new LuceneSearchResult() 197 | { 198 | Score = match.Score, 199 | Document = doc 200 | }); 201 | } 202 | 203 | return results; 204 | } 205 | 206 | /// 207 | /// 搜索单条记录 208 | /// 209 | /// 210 | /// 211 | public Document ScoredSearchSingle(SearchOptions options) 212 | { 213 | options.MaximumNumberOfHits = 1; 214 | var results = ScoredSearch(options); 215 | return results.TotalHits > 0 ? results.Results.First().Document : null; 216 | } 217 | 218 | /// 219 | /// 按权重搜索 220 | /// 221 | /// 搜索选项 222 | /// 223 | public ILuceneSearchResultCollection ScoredSearch(SearchOptions options) 224 | { 225 | ILuceneSearchResultCollection results; 226 | var sw = Stopwatch.StartNew(); 227 | try 228 | { 229 | results = PerformSearch(options, false); 230 | } 231 | catch (ParseException) 232 | { 233 | results = PerformSearch(options, true); 234 | } 235 | 236 | sw.Stop(); 237 | results.Elapsed = sw.ElapsedMilliseconds; 238 | return results; 239 | } 240 | 241 | /// 242 | /// 按权重搜索 243 | /// 244 | /// 关键词 245 | /// 限定检索字段 246 | /// 最大检索量 247 | /// 多字段搜索时,给字段的搜索加速 248 | /// 文档类型 249 | /// 排序规则 250 | /// 跳过多少条 251 | /// 取多少条 252 | /// 253 | public ILuceneSearchResultCollection ScoredSearch(string keywords, string fields, int maximumNumberOfHits, Dictionary boosts, Type type, string sortBy, int? skip, int? take) 254 | { 255 | var options = new SearchOptions(keywords, fields, maximumNumberOfHits, boosts, type, sortBy, skip, take); 256 | return ScoredSearch(options); 257 | } 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎(码数鲁森库) 2 | 3 | **仅70KB的代码量!新手友好。** 基于EntityFrameworkCore和Lucene.NET实现的全文检索搜索引擎,可轻松实现高性能的全文检索,支持添加自定义词库,自定义同义词和同音词,搜索分词默认支持同音词搜索。可以轻松应用于任何基于EntityFrameworkCore的实体框架数据库。 4 | **`注意:该项目仅适用于单体项目的简单搜索场景,不适用于分布式应用以及复杂的搜索场景,分布式应用请考虑使用大型的搜索引擎中间件做支撑,如:ElasticSearch,或考虑数据库的正则表达式查询`** 5 | 6 | [官网页面](http://masuit.com/1437) | [实际应用案例体验](https://masuit.com/s?wd=会声会影+TeamViewer) 7 | 8 | 项目开发模式:日常代码积累+网络搜集 9 | 10 | [![LICENSE](https://img.shields.io/badge/license-Anti%20996-blue.svg)](https://github.com/996icu/996.ICU/blob/master/LICENSE) [![nuget](https://img.shields.io/nuget/v/Masuit.LuceneEFCore.SearchEngine_string.svg)](https://www.nuget.org/packages/Masuit.LuceneEFCore.SearchEngine_string) [![nuget](https://img.shields.io/nuget/dt/Masuit.LuceneEFCore.SearchEngine_string.svg)](https://www.nuget.org/packages/Masuit.LuceneEFCore.SearchEngine_string) ![codeSize](https://img.shields.io/github/languages/code-size/ldqk/Masuit.LuceneEFCore.SearchEngine.svg) ![language](https://img.shields.io/github/languages/top/ldqk/Masuit.LuceneEFCore.SearchEngine.svg) 11 | 12 | ### 请注意: 13 | 一旦使用本开源项目以及引用了本项目或包含本项目代码的公司因为违反劳动法(包括但不限定非法裁员、超时用工、雇佣童工等)在任何法律诉讼中败诉的,一经发现,本项目作者有权利追讨本项目的使用费(**公司工商注册信息认缴金额的2-5倍作为本项目的授权费**),或者直接不允许使用任何包含本项目的源代码!任何性质的`外包公司`或`996公司`需要使用本类库,请联系作者进行商业授权!其他企业或个人可随意使用不受限。996那叫用人,也是废人。8小时工作制才可以让你有时间自我提升,将来有竞争力。反对996,人人有责! 14 | 15 | ⭐⭐⭐喜欢这个项目的话就Star、Fork、Follow素质三连关♂注一下吧⭐⭐⭐ 16 | 17 | ## Stargazers over time 18 | 19 | 20 | ### 项目特点 21 | 1. 基于原生Lucene实现,轻量高效,毫秒级响应 22 | 2. 与EFCore无缝接入,配置代码少,可轻松接入现有项目 23 | 3. 支持添加自定义词库,支持同义词和同音词检索,支持添加自定义同义词和同音词 24 | 4. 不支持分布式应用,若你能解决分布式场景中索引库的同步问题,可以选择 25 | 26 | ### 为什么没有集成到Masuit.Tools这个库? 27 | 因为这个项目又引入了几个Lucene相关的库,如果集成到[Masuit.Tools](https://github.com/ldqk/Masuit.Tools "Masuit.Tools"),这必将给原来的项目增加了更多的引用包,使用过程中也有可能没有使用Lucene的场景,这就造成了项目更加的臃肿,所以做了个新的项目。 28 | ### 为什么有这个库?现成的ElasticSearch不好么? 29 | ES确实很好用,但我想的是还有很多的小站没必要上那么重量级的中间件,于是原生lucene库不失为一种好的选择,然而原生LuceneAPI的学习成本也相对较高,所以专门封装了这个库。 30 | ### 快速开始 31 | #### EntityFrameworkCore基架搭建 32 | 新建项目,并安装EntityFrameworkCore相关库以及全文检索包: 33 | 34 | 根据你的项目情况,选择对应的后缀版本,提供了4个主键版本的库,后缀为int的代表主键是基于int自增类型的,后缀为Guid的代表主键是基于Guid类型的... 35 | ```shell 36 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_int 37 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_long 38 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_string 39 | PM> Install-Package Masuit.LuceneEFCore.SearchEngine_Guid 40 | ``` 41 | 按照套路我们需要首先搭建好EntityFrameworkCore的基架,即数据库上下文和实体对象; 42 | 43 | 准备数据库上下文对象: 44 | ```csharp 45 | public class DataContext : DbContext 46 | { 47 | public DataContext(DbContextOptions options) : base(options){} 48 | protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) 49 | { 50 | base.OnConfiguring(optionsBuilder); 51 | optionsBuilder.UseQueryTrackingBehavior(QueryTrackingBehavior.TrackAll); 52 | } 53 | public virtual DbSet Post { get; set; } 54 | } 55 | ``` 56 | 准备实体对象,这里开始需要注意了,要想这个库的数据被全文检索,需要符合两个条件: 57 | 1. 实体必须继承自LuceneIndexableBaseEntity; 58 | 2. 需要被检索的字段需要被LuceneIndexAttribute所标记。 59 | ```csharp 60 | /// 61 | /// 文章 62 | /// 63 | [Table("Post")] 64 | public class Post : LuceneIndexableBaseEntity 65 | { 66 | public Post() 67 | { 68 | PostDate = DateTime.Now; 69 | } 70 | 71 | /// 72 | /// 标题 73 | /// 74 | [Required(ErrorMessage = "文章标题不能为空!"), LuceneIndex] 75 | public string Title { get; set; } 76 | 77 | /// 78 | /// 作者 79 | /// 80 | [Required, MaxLength(24, ErrorMessage = "作者名最长支持24个字符!"), LuceneIndex] 81 | public string Author { get; set; } 82 | 83 | /// 84 | /// 内容 85 | /// 86 | [Required(ErrorMessage = "文章内容不能为空!"), LuceneIndex(IsHtml = true)] 87 | public string Content { get; set; } 88 | 89 | /// 90 | /// 发表时间 91 | /// 92 | public DateTime PostDate { get; set; } 93 | 94 | /// 95 | /// 作者邮箱 96 | /// 97 | [Required(ErrorMessage = "作者邮箱不能为空!"), LuceneIndex] 98 | public string Email { get; set; } 99 | 100 | /// 101 | /// 标签 102 | /// 103 | [StringLength(256, ErrorMessage = "标签最大允许255个字符"), LuceneIndex] 104 | public string Label { get; set; } 105 | 106 | /// 107 | /// 文章关键词 108 | /// 109 | [StringLength(256, ErrorMessage = "文章关键词最大允许255个字符"), LuceneIndex] 110 | public string Keyword { get; set; } 111 | 112 | } 113 | ``` 114 | LuceneIndexAttribute对应的4个自定义参数: 115 | 1. Name:自定义索引字段名,默认为空; 116 | 2. Index:索引行为,默认为Field.Index.ANALYZED; 117 | 3. Store:是否被存储到索引库,默认为Field.Store.YES; 118 | 4. IsHtml:是否是html,默认为false,若标记为true,则在索引解析时会先清空其中的html标签。 119 | #### 为什么实体类要继承LuceneIndexableBaseEntity? 120 | LuceneIndexableBaseEntity源代码如下: 121 | ```csharp 122 | /// 123 | /// 需要被索引的实体基类 124 | /// 125 | public abstract class LuceneIndexableBaseEntity : ILuceneIndexable 126 | { 127 | /// 128 | /// 主键id 129 | /// 130 | [LuceneIndex(Name = "Id", Store = Field.Store.YES, Index = Field.Index.NOT_ANALYZED), Key] 131 | public int Id { get; set; } 132 | 133 | /// 134 | /// 索引唯一id 135 | /// 136 | [LuceneIndex(Name = "IndexId", Store = Field.Store.YES, Index = Field.Index.NOT_ANALYZED)] 137 | [NotMapped] 138 | public string IndexId 139 | { 140 | get => GetType().Name + ":" + Id; 141 | set 142 | { 143 | } 144 | } 145 | 146 | /// 147 | /// 转换成Lucene文档 148 | /// 149 | /// 150 | public virtual Document ToDocument() 151 | { 152 | // 将实体对象转换成Lucene文档的逻辑 153 | } 154 | } 155 | ``` 156 | 实体继承自LuceneIndexableBaseEntity后,方便封装的Lucene可以直接调用ToDocument方法进行存储,同时,主键Id和IndexId需要参与Lucene索引文档的唯一标识(但IndexId不会生成到数据库)。 157 | #### 搜索引擎配置、创建索引、导入自定义词库等 158 | Startup.cs 159 | ```csharp 160 | public void ConfigureServices(IServiceCollection services) 161 | { 162 | // ... 163 | services.AddDbContext(db => 164 | { 165 | db.UseSqlServer("Data Source=.;Initial Catalog=MyBlogs;Integrated Security=True"); 166 | });// 配置数据库上下文 167 | services.AddSearchEngine(new LuceneIndexerOptions() 168 | { 169 | Path = "lucene" 170 | });// 依赖注入搜索引擎,并配置索引库路径 171 | // ... 172 | } 173 | 174 | public void Configure(IApplicationBuilder app, IHostingEnvironment env, ISearchEngine searchEngine, LuceneIndexerOptions luceneIndexerOptions) 175 | { 176 | // ... 177 | // 导入自定义词库,支持中英文词 178 | KeywordsManager.AddWords("面向对象编程语言"); 179 | KeywordsManager.AddWords("懒得勤快"); 180 | KeywordsManager.AddWords("码数科技"); 181 | KeywordsManager.AddWords("Tree New Bee"); 182 | KeywordsManager.AddWords("男♂能可贵"); 183 | 184 | // 导入自定义同义词,支持中英文词 185 | KeywordsManager.AddSynonyms("RDM","Redis Desktop Manager"); 186 | KeywordsManager.AddSynonyms("RDM","Remote Desktop Manager"); 187 | KeywordsManager.AddSynonyms("VS","Visual Studio"); 188 | KeywordsManager.AddSynonyms("Visual Studio","宇宙最强IDE"); 189 | KeywordsManager.AddSynonyms("VS","Video Studio"); 190 | KeywordsManager.AddSynonyms("难能可贵","男♂能可贵"); 191 | // 提问:以上示例配置了近义词:VS->Visual Studio和Visual Studio->宇宙最强IDE?那么分词时VS是否能够找到间接近义词“宇宙最强IDE”? 192 | // 答案是不能,为什么不能?近义词查找并没有实现递归查找,为什么不做递归查找?因为近义词库是完全不可控的动态配置,如果做了递归查找,词库的配置不当很有可能造成死递归,所以,如果需要让VS和“宇宙最强IDE”同义,则需要再单独配置 193 | 194 | // 初始化索引库,建议结合定时任务使用,定期刷新索引库 195 | string lucenePath = Path.Combine(env.ContentRootPath, luceneIndexerOptions.Path); 196 | if (!Directory.Exists(lucenePath) || Directory.GetFiles(lucenePath).Length < 1) 197 | { 198 | // 创建索引 199 | Console.WriteLine("索引库不存在,开始自动创建Lucene索引库..."); 200 | searchEngine.CreateIndex(new List() 201 | { 202 | nameof(DataContext.Post), 203 | }); 204 | var list = searchEngine.Context.Post.Where(i => i.Status != Status.Pended).ToList(); // 删除不需要被索引的数据 205 | searchEngine.LuceneIndexer.Delete(list); 206 | Console.WriteLine("索引库创建完成!"); 207 | } 208 | // ... 209 | } 210 | 211 | ``` 212 | **同义词支持正向和反向查找,如配置了:`KeywordsManager.AddSynonyms("地大物博","弟大勿勃")`和`KeywordsManager.AddSynonyms("弟大勿勃","地大物博")`是等效的,只需要其中一条即可** 213 | HomeController.cs 214 | ```csharp 215 | [Route("[controller]/[action]")] 216 | public class HomeController : Controller 217 | { 218 | private readonly ISearchEngine _searchEngine; 219 | private readonly ILuceneIndexer _luceneIndexer; 220 | public HomeController(ISearchEngine searchEngine, ILuceneIndexer luceneIndexer) 221 | { 222 | _searchEngine = searchEngine; 223 | _luceneIndexer = luceneIndexer; 224 | } 225 | 226 | /// 227 | /// 搜索 228 | /// 229 | /// 关键词 230 | /// 第几页 231 | /// 页大小 232 | /// 233 | [HttpGet] 234 | public async Task Index(string s, int page, int size) 235 | { 236 | //var result = _searchEngine.ScoredSearch(new SearchOptions(s, page, size, "Title,Content,Email,Author")); 237 | var result = _searchEngine.ScoredSearch(new SearchOptions(s, page, size, typeof(Post))); 238 | return Ok(result); 239 | } 240 | 241 | /// 242 | /// 创建索引 243 | /// 244 | [HttpGet] 245 | public void CreateIndex() 246 | { 247 | //_searchEngine.CreateIndex();//扫描所有数据表,创建符合条件的库的索引 248 | _searchEngine.CreateIndex(new List() { nameof(Post) });//创建指定的数据表的索引 249 | } 250 | 251 | /// 252 | /// 添加索引 253 | /// 254 | [HttpPost] 255 | public void AddIndex(Post p) 256 | { 257 | // 添加到数据库并更新索引 258 | _searchEngine.Context.Post.Add(p); 259 | _searchEngine.SaveChanges(); 260 | 261 | //_luceneIndexer.Add(p); //单纯的只添加索引库 262 | } 263 | 264 | /// 265 | /// 删除索引 266 | /// 267 | [HttpDelete] 268 | public void DeleteIndex(Post post) 269 | { 270 | //从数据库删除并更新索引库 271 | Post p = _searchEngine.Context.Post.Find(post.Id); 272 | _searchEngine.Context.Post.Remove(p); 273 | _searchEngine.SaveChanges(); 274 | 275 | //_luceneIndexer.Delete(p);// 单纯的从索引库移除 276 | } 277 | 278 | /// 279 | /// 更新索引库 280 | /// 281 | /// 282 | [HttpPatch] 283 | public void UpdateIndex(Post post) 284 | { 285 | //从数据库更新并同步索引库 286 | Post p = _searchEngine.Context.Post.Find(post.Id); 287 | // update... 288 | _searchEngine.Context.Post.Update(p); 289 | _searchEngine.SaveChanges(); 290 | 291 | //_luceneIndexer.Update(p);// 单纯的更新索引库 292 | } 293 | } 294 | ``` 295 | #### 关于更新索引 296 | 要在执行任何CRUD操作后更新索引,只需从ISearchEngine调用SaveChanges()方法,而不是从DataContext调用SaveChanges()。 这才会更新索引,然后会自动调用DataContexts的SaveChanges()方法。如果直接调用DataContexts的SaveChanges()方法,只会保存到数据库,而不会更新索引库。 297 | #### 关于搜索结果 298 | 搜索返回IScoredSearchResultCollection,其中包括执行搜索所花费的时间,命中总数以及每个包含的对象的结果集以及在搜索中匹配度的数量。 299 | 300 | 特别注意:单元测试中使用内存RAM目录进行索引和搜索,但这仅用于测试目的,真实生产环境应使用物理磁盘的目录。 301 | 302 | #### 演示项目 303 | [点击这里](/WebSearchDemo "demo") 304 | ### 推荐项目 305 | .NET万能框架:[Masuit.Tools](https://github.com/ldqk/Masuit.Tools "Masuit.Tools") 306 | 307 | 开源博客系统:[Masuit.MyBlogs](https://github.com/ldqk/Masuit.MyBlogs "Masuit.MyBlogs") 308 | -------------------------------------------------------------------------------- /Masuit.LuceneEFCore.SearchEngine/SearchEngine.cs: -------------------------------------------------------------------------------- 1 | using JiebaNet.Segmenter; 2 | using Lucene.Net.Analysis; 3 | using Lucene.Net.Documents; 4 | using Lucene.Net.Store; 5 | using Masuit.LuceneEFCore.SearchEngine.Extensions; 6 | using Masuit.LuceneEFCore.SearchEngine.Interfaces; 7 | using Microsoft.EntityFrameworkCore; 8 | using Microsoft.Extensions.Caching.Memory; 9 | using System; 10 | using System.Collections.Generic; 11 | using System.Diagnostics; 12 | using System.Linq; 13 | using System.Linq.Expressions; 14 | using System.Reflection; 15 | using System.Threading.Tasks; 16 | 17 | namespace Masuit.LuceneEFCore.SearchEngine 18 | { 19 | /// 20 | /// 搜索引擎 21 | /// 22 | /// 23 | public class SearchEngine : ISearchEngine where TContext : DbContext 24 | { 25 | /// 26 | /// 数据库上下文 27 | /// 28 | public TContext Context { get; } 29 | 30 | /// 31 | /// 索引器 32 | /// 33 | public ILuceneIndexer LuceneIndexer { get; } 34 | 35 | /// 36 | /// 索引搜索器 37 | /// 38 | public ILuceneIndexSearcher LuceneIndexSearcher { get; } 39 | 40 | /// 41 | /// 索引条数 42 | /// 43 | public int IndexCount => LuceneIndexer.Count(); 44 | 45 | /// 46 | /// 搜索引擎 47 | /// 48 | /// 数据库上下文 49 | /// 50 | /// 51 | /// 52 | public SearchEngine(TContext context, Directory directory, Analyzer analyzer, IMemoryCache memoryCache) 53 | { 54 | Context = context; 55 | LuceneIndexer = new LuceneIndexer(directory, analyzer); 56 | LuceneIndexSearcher = new LuceneIndexSearcher(directory, analyzer, memoryCache); 57 | } 58 | 59 | /// 60 | /// 检查数据库上下文更改,并返回LuceneIndexChanges类型的集合 61 | /// 62 | /// LuceneIndexChangeset - 转换为LuceneIndexChanges类型的实体更改集合 63 | private LuceneIndexChangeset GetChangeset() 64 | { 65 | var changes = new LuceneIndexChangeset(); 66 | foreach (var entity in Context.ChangeTracker.Entries().Where(x => x.State != EntityState.Unchanged)) 67 | { 68 | var entityType = entity.Entity.GetType(); 69 | if (!typeof(ILuceneIndexable).IsAssignableFrom(entityType) || entityType.GetMethod("ToDocument") is null) 70 | { 71 | continue; 72 | } 73 | 74 | var change = new LuceneIndexChange(entity.Entity as ILuceneIndexable); 75 | 76 | switch (entity.State) 77 | { 78 | case EntityState.Added: 79 | change.State = LuceneIndexState.Added; 80 | break; 81 | case EntityState.Deleted: 82 | change.State = LuceneIndexState.Removed; 83 | break; 84 | case EntityState.Modified: 85 | change.State = LuceneIndexState.Updated; 86 | break; 87 | default: 88 | change.State = LuceneIndexState.Unchanged; 89 | break; 90 | } 91 | 92 | changes.Entries.Add(change); 93 | } 94 | 95 | return changes; 96 | } 97 | 98 | /// 99 | ///获取文档的具体版本 100 | /// 101 | /// 要转换的文档 102 | /// 103 | private ILuceneIndexable GetConcreteFromDocument(Document doc) 104 | { 105 | var t = Type.GetType(doc.Get("Type")); 106 | var obj = Expression.Lambda>(Expression.New(t.GetConstructors()[0])).Compile()(); 107 | foreach (var p in t.GetProperties().Where(p => p.GetCustomAttributes().Any())) 108 | { 109 | p.SetValue(obj, doc.Get(p.Name, p.PropertyType)); 110 | } 111 | return obj; 112 | } 113 | 114 | /// 115 | /// 保存数据更改并同步索引 116 | /// 117 | /// 118 | public int SaveChanges(bool index = true) 119 | { 120 | int result = 0; 121 | 122 | if (Context.ChangeTracker.HasChanges()) 123 | { 124 | // 获取要变更的实体集 125 | var changes = GetChangeset(); 126 | result = Context.SaveChanges(); 127 | if (changes.HasChanges && index) 128 | { 129 | LuceneIndexer.Update(changes); 130 | } 131 | } 132 | 133 | return result; 134 | } 135 | 136 | /// 137 | /// 保存数据更改并同步索引 138 | /// 139 | /// 是否需要被重新索引 140 | /// 141 | public async Task SaveChangesAsync(bool index = true) 142 | { 143 | int result = 0; 144 | 145 | if (Context.ChangeTracker.HasChanges()) 146 | { 147 | // 获取要变更的结果集 148 | var changes = GetChangeset(); 149 | result = await Context.SaveChangesAsync(); 150 | if (changes.HasChanges && index) 151 | { 152 | LuceneIndexer.Update(changes); 153 | } 154 | } 155 | 156 | return result; 157 | } 158 | 159 | /// 160 | /// 扫描数据库上下文并对所有已实现ILuceneIndexable的对象,并创建索引 161 | /// 162 | public void CreateIndex() 163 | { 164 | if (LuceneIndexer == null) 165 | { 166 | return; 167 | } 168 | 169 | var properties = Context.GetType().GetProperties(); 170 | foreach (var pi in properties) 171 | { 172 | if (typeof(IQueryable).IsAssignableFrom(pi.PropertyType)) 173 | { 174 | var entities = Context.GetType().GetProperty(pi.Name).GetValue(Context, null) as IQueryable; 175 | LuceneIndexer.CreateIndex(entities, false); 176 | } 177 | } 178 | } 179 | 180 | /// 181 | /// 创建指定数据表的索引 182 | /// 183 | public void CreateIndex(List tables) 184 | { 185 | if (LuceneIndexer == null) 186 | { 187 | return; 188 | } 189 | 190 | var properties = Context.GetType().GetProperties(); 191 | foreach (var pi in properties) 192 | { 193 | if (typeof(IQueryable).IsAssignableFrom(pi.PropertyType) && tables.Contains(pi.Name)) 194 | { 195 | var entities = Context.GetType().GetProperty(pi.Name).GetValue(Context, null) as IQueryable; 196 | LuceneIndexer.CreateIndex(entities, false); 197 | } 198 | } 199 | } 200 | 201 | /// 202 | /// 删除索引 203 | /// 204 | public void DeleteIndex() 205 | { 206 | LuceneIndexer?.DeleteAll(); 207 | } 208 | 209 | /// 210 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息 211 | /// 212 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable 213 | /// 搜索选项 214 | /// 215 | public ISearchResultCollection Search(SearchOptions options) 216 | { 217 | options.Type = typeof(T); 218 | var indexResults = LuceneIndexSearcher.ScoredSearch(options); 219 | ISearchResultCollection resultSet = new SearchResultCollection 220 | { 221 | TotalHits = indexResults.TotalHits 222 | }; 223 | 224 | var sw = Stopwatch.StartNew(); 225 | foreach (var indexResult in indexResults.Results) 226 | { 227 | var entity = (T)GetConcreteFromDocument(indexResult.Document); 228 | resultSet.Results.Add(entity); 229 | } 230 | 231 | sw.Stop(); 232 | resultSet.Elapsed = indexResults.Elapsed + sw.ElapsedMilliseconds; 233 | return resultSet; 234 | } 235 | 236 | /// 237 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型,但不返回任何评分信息 238 | /// 239 | /// 要搜索的实体类型 - 注意:必须实现ILuceneIndexable 240 | /// 搜索选项 241 | /// 242 | public IScoredSearchResultCollection ScoredSearch(SearchOptions options) 243 | { 244 | // 确保类型匹配 245 | if (typeof(T) != typeof(ILuceneIndexable)) 246 | { 247 | options.Type = typeof(T); 248 | } 249 | 250 | var indexResults = LuceneIndexSearcher.ScoredSearch(options); 251 | IScoredSearchResultCollection results = new ScoredSearchResultCollection(); 252 | results.TotalHits = indexResults.TotalHits; 253 | var sw = Stopwatch.StartNew(); 254 | foreach (var indexResult in indexResults.Results) 255 | { 256 | IScoredSearchResult result = new ScoredSearchResult(); 257 | result.Score = indexResult.Score; 258 | result.Entity = (T)GetConcreteFromDocument(indexResult.Document); 259 | results.Results.Add(result); 260 | } 261 | 262 | sw.Stop(); 263 | results.Elapsed = indexResults.Elapsed + sw.ElapsedMilliseconds; 264 | return results; 265 | } 266 | 267 | /// 268 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型 269 | /// 270 | /// 搜索选项 271 | /// 272 | public IScoredSearchResultCollection ScoredSearch(SearchOptions options) 273 | { 274 | return ScoredSearch(options); 275 | } 276 | 277 | /// 278 | /// 执行搜索并将结果限制为特定类型,在返回之前,搜索结果将转换为相关类型 279 | /// 280 | /// 搜索选项 281 | /// 282 | public ISearchResultCollection Search(SearchOptions options) 283 | { 284 | return Search(options); 285 | } 286 | 287 | /// 288 | /// 搜索一条匹配度最高的记录 289 | /// 290 | /// 搜索选项 291 | /// 292 | public ILuceneIndexable SearchOne(SearchOptions options) 293 | { 294 | return GetConcreteFromDocument(LuceneIndexSearcher.ScoredSearchSingle(options)); 295 | } 296 | 297 | /// 298 | /// 搜索一条匹配度最高的记录 299 | /// 300 | /// 搜索选项 301 | /// 302 | public T SearchOne(SearchOptions options) where T : class 303 | { 304 | return GetConcreteFromDocument(LuceneIndexSearcher.ScoredSearchSingle(options)) as T; 305 | } 306 | 307 | /// 308 | /// 导入自定义词库 309 | /// 310 | /// 311 | public void ImportCustomerKeywords(IEnumerable words) 312 | { 313 | var segmenter = new JiebaSegmenter(); 314 | foreach (var word in words) 315 | { 316 | segmenter.AddWord(word); 317 | } 318 | } 319 | } 320 | } --------------------------------------------------------------------------------